RQ #4: Is engagement in STEM activities across several weeks associated with changes in: individual (sustained) interest in STEM and future goals and aspirations related to STEM?

This is what we know:

Predicted engagement (via a BLUP) strongly predicts changes in interest and future goals and plans
Males are associated with changes in future goals and plans
Overall program rating is associated with changes in interest (but may not be very robust)

This is what we still need to figure out:

How will these findings hold up when we use the predictions for relevance and challenge (the BLUPs) in different ways (in a bivariate model that accounts for the standard error)

How this document is organized

Two different outcomes, Interest and future goals, controlling on interest and future goals and gender, with three models each:
- Basic model (described above)
- Addition of categorical overall program rating
- Addition of continuous overall program rating

library(tidyverse)
library(lme4)
library(corrr)
library(jmRtools)
library(sjPlot)
library(magrittr)

esm <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-esm.csv")
pre_survey_data_processed <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-pre-survey.csv")
post_survey_data_partially_processed <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-post-survey.csv")
video <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-video.csv")
pqa <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-pqa.csv")
attendance <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-attendance.csv")
class_data <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-class-video.csv")
demographics <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-demographics.csv")
pm <- read_csv("/Volumes/SCHMIDTLAB/PSE/Data/STEM-IE/STEM-IE-program-match.csv")
act <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-program-by-activity.csv")
loc <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-program-by-location.csv")

attendance <- rename(attendance, participant_ID = ParticipantID)
attendance <- mutate(attendance, prop_attend = DaysAttended / DaysScheduled, 
                     participant_ID = as.integer(participant_ID))
attendance <- select(attendance, participant_ID, prop_attend)

demographics <- filter(demographics, participant_ID!= 7187)
demographics <- left_join(demographics, attendance)

esm$overall_engagement <- jmRtools::composite_mean_maker(esm, hard_working, concentrating, enjoy, interest)

df <- left_join(esm, pre_survey_data_processed, by = "participant_ID") # df & post-survey
df <- left_join(df, video, by = c("program_ID", "response_date", "sociedad_class", "signal_number")) # df & video
df <- left_join(df, demographics, by = c("participant_ID", "program_ID")) # df and demographics

pqa <- mutate(pqa, 
              active = active_part_1 + active_part_2,
              ho_thinking = ho_thinking_1 + ho_thinking_2 + ho_thinking_3,
              belonging = belonging_1 + belonging_2,
              agency = agency_1 + agency_2 + agency_3 + agency_4,
              youth_development_overall = active_part_1 + active_part_2 + ho_thinking_1 + ho_thinking_2 + ho_thinking_3 + belonging_1 + belonging_2 + agency_1 + agency_2 + agency_3 + agency_4,
              making_observations = stem_sb_8,
              data_modeling = stem_sb_2 + stem_sb_3 + stem_sb_9,
              interpreting_communicating = stem_sb_6,
              generating_data = stem_sb_4,
              asking_questions = stem_sb_1)

# pqa <- rename(pqa, sixth_math_sociedad = sixth_math)
# pqa <- rename(pqa, seventh_math_sociedad = seventh_math)
# pqa <- rename(pqa, eighth_math_sociedad = eighth_math)
# pqa <- rename(pqa, dance_sociedad = dance)
# pqa <- rename(pqa, robotics_sociedad = robotics)

pqa$sociedad_class <- ifelse(pqa$eighth_math == 1, "8th Math",
                             ifelse(pqa$seventh_math == 1, "7th Math",
                                    ifelse(pqa$sixth_math == 1, "6th Math",
                                           ifelse(pqa$robotics == 1, "Robotics",
                                                  ifelse(pqa$dance == 1, "Dance", NA)))))

pqa <- rename(pqa, 
              program_ID = SiteIDNumeric,
              response_date = resp_date,
              signal_number = signal)

pqa$program_ID <- as.character(pqa$program_ID)

df <- left_join(df, pqa, by = c("response_date", "program_ID", "signal_number", "sociedad_class"))

x <- lmer(challenge ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    broom::tidy() %>% 
    filter(term == "(Intercept)") %>% 
    select(estimate)

chall_df <- lmer(challenge ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    ranef() %>% 
    extract2("participant_ID") %>% 
    rownames_to_column(var = "participant_ID") %>% 
    rename(pred_challenge = `(Intercept)`) %>%
    mutate(pred_challenge = pred_challenge + x[1, 1]) %>% 
    tbl_df()

x <- lmer(learning ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    broom::tidy() %>% 
    filter(term == "(Intercept)") %>% 
    select(estimate)

learning_df <- lmer(learning ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    ranef() %>% 
    extract2("participant_ID") %>% 
    rownames_to_column(var = "participant_ID") %>% 
    rename(pred_learning = `(Intercept)`) %>% 
    mutate(pred_learning = pred_learning + x[1, 1]) %>% 
    tbl_df()

x <- lmer(positive_affect ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    broom::tidy() %>% 
    filter(term == "(Intercept)") %>% 
    select(estimate)

positive_affect_df <- lmer(positive_affect ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    ranef() %>% 
    extract2("participant_ID") %>% 
    rownames_to_column(var = "participant_ID") %>% 
    rename(pred_positive_affect = `(Intercept)`) %>% 
    mutate(pred_positive_affect = pred_positive_affect + x[1, 1]) %>% 
    tbl_df()

x <- lmer(relevance ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    broom::tidy() %>% 
    filter(term == "(Intercept)") %>% 
    select(estimate)

relevance_df <- lmer(relevance ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    ranef() %>% 
    extract2("participant_ID") %>% 
    rownames_to_column(var = "participant_ID") %>% 
    rename(pred_relevance = `(Intercept)`) %>% 
    mutate(pred_relevance = pred_relevance + x[1, 1]) %>% 
    tbl_df()

x <- lmer(interest ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    broom::tidy() %>% 
    filter(term == "(Intercept)") %>% 
    select(estimate)

interest_df <- lmer(relevance ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    ranef() %>% 
    extract2("participant_ID") %>% 
    rownames_to_column(var = "participant_ID") %>% 
    rename(pred_interest = `(Intercept)`) %>% 
    mutate(pred_interest = pred_interest + x[1, 1]) %>% 
    tbl_df()

x <- lmer(overall_engagement ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    broom::tidy() %>% 
    filter(term == "(Intercept)") %>% 
    select(estimate)

engagement_df <- lmer(overall_engagement ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    ranef() %>% 
    extract2("participant_ID") %>% 
    rownames_to_column(var = "participant_ID") %>% 
    rename(pred_overall_engagement = `(Intercept)`) %>% 
    mutate(pred_overall_engagement = pred_overall_engagement + x[1, 1]) %>% 
    tbl_df()

pred_var_df <- chall_df %>% 
    left_join(learning_df, by = "participant_ID") %>% 
    left_join(positive_affect_df, by = "participant_ID") %>% 
    left_join(relevance_df, by = "participant_ID") %>% 
    left_join(interest_df, by = "participant_ID") %>% 
    left_join(engagement_df, by = "participant_ID")

pre_survey_data_processed$participant_ID <- as.character(pre_survey_data_processed$participant_ID)
post_survey_data_partially_processed$participant_ID <- as.character(post_survey_data_partially_processed$participant_ID)
demographics$participant_ID <- as.character(demographics$participant_ID)

fix_missing <- function(x) {
    x[is.na(x)] <- 0
    x
}

names(act)[2:7] <- paste0("ACT_", names(act)[2:7])
act <- mutate_if(act, is.double, fix_missing)
names(loc)[2:7] <- paste0("LOC_", names(loc)[2:7])
loc <- mutate_if(loc, is.double, fix_missing)

mod_df <- left_join(pred_var_df, pre_survey_data_processed, by = "participant_ID") %>% 
    left_join(post_survey_data_partially_processed, by = "participant_ID") %>% 
    left_join(demographics, by = "participant_ID") %>% 
    left_join(pm, by = "program_ID")

mod_df <- mod_df %>% 
    left_join(act) %>%
    left_join(loc) %>% 
    rename(lab = `ACT_Lab Activity`,
           create = `ACT_Creating Product`,
           not_focused = `ACT_Not Focused`,
           basic = `ACT_Basic Skills Activity`,
           psl = `ACT_Program Staff Led`,
           fts = `ACT_Field Trip Speaker`,
           class_space = `LOC_Classroom Space`,
           in_comm = `LOC_Outdoors - Community`) %>% 
    mutate(doing = lab + create)

mod_df$post_future_goals_plans <- jmRtools::composite_mean_maker(mod_df,
                                                                 post_future_job_become_STEM,
                                                                 post_future_job_use_science_math,
                                                                 post_future_job_work_science_computer)

mod_df$pre_future_goals_plans <- jmRtools::composite_mean_maker(mod_df,
                                                                pre_future_job_become_STEM,
                                                                pre_future_job_use_science_math,
                                                                pre_future_job_work_science_computer)

mod_df <- mutate(mod_df,
                 program_name = ifelse(program_name == "Providence -  RWP Zoo", "Providence - RWP Zoo", program_name),
                 overall_program_rating = case_when(
                     program_name == "Boston - Dorchester House" ~ "inconsistent-quality",
                     program_name == "Boston - Mathpower" ~ "inconsistent-quality",
                     program_name == "Boston - Sociedad Latina" ~ "inconsistent-quality",
                     program_name == "Boston - Thompson Island" ~ "consistent-quality",
                     program_name == "Providence - RWP Zoo" ~ "inconsistent-quality",
                     program_name == "Providence - Biomes" ~ "consistent-quality",
                     program_name == "Providence - Crazy Machines" ~ "consistent-quality",
                     program_name == "Providence - Down City Design" ~ "inconsistent-quality",
                     program_name == "Providence - Explore the Bay" ~ "consistent-quality",
                     TRUE ~ "missing"
                 ),
                 overall_program_continuous_rating = case_when(
                     program_name == "Boston - Dorchester House" ~ 151.2782,
                     program_name == "Boston - Mathpower" ~ 112.4517,
                     program_name == "Boston - Sociedad Latina" ~ 133.9675,
                     program_name == "Boston - Thompson Island" ~ 94.2545,
                     program_name == "Providence - RWP Zoo" ~ 136.8659,
                     program_name == "Providence - Biomes" ~ 92.6211,
                     program_name == "Providence - Crazy Machines" ~ 99.3313,
                     program_name == "Providence - Down City Design" ~ 117.7022,
                     program_name == "Providence - Explore the Bay" ~ 88.5104,
                     TRUE ~ -99
                 )
)

mod_df$overall_program_rating <- fct_relevel(mod_df$overall_program_rating,
                                             "inconsistent-quality")

Correlations

mod_df %>% 
    select(overall_pre_interest, overall_post_interest, pre_future_goals_plans, post_future_goals_plans, pred_challenge, pred_relevance, overall_program_continuous_rating) %>% 
    correlate() %>% 
    shave() %>% 
    fashion()

##                             rowname overall_pre_interest
## 1              overall_pre_interest                     
## 2             overall_post_interest                  .59
## 3            pre_future_goals_plans                  .39
## 4           post_future_goals_plans                  .20
## 5                    pred_challenge                  .00
## 6                    pred_relevance                  .11
## 7 overall_program_continuous_rating                 -.12
##   overall_post_interest pre_future_goals_plans post_future_goals_plans
## 1                                                                     
## 2                                                                     
## 3                   .33                                               
## 4                   .47                    .47                        
## 5                  -.06                    .05                     .08
## 6                   .17                    .28                     .30
## 7                  -.13                   -.16                    -.00
##   pred_challenge pred_relevance overall_program_continuous_rating
## 1                                                                
## 2                                                                
## 3                                                                
## 4                                                                
## 5                                                                
## 6            .58                                                 
## 7            .04            .10

1. Changes in engagement

1A: Basic model

m <- lmer(overall_post_interest ~ 
              
              pred_overall_engagement +
              
              overall_pre_interest + 
              
              gender + 

              (1|program_ID), 
          data = mod_df)

sjPlot::sjt.lmer(m, p.kr = T, show.re.var = F, show.ci = F, show.se = T)

	overall_post_interest
	B	std. Error	p
Fixed Parts
(Intercept)	0.10	0.37	.784
pred_overall_engagement	0.46	0.11	<.001
overall_pre_interest	0.53	0.07	<.001
gender (M)	0.11	0.12	.345
Random Parts
N_{program_ID}	9
ICC_{program_ID}	0.051
Observations	141
R² / Ω₀²	.465 / .465

With overall program rating added

m <- lmer(overall_post_interest ~ 
              
              pred_overall_engagement + 
              
              overall_pre_interest + 
              
              gender + 
              
              overall_program_rating + 

              (1|program_ID), 
          data = mod_df)

sjPlot::sjt.lmer(m, p.kr = T, show.re.var = F, show.ci = F, show.se = T)

	overall_post_interest
	B	std. Error	p
Fixed Parts
(Intercept)	-0.03	0.36	.925
pred_overall_engagement	0.45	0.11	<.001
overall_pre_interest	0.54	0.07	<.001
gender (M)	0.12	0.12	.299
overall_program_rating (consistent-quality)	0.26	0.13	.045
Random Parts
N_{program_ID}	9
ICC_{program_ID}	0.010
Observations	141
R² / Ω₀²	.454 / .454

With overall program rating (continuous) added

m <- lmer(overall_post_interest ~ 
              
              pred_overall_engagement + 
               
              overall_pre_interest + 
               
              gender + 
               
              scale(overall_program_continuous_rating) + 

              (1|program_ID), 
          data = mod_df)

sjPlot::sjt.lmer(m, p.kr = T, show.re.var = F, show.ci = F, show.se = T)

	overall_post_interest
	B	std. Error	p
Fixed Parts
(Intercept)	0.11	0.37	.758
pred_overall_engagement	0.46	0.11	<.001
overall_pre_interest	0.53	0.07	<.001
gender (M)	0.12	0.12	.319
scale(overall_program_continuous_rating)	-0.06	0.08	.443
Random Parts
N_{program_ID}	9
ICC_{program_ID}	0.054
Observations	141
R² / Ω₀²	.467 / .466

2. Changes in engagement

2A: Basic model

m <- lmer(post_future_goals_plans ~ 
              
              pred_overall_engagement +
              
              pre_future_goals_plans +
              
              gender + 

              (1|program_ID), 
          data = mod_df)

sjPlot::sjt.lmer(m, p.kr = T, show.re.var = F, show.ci = F, show.se = T)

	post_future_goals_plans
	B	std. Error	p
Fixed Parts
(Intercept)	0.72	0.39	.064
pred_overall_engagement	0.30	0.13	.026
pre_future_goals_plans	0.38	0.09	<.001
gender (M)	0.39	0.14	.007
Random Parts
N_{program_ID}	9
ICC_{program_ID}	0.000
Observations	133
R² / Ω₀²	.293 / .293

2B: With overall program rating added

m <- lmer(post_future_goals_plans ~ 
              
              pred_overall_engagement + 
              
              pre_future_goals_plans +
              
              gender + 
              
              overall_program_rating + 

              (1|program_ID), 
          data = mod_df)

sjPlot::sjt.lmer(m, p.kr = T, show.re.var = F, show.ci = F, show.se = T)

	post_future_goals_plans
	B	std. Error	p
Fixed Parts
(Intercept)	0.73	0.39	.063
pred_overall_engagement	0.29	0.13	.027
pre_future_goals_plans	0.39	0.09	<.001
gender (M)	0.39	0.14	.007
overall_program_rating (consistent-quality)	-0.04	0.14	.786
Random Parts
N_{program_ID}	9
ICC_{program_ID}	0.000
Observations	133
R² / Ω₀²	.293 / .293

2C: With overall program rating (continuous) added

m <- lmer(post_future_goals_plans ~ 
              
              pred_overall_engagement + 
               
              pre_future_goals_plans +
               
              gender + 
               
              scale(overall_program_continuous_rating) + 

              (1|program_ID), 
          data = mod_df)

sjPlot::sjt.lmer(m, p.kr = T, show.re.var = F, show.ci = F, show.se = T)

	post_future_goals_plans
	B	std. Error	p
Fixed Parts
(Intercept)	0.72	0.39	.065
pred_overall_engagement	0.30	0.13	.026
pre_future_goals_plans	0.38	0.09	<.001
gender (M)	0.39	0.14	.007
scale(overall_program_continuous_rating)	-0.00	0.07	.949
Random Parts
N_{program_ID}	9
ICC_{program_ID}	0.000
Observations	133
R² / Ω₀²	.293 / .293

RQ_4

Joshua Rosenberg

12/19/2017