How this document is organized
- Two different outcomes, Interest and future goals, controlling on interest and future goals and gender, with three models each:
- Basic model (described above)
- Addition of categorical overall program rating
- Addition of continuous overall program rating
library(tidyverse)
library(lme4)
library(corrr)
library(jmRtools)
library(sjPlot)
library(magrittr)
esm <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-esm.csv")
pre_survey_data_processed <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-pre-survey.csv")
post_survey_data_partially_processed <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-post-survey.csv")
video <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-video.csv")
pqa <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-pqa.csv")
attendance <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-attendance.csv")
class_data <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-class-video.csv")
demographics <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-demographics.csv")
pm <- read_csv("/Volumes/SCHMIDTLAB/PSE/Data/STEM-IE/STEM-IE-program-match.csv")
act <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-program-by-activity.csv")
loc <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-program-by-location.csv")
attendance <- rename(attendance, participant_ID = ParticipantID)
attendance <- mutate(attendance, prop_attend = DaysAttended / DaysScheduled,
participant_ID = as.integer(participant_ID))
attendance <- select(attendance, participant_ID, prop_attend)
demographics <- filter(demographics, participant_ID!= 7187)
demographics <- left_join(demographics, attendance)
esm$overall_engagement <- jmRtools::composite_mean_maker(esm, hard_working, concentrating, enjoy, interest)
df <- left_join(esm, pre_survey_data_processed, by = "participant_ID") # df & post-survey
df <- left_join(df, video, by = c("program_ID", "response_date", "sociedad_class", "signal_number")) # df & video
df <- left_join(df, demographics, by = c("participant_ID", "program_ID")) # df and demographics
pqa <- mutate(pqa,
active = active_part_1 + active_part_2,
ho_thinking = ho_thinking_1 + ho_thinking_2 + ho_thinking_3,
belonging = belonging_1 + belonging_2,
agency = agency_1 + agency_2 + agency_3 + agency_4,
youth_development_overall = active_part_1 + active_part_2 + ho_thinking_1 + ho_thinking_2 + ho_thinking_3 + belonging_1 + belonging_2 + agency_1 + agency_2 + agency_3 + agency_4,
making_observations = stem_sb_8,
data_modeling = stem_sb_2 + stem_sb_3 + stem_sb_9,
interpreting_communicating = stem_sb_6,
generating_data = stem_sb_4,
asking_questions = stem_sb_1)
# pqa <- rename(pqa, sixth_math_sociedad = sixth_math)
# pqa <- rename(pqa, seventh_math_sociedad = seventh_math)
# pqa <- rename(pqa, eighth_math_sociedad = eighth_math)
# pqa <- rename(pqa, dance_sociedad = dance)
# pqa <- rename(pqa, robotics_sociedad = robotics)
pqa$sociedad_class <- ifelse(pqa$eighth_math == 1, "8th Math",
ifelse(pqa$seventh_math == 1, "7th Math",
ifelse(pqa$sixth_math == 1, "6th Math",
ifelse(pqa$robotics == 1, "Robotics",
ifelse(pqa$dance == 1, "Dance", NA)))))
pqa <- rename(pqa,
program_ID = SiteIDNumeric,
response_date = resp_date,
signal_number = signal)
pqa$program_ID <- as.character(pqa$program_ID)
df <- left_join(df, pqa, by = c("response_date", "program_ID", "signal_number", "sociedad_class"))
x <- lmer(challenge ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>%
broom::tidy() %>%
filter(term == "(Intercept)") %>%
select(estimate)
chall_df <- lmer(challenge ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>%
ranef() %>%
extract2("participant_ID") %>%
rownames_to_column(var = "participant_ID") %>%
rename(pred_challenge = `(Intercept)`) %>%
mutate(pred_challenge = pred_challenge + x[1, 1]) %>%
tbl_df()
x <- lmer(learning ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>%
broom::tidy() %>%
filter(term == "(Intercept)") %>%
select(estimate)
learning_df <- lmer(learning ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>%
ranef() %>%
extract2("participant_ID") %>%
rownames_to_column(var = "participant_ID") %>%
rename(pred_learning = `(Intercept)`) %>%
mutate(pred_learning = pred_learning + x[1, 1]) %>%
tbl_df()
x <- lmer(positive_affect ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>%
broom::tidy() %>%
filter(term == "(Intercept)") %>%
select(estimate)
positive_affect_df <- lmer(positive_affect ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>%
ranef() %>%
extract2("participant_ID") %>%
rownames_to_column(var = "participant_ID") %>%
rename(pred_positive_affect = `(Intercept)`) %>%
mutate(pred_positive_affect = pred_positive_affect + x[1, 1]) %>%
tbl_df()
x <- lmer(relevance ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>%
broom::tidy() %>%
filter(term == "(Intercept)") %>%
select(estimate)
relevance_df <- lmer(relevance ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>%
ranef() %>%
extract2("participant_ID") %>%
rownames_to_column(var = "participant_ID") %>%
rename(pred_relevance = `(Intercept)`) %>%
mutate(pred_relevance = pred_relevance + x[1, 1]) %>%
tbl_df()
x <- lmer(interest ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>%
broom::tidy() %>%
filter(term == "(Intercept)") %>%
select(estimate)
interest_df <- lmer(relevance ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>%
ranef() %>%
extract2("participant_ID") %>%
rownames_to_column(var = "participant_ID") %>%
rename(pred_interest = `(Intercept)`) %>%
mutate(pred_interest = pred_interest + x[1, 1]) %>%
tbl_df()
x <- lmer(overall_engagement ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>%
broom::tidy() %>%
filter(term == "(Intercept)") %>%
select(estimate)
engagement_df <- lmer(overall_engagement ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>%
ranef() %>%
extract2("participant_ID") %>%
rownames_to_column(var = "participant_ID") %>%
rename(pred_overall_engagement = `(Intercept)`) %>%
mutate(pred_overall_engagement = pred_overall_engagement + x[1, 1]) %>%
tbl_df()
pred_var_df <- chall_df %>%
left_join(learning_df, by = "participant_ID") %>%
left_join(positive_affect_df, by = "participant_ID") %>%
left_join(relevance_df, by = "participant_ID") %>%
left_join(interest_df, by = "participant_ID") %>%
left_join(engagement_df, by = "participant_ID")
pre_survey_data_processed$participant_ID <- as.character(pre_survey_data_processed$participant_ID)
post_survey_data_partially_processed$participant_ID <- as.character(post_survey_data_partially_processed$participant_ID)
demographics$participant_ID <- as.character(demographics$participant_ID)
fix_missing <- function(x) {
x[is.na(x)] <- 0
x
}
names(act)[2:7] <- paste0("ACT_", names(act)[2:7])
act <- mutate_if(act, is.double, fix_missing)
names(loc)[2:7] <- paste0("LOC_", names(loc)[2:7])
loc <- mutate_if(loc, is.double, fix_missing)
mod_df <- left_join(pred_var_df, pre_survey_data_processed, by = "participant_ID") %>%
left_join(post_survey_data_partially_processed, by = "participant_ID") %>%
left_join(demographics, by = "participant_ID") %>%
left_join(pm, by = "program_ID")
mod_df <- mod_df %>%
left_join(act) %>%
left_join(loc) %>%
rename(lab = `ACT_Lab Activity`,
create = `ACT_Creating Product`,
not_focused = `ACT_Not Focused`,
basic = `ACT_Basic Skills Activity`,
psl = `ACT_Program Staff Led`,
fts = `ACT_Field Trip Speaker`,
class_space = `LOC_Classroom Space`,
in_comm = `LOC_Outdoors - Community`) %>%
mutate(doing = lab + create)
mod_df$post_future_goals_plans <- jmRtools::composite_mean_maker(mod_df,
post_future_job_become_STEM,
post_future_job_use_science_math,
post_future_job_work_science_computer)
mod_df$pre_future_goals_plans <- jmRtools::composite_mean_maker(mod_df,
pre_future_job_become_STEM,
pre_future_job_use_science_math,
pre_future_job_work_science_computer)
mod_df <- mutate(mod_df,
program_name = ifelse(program_name == "Providence - RWP Zoo", "Providence - RWP Zoo", program_name),
overall_program_rating = case_when(
program_name == "Boston - Dorchester House" ~ "inconsistent-quality",
program_name == "Boston - Mathpower" ~ "inconsistent-quality",
program_name == "Boston - Sociedad Latina" ~ "inconsistent-quality",
program_name == "Boston - Thompson Island" ~ "consistent-quality",
program_name == "Providence - RWP Zoo" ~ "inconsistent-quality",
program_name == "Providence - Biomes" ~ "consistent-quality",
program_name == "Providence - Crazy Machines" ~ "consistent-quality",
program_name == "Providence - Down City Design" ~ "inconsistent-quality",
program_name == "Providence - Explore the Bay" ~ "consistent-quality",
TRUE ~ "missing"
),
overall_program_continuous_rating = case_when(
program_name == "Boston - Dorchester House" ~ 151.2782,
program_name == "Boston - Mathpower" ~ 112.4517,
program_name == "Boston - Sociedad Latina" ~ 133.9675,
program_name == "Boston - Thompson Island" ~ 94.2545,
program_name == "Providence - RWP Zoo" ~ 136.8659,
program_name == "Providence - Biomes" ~ 92.6211,
program_name == "Providence - Crazy Machines" ~ 99.3313,
program_name == "Providence - Down City Design" ~ 117.7022,
program_name == "Providence - Explore the Bay" ~ 88.5104,
TRUE ~ -99
)
)
mod_df$overall_program_rating <- fct_relevel(mod_df$overall_program_rating,
"inconsistent-quality")