Chapter 1 Data
1.1 Socio-demographic data
rm(list=ls())
library(tidyverse)
library(skimr)
library(sf)
icemr <- read.csv("./_data/JASON/ICEMR2.0_P1_longJul_v1_20190406.csv", stringsAsFactors = F) %>%
filter(!is.na(id_muestra)) %>%
dplyr::select(id_muestra, id_house, id_study, edad, nm_sex,
nm_level_study, viaje_ult_mes, lat, long, resultado_micro,
especie_micro, temp_axilar,hist_fever, main_act_ec, tipo_casa,
animales_casa, fumigacion, hour_sleep, result_pcr) %>% # FALTA date_fever
mutate(id_study = as.numeric(id_study),
long = as.numeric(long))
cam <- read.csv("./_data/JASON/Master 20180905_ON.csv", stringsAsFactors = F) %>%
dplyr::select(id_muestra, id_house, id_study, nm_age_int, nm_sex,
nm_level_study, ce_travel, latitud, longitud, resultado_micro,
especie_micro, ce_temp_ax, ce_temp, ce_economic_act, ce_house_type,
ce_in_animals, ce_fumig, ce_sleep_hour, result_pcr.mangold) %>%
mutate(latitud = as.numeric(latitud))
colnames(cam) <- colnames(icemr)
1.2 Lab data
# sero <- read.csv("./_data/JASON/PvSeroTAT_Peru_Ab_data_2020-03-05.csv", stringsAsFactors = F) %>%
# rename(id_muestra = X)
# Updated 2021-03-29
sero <- read.csv("./_data/JASON/PVSEROTAT_RF_CLASS_RESULTS_0303221.csv", stringsAsFactors = F) %>%
rename(id_muestra = Bleedcode,
SEROPOSITIVE = SEROPOSITIVE_63SE_90SP) %>%
inner_join(read.csv("./_data/JASON/PvSeroTAT_Peru_Ab_data_2020-03-05.csv", stringsAsFactors = F) %>%
rename(id_muestra = X) %>%
select(id_muestra, TREATMENT),
by = "id_muestra")
1.2.1 Assemble
Name | d1 |
Number of rows | 1904 |
Number of columns | 32 |
_______________________ | |
Column type frequency: | |
character | 5 |
numeric | 27 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
hour_sleep | 0 | 1 | 7 | 8 | 0 | 41 | 0 |
SEROPOSITIVE_79SE_79SP | 0 | 1 | 8 | 8 | 0 | 2 | 0 |
SEROPOSITIVE | 0 | 1 | 8 | 8 | 0 | 2 | 0 |
SEROPOSITIVE_90SE_59SP | 0 | 1 | 8 | 8 | 0 | 2 | 0 |
TREATMENT | 0 | 1 | 9 | 12 | 0 | 2 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
id_muestra | 0 | 1.00 | 5096818.03 | 4260507.20 | 501001.00 | 502190.75 | 9020246.00 | 9042933.50 | 9072251.00 | ▇▁▁▁▇ |
id_house | 0 | 1.00 | 718499.70 | 200661.45 | 501001.00 | 502182.00 | 902008.00 | 904107.25 | 907083.00 | ▇▁▁▁▇ |
id_study | 13 | 0.99 | 714191804.13 | 199690114.30 | 500100101.00 | 500217901.50 | 900200704.00 | 900410603.50 | 900708302.00 | ▇▁▁▁▇ |
edad | 0 | 1.00 | 28.85 | 21.83 | 0.00 | 10.00 | 24.00 | 44.00 | 117.00 | ▇▅▃▁▁ |
nm_sex | 0 | 1.00 | 0.45 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
nm_level_study | 0 | 1.00 | 607.53 | 2381.72 | 1.00 | 3.00 | 4.00 | 5.00 | 9999.00 | ▇▁▁▁▁ |
viaje_ult_mes | 0 | 1.00 | 26.45 | 511.85 | 0.00 | 0.00 | 0.00 | 0.00 | 9999.00 | ▇▁▁▁▁ |
lat | 102 | 0.95 | -3.95 | 4.91 | -73.23 | -3.80 | -3.51 | -3.44 | -3.35 | ▁▁▁▁▇ |
long | 102 | 0.95 | -33.50 | 35.41 | -73.34 | -73.33 | -3.51 | -3.44 | 73.23 | ▆▁▇▁▁ |
resultado_micro | 32 | 0.98 | 0.02 | 0.15 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
especie_micro | 1006 | 0.47 | 0.07 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 2.00 | ▇▁▁▁▁ |
temp_axilar | 0 | 1.00 | 36.19 | 0.51 | 35.50 | 35.90 | 36.10 | 36.50 | 40.20 | ▇▃▁▁▁ |
hist_fever | 0 | 1.00 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
main_act_ec | 0 | 1.00 | 7.40 | 9.11 | 0.00 | 5.00 | 7.00 | 8.00 | 88.00 | ▇▁▁▁▁ |
tipo_casa | 0 | 1.00 | 3.28 | 0.79 | 1.00 | 3.00 | 3.00 | 4.00 | 4.00 | ▁▂▁▇▇ |
animales_casa | 0 | 1.00 | 0.29 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
fumigacion | 0 | 1.00 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
result_pcr | 0 | 1.00 | 0.09 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 2.00 | ▇▁▁▁▁ |
W16_RAMA | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
W02_L02 | 0 | 1.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
W58_EBPII | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
W50_RBP2b | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
W01_MSP119 | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
W30_MSP8 | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
W08_L12 | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
W39_MSP3a | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
RFOREST_MODEL_VOTES | 0 | 1.00 | 0.74 | 0.23 | 0.11 | 0.56 | 0.82 | 0.95 | 1.00 | ▁▂▂▃▇ |
## Reading layer `ser_data' from data source `/Users/gabrielcarrasco/Dropbox/Work/Colabs UPCH/Serology [CAM:Jason]/Analysis/RSCD_JR/_data/ser_data.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1233 features and 2 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: -73.34285 ymin: -3.835081 xmax: -72.97889 ymax: -3.354823
## geographic CRS: WGS 84
d2 <- coord %>%
distinct(.keep_all = T) %>%
inner_join(d1, by= "id_house") %>%
mutate(sero = ifelse(SEROPOSITIVE == "Positive",1,0),
fever = ifelse(temp_axilar<37.5,0,1),
nm_sex = ifelse(nm_sex==1,"1_male","0_female"),
area = factor(ifelse(as.numeric(as.character(comm))<600,"0_periurban","1_rural")),
age_cat = cut(edad, breaks = c(-Inf,5,15,30,50,Inf))) %>%
mutate_at(c("nm_sex", "nm_level_study", "viaje_ult_mes", "resultado_micro", "especie_micro", "fever",
"area"), as.factor)
skim(d2)
Name | d2 |
Number of rows | 1790 |
Number of columns | 38 |
_______________________ | |
Column type frequency: | |
character | 7 |
factor | 8 |
numeric | 23 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
comm | 0 | 1 | 3 | 3 | 0 | 10 | 0 |
hour_sleep | 0 | 1 | 7 | 8 | 0 | 41 | 0 |
SEROPOSITIVE_79SE_79SP | 0 | 1 | 8 | 8 | 0 | 2 | 0 |
SEROPOSITIVE | 0 | 1 | 8 | 8 | 0 | 2 | 0 |
SEROPOSITIVE_90SE_59SP | 0 | 1 | 8 | 8 | 0 | 2 | 0 |
TREATMENT | 0 | 1 | 9 | 12 | 0 | 2 | 0 |
geometry | 0 | 1 | 21 | 39 | 0 | 572 | 0 |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
nm_sex | 0 | 1.00 | FALSE | 2 | 0_f: 973, 1_m: 817 |
nm_level_study | 0 | 1.00 | FALSE | 12 | 3: 627, 5: 316, 4: 295, 6: 137 |
viaje_ult_mes | 0 | 1.00 | FALSE | 3 | 0: 1436, 1: 349, 999: 5 |
resultado_micro | 23 | 0.99 | FALSE | 2 | 0: 1729, 1: 38 |
especie_micro | 987 | 0.45 | FALSE | 3 | 0: 768, 2: 27, 1: 8 |
fever | 0 | 1.00 | FALSE | 2 | 0: 1764, 1: 26 |
area | 0 | 1.00 | FALSE | 2 | 1_r: 1005, 0_p: 785 |
age_cat | 0 | 1.00 | FALSE | 5 | (5,: 568, (30: 388, (50: 357, (15: 299 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
id_house | 0 | 1.00 | 728025.71 | 199707.77 | 501001.00 | 502212.75 | 902021.00 | 905002.00 | 907079.00 | ▆▁▁▁▇ |
id_muestra | 0 | 1.00 | 5298789.57 | 4240476.79 | 501001.00 | 502200.25 | 9020646.00 | 9050038.50 | 9072251.00 | ▆▁▁▁▇ |
id_study | 12 | 0.99 | 723740754.26 | 198798954.37 | 500100101.00 | 500220802.00 | 900201904.50 | 900411103.75 | 900707901.00 | ▆▁▁▁▇ |
edad | 0 | 1.00 | 28.99 | 22.04 | 0.00 | 10.00 | 24.00 | 45.00 | 117.00 | ▇▅▃▁▁ |
lat | 3 | 1.00 | -3.95 | 4.93 | -73.23 | -3.80 | -3.51 | -3.44 | -3.35 | ▁▁▁▁▇ |
long | 3 | 1.00 | -33.75 | 35.45 | -73.34 | -73.33 | -3.51 | -3.44 | 73.23 | ▆▁▇▁▁ |
temp_axilar | 0 | 1.00 | 36.18 | 0.48 | 35.50 | 35.90 | 36.10 | 36.50 | 39.30 | ▇▆▁▁▁ |
hist_fever | 0 | 1.00 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
main_act_ec | 0 | 1.00 | 7.35 | 9.18 | 0.00 | 5.00 | 7.00 | 8.00 | 88.00 | ▇▁▁▁▁ |
tipo_casa | 0 | 1.00 | 3.29 | 0.77 | 1.00 | 3.00 | 3.00 | 4.00 | 4.00 | ▁▂▁▆▇ |
animales_casa | 0 | 1.00 | 0.29 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
fumigacion | 0 | 1.00 | 0.47 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
result_pcr | 0 | 1.00 | 0.08 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 2.00 | ▇▁▁▁▁ |
W16_RAMA | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
W02_L02 | 0 | 1.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
W58_EBPII | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
W50_RBP2b | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
W01_MSP119 | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
W30_MSP8 | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
W08_L12 | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
W39_MSP3a | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▁▁▁▁ |
RFOREST_MODEL_VOTES | 0 | 1.00 | 0.74 | 0.23 | 0.11 | 0.56 | 0.82 | 0.95 | 1.00 | ▁▂▂▃▇ |
sero | 0 | 1.00 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
1.3 Descriptive [Table 1]
t1 <- d2 %>%
st_set_geometry(NULL) %>%
dplyr::select(area, comm, edad, age_cat, nm_sex, nm_level_study, viaje_ult_mes, resultado_micro,
especie_micro, fever,temp_axilar,hist_fever,SEROPOSITIVE, TREATMENT)
library(table1)
table1(~. | SEROPOSITIVE, data = t1)
Negative (N=1200) |
Positive (N=590) |
Overall (N=1790) |
|
---|---|---|---|
area | |||
0_periurban | 605 (50.4%) | 180 (30.5%) | 785 (43.9%) |
1_rural | 595 (49.6%) | 410 (69.5%) | 1005 (56.1%) |
comm | |||
501 | 197 (16.4%) | 53 (9.0%) | 250 (14.0%) |
502 | 187 (15.6%) | 86 (14.6%) | 273 (15.3%) |
503 | 221 (18.4%) | 41 (6.9%) | 262 (14.6%) |
901 | 13 (1.1%) | 34 (5.8%) | 47 (2.6%) |
902 | 77 (6.4%) | 102 (17.3%) | 179 (10.0%) |
903 | 22 (1.8%) | 36 (6.1%) | 58 (3.2%) |
904 | 186 (15.5%) | 84 (14.2%) | 270 (15.1%) |
905 | 82 (6.8%) | 15 (2.5%) | 97 (5.4%) |
906 | 109 (9.1%) | 57 (9.7%) | 166 (9.3%) |
907 | 106 (8.8%) | 82 (13.9%) | 188 (10.5%) |
edad | |||
Mean (SD) | 24.1 (20.4) | 39.0 (21.9) | 29.0 (22.0) |
Median [Min, Max] | 15.0 [0, 117] | 38.0 [1.00, 92.0] | 24.0 [0, 117] |
age_cat | |||
(-Inf,5] | 160 (13.3%) | 18 (3.1%) | 178 (9.9%) |
(5,15] | 467 (38.9%) | 101 (17.1%) | 568 (31.7%) |
(15,30] | 201 (16.8%) | 98 (16.6%) | 299 (16.7%) |
(30,50] | 208 (17.3%) | 180 (30.5%) | 388 (21.7%) |
(50, Inf] | 164 (13.7%) | 193 (32.7%) | 357 (19.9%) |
nm_sex | |||
0_female | 695 (57.9%) | 278 (47.1%) | 973 (54.4%) |
1_male | 505 (42.1%) | 312 (52.9%) | 817 (45.6%) |
nm_level_study | |||
1 | 82 (6.8%) | 54 (9.2%) | 136 (7.6%) |
2 | 97 (8.1%) | 7 (1.2%) | 104 (5.8%) |
3 | 425 (35.4%) | 202 (34.2%) | 627 (35.0%) |
4 | 146 (12.2%) | 149 (25.3%) | 295 (16.5%) |
5 | 218 (18.2%) | 98 (16.6%) | 316 (17.7%) |
6 | 100 (8.3%) | 37 (6.3%) | 137 (7.7%) |
7 | 13 (1.1%) | 3 (0.5%) | 16 (0.9%) |
8 | 18 (1.5%) | 3 (0.5%) | 21 (1.2%) |
9 | 7 (0.6%) | 1 (0.2%) | 8 (0.4%) |
10 | 11 (0.9%) | 4 (0.7%) | 15 (0.8%) |
11 | 0 (0%) | 1 (0.2%) | 1 (0.1%) |
9999 | 83 (6.9%) | 31 (5.3%) | 114 (6.4%) |
viaje_ult_mes | |||
0 | 1016 (84.7%) | 420 (71.2%) | 1436 (80.2%) |
1 | 181 (15.1%) | 168 (28.5%) | 349 (19.5%) |
9999 | 3 (0.2%) | 2 (0.3%) | 5 (0.3%) |
resultado_micro | |||
0 | 1167 (97.2%) | 562 (95.3%) | 1729 (96.6%) |
1 | 15 (1.2%) | 23 (3.9%) | 38 (2.1%) |
Missing | 18 (1.5%) | 5 (0.8%) | 23 (1.3%) |
especie_micro | |||
0 | 596 (49.7%) | 172 (29.2%) | 768 (42.9%) |
1 | 5 (0.4%) | 3 (0.5%) | 8 (0.4%) |
2 | 8 (0.7%) | 19 (3.2%) | 27 (1.5%) |
Missing | 591 (49.2%) | 396 (67.1%) | 987 (55.1%) |
fever | |||
0 | 1182 (98.5%) | 582 (98.6%) | 1764 (98.5%) |
1 | 18 (1.5%) | 8 (1.4%) | 26 (1.5%) |
temp_axilar | |||
Mean (SD) | 36.2 (0.480) | 36.2 (0.482) | 36.2 (0.481) |
Median [Min, Max] | 36.1 [35.5, 39.3] | 36.1 [35.5, 39.3] | 36.1 [35.5, 39.3] |
hist_fever | |||
Mean (SD) | 0.104 (0.306) | 0.158 (0.365) | 0.122 (0.327) |
Median [Min, Max] | 0 [0, 1.00] | 0 [0, 1.00] | 0 [0, 1.00] |
SEROPOSITIVE | |||
Negative | 1200 (100%) | 0 (0%) | 1200 (67.0%) |
Positive | 0 (0%) | 590 (100%) | 590 (33.0%) |
TREATMENT | |||
No treatment | 917 (76.4%) | 0 (0%) | 917 (51.2%) |
Treatment | 283 (23.6%) | 590 (100%) | 873 (48.8%) |
library(tableone)
CreateTableOne(vars = names(t1)[1:12], strata = "SEROPOSITIVE", data = t1,
factorVars = names(t1)[c(1:2,4:10,12)])
## Stratified by SEROPOSITIVE
## Negative Positive p test
## n 1200 590
## area = 1_rural (%) 595 (49.6) 410 (69.5) <0.001
## comm (%) <0.001
## 501 197 (16.4) 53 ( 9.0)
## 502 187 (15.6) 86 (14.6)
## 503 221 (18.4) 41 ( 6.9)
## 901 13 ( 1.1) 34 ( 5.8)
## 902 77 ( 6.4) 102 (17.3)
## 903 22 ( 1.8) 36 ( 6.1)
## 904 186 (15.5) 84 (14.2)
## 905 82 ( 6.8) 15 ( 2.5)
## 906 109 ( 9.1) 57 ( 9.7)
## 907 106 ( 8.8) 82 (13.9)
## edad (mean (SD)) 24.08 (20.42) 38.97 (21.85) <0.001
## age_cat (%) <0.001
## (-Inf,5] 160 (13.3) 18 ( 3.1)
## (5,15] 467 (38.9) 101 (17.1)
## (15,30] 201 (16.8) 98 (16.6)
## (30,50] 208 (17.3) 180 (30.5)
## (50, Inf] 164 (13.7) 193 (32.7)
## nm_sex = 1_male (%) 505 (42.1) 312 (52.9) <0.001
## nm_level_study (%) <0.001
## 1 82 ( 6.8) 54 ( 9.2)
## 2 97 ( 8.1) 7 ( 1.2)
## 3 425 (35.4) 202 (34.2)
## 4 146 (12.2) 149 (25.3)
## 5 218 (18.2) 98 (16.6)
## 6 100 ( 8.3) 37 ( 6.3)
## 7 13 ( 1.1) 3 ( 0.5)
## 8 18 ( 1.5) 3 ( 0.5)
## 9 7 ( 0.6) 1 ( 0.2)
## 10 11 ( 0.9) 4 ( 0.7)
## 11 0 ( 0.0) 1 ( 0.2)
## 9999 83 ( 6.9) 31 ( 5.3)
## viaje_ult_mes (%) <0.001
## 0 1016 (84.7) 420 (71.2)
## 1 181 (15.1) 168 (28.5)
## 9999 3 ( 0.2) 2 ( 0.3)
## resultado_micro = 1 (%) 15 ( 1.3) 23 ( 3.9) 0.001
## especie_micro (%) <0.001
## 0 596 (97.9) 172 (88.7)
## 1 5 ( 0.8) 3 ( 1.5)
## 2 8 ( 1.3) 19 ( 9.8)
## fever = 1 (%) 18 ( 1.5) 8 ( 1.4) 0.977
## temp_axilar (mean (SD)) 36.18 (0.48) 36.19 (0.48) 0.686
## hist_fever = 1 (%) 125 (10.4) 93 (15.8) 0.002