############# R CODE install.packages("dplyr") install.packages("texreg") install.packages("extrafont") install.packages("Rbeast") install.packages("AER") install.packages("survival") install.packages("pscl") library(marginaleffects) library(MASS) library(pscl) library(boot) library(Rbeast) library(extrafont) library(pROC) library(texreg) library(car) library(dplyr) library(stringr) library(tidyverse) library(lubridate) library(ggplot2) library(comorbidity) library(pastecs) library(mfx) library(pscl) library(lmtest) library(collinear) library(AER) #font font_import(pattern = "lmroman*") font<- "LM Roman 10" options(scipen = 999) #Positive tests data_test<-read.csv("Infekce_202305110838.csv", sep=";") data_test$Datum_pozitivity<-as.Date(data_test$Datum_pozitivity) #study period data_test_period<-data_test %>% filter(Datum_pozitivity<="2020-12-27") #filtering those who were detected positive only once during our study period ID_tot <- subset(data_test_period, Infekce == 1 & !(ID %in% data_test_period$ID[data_test_period$Infekce %in% c(2,3)]))$ID data_test_period<-data_test_period%>%filter(ID %in% ID_tot) #Medication 1.1.2019-27.12.2020 - filtered from Python for people detected positive data_filtered<- read.csv("Filtered_test.csv", sep=",") data_filtered$den_vydani<-substr(data_filtered$den_vydani, 1, 10) summary(data_filtered) #no missing values #merging with tests, thus obtaining medication only for people in our sample data_filtered<-merge(data_test_period, data_filtered, by.x=c("ID"), by.y=c("ID"), all.x = TRUE, all.y = FALSE) #data cleansing - filtering out the negative amount of drugs or negative number of pills in package data_filtered$MNOZSTVI<-as.numeric(data_filtered$MNOZSTVI) data_filtered$BALENI<-as.numeric(data_filtered$BALENI) data_filtered<-filter(data_filtered, (MNOZSTVI>0|is.na(MNOZSTVI)) & (BALENI>0|is.na(BALENI))) #Demographics data_demo <- read.csv("data_demo.csv", sep=",") #people in our sample data_demo<-data_demo%>%filter(ID %in% ID_tot) #Diagnoses diagnozy<-read.csv("data_diagnoses1.csv", sep=",") diagnozy2<-read.csv("data_diagnoses5.csv", sep=",") diagnozy<-diagnozy[,c("ID", "den", "ZDG")] diagnozy<-merge(data_test_period, diagnozy, by.x=c("ID"), by.y=c("ID"), all.x = TRUE, all.y = FALSE) diagnozy$den<-as.Date(diagnozy$den) diagnozy2<-diagnozy2[,c("ID", "den", "ZDG")] diagnozy2<-merge(data_test_period, diagnozy2, by.x=c("ID"), by.y=c("ID"), all.x = TRUE, all.y = FALSE) diagnozy2$den<-as.Date(diagnozy2$den) #diagnoses for different periods (filtering based on individual's positive COVID-19 test) diagnozy_period1<-diagnozy%>%filter(den <= "2020-12-27" & den >= "2019-01-01") diagnozy_period1<-diagnozy_period1 %>% group_by(ID) %>% filter( den <= Datum_pozitivity) #diagnoses for oncological treatment diagnozy_period5<-diagnozy2%>%filter(den <= "2020-12-27" & den >= "2015-01-01") diagnozy_period5<-diagnozy_period5 %>% group_by(ID) %>% filter( den <= Datum_pozitivity) #Hospitalisation datahosp <- read.csv("data_hosp.csv", sep=",") #Mortality column_names2<-c( "ID","Datum_umrti","LPZ_Covid","Umrti_Covid", "Zakl_pricina_smrti", "Bezprostredni_pricina_smrti_Ia", "Nemoci_vedouci_ke_smrti_Ib", "Nemoci_vedouci_ke_smrti_Ic", "Nemoci_vedouci_ke_smrti_Id","Zprostredkujici_priciny_smrti") data_um<-read.csv("Umrti_202305110838.csv", sep=";", col.names = column_names2) data_um$Datum_umrti<-as.Date(data_um$Datum_umrti) data_um_period<-data_um %>% filter(Datum_umrti<"2020-12-27" & Datum_umrti>"2020-03-01") data_um_tot<-data_um_period%>% filter(ID %in% ID_tot) #LOGIT #HOSPITALIZATION AS DEPENDENT #filtering hospitalization associated with COVID-19 datahosp<-filter(datahosp, hlavni_diagnoza=="U071"| (grepl("^(J)", hlavni_diagnoza)&(vedlejsi_diagnoza_1=="U071"|vedlejsi_diagnoza_2=="U071"|vedlejsi_diagnoza_3=="U071"|vedlejsi_diagnoza_4=="U071"|vedlejsi_diagnoza_5=="U071"|vedlejsi_diagnoza_6=="U071"|vedlejsi_diagnoza_7=="U071"|vedlejsi_diagnoza_8=="U071"|vedlejsi_diagnoza_9=="U071"|vedlejsi_diagnoza_10=="U071"|vedlejsi_diagnoza_11=="U071"|vedlejsi_diagnoza_12=="U071"|vedlejsi_diagnoza_13=="U071"|vedlejsi_diagnoza_14=="U071"))| (hlavni_diagnoza=="A083"&(vedlejsi_diagnoza_1=="U071"|vedlejsi_diagnoza_2=="U071"|vedlejsi_diagnoza_3=="U071"|vedlejsi_diagnoza_4=="U071"|vedlejsi_diagnoza_5=="U071"|vedlejsi_diagnoza_6=="U071"|vedlejsi_diagnoza_7=="U071"|vedlejsi_diagnoza_8=="U071"|vedlejsi_diagnoza_9=="U071"|vedlejsi_diagnoza_10=="U071"|vedlejsi_diagnoza_11=="U071"|vedlejsi_diagnoza_12=="U071"|vedlejsi_diagnoza_13=="U071"|vedlejsi_diagnoza_14=="U071"))| (hlavni_diagnoza=="B348"&(vedlejsi_diagnoza_1=="U071"|vedlejsi_diagnoza_2=="U071"|vedlejsi_diagnoza_3=="U071"|vedlejsi_diagnoza_4=="U071"|vedlejsi_diagnoza_5=="U071"|vedlejsi_diagnoza_6=="U071"|vedlejsi_diagnoza_7=="U071"|vedlejsi_diagnoza_8=="U071"|vedlejsi_diagnoza_9=="U071"|vedlejsi_diagnoza_10=="U071"|vedlejsi_diagnoza_11=="U071"|vedlejsi_diagnoza_12=="U071"|vedlejsi_diagnoza_13=="U071"|vedlejsi_diagnoza_14=="U071"))| (hlavni_diagnoza=="Z228"&(vedlejsi_diagnoza_1=="U071"|vedlejsi_diagnoza_2=="U071"|vedlejsi_diagnoza_3=="U071"|vedlejsi_diagnoza_4=="U071"|vedlejsi_diagnoza_5=="U071"|vedlejsi_diagnoza_6=="U071"|vedlejsi_diagnoza_7=="U071"|vedlejsi_diagnoza_8=="U071"|vedlejsi_diagnoza_9=="U071"|vedlejsi_diagnoza_10=="U071"|vedlejsi_diagnoza_11=="U071"|vedlejsi_diagnoza_12=="U071"|vedlejsi_diagnoza_13=="U071"|vedlejsi_diagnoza_14=="U071"))) datahosp$datum_pri<-as.Date(datahosp$datum_pri) data_hosp_period<-datahosp %>% filter(datum_pri<="2020-12-27" & datum_pri>="2020-03-01") #only people with the initial test during the period data_hosp_tot<-data_hosp_period %>% filter(ID %in% ID_tot) data_hosp_tot <- data_hosp_tot %>% group_by(ID) %>% slice_min(order_by = datum_pri) #merging with positive tests data_hosp_tot1<-merge(data_hosp_tot, data_test_period, by.x=c("ID"), by.y=c("ID"), all.x = TRUE, all.y = TRUE) #filtering for people who obtained first test and only after then were hospitalised data_hosp_tot3<-filter(data_hosp_tot1, ((Datum_pozitivity >= (datum_pri - 30)) & (Datum_pozitivity <= (datum_pri + 2)))) #remove duplicated IDs - only the first observation is kept in the dataset when the individual switched departments on the same day (1) data_hosp_tot2<-data_hosp_tot3[!duplicated(data_hosp_tot3$ID), ] #merging with medication, date of first positivity data_hosp_testy_merged<-merge(data_hosp_tot2[,c("ID", "datum_pri", "datum_pro", "delka")], data_filtered[, c("ID", "Datum_pozitivity", "hvlp_atc", "BALENI", "den_vydani", "MNOZSTVI")], by.x=c("ID"), by.y=c("ID"), all.x = TRUE, all.y = TRUE) data_hosp_testy_merged<-data_hosp_testy_merged%>%group_by(ID, den_vydani) #filtering out people who were filtered in hosp process (1) ID_in_datahosptot_notin_datahosptot2<- anti_join(data_hosp_tot, data_hosp_tot2, by = "ID")$ID data_hosp_testy_merged <- data_hosp_testy_merged[!(data_hosp_testy_merged$ID %in% ID_in_datahosptot_notin_datahosptot2), ] #hosp dummy ID_hosp<-data_hosp_tot2$ID data_hosp_testy_merged$hosp<-ifelse(data_hosp_testy_merged$ID %in% ID_hosp, 1, 0) #we obtained sample of all people who were detected positive once during our study period and all their respective information (whether hospitalised, medication taken, date of positivity,...) #for one individual, we have more rows (more than one medication collected during the study period - we will further proceed with the data preparation procedure) #data preparation - DUMMY VARIABLES #look-back periods for medication - followed by our assumptions med_antidepressants<-filter(data_filtered, grepl("^(N06A)", hvlp_atc)) med_2 <- filter(data_filtered, ((den_vydani >= Datum_pozitivity %m-% months(12))&(den_vydani <= Datum_pozitivity))) med_6m <- filter(med_antidepressants, ((den_vydani >= Datum_pozitivity %m-% months(6))&(den_vydani <= Datum_pozitivity))) med_6m_packages <-med_6m %>% group_by(ID) %>% summarise(packages = sum(MNOZSTVI)) med_6m<-merge(med_6m, med_6m_packages, by="ID", all.x=TRUE, all.y=TRUE) med_6m<-filter(med_6m, packages>=3) med_12m <- filter(med_antidepressants, ((den_vydani >= Datum_pozitivity %m-% months(12))&(den_vydani <= Datum_pozitivity))) med_12_packages <-med_12m %>% group_by(ID) %>% summarise(packages = sum(MNOZSTVI)) med_12m<-merge(med_12m, med_12_packages, by="ID", all.x=TRUE, all.y=TRUE) med_12m<-filter(med_12m, packages>=6) med_3m <- filter(med_antidepressants, ((den_vydani >= Datum_pozitivity %m-% months(3))&(den_vydani <= Datum_pozitivity))) med_3m$packages<-med_3m$MNOZSTVI med_1<- rbind(med_12m, med_3m, med_6m) med_1<-subset(med_1,select=-packages) med_1<-unique(med_1) #for remdesivir (medication was used during the hospitalization) med_3<-filter(data_filtered, ((den_vydani >= Datum_pozitivity-2)&(den_vydani <= "2020-12-27"))) #function for creating dummies - serious medical conditions apply_filter_and_merge <- function(target_data, fce, med_id_col, target_id_col, new_col_name) { filtered_data <-fce ID_vector <- filtered_data[[med_id_col]] target_data[[new_col_name]] <- ifelse(target_data[[target_id_col]] %in% ID_vector, 1, 0) return(target_data) } #antidepressants data_hosp_testy_merged <- apply_filter_and_merge(data_hosp_testy_merged, dplyr::filter(med_1, grepl("^(N06A)", hvlp_atc)), "ID", "ID", "antidepressants") #SSRIs data_SSRI<-filter(med_1, grepl("^(N06AB)", hvlp_atc)) ID_SSRI<-data_SSRI$ID data_hosp_testy_merged$SSRI<-ifelse(data_hosp_testy_merged$ID %in% ID_SSRI, 1, 0) #risk factors of severe outcome of COVID-19 (ÚZIS) #Diabetes data_diabetes<-filter(med_2, grepl("^(A10)", hvlp_atc)) ID_diabetes <- data_diabetes$ID data_hosp_testy_merged$diabetes<-ifelse(data_hosp_testy_merged$ID %in% ID_diabetes, 1, 0) #Oncological treatment of malignant tumor in the last 5 years data_hosp_testy_merged <- apply_filter_and_merge(data_hosp_testy_merged, dplyr::filter(med_2, (grepl("^(L01)", hvlp_atc)|grepl("^(L02)", hvlp_atc))), "ID", "ID", "onkologicka_lecba_med") data_hosp_testy_merged <- apply_filter_and_merge(data_hosp_testy_merged, dplyr::filter(diagnozy_period5,grepl("^(C)", ZDG)|grepl("^(D00)", ZDG)|grepl("^(D01)", ZDG)|grepl("^(D02)", ZDG)|grepl("^(D03)", ZDG)|grepl("^(D04)", ZDG)|grepl("^(D05)", ZDG)|grepl("^(D06)", ZDG)|grepl("^(D07)", ZDG)|grepl("^(D08)", ZDG)|grepl("^(D09)", ZDG)), "ID", "ID", "onkologicka_lecba_ZDG") data_hosp_testy_merged$onkologicka_lecba <- ifelse((data_hosp_testy_merged$onkologicka_lecba_med==1|data_hosp_testy_merged$onkologicka_lecba_ZDG==1),1,0) #Obesity data_hosp_testy_merged <- apply_filter_and_merge(data_hosp_testy_merged, dplyr::filter(med_2, grepl("^(A08)", hvlp_atc)), "ID", "ID", "obezita_med") data_hosp_testy_merged <- apply_filter_and_merge(data_hosp_testy_merged, dplyr::filter(diagnozy_period1, grepl("^(E66)", ZDG)|grepl("^(U59)", ZDG)), "ID", "ID", "obezita_ZDG") data_hosp_testy_merged$obezita <- ifelse(data_hosp_testy_merged$obezita_med==1|data_hosp_testy_merged$obezita_ZDG==1,1,0) #Liver diseases data_hosp_testy_merged <- apply_filter_and_merge(data_hosp_testy_merged, dplyr::filter(diagnozy_period1,grepl("^(B15)", ZDG) | grepl("^(B16)", ZDG) |grepl("^(B17)", ZDG) | grepl("^(B18)", ZDG) |grepl("^(B19)", ZDG) | grepl("^(D684C)", ZDG) |grepl("^(I982)", ZDG) | grepl("^(Q618A)", ZDG) | grepl("^(Z944)", ZDG)| grepl("^(C22)", ZDG) |grepl("^(K70)", ZDG) | grepl("^(K71)", ZDG) | grepl("^(K72)",ZDG) | grepl("^(K73)", ZDG)|grepl("^(K74)",ZDG) | grepl("^(K75)",ZDG) |grepl("^(K76)",ZDG) | grepl("^(K77)",ZDG)), "ID", "ID", "nemoci_jater") #Stroke data_hosp_testy_merged <- apply_filter_and_merge(data_hosp_testy_merged, dplyr::filter(diagnozy_period1, grepl("^(I64)", ZDG)|grepl("^(I63)", ZDG)), "ID", "ID", "cevni_mozkova_prihoda") #Chronic kidney diseases data_hosp_testy_merged <- apply_filter_and_merge(data_hosp_testy_merged, dplyr::filter(diagnozy_period1, grepl("^(N02)", ZDG)|grepl("^(N03)", ZDG)|grepl("^(N04)", ZDG)|grepl("^(N05)", ZDG)|grepl("^(N06)", ZDG)| grepl("^(N07)", ZDG)|grepl("^(N08)", ZDG)|grepl("^(N11)", ZDG)|grepl("^(N12)", ZDG)| grepl("^(N13)", ZDG)|grepl("^(N14)", ZDG)| grepl("^(N26)", ZDG)|grepl("^(N158)", ZDG)|grepl("^(N159)", ZDG)|grepl("^(N160)", ZDG)| grepl("^(N162)", ZDG)|grepl("^(N163)", ZDG)|grepl("^(N164)", ZDG)|grepl("^(N168)", ZDG)|grepl("^(E102)", ZDG)| grepl("^(E112)", ZDG)|grepl("^(E132)", ZDG)|grepl("^(E142)", ZDG)|grepl("^(I120)", ZDG)|grepl("^(M321B)", ZDG)| grepl("^(Q61)", ZDG)|grepl("^(N18)", ZDG) | grepl("^(N19)", ZDG) | grepl("^(Y841)",ZDG)), "ID", "ID", "chronicke_onemocneni_ledvin") #Chronic respiratory diseases (asthma, COPD) data_hosp_testy_merged <- apply_filter_and_merge(data_hosp_testy_merged, dplyr::filter(med_2, grepl("^(R03A)", hvlp_atc)|grepl("^(R03BB)", hvlp_atc)|grepl("^(R03DC)", hvlp_atc)), "ID", "ID", "dych_med") data_hosp_testy_merged <- apply_filter_and_merge(data_hosp_testy_merged, dplyr::filter(diagnozy_period1, grepl("^(J40)", ZDG)| grepl("^(J41)", ZDG) | grepl("^(J42)",ZDG) | grepl("^(J43)", ZDG)|grepl("^(J44)",ZDG) |grepl("^(J45)",ZDG) |grepl("^(J46)",ZDG) |grepl("^(J47)",ZDG)), "ID", "ID", "dych_ZDG") data_hosp_testy_merged$dych <- ifelse(data_hosp_testy_merged$dych_med==1|data_hosp_testy_merged$dych_ZDG==1,1,0) #Hypertension data_hosp_testy_merged <- apply_filter_and_merge(data_hosp_testy_merged, dplyr::filter(med_2, grepl("^(C02L)", hvlp_atc)|grepl("^(C02A)", hvlp_atc)|grepl("^(C02B)", hvlp_atc)|grepl("^(C02C)", hvlp_atc)| grepl("^(C03A)", hvlp_atc)|grepl("^(C03B)", hvlp_atc)| grepl("^(C03A)", hvlp_atc)| grepl("^(C03D)", hvlp_atc)|grepl("^(C03E)", hvlp_atc)|grepl("^(C03X)", hvlp_atc)| grepl("^(C07B)", hvlp_atc)|grepl("^(C07C)", hvlp_atc)| grepl("^(C08G)", hvlp_atc)| grepl("^(C02DA)", hvlp_atc)|grepl("^(C09BA)", hvlp_atc)|grepl("^(C09DA)", hvlp_atc)| grepl("^(C02DB)", hvlp_atc)|grepl("^(C02DD)", hvlp_atc)| grepl("^(C02DG)", hvlp_atc)| grepl("^(C07A)", hvlp_atc)|grepl("^(C07D)", hvlp_atc)|grepl("^(C07F)", hvlp_atc)| grepl("^(C08)", hvlp_atc)|grepl("^(C09BB)", hvlp_atc)|grepl("^(C09DB)", hvlp_atc)| grepl("^(C09AA)", hvlp_atc)|grepl("^(C09CA)", hvlp_atc)| grepl("^(C09XA02)", hvlp_atc)| grepl("^(C09XA52)", hvlp_atc)), "ID", "ID", "hypertenze") #ICHS data_hosp_testy_merged <- apply_filter_and_merge(data_hosp_testy_merged, dplyr::filter(diagnozy_period1, grepl("^(I120)",ZDG)|grepl("^(I121)",ZDG)|grepl("^(I122)", ZDG)|grepl("^(I123)",ZDG)|grepl("^(I124)", ZDG)| grepl("^(I125)", ZDG)), "ID", "ID", "ICHS") #Remdesivir data_hosp_testy_merged <- apply_filter_and_merge(data_hosp_testy_merged, dplyr::filter(med_3, grepl("^(249656)", SUKL_kod)|grepl("^(J05AX27)", hvlp_atc)) , "ID", "ID", "remdesivir") #CCI score charlson <- comorbidity(diagnozy_period1, id = "ID", code = "ZDG", map = "charlson_icd10_quan", assign0 = FALSE) CCI <- charlson %>% mutate(cci_score = score(x = charlson, weights = "quan", assign0 = FALSE)) CCI<-CCI[,c("ID", "cci_score")] data_hosp_testy_merged<-merge(data_hosp_testy_merged, CCI, by="ID", all.x = TRUE, all.y = FALSE) #Final preparation of cross-sectional dataset with categorical variables cross_sections<-data_hosp_testy_merged[,c("ID","datum_pri","datum_pro","Datum_pozitivity", "hosp", "SSRI", "antidepressants", "diabetes","obezita","onkologicka_lecba","dych", "hypertenze", "ICHS","cevni_mozkova_prihoda","nemoci_jater", "onkologicka_lecba_med", "onkologicka_lecba_ZDG", "cci_score", "chronicke_onemocneni_ledvin","remdesivir")] cross_sections<-merge(cross_sections, data_demo, by.x=c("ID"), by.y=c("ID"), all.x = TRUE, all.y = FALSE) cross_sections$female<-ifelse(cross_sections$pohlavi=="Z", 1,0) #people older than 18 and younger than 100 cross_sections<-cross_sections[cross_sections$vek_kat !="0-4 let"&cross_sections$vek_kat !="5-9 let"&cross_sections$vek_kat !="10-14 let"&cross_sections$vek_kat !="100-104 let"&cross_sections$vek_kat !="105-109 let"&cross_sections$vek_kat !="110-114 let"&cross_sections$vek_kat !="115-119 let"&cross_sections$vek_kat !="120-124 let",] cross_sections<-cross_sections[cross_sections$vek_kat !="- let" ,] #age category cross_sections$age <- ifelse(cross_sections$vek_kat %in% c("15-19 let", "20-24 let", "25-29 let"), "15-30", ifelse(cross_sections$vek_kat %in% c("30-34 let", "35-39 let", "40-44 let"), "30-45", ifelse(cross_sections$vek_kat %in% c("45-49 let", "50-54 let", "55-59 let"), "45-60", ifelse(cross_sections$vek_kat %in% c("60-64 let", "65-69 let", "70-74 let"), "60-75", ifelse(cross_sections$vek_kat %in% c("75-79 let", "80-84 let", "85-89 let", "90-94 let", "95-99 let"), "75+", NA))))) #ordinal age cross_sections$vek_kat<-ifelse(cross_sections$vek_kat=="15-19 let"|cross_sections$vek_kat=="20-24 let", "20-24 let",cross_sections$vek_kat) cross_sections$ordinal_age<-factor(cross_sections$vek_kat, levels=c( "20-24 let", "25-29 let","30-34 let", "35-39 let", "40-44 let","45-49 let", "50-54 let", "55-59 let","60-64 let", "65-69 let", "70-74 let","75-79 let", "80-84 let", "85-89 let", "90-94 let","95-99 let")) #older dummy cross_sections$older_age<-ifelse(cross_sections$vek_kat=="65-69 let"|cross_sections$vek_kat=="70-74 let"|cross_sections$vek_kat=="75-79 let"| cross_sections$vek_kat=="80-84 let"|cross_sections$vek_kat=="85-89 let"| cross_sections$vek_kat=="90-94 let"|cross_sections$vek_kat=="95-99 let", 1,0) #removing duplicates - currently, each row for an individual contains identical relevant information extracted during the data preparation process cross_sections<-cross_sections[!duplicated(cross_sections$ID), ] #groups based on cci_score cross_sections$cci_score<-ifelse(is.na(cross_sections$cci_score),0,cross_sections$cci_score) cross_sections$cci_0<-ifelse(cross_sections$cci_score=="0", 1,0) cross_sections$cci_1to2<-ifelse(cross_sections$cci_score=="1"|cross_sections$cci_score=="2", 1,0) cross_sections$cci_3to4<-ifelse(cross_sections$cci_score=="3"|cross_sections$cci_score=="4", 1,0) cross_sections$cci_4plus<-ifelse(cross_sections$cci_score>4, 1,0) #waves cross_sections$waves<-ifelse((cross_sections$Datum_pozitivity>"2020-03-25"&cross_sections$Datum_pozitivity<"2020-04-10"), "wave1",ifelse((cross_sections$Datum_pozitivity>"2020-10-13"&cross_sections$Datum_pozitivity<"2020-11-14"), "wave2",ifelse((cross_sections$Datum_pozitivity>"2020-12-15"&cross_sections$Datum_pozitivity<"2020-12-31"),"wave3","nowave"))) #chronic conditions aggregated cross_sections$chronical1<-ifelse((cross_sections$diabetes=="1"|cross_sections$obezita=="1"|cross_sections$onkologicka_lecba=="1"| cross_sections$dych=="1"|cross_sections$hypertenze=="1"|cross_sections$ICHS=="1"| cross_sections$cevni_mozkova_prihoda=="1"|cross_sections$nemoci_jater=="1"| cross_sections$chronicke_onemocneni_ledvin=="1"), 1,0) cross_sections$chronical2<-ifelse((cross_sections$diabetes=="1"|cross_sections$obezita=="1"|cross_sections$onkologicka_lecba=="1"| cross_sections$dych=="1"|cross_sections$ICHS=="1"| cross_sections$cevni_mozkova_prihoda=="1"|cross_sections$nemoci_jater=="1"| cross_sections$chronicke_onemocneni_ledvin=="1"), 1,0) #final dataset = cross_sections #Names of variables in the thesis #hosp = hospitalisation #SSRI=SSRIs #diabetes=diabetes #obezita = obesity", #onkologicka_lecba = oncological treatment #dych = COPD/asthma #hypertenze = hypertension #chronicke_onemocneni_ledvin = chronic kidney diseases #cevni_mozkova_prihoda = stroke #nemoci_jater = liver diseases #ICHS = IHD #DESCRIPTIVE STATISTICS summary(cross_sections) stat.desc(cross_sections) #categorical variable age table(cross_sections$vek_kat) #barchart-age age_groups <- c("0-9 years", "10-19 years", "20-29 years", "30-39 years", "40-49 years", "50-59 years", "60-69 years", "70-79 years", "80-89 years", "90-99 years", "100-109 years", "110-119 years", "120+ years") age_mapping <- list( "0-4 let" = "0-9 years", "5-9 let" = "0-9 years", "10-14 let" = "10-19 years", "15-19 let" = "10-19 years", "20-24 let" = "20-29 years", "25-29 let" = "20-29 years", "30-34 let" = "30-39 years", "35-39 let" = "30-39 years", "40-44 let" = "40-49 years", "45-49 let" = "40-49 years", "50-54 let" = "50-59 years", "55-59 let" = "50-59 years", "60-64 let" = "60-69 years", "65-69 let" = "60-69 years", "70-74 let" = "70-79 years", "75-79 let" = "70-79 years", "80-84 let" = "80-89 years", "85-89 let" = "80-89 years", "90-94 let" = "90-99 years", "95-99 let" = "90-99 years", "100-104 let" = "100-109 years", "105-109 let" = "100-109 years", "110-114 let" = "110-119 years", "115-119 let" = "110-119 years", "120-124 let" = "120+ years" ) cross_sections$age_group <- factor(age_mapping[cross_sections$vek_kat], levels = age_groups) ggplot(cross_sections, aes(x = age_group)) + geom_bar(fill = "gray50") + labs(x = "Age group", y = "Frequency") + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1, family=font, size = 8), axis.text.y = element_text(angle=90, hjust = 1, family=font, size = 8), aspect.ratio = 1/2, axis.title.x = element_text(vjust=0,family=font, size = 12), axis.title.y = element_text(vjust=3, family=font, size = 12) ) #serious medical conditions conditions_df<-data.frame( conditions= c("SSRIs", "diabetes", "obesity", "oncological treatment", "COPD/asthma", "hypertension", "chronic kidney diseases", "stroke", "liver diseases", "IHD"), percents = c(mean(cross_sections$SSRI), mean(cross_sections$diabetes), mean(cross_sections$obezita), mean(cross_sections$onkologicka_lecba), mean(cross_sections$dych), mean(cross_sections$hypertenze), mean(cross_sections$chronicke_onemocneni_ledvin), mean(cross_sections$cevni_mozkova_prihoda), mean(cross_sections$nemoci_jater), mean(cross_sections$ICHS))) #barchart of conditions ggplot(conditions_df) + geom_col(aes(x = conditions, y = percents, fill = conditions), width = 0.7) + labs(x = "Medical conditions", y = "Proportion") + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1,family=font, size = 8), axis.text.y = element_text(angle = 0, hjust = 1,family=font, size = 8), aspect.ratio = 1/2.5, axis.title.x = element_text(family=font, size = 12, vjust=-1), axis.title.y = element_text(vjust = 2,family=font, size = 12))+ scale_fill_manual(values = c("SSRIs" = "red", "diabetes"="gray50", "obesity"="gray50", "oncological treatment"="gray50", "dych"="gray50", "hypertension"="gray50", "chronic kidney diseases"="gray50", "stroke"="gray50", "liver diseases"="gray50", "IHD"="gray50")) + theme(legend.position = "none") #SSRIs table(cross_sections$hosp, cross_sections$SSRI) #dependent variable (hospitalisation) hosp_bar<-cross_sections %>% mutate(Hospitalisation = case_when(hosp == 1 ~ "Hospitalised", hosp == 0 ~ "Not hospitalised")) options(repr.plot.width = 0.5, repr.plot.height =3) ggplot(hosp_bar, aes(x = Hospitalisation)) + geom_bar(fill = "gray50", width=0.8) + labs(x = "Hospitalisation", y = "Frequency") + theme_minimal() + theme(axis.text.x = element_text( hjust = 1, family=font, size = 8), axis.text.y = element_text( hjust = 1, family=font, size = 8), aspect.ratio = 1/2, axis.title.x = element_text(vjust=0,family=font, size = 15), axis.title.y = element_text(vjust=2.5, family=font, size = 15)) #new positive COVID-19 cases new_cases <- cross_sections %>% group_by(Datum_pozitivity) %>% summarise(cases = n()) ggplot(new_cases, aes(x = Datum_pozitivity, y = cases)) + geom_col(fill = "gray50") + labs(x = "Day", y = "New cases") + theme_minimal() + theme(axis.title = element_text(size = 12, vjust=3, family=font), axis.text.x = element_text(angle = 45, hjust = 1, family=font, size = 8), axis.text.y = element_text(hjust = 1, family=font, size = 8)) #change point detection (trend-only) par(font.lab = 2, font.main = 3, family =font, cex.lab = 1, cex.main = 1) mtext("Custom Font Example", side = 3, line = 1, font = 2) new_cases2<-filter(new_cases, Datum_pozitivity<"2020-06-01") out1<-beast(new_cases$cases, season='none') out2<-beast(new_cases2$cases, season='none') plot(out1, main=NULL, cex.lab = NULL, cex.main = NULL, mtext=font, xlab="New cases") print(out1) plot(out2, main=NULL, cex.lab = NULL, cex.main = NULL, mtext=font) print(out2) #logistics regression logit1<-glm(hosp~as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(diabetes) +as.factor(dych)+as.factor(hypertenze)+as.factor(ICHS)+as.factor(cevni_mozkova_prihoda) +as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin)+as.factor(nemoci_jater)+as.factor(obezita) +as.factor(waves), family = binomial (link = "logit"), data =cross_sections) #without ICHS logit1<-glm(hosp~as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(diabetes) +as.factor(dych)+as.factor(hypertenze)+as.factor(cevni_mozkova_prihoda) +as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin)+as.factor(nemoci_jater)+as.factor(obezita) +as.factor(waves), family = binomial (link = "logit"), data =cross_sections) #aggregated chronic conditions logit1.2<-glm(hosp~as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(chronical1) +as.factor(waves), family = binomial (link = "logit"), data =cross_sections) logit1.3<-glm(hosp~as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(chronical2)+as.factor(hypertenze) +as.factor(waves), family = binomial (link = "logit"), data =cross_sections) logit2<-glm(hosp~as.factor(female)+as.factor(age)+as.factor(SSRI)+ as.factor(cci_1to2)+as.factor(cci_3to4)+as.factor(cci_4plus)+as.factor(waves), family = binomial (link = "logit"), data =cross_sections) logit3<-glm(hosp~as.factor(female)+as.factor(age)+as.factor(antidepressants)+as.factor(diabetes) +as.factor(dych)+as.factor(hypertenze)+as.factor(ICHS)+as.factor(cevni_mozkova_prihoda) +as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin)+as.factor(nemoci_jater)+ as.factor(obezita) +as.factor(waves), family = binomial (link = "logit"), data =cross_sections) logit4<-glm(hosp~as.factor(female)+as.factor(age)+as.factor(antidepressants)+ as.factor(cci_1to2)+as.factor(cci_3to4)+as.factor(cci_4plus)+as.factor(waves), family = binomial (link = "logit"), data =cross_sections) #ordinal age logit1<-glm(hosp~as.factor(female)+ordinal_age+as.factor(SSRI)+as.factor(diabetes) +as.factor(dych)+as.factor(hypertenze)+as.factor(cevni_mozkova_prihoda) +as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin)+as.factor(nemoci_jater)+as.factor(obezita) +as.factor(waves), family = binomial (link = "logit"), data =cross_sections) summary(logit1) summary(logit2) AIC(logit1) AIC(logit1.2) #McFadden R-squared logit1_R2<-pR2(logit1)['McFadden'] logit2_R2<-pR2(logit2)['McFadden'] #LM-test lrtest(logit1) lrtest(logit2) #APE logit_ape1 <- logitmfx(hosp~as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(diabetes) +as.factor(dych)+as.factor(hypertenze)+as.factor(ICHS)+as.factor(cevni_mozkova_prihoda) +as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin)+as.factor(nemoci_jater)+as.factor(obezita) +as.factor(waves), data=cross_sections, atmean = F) logit_ape1 <- logitmfx(hosp~as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(diabetes) +as.factor(dych)+as.factor(hypertenze)+as.factor(cevni_mozkova_prihoda) +as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin)+as.factor(nemoci_jater)+as.factor(obezita) +as.factor(waves), data=cross_sections, atmean = F) logit_ape2 <- logitmfx(hosp~as.factor(female)+as.factor(age)+as.factor(SSRI)+ as.factor(cci_1to2)+as.factor(cci_3to4)+as.factor(cci_4plus)+as.factor(waves), data =cross_sections, atmean = F) logit_ape3 <- logitmfx(hosp~as.factor(female)+as.factor(age)+as.factor(antidepressants)+as.factor(diabetes) +as.factor(dych)+as.factor(hypertenze)+as.factor(cevni_mozkova_prihoda) +as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin)+as.factor(nemoci_jater)+as.factor(obezita) +as.factor(waves), data=cross_sections, atmean = F) logit_ape4 <- logitmfx(hosp~as.factor(female)+as.factor(age)+as.factor(antidepressants)+ as.factor(cci_1to2)+as.factor(cci_3to4)+as.factor(cci_4plus)+as.factor(waves), data=cross_sections, atmean = F) logit_ape1.3 <- logitmfx(hosp~as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(chronical1) +as.factor(waves), data=cross_sections, atmean = F) logit_ape1.2 <- logitmfx(hosp~as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(chronical2)+as.factor(hypertenze) +as.factor(waves), data=cross_sections, atmean = F) #assumptions #multicollinearity vif<-vif(logit1.3) cramer_v(as.factor(cross_sections$age), as.factor(cross_sections$chronical1)) cramer_v(as.factor(cross_sections$age), as.factor(cross_sections$cci_score)) #printing results stargazer::stargazer(logit1,logit2, logit1.2,logit1.3, no.space = TRUE, font.size = "small", column.sep.width = "4pt" ) stargazer::stargazer(logit1,logit2,logit2.1,logit2.2, no.space = TRUE, font.size = "small", column.sep.width = "4pt" ) stargazer::stargazer(logit2.3,logit2.4, no.space = TRUE, font.size = "small", single.row=TRUE ) stargazer::stargazer(vif) texreg(logit_ape1) ################################################################################ #ZINB #creating new variable summed_days <- data_hosp_tot3 %>% arrange(ID, datum_pri) summed_days <- summed_days %>% group_by(ID) %>% mutate(lag_datum_pro = lag(datum_pro)) %>% ungroup() summed_days <- summed_days %>% mutate(match_days = if_else(datum_pri == lag_datum_pro, delka, 0, missing = 0)) %>% group_by(ID) %>% mutate(total_days = sum(match_days, na.rm = TRUE)) %>% ungroup() summed_days$days_in_hosp<-ifelse(summed_days$total_days==0, summed_days$delka, summed_days$total_days) summed_days<-summed_days[!duplicated(summed_days$ID), ] summed_days<-summed_days[,c("ID","days_in_hosp")] cross_sections<-merge(cross_sections, summed_days, by="ID", all.x = TRUE, all.y = FALSE) cross_sections$days_in_hosp<-ifelse(is.na(cross_sections$days_in_hosp), 0, cross_sections$days_in_hosp) #Dependent variable-histogram freq_data <- cross_sections %>% count(days_in_hosp) %>% mutate(n = ifelse(n > 30000, 30000, n)) ggplot(freq_data, aes(x = days_in_hosp, y = n)) + geom_bar(stat = "identity", fill = "gray50", width = 0.8) + labs(x = "Days spent in hospital", y = "Frequency") + theme_minimal() + theme(axis.text.x = element_text( hjust = 1, family=font, size = 8), axis.text.y = element_text( hjust = 1, family=font, size = 8), aspect.ratio = 1/2, axis.title.x = element_text(vjust=0,family=font, size = 15), axis.title.y = element_text(vjust=1.5, family=font, size = 15))+ scale_y_continuous(limits = c(0, 30000))+scale_x_continuous(limits = c(-1, 50)) #outlier analysis quantile((cross_sections$days_in_hosp)) quantile(cross_hosp$days_in_hosp, 0.75)+3*(quantile(cross_hosp$days_in_hosp, 0.75) - quantile(cross_hosp$days_in_hosp, 0.15)) quantile(cross_hosp$days_in_hosp, 0.15)-3*(quantile(cross_hosp$days_in_hosp, 0.75) - quantile(cross_hosp$days_in_hosp, 0.15)) tail(sort(cross_sections$days_in_hosp)) #final dataset cross_sections_zinb<-filter(cross_sections, cross_sections$days_in_hosp<92) #alternative models zinb1<-zeroinfl(days_in_hosp ~ as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(diabetes) +as.factor(dych)+as.factor(hypertenze)+as.factor(cevni_mozkova_prihoda) +as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin)+as.factor(nemoci_jater)+ as.factor(obezita)+as.factor(waves), data = cross_sections_zinb, link ="logit", dist = "negbin") zinb2<-zeroinfl(days_in_hosp ~ as.factor(female)+as.factor(older_age)+as.factor(SSRI)+as.factor(diabetes) +as.factor(dych)+as.factor(hypertenze)+as.factor(cevni_mozkova_prihoda) +as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin)+as.factor(nemoci_jater)+ as.factor(obezita)+as.factor(waves), data = cross_sections_zinb, link ="logit", dist = "negbin") zinb3<-zeroinfl(days_in_hosp ~ as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(chronical1)| as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(chronical1)+as.factor(waves), data = cross_sections_zinb, link ="logit", dist = "negbin") AIC(zinb1, zinb2, zinb3, zinb_final) summary(zinb3) #final model zinb_final<-zeroinfl(days_in_hosp ~ as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(chronical1)+as.factor(waves)| as.factor(female) +as.factor(age)+as.factor(SSRI)+as.factor(diabetes)+as.factor(dych)+as.factor(hypertenze) +as.factor(cevni_mozkova_prihoda)+as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin) +as.factor(nemoci_jater)+as.factor(obezita)+as.factor(waves), data = cross_sections_zinb, link ="logit", dist = "negbin") #remdesivir zinb_final<-zeroinfl(days_in_hosp ~ as.factor(female)+as.factor(age)+as.factor(remdesivir)+as.factor(SSRI)+as.factor(chronical1)+as.factor(waves)| as.factor(female) +as.factor(older_age)+as.factor(SSRI)+as.factor(diabetes)+as.factor(dych)+as.factor(hypertenze) +as.factor(cevni_mozkova_prihoda)+as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin) +as.factor(nemoci_jater)+as.factor(obezita)+as.factor(waves), data = cross_sections_zinb, link ="logit", dist = "negbin") summary(zinb_final) lrtest(zinb_final) AIC(zinb_final) #interpretation=AME #logit part marginal_effects_count <- avg_slopes(zinb_final,type = "zero") #count part marginal_effects_zero<-avg_slopes(zinb_final,type = "count") #comparison with poisson poiss_final<-zeroinfl(days_in_hosp ~ as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(chronical1)+as.factor(waves)| as.factor(female) +as.factor(age)+as.factor(SSRI)+as.factor(diabetes)+as.factor(dych)+as.factor(hypertenze) +as.factor(cevni_mozkova_prihoda)+as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin) +as.factor(nemoci_jater)+as.factor(obezita)+as.factor(waves), data = cross_sections_zinb, link ="logit", dist = "poisson") lrtest(zinb_final,poiss_final) AIC(zinb_final,poiss_final) #overdispersion check overdisperison_model <- glm(days_in_hosp ~ as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(diabetes)+as.factor(dych)+as.factor(hypertenze)+as.factor(cevni_mozkova_prihoda)+as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin)+as.factor(nemoci_jater)+as.factor(obezita)+as.factor(waves),data = cross_sections_zinb, family=poisson) overdispersion_model <- glm(days_in_hosp ~ as.factor(female)+as.factor(age)+as.factor(SSRI)+as.factor(chronical1)+as.factor(wave1)+as.factor(wave2), data = cross_sections_zinb, family=poisson) dispersiontest(overdisperison_model) #rejected null hypothesis - we should use ZINB instead of ZIP #multicollinearity check vif(overdisperison_model) vif(poiss) #visualisation stargazer::stargazer(zinb_final, zero.component = TRUE, font.size = "small", single.row=TRUE ) stargazer::stargazer(zinb_final, zero.component = FALSE, font.size = "small", single.row=TRUE ) ################################################################################ #Mortality #deaths attributable to Covid-19 data_um_tot<-filter(data_um_tot, Zakl_pricina_smrti=="U071") data_um_test<-merge(data_um_tot, data_test_period, by="ID", all.x = TRUE, all.y = FALSE) #90 days after positive test (1) #data_um_test<-filter(data_um_test, Datum_umrti < Datum_pozitivity) data_um_filtered1<-filter(data_um_test, (Datum_umrti <= (Datum_pozitivity + days(90)))) data_um_filtered1<-data_um_filtered1[,c("ID","Datum_umrti", "Zakl_pricina_smrti")] #merging with tests data_um_filtered<-merge(data_um_filtered1, data_test_period, by="ID", all.x = TRUE, all.y = TRUE) #filtering out people who were filtered in (1) ID_in_dataumtest_notin_dataumfiltered1<- anti_join(data_um_test, data_um_filtered1, by = "ID")$ID data_um_filtered <- data_um_filtered[!(data_um_filtered$ID %in% ID_in_dataumtest_notin_dataumfiltered1), ] #death dummy data_um_filtered$Death_Covid<-ifelse(is.na(data_um_filtered$Datum_umrti), 0,1) #merging with medication data_um_merged<-merge(data_um_filtered, data_filtered [,c("ID", "hvlp_atc", "FORMA", "BALENI", "SILA", "den_vydani", "MNOZSTVI")], by.x=c("ID"), by.y=c("ID"), all.x = TRUE, all.y = TRUE) data_um_merged<-data_um_merged[!(data_um_merged$ID %in% ID_in_dataumtest_notin_dataumfiltered1), ] #DUMMY VARIABLES #antidepressants data_um_merged <- apply_filter_and_merge(data_um_merged, dplyr::filter(med_1, grepl("^(N06A)", hvlp_atc)), "ID", "ID", "antidepressants") #SSRIs data_SSRI<-filter(med_1, grepl("^(N06AB)", hvlp_atc)) ID_SSRI<-data_SSRI$ID data_um_merged$SSRI<-ifelse(data_um_merged$ID %in% ID_SSRI, 1, 0) #risk factors of severe outcome of COVID-19 (ÚZIS) #Diabetes data_diabetes<-filter(med_2, grepl("^(A10)", hvlp_atc)) ID_diabetes <- data_diabetes$ID data_um_merged$diabetes<-ifelse(data_um_merged$ID %in% ID_diabetes, 1, 0) #Oncological treatment of malignant tumor in the last 5 years data_um_merged <- apply_filter_and_merge(data_um_merged, dplyr::filter(med_2, (grepl("^(L01)", hvlp_atc)|grepl("^(L02)", hvlp_atc))), "ID", "ID", "onkologicka_lecba_med") data_um_merged <- apply_filter_and_merge(data_um_merged, dplyr::filter(diagnozy_period5,grepl("^(C)", ZDG)|grepl("^(D00)", ZDG)|grepl("^(D01)", ZDG)|grepl("^(D02)", ZDG)|grepl("^(D03)", ZDG)|grepl("^(D04)", ZDG)|grepl("^(D05)", ZDG)|grepl("^(D06)", ZDG)|grepl("^(D07)", ZDG)|grepl("^(D08)", ZDG)|grepl("^(D09)", ZDG)), "ID", "ID", "onkologicka_lecba_ZDG") data_um_merged$onkologicka_lecba <- ifelse((data_um_merged$onkologicka_lecba_med==1|data_um_merged$onkologicka_lecba_ZDG==1),1,0) #Obesity data_um_merged <- apply_filter_and_merge(data_um_merged, dplyr::filter(med_2, grepl("^(A08)", hvlp_atc)), "ID", "ID", "obezita_med") data_um_merged <- apply_filter_and_merge(data_um_merged, dplyr::filter(diagnozy_period1, grepl("^(E66)", ZDG)|grepl("^(U59)", ZDG)), "ID", "ID", "obezita_ZDG") data_um_merged$obezita <- ifelse(data_um_merged$obezita_med==1|data_um_merged$obezita_ZDG==1,1,0) #Liver diseases data_um_merged <- apply_filter_and_merge(data_um_merged, dplyr::filter(diagnozy_period1, grepl("^(B15)", ZDG) |grepl("^(B16)", ZDG) |grepl("^(B17)", ZDG) |grepl("^(B18)", ZDG) |grepl("^(B19)", ZDG) |grepl("^(D684C)", ZDG) |grepl("^(I982)", ZDG) |grepl("^(Q618A)", ZDG) | grepl("^(Z944)", ZDG)|grepl("^(C22)", ZDG) | grepl("^(K70)", ZDG) | grepl("^(K71)", ZDG) | grepl("^(K72)",ZDG) | grepl("^(K73)", ZDG)|grepl("^(K74)",ZDG) |grepl("^(K75)",ZDG) |grepl("^(K76)",ZDG) |grepl("^(K77)",ZDG)), "ID", "ID", "nemoci_jater") #Stroke data_um_merged <- apply_filter_and_merge(data_um_merged, dplyr::filter(diagnozy_period1, grepl("^(I64)", ZDG)|grepl("^(I63)", ZDG)), "ID", "ID", "cevni_mozkova_prihoda") #Chronic kidney diseases data_um_merged <- apply_filter_and_merge(data_um_merged, dplyr::filter(diagnozy_period1, grepl("^(N02)", ZDG)|grepl("^(N03)", ZDG)|grepl("^(N04)", ZDG)|grepl("^(N05)", ZDG)|grepl("^(N06)", ZDG)| grepl("^(N07)", ZDG)|grepl("^(N08)", ZDG)|grepl("^(N11)", ZDG)|grepl("^(N12)", ZDG)| grepl("^(N13)", ZDG)|grepl("^(N14)", ZDG)| grepl("^(N26)", ZDG)|grepl("^(N158)", ZDG)|grepl("^(N159)", ZDG)|grepl("^(N160)", ZDG)| grepl("^(N162)", ZDG)|grepl("^(N163)", ZDG)|grepl("^(N164)", ZDG)|grepl("^(N168)", ZDG)|grepl("^(E102)", ZDG)| grepl("^(E112)", ZDG)|grepl("^(E132)", ZDG)|grepl("^(E142)", ZDG)|grepl("^(I120)", ZDG)|grepl("^(M321B)", ZDG)| grepl("^(Q61)", ZDG)|grepl("^(N18)", ZDG) | grepl("^(N19)", ZDG) | grepl("^(Y841)",ZDG)), "ID", "ID", "chronicke_onemocneni_ledvin") #Chronic respiratory diseases (asthma, COPD) data_um_merged <- apply_filter_and_merge(data_um_merged, dplyr::filter(med_2, grepl("^(R03A)", hvlp_atc)|grepl("^(R03BB)", hvlp_atc)|grepl("^(R03DC)", hvlp_atc)), "ID", "ID", "dych_med") data_um_merged <- apply_filter_and_merge(data_um_merged, dplyr::filter(diagnozy_period1, grepl("^(J40)", ZDG)| grepl("^(J41)", ZDG) | grepl("^(J42)",ZDG) | grepl("^(J43)", ZDG)|grepl("^(J44)",ZDG) |grepl("^(J45)",ZDG) |grepl("^(J46)",ZDG) |grepl("^(J47)",ZDG)), "ID", "ID", "dych_ZDG") data_um_merged$dych <- ifelse(data_um_merged$dych_med==1|data_um_merged$dych_ZDG==1,1,0) #Hypertension data_um_merged <- apply_filter_and_merge(data_um_merged, dplyr::filter(med_2, grepl("^(C02L)", hvlp_atc)|grepl("^(C02A)", hvlp_atc)|grepl("^(C02B)", hvlp_atc)|grepl("^(C02C)", hvlp_atc)| grepl("^(C03A)", hvlp_atc)|grepl("^(C03B)", hvlp_atc)| grepl("^(C03A)", hvlp_atc)| grepl("^(C03D)", hvlp_atc)|grepl("^(C03E)", hvlp_atc)|grepl("^(C03X)", hvlp_atc)| grepl("^(C07B)", hvlp_atc)|grepl("^(C07C)", hvlp_atc)| grepl("^(C08G)", hvlp_atc)| grepl("^(C02DA)", hvlp_atc)|grepl("^(C09BA)", hvlp_atc)|grepl("^(C09DA)", hvlp_atc)| grepl("^(C02DB)", hvlp_atc)|grepl("^(C02DD)", hvlp_atc)| grepl("^(C02DG)", hvlp_atc)| grepl("^(C07A)", hvlp_atc)|grepl("^(C07D)", hvlp_atc)|grepl("^(C07F)", hvlp_atc)| grepl("^(C08)", hvlp_atc)|grepl("^(C09BB)", hvlp_atc)|grepl("^(C09DB)", hvlp_atc)| grepl("^(C09AA)", hvlp_atc)|grepl("^(C09CA)", hvlp_atc)| grepl("^(C09XA02)", hvlp_atc)| grepl("^(C09XA52)", hvlp_atc)) , "ID", "ID", "hypertenze") #ICHS data_um_merged <- apply_filter_and_merge(data_um_merged, dplyr::filter(diagnozy_period1, grepl("^(I120)",ZDG)|grepl("^(I121)",ZDG)|grepl("^(I122)", ZDG)|grepl("^(I123)",ZDG)|grepl("^(I124)", ZDG)| grepl("^(I125)", ZDG)), "ID", "ID", "ICHS") #CCI score charlson2 <- comorbidity(diagnozy_period1, id = "ID", code = "ZDG", map = "charlson_icd10_quan", assign0 = FALSE) CCI2 <- charlson2 %>% mutate(cci_score = score(x = charlson2, weights = "quan", assign0 = FALSE)) CCI2<-CCI2[,c("ID", "cci_score")] data_um_merged<-merge(data_um_merged, CCI2, by="ID", all.x = TRUE, all.y = FALSE) #Final data preparation #cross sections cross_sections2<-data_um_merged[,c("ID","Datum_umrti","Death_Covid", "Datum_pozitivity", "SSRI","antidepressants", "diabetes","obezita","onkologicka_lecba", "dych", "hypertenze","ICHS","cevni_mozkova_prihoda","nemoci_jater", "onkologicka_lecba_med", "onkologicka_lecba_ZDG", "cci_score", "chronicke_onemocneni_ledvin")] cross_sections2<-merge(cross_sections2, data_demo, by.x=c("ID"), by.y=c("ID"), all.x = TRUE, all.y = FALSE) cross_sections2$female<-ifelse(cross_sections2$pohlavi=="Z", 1,0) #people older than 18 and younger than 100 cross_sections2<-cross_sections2[cross_sections2$vek_kat !="0-4 let"&cross_sections2$vek_kat !="5-9 let"&cross_sections2$vek_kat !="10-14 let"&cross_sections2$vek_kat !="100-104 let"&cross_sections2$vek_kat !="105-109 let"&cross_sections2$vek_kat !="110-114 let"&cross_sections2$vek_kat !="115-119 let"&cross_sections2$vek_kat !="120-124 let",] cross_sections2<-cross_sections2[cross_sections2$vek_kat !="- let" ,] #age category cross_sections2$age <- ifelse(cross_sections2$vek_kat %in% c("15-19 let", "20-24 let", "25-29 let"), "15-30", ifelse(cross_sections2$vek_kat %in% c("30-34 let", "35-39 let", "40-44 let"), "30-45", ifelse(cross_sections2$vek_kat %in% c("45-49 let", "50-54 let", "55-59 let"), "45-60", ifelse(cross_sections2$vek_kat %in% c("60-64 let", "65-69 let", "70-74 let"), "60-75", ifelse(cross_sections2$vek_kat %in% c("75-79 let", "80-84 let", "85-89 let", "90-94 let", "95-99 let"), "75+", NA))))) #older dummy cross_sections2$older_age<-ifelse(cross_sections2$vek_kat=="65-69 let"|cross_sections2$vek_kat=="70-74 let"|cross_sections2$vek_kat=="75-79 let"| cross_sections2$vek_kat=="80-84 let"|cross_sections2$vek_kat=="85-89 let"| cross_sections2$vek_kat=="90-94 let"|cross_sections2$vek_kat=="95-99 let", 1,0) cross_sections2$age_death <- ifelse(cross_sections2$vek_kat %in% c("15-19 let", "20-24 let", "25-29 let","30-34 let", "35-39 let", "40-44 let","45-49 let", "50-54 let", "55-59 let","60-64 let"), "15-65", ifelse(cross_sections2$vek_kat %in% c("65-69 let", "70-74 let"), "65-75", ifelse(cross_sections2$vek_kat %in% c("75-79 let","80-84 let"), "75-85", ifelse(cross_sections2$vek_kat %in% c("85-89 let", "90-94 let", "95-99 let"), "85+", NA)))) #removing duplicates cross_sections2<-cross_sections2[!duplicated(cross_sections2$ID), ] #groups based on cci_score cross_sections2$cci_score<-ifelse(is.na(cross_sections2$cci_score),0,cross_sections2$cci_score) cross_sections2$cci_0<-ifelse(cross_sections2$cci_score=="0", 1,0) cross_sections2$cci_1to2<-ifelse(cross_sections2$cci_score=="1"|cross_sections2$cci_score=="2", 1,0) cross_sections2$cci_3to4<-ifelse(cross_sections2$cci_score=="3"|cross_sections2$cci_score=="4", 1,0) cross_sections2$cci_4plus<-ifelse(cross_sections2$cci_score>4, 1,0) #waves cross_sections2$waves<-ifelse((cross_sections2$Datum_pozitivity>"2020-03-25"&cross_sections2$Datum_pozitivity<"2020-04-10"), "wave1",ifelse((cross_sections2$Datum_pozitivity>"2020-10-13"&cross_sections2$Datum_pozitivity<"2020-11-14"), "wave2",ifelse((cross_sections2$Datum_pozitivity>"2020-12-15"&cross_sections2$Datum_pozitivity<"2020-12-31"),"wave3","nowave"))) #chronical condition aggregated cross_sections2$chronical1<-ifelse((cross_sections2$diabetes=="1"|cross_sections2$obezita=="1"|cross_sections2$onkologicka_lecba=="1"| cross_sections2$dych=="1"|cross_sections2$hypertenze=="1"|cross_sections2$ICHS=="1"| cross_sections2$cevni_mozkova_prihoda=="1"|cross_sections2$nemoci_jater=="1"| cross_sections2$chronicke_onemocneni_ledvin=="1"), 1,0) cross_sections2$chronical2<-ifelse((cross_sections2$diabetes=="1"|cross_sections2$obezita=="1"|cross_sections2$onkologicka_lecba=="1"| cross_sections2$dych=="1"|cross_sections2$ICHS=="1"| cross_sections2$cevni_mozkova_prihoda=="1"|cross_sections2$nemoci_jater=="1"| cross_sections2$chronicke_onemocneni_ledvin=="1"), 1,0) #final dataset = cross_sections2 #DESCRIPTIVE STATISTICS summary(cross_sections2) death_bar<-cross_sections2 %>% mutate(Death = case_when(Death_Covid == 1 ~ "Died", Death_Covid == 0 ~ "Survived")) ggplot(death_bar, aes(x = Death)) + geom_bar(fill = "gray50", width=0.8) + labs(x = "Death", y = "Frequency") + theme_minimal() + theme(axis.text.x = element_text( hjust = 1, family=font, size = 8), axis.text.y = element_text( hjust = 1, family=font, size = 8), aspect.ratio = 1/2, axis.title.x = element_text(vjust=0,family=font, size = 15), axis.title.y = element_text(vjust=2.5, family=font, size = 15)) #logistics regression logit2.1<-glm(Death_Covid~as.factor(female)+as.factor(age_death)+as.factor(SSRI)+as.factor(diabetes) +as.factor(dych)+as.factor(hypertenze)+as.factor(ICHS) +as.factor(cevni_mozkova_prihoda) +as.factor(nemoci_jater)+as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin) + as.factor(obezita)+as.factor(waves), family = binomial (link = "logit"), data =cross_sections2) #aggregated chronical conditions logit2.12<-glm(Death_Covid~as.factor(female)+as.factor(age_death)+as.factor(SSRI)+as.factor(chronical1) +as.factor(waves), family = binomial (link = "logit"), data =cross_sections2) logit2.13<-glm(Death_Covid~as.factor(female)+as.factor(age_death)+as.factor(SSRI)+as.factor(chronical2)+as.factor(hypertenze) +as.factor(waves), family = binomial (link = "logit"), data =cross_sections2) logit2.2<-glm(Death_Covid~as.factor(female)+as.factor(age_death)+as.factor(SSRI)+ as.factor(cci_1to2)+as.factor(cci_3to4)+as.factor(cci_4plus)+as.factor(waves), family = binomial (link = "logit"), data =cross_sections2) logit2.3<-glm(Death_Covid~as.factor(female)+as.factor(age_death)+as.factor(antidepressants)+as.factor(diabetes) +as.factor(dych)+as.factor(hypertenze)+as.factor(ICHS) +as.factor(cevni_mozkova_prihoda) +as.factor(nemoci_jater)+as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin) + as.factor(obezita)+as.factor(waves), family = binomial (link = "logit"), data =cross_sections2) logit2.4<-glm(Death_Covid~as.factor(female)+as.factor(age_death)+as.factor(antidepressants)+ as.factor(cci_1to2)+as.factor(cci_3to4)+as.factor(cci_4plus)+as.factor(waves), family = binomial (link = "logit"), data =cross_sections2) summary(logit2.1) summary(logit2.4) #McFadden R-squared logit1_R2<-pR2(logit2.1)['McFadden'] logit2_R2<-pR2(logit2.2)['McFadden'] #LM-test lrtest(logit2.1) lrtest(logit2.2) #APE logit_ape2.1 <- logitmfx(Death_Covid~as.factor(female)+as.factor(age_death)+as.factor(SSRI)+as.factor(diabetes) +as.factor(dych)+as.factor(ICHS)+as.factor(hypertenze) +as.factor(cevni_mozkova_prihoda) +as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin)+as.factor(obezita)+as.factor(nemoci_jater) +as.factor(waves), data=cross_sections2, atmean = F) logit_ape2.2 <- logitmfx(Death_Covid~as.factor(female)+as.factor(age_death)+as.factor(SSRI)+ as.factor(cci_1to2)+as.factor(cci_3to4)+as.factor(cci_4plus)+as.factor(waves), data =cross_sections2, atmean = F) logit_ape2.3 <- logitmfx(Death_Covid~as.factor(female)+as.factor(age_death)+as.factor(antidepressants)+as.factor(diabetes) +as.factor(dych)+as.factor(hypertenze)+as.factor(ICHS) +as.factor(cevni_mozkova_prihoda) +as.factor(onkologicka_lecba) +as.factor(chronicke_onemocneni_ledvin) +as.factor(waves), data=cross_sections2,atmean = F) logit_ape2.4 <- logitmfx(Death_Covid~as.factor(female)+as.factor(age_death)+as.factor(antidepressants)+ as.factor(cci_1to2)+as.factor(cci_3to4)+as.factor(cci_4plus)+as.factor(waves), data=cross_sections2, atmean = F) #assumptions #multicollinearity vif<-vif(logit2.1) #printing results stargazer::stargazer(logit1,logit2,logit2.1,logit2.2, no.space = TRUE, font.size = "small", column.sep.width = "4pt" ) stargazer::stargazer(logit2.1,logit2.2,logit2.12,logit2.13, no.space = TRUE, font.size = "small", column.sep.width = "4pt" ) stargazer::stargazer(vif) ################################################################################ #GRAPHS #logistic function logistic <- function(x) { 1 / (1 + exp(-x)) } data <- data.frame(x = seq(-3, 3, by = 0.1)) data$y <- logistic(data$x) ggplot(data, aes(x = x, y = y)) + geom_line(color = "blue", size = 1) + labs(x = NULL, y = expression(w)) + scale_x_continuous(breaks = seq(-3, 3, 1), limits = c(-3, 3)) + scale_y_continuous(breaks = seq(0, 1, 0.5), limits = c(0, 1)) + theme_minimal() + theme(axis.title.x = element_text(vjust = 3, size = 13, family = font), axis.title.y = element_text(angle = -1.5, vjust = 0.7, hjust = 1.02, size = 14, family = font), axis.text = element_text(size = 12, family = font), panel.grid.major = element_line(color = "gray", linetype = "dashed"), panel.grid.minor = element_blank(), plot.margin = margin(t = 20, r = 20, b = 20, l = 20)) + geom_hline(yintercept = 0.5, linetype = "dashed", color = "black") + geom_vline(xintercept = 0, color = "black") + annotate("text", x = -2, y = 1, label = expression(G(w)), hjust = -4.8, vjust = 1.3, size = 4.4, family = font) + annotate("text", x = 0, y = 1.05, label = "G(w)", size = 5, family = font, parse = TRUE, vjust = -1) ##############CODE PYTHON #filtering medication until 27.12.2020 (end of our study period) and only for people detected COVID-19 during our study period #resulting dataset is Filtered_test.csv from datetime import datetime import pandas as pd date_start = pd.to_datetime('2018-01-01') date_end = pd.to_datetime('2020-12-27') df_chunks = pd.read_csv('Leky_202305111118.csv', sep=';', encoding='cp1252', low_memory=False, chunksize=10000, dtype = {'ID': 'str', 'SUKL_kod': 'str'}) filtered_chunks = [] for chunk in df_chunks: chunk.iloc[:, 7] = pd.to_datetime(chunk.iloc[:, 7], format='%Y-%m-%d') filtered_chunk = chunk[(chunk.iloc[:, 7] >= date_start) & (chunk.iloc[:, 7] <= date_end)] filtered_chunks.append(filtered_chunk) filtered_df = pd.concat(filtered_chunks) filtered_df.to_csv('New_hosp.csv', index=False) import pandas as pd df_testy = pd.read_csv('Infekce_202305110838.csv', sep=';', encoding='cp1252', dtype = {'ID': 'str'}) data_hosp= pd.read_csv('New_hosp.csv', sep=',', encoding='utf-8', low_memory=False, dtype = {'ID': 'str', 'SUKL_kod': 'str'}) date_start = pd.to_datetime('2018-01-01') date_end = pd.to_datetime('2020-12-27') df_testy['Datum_pozitivity'] = pd.to_datetime(df_testy['Datum_pozitivity']) df_testy = df_testy[(df_testy['Datum_pozitivity'] >= date_start) & (df_testy['Datum_pozitivity'] <= date_end)] ID_test = df_testy['ID'] filtered_data_hosp = data_hosp[data_hosp['ID'].isin(ID_test)] filtered_data_hosp.to_csv('Filtered_test.csv', index=False)