library(plm) library(stargazer) library(car) library(plyr) library(dplyr) library(ggplot2) library(tidyr) library(lmtest) library(stringr) library(sandwich) library(xtable) library(Zelig) library(lfe) library(fabricatr) library(Hmisc) library(nortest) #install.packages("https://cran.r-project.org/src/contrib/Archive/Zelig/Zelig_4.2-1.tar.gz", # repos=NULL, # type="source") #Load original dataset data_dt <- read.csv("directory", sep=";", header=TRUE, na.strings = "", dec =",") data_dt1 <- data_dt[c(1,6,11:30, 433:436)] #Get all variables to same currency data_dt1$Net_Profit_Loss_CCY <- ifelse(data_dt1$CURRENCYISOCODE == "CZK", data_dt1$Net_Profit_Loss_CCY, data_dt1$Net_Profit_Loss_CCY*data_dt1$FX_rate) data_dt1$Total_turnover_CCY <- ifelse(data_dt1$CURRENCYISOCODE == "CZK", data_dt1$Total_turnover_CCY, data_dt1$Total_turnover_CCY*data_dt1$FX_rate) data_dt1$Total_debt_CCY <- ifelse(data_dt1$CURRENCYISOCODE == "CZK", data_dt1$Total_debt_CCY, data_dt1$Total_debt_CCY*data_dt1$FX_rate) data_dt1$Equity_CCY <- ifelse(data_dt1$CURRENCYISOCODE == "CZK", data_dt1$Equity_CCY, data_dt1$Equity_CCY*data_dt1$FX_rate) data_dt1$TOTAL_assets_CCY <- ifelse(data_dt1$CURRENCYISOCODE == "CZK", data_dt1$TOTAL_assets_CCY, data_dt1$TOTAL_assets_CCY*data_dt1$FX_rate) data_dt1$Operating_cash_flow_CCY <- ifelse(data_dt1$CURRENCYISOCODE == "CZK", data_dt1$Operating_cash_flow_CCY, data_dt1$Operating_cash_flow_CCY*data_dt1$FX_rate) data_dt1$EBITDA_CCY <- ifelse(data_dt1$CURRENCYISOCODE == "CZK", data_dt1$EBITDA_CCY, data_dt1$EBITDA_CCY*data_dt1$FX_rate) data_dt1$Gross_sales_CCY <- ifelse(data_dt1$CURRENCYISOCODE == "CZK", data_dt1$Gross_sales_CCY, data_dt1$Gross_sales_CCY*data_dt1$FX_rate) data_dt1$Capital_expenditures_CCY <- ifelse(data_dt1$CURRENCYISOCODE == "CZK", data_dt1$Capital_expenditures_CCY, data_dt1$Capital_expenditures_CCY*data_dt1$FX_rate) data_dt1$ROE <- data_dt1$Net_Profit_Loss_CCY/Lag(data_dt1$Equity_CCY, -1) #profitability data_dt1$ROA <- data_dt1$Net_Profit_Loss_CCY/Lag(data_dt1$TOTAL_assets_CCY, -1) #profitability data_dt1$debt_EBITDA <- data_dt1$Total_debt_CCY/data_dt1$EBITDA_CCY #solvency data_dt1$CFC <- data_dt1$Operating_cash_flow_CCY/data_dt1$Total_debt_CCY #liquidity data_dt1$D_E <- data_dt1$Total_debt_CCY/data_dt1$Equity_CCY #solvency data_dt1$capx_sales <- -data_dt1$Capital_expenditures_CCY/data_dt1$Total_turnover_CCY #investment #data_dt1$PD <- ifelse(data_dt1$PD_RWA < 0.25, 0, 1) data_dt1$year <- str_sub(data_dt1$STATEMENTDATE, 6, 9) data_dt1$month <- str_sub(data_dt1$STATEMENTDATE, 3, 5) # Data filters data_dt2 <- filter(data_dt1, year %in% c(2017, 2018, 2019, 2020, 2021, 2022) & month %in% c("Dec")) data_dt2 <- filter(data_dt2, capx_sales < 1 & capx_sales > 0 & debt_EBITDA < Inf & CFC < Inf & CFC > -Inf) data_dt2 <- filter(data_dt2, D_E < 3 & D_E > 0) data_dt2 <- filter(data_dt2, CFC < 5 & CFC > -1) data_dt2 <- filter(data_dt2, ROA < 0.2 & ROA > -0.1) data_dt2 <- filter(data_dt2, ROE < 0.5 & ROE > -0.2) data_dt2 <- filter(data_dt2, Volatility_of_net_sales < 1.8) Q1 <- quantile(data_dt2$ROA, 0.25, na.rm = T) Q3 <- quantile(data_dt2$ROA, 0.75, na.rm = T) IQR <- Q3 - Q1 lower_bound <- Q1 - 1.5 * IQR upper_bound <- Q3 + 1.5 * IQR data_dt2 <- data_dt2[data_dt2$ROA >= lower_bound & data_dt2$ROA <= upper_bound, ] Q1a <- quantile(data_dt2$ROE, 0.25, na.rm = T) Q3a <- quantile(data_dt2$ROE, 0.75, na.rm = T) IQRa <- Q3a - Q1a lower_bounda <- Q1a - 1.5 * IQRa upper_bounda <- Q3a + 1.5 * IQRa data_dt2 <- data_dt2[data_dt2$ROE >= lower_bounda & data_dt2$ROE <= upper_bounda, ] #Macro variables added data_dt2$GDPg[data_dt2$year == 2015] <- 5.4 data_dt2$GDPg[data_dt2$year == 2016] <- 2.5 data_dt2$GDPg[data_dt2$year == 2017] <- 5.2 data_dt2$GDPg[data_dt2$year == 2018] <- 3.2 data_dt2$GDPg[data_dt2$year == 2019] <- 3 data_dt2$GDPg[data_dt2$year == 2020] <- -5.5 data_dt2$GDPg[data_dt2$year == 2021] <- 3.5 data_dt2$GDPg[data_dt2$year == 2022] <- 2.4 data_dt2$pribor[data_dt2$year == 2016] <- 0.45 data_dt2$pribor[data_dt2$year == 2017] <- 0.58 data_dt2$pribor[data_dt2$year == 2018] <- 1.48 data_dt2$pribor[data_dt2$year == 2019] <- 2.25 data_dt2$pribor[data_dt2$year == 2020] <- 0.94 data_dt2$pribor[data_dt2$year == 2021] <- 1.45 data_dt2$pribor[data_dt2$year == 2022] <- 6.52 data_dt2$eurczk[data_dt2$year == 2017] <- -5.117 data_dt2$eurczk[data_dt2$year == 2018] <- 0.714 data_dt2$eurczk[data_dt2$year == 2019] <- -1.285 data_dt2$eurczk[data_dt2$year == 2020] <- 3.192 data_dt2$eurczk[data_dt2$year == 2021] <- -4.01 data_dt2$eurczk[data_dt2$year == 2022] <- -3.912 data_dt2$CPI[data_dt2$year == 2017] <-2.4 data_dt2$CPI[data_dt2$year == 2018] <-2 data_dt2$CPI[data_dt2$year == 2019] <-2.6 data_dt2$CPI[data_dt2$year == 2020] <-3.3 data_dt2$CPI[data_dt2$year == 2021] <-3.3 data_dt2$CPI[data_dt2$year == 2022] <-14.8 data_dt_d <- data_dt2 esg_data <- read.csv("directory", sep=";", header=TRUE, na.strings = "", dec =".") esg_data <- esg_data %>% drop_na() macro_dt <- read.csv("directory.csv", sep=";", header=TRUE, na.strings = "", dec =".") macro_dt <- macro_dt %>% filter(year>2009 & year < 2023) macro_dt <- macro_dt[,1:3] summary(data_dt_d) #################################################### # Boxplots for years ggplot(data = data_dt_d, aes(x = year, y = ROA, group = year)) + geom_boxplot(fill = "lightblue") + #ylim(-0.2, 0.4) + scale_x_continuous(breaks = unique(data_dt_d$year)) + scale_y_continuous(limits=c(-0.2, 0.4), labels = scales::percent) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12)) + labs(title = NULL, x = NULL, y = "Return on asset") ggplot(data = data_dt_d, aes(x = year, y = ROA, group = year)) + geom_boxplot(fill = "lightblue") + ylim(-0.1, 0.2) + scale_x_continuous(breaks = unique(data_dt_d$year)) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12)) + labs(title = NULL, x = NULL, y = "Return on asset") ggplot(data = data_dt_d, aes(x = year, y = ROE, group = year)) + geom_boxplot(fill = "lightblue") + #ylim(-0.2, 0.4) + scale_x_continuous(breaks = unique(data_dt_d$year)) + scale_y_continuous(limits=c(-0.2, 0.4), labels = scales::percent) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12)) + labs(title = NULL, x = NULL, y = "Return on equity") ggplot(data = data_dt_d, aes(x = year, y = D_E, group = year)) + geom_boxplot(fill = "lightblue") + #ylim(-0.2, 0.4) + scale_x_continuous(breaks = unique(data_dt_d$year)) + scale_y_continuous(limits=c(0, 3), labels = scales::percent) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12)) + labs(title = NULL, x = NULL, y = "Debt-to-equity") ggplot(data = data_dt_d, aes(x = year, y = CFC, group = year)) + geom_boxplot(fill = "lightblue") + #ylim(-0.2, 0.4) + scale_x_continuous(breaks = unique(data_dt_d$year)) + scale_y_continuous(limits=c(0, 1.5), labels = scales::percent) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12)) + labs(title = NULL, x = NULL, y = "Casflow Coverage Ratio") ggplot(data = data_dt_d, aes(x = year, y = Volatility_of_net_sales, group = year)) + geom_boxplot(fill = "lightblue") + ylim(1, 1.6) + scale_x_continuous(breaks = unique(data_dt_d$year)) + #scale_y_continuous(limits=c(0, 1.5), labels = scales::percent) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12)) + labs(title = NULL, x = NULL, y = "Net sales volatility") ggplot(data = data_dt_d, aes(x = year, y = capx_sales, group = year)) + geom_boxplot(fill = "lightblue") + #ylim(1, 1.6) + scale_x_continuous(breaks = unique(data_dt_d$year)) + scale_y_continuous(limits=c(0, 0.5), labels = scales::percent) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12)) + labs(title = NULL, x = NULL, y = "Capital expenditures-to-sales") ggplot(data = data_dt_d, aes(x = year, y = TOTAL_assets_CCY, group = year)) + geom_boxplot(fill = "lightblue", outlier.shape = NA) + scale_y_continuous(labels = function(x) format(x/1000000, scientific = FALSE), limits = c(0, 500000000)) + #scale_y_continuous(labels = scales::label_number(), limits = c(0, 500000000)) + #stat_summary(fun= max, geom = "point", shape = 16, size = 2, color = "black") + scale_x_continuous(breaks = unique(data_dt_d$year)) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20))) + labs(title = NULL, x = NULL, y = "Assets (Million CZK)") ggplot(data = data_dt_d, aes(x = year, y = Total_debt_CCY, group = year)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(labels = function(x) format(x/1000000, scientific = FALSE), limits = c(0, 100000000)) + scale_x_continuous(breaks = unique(data_dt_d$year)) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20)))+ labs(title = NULL, x = NULL, y = "Debt (Million CZK)") ggplot(data = data_dt_d, aes(x = year, y = Net_Profit_Loss_CCY, group = year)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(labels = function(x) format(x/1000000, scientific = FALSE)) + coord_cartesian(ylim = c(-20000000, 30000000)) + scale_x_continuous(breaks = unique(data_dt_d$year)) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20)))+ labs(title = NULL, x = NULL, y = "Profit/Loss (Million CZK)") #################################################### # Boxplots for ESG scores ggplot(data = data_dt_d, aes(x = bin_score_G, y = ROA, group = bin_score_G)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(limits=c(-0.1, 0.2), labels = scales::percent) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=17), axis.text.x = element_text(size=17), axis.title.y = element_text(size=17, margin=margin(r=20)), axis.title.x = element_text(size=17, margin=margin(r=20))) + labs(title = NULL, x = "Segment G score", y = "ROA") ggplot(data = data_dt_d, aes(x = bin_score_ESG, y = ROA, group = bin_score_ESG)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(limits=c(-0.1, 0.2), labels = scales::percent) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20)))+ labs(title = NULL, x = "ESG score", y = "ROA") ggplot(data = data_dt_d, aes(x = bin_score_ESG, y = ROE, group = bin_score_ESG)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(limits=c(-0.2, 0.4), labels = scales::percent) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20)))+ labs(title = NULL, x = "ESG score", y = "ROE") ggplot(data = data_dt_d, aes(x = bin_score_ESG, y = PD_RWA, group = bin_score_ESG)) + geom_boxplot(fill = "lightblue") + ylim(0, 0.2) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20)))+ labs(title = NULL, x = "ESG score", y = "PD_RWA") ggplot(data = data_dt_d, aes(x = bin_score_G, y = TOTAL_assets_CCY, group = bin_score_G)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(labels = function(x) format(x/1000000, scientific = FALSE), limits = c(0, 1000000000))+ theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20)))+ labs(title = NULL, x = "Segment G score", y = "Assets (Million CZK)") ggplot(data = data_dt_d, aes(x = bin_score_E, y = TOTAL_assets_CCY, group = bin_score_E)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(labels = function(x) format(x/1000000, scientific = FALSE), limits = c(0, 1000000000))+ theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=17), axis.text.x = element_text(size=17), axis.title.y = element_text(size=17, margin=margin(r=20)), axis.title.x = element_text(size=17, margin=margin(r=20))) + labs(title = NULL, x = "Segment E score", y = "Assets (Million CZK)") ggplot(data = data_dt_d, aes(x = bin_score_G, y = ROE, group = bin_score_G)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(limits=c(-0.2, 0.4), labels = scales::percent) + theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=17), axis.text.x = element_text(size=17), axis.title.y = element_text(size=17, margin=margin(r=20)), axis.title.x = element_text(size=17, margin=margin(r=20))) + labs(title = NULL, x = "Segment G score", y = "ROE") ggplot(data = data_dt_d, aes(x = bin_score_ESG, y = Net_Profit_Loss_CCY, group = bin_score_ESG)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(labels = function(x) format(x/1000000, scientific = FALSE), limits = c(-20000000, 50000000))+ theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20)))+ labs(title = NULL, x = "ESG score", y = "Profit/Loss (Million CZK)") ggplot(data = data_dt_d, aes(x = bin_score_G, y = Total_debt_CCY, group = bin_score_G)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(labels = function(x) format(x/1000000, scientific = FALSE), limits = c(0, 300000000))+ theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20)))+ labs(title = NULL, x = "Segment G score", y = "Debt (Million CZK)") ggplot(data = data_dt_d, aes(x = bin_score_ESG, y = D_E, group = bin_score_ESG)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(limits=c(0, 3), labels = scales::percent) + #limits = c(0, 300000000))+ theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20)))+ labs(title = NULL, x = "ESG score", y = "D/E") ggplot(data = data_dt_d, aes(x = bin_score_G, y = D_E, group = bin_score_G)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(limits=c(0, 3), labels = scales::percent) + #limits = c(0, 300000000))+ theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=17), axis.text.x = element_text(size=17), axis.title.y = element_text(size=17, margin=margin(r=20)), axis.title.x = element_text(size=17, margin=margin(r=20))) + labs(title = NULL, x = "Segment G score", y = "D/E") ggplot(data = data_dt_d, aes(x = bin_score_ESG, y = Volatility_of_net_sales, group = bin_score_ESG)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(limits = c(1, 1.5))+ theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20)))+ labs(title = NULL, x = "ESG score", y = "Net sales volatility") ggplot(data = data_dt_d, aes(x = bin_score_G, y = capx_sales, group = bin_score_G)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(limits = c(0, 0.6))+ theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20)))+ labs(title = NULL, x = "Segment G score", y = "Capex/Sales") ggplot(data = data_dt_d, aes(x = bin_score_ESG, y = CFC, group = bin_score_ESG)) + geom_boxplot(fill = "lightblue") + scale_y_continuous(limits = c(-1, 1.5))+ theme_minimal() + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20)))+ labs(title = NULL, x = "ESG score", y = "CFCR") #################################################### # Graphs for relationships b'ween financial measures ggplot(data_dt_d, aes(y = Volatility_of_net_sales, x = TOTAL_assets_CCY)) + geom_point()+ scale_x_continuous(labels = function(x) format(x/1000000, scientific = FALSE), limits = c(0, 500000000)) + theme_minimal() + theme(#panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20))) + labs(title = NULL, y = "Net sales volatility", x = "Assets (Million CZK)") ggplot(data_dt_d, aes(y = Total_debt_CCY, x = TOTAL_assets_CCY)) + geom_point()+ scale_y_continuous(labels = function(x) format(x/1000000, scientific = FALSE), limits = c(0, 300000000)) + scale_x_continuous(labels = function(x) format(x/1000000, scientific = FALSE), limits = c(0, 500000000)) + theme_minimal() + theme(#panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20))) + labs(title = NULL, y = "Total debt (Million CZK)", x = "Assets (Million CZK)") ggplot(data_dt_d, aes(y = Net_Profit_Loss_CCY, x = TOTAL_assets_CCY)) + geom_point()+ scale_y_continuous(labels = function(x) format(x/1000000, scientific = FALSE), limits = c(0, 50000000)) + scale_x_continuous(labels = function(x) format(x/1000000, scientific = FALSE), limits = c(0, 500000000)) + theme_minimal() + theme(#panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20))) + labs(title = NULL, y = "Profit/Loss (Million CZK)", x = "Assets (Million CZK)") ggplot(data_dt_d, aes(x = Total_debt_CCY, y = Net_Profit_Loss_CCY)) + geom_point()+ scale_y_continuous(labels = function(x) format(x/1000000, scientific = FALSE), limits = c(0, 100000000)) + scale_x_continuous(labels = function(x) format(x/1000000, scientific = FALSE), limits = c(0, 300000000)) + theme_minimal() + theme(#panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20))) + labs(title = NULL, x = "Total debt (Million CZK)", y = "Profit/Loss (Million CZK)") #################################################### #Histograms ggplot(data_dt_d, aes(x=log(TOTAL_assets_CCY))) + geom_histogram(bins=100) + labs(title = "Profit", x = "Profit/Loss", y = "Count") ggplot(esg_data, aes(x=bin_score_ESG)) + geom_bar(color="black", fill = "lightblue", aes(y = (..count..)/sum(..count..))) + scale_y_continuous(labels = percent) + theme_minimal() + theme(#panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=24), axis.text.x = element_text(size=24), axis.title.y = element_text(size=24, margin=margin(r=20)), axis.title.x = element_text(size=24, margin=margin(r=20))) + labs(title = NULL, x = "ESG score", y = "Frequency") ggplot(data_dt_d, aes(x=ROE)) + geom_histogram(aes(y = ..density..), bins=100) + stat_function(fun = dnorm, colour = "red", lwd=1.3, args = list(mean = mean(data_dt_d$ROE), sd = sd(data_dt_d$ROE)))+ theme_minimal() + theme(#panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20))) + labs(title = NULL, x = "ROE", y = "Frequency") ggplot(data=macro_dt, aes(x=year)) + geom_line(aes(y=cpi, color="black", linetype="dashed"), lwd=1.2)+ geom_line(aes(y=GDPg, color="red", linetype="solid"), lwd=1.2)+ scale_y_continuous(labels = function(x) paste0(x, "%"))+ scale_x_continuous(breaks = seq(2010, 2022, by = 2)) + scale_color_identity(name=NULL, breaks = c("black","red"), labels = c("Harmonised CPI", "Real GDP growth"), guide="legend")+ scale_linetype_identity(name=NULL, breaks = c("dashed","solid"), labels = c("Harmonised CPI", "Real GDP growth"), guide="legend")+ theme_minimal() + theme(#panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(margin=margin(r=20))) + labs(title = NULL, x = NULL, y = "Annual percentage change") + theme(legend.text = element_text(size=12), legend.position="bottom") ########################################### # Statistics, test etc. summary(data_dt_d[, c("ROE", "ROA", "CFC", "debt_EBITDA", "D_E", "Volatility_of_net_sales", "capx_sales")], digits=6) sum1 <- summary(data_dt_d[, c("ROE", "ROA", "CFC", "D_E", "Volatility_of_net_sales", "capx_sales")], digits=4) sum2 <- summary(data_dt_d[, c("TOTAL_assets_CCY", "Total_liabilities_CCY", "Equity_CCY", "Total_debt_CCY", "EBITDA_CCY", "Net_Profit_Loss_CCY")], digits=6) print(xtable(as.table(sum1), type = "latex", digits = 0), file = "sum1.tex") print(xtable(as.table(sum2), type = "latex", digits = 0), file = "sum2.tex") #Correlation analysis corr <- cor(data_dt_d[c("ROE","ROA", "CFC", "D_E", "Volatility_of_net_sales", "capx_sales", "TOTAL_assets_CCY")], use="pairwise.complete.obs") print(xtable(corr, type = "latex"), file = "corr1.tex") corr2 <- cor(data_dt_d[c("ROE","ROA", "CFC", "D_E", "Volatility_of_net_sales", "capx_sales", "TOTAL_assets_CCY", "GDPg", "eurczk", "pribor", "CPI")], use="pairwise.complete.obs") ########################################### # ESG dataset description data_esg <- data_dt_d %>% group_by(client_id) %>% reframe (E= max(bin_score_E), S= max(bin_score_S), G= max(bin_score_G), ESG= max(bin_score_ESG)) table(data_esg$ESG) #Profitability model - standard methods prof_all <- pdata.frame(data_dt_d, index=c("client_id", "year")) prof_all[, 28:33] <- lapply(prof_all[, 28:33], as.numeric) prof_all[, 36:39] <- lapply(prof_all[, 36:39], as.numeric) model_roa_log <- (ROA ~ CFC + log(D_E) + log(Volatility_of_net_sales) + log(capx_sales) + log(TOTAL_assets_CCY)) model_roa_macro_log <- (ROA ~ CFC + log(D_E) + log(Volatility_of_net_sales) + log(capx_sales) + log(TOTAL_assets_CCY) + GDPg + CPI) model_roa <- (ROA ~ CFC + D_E + Volatility_of_net_sales + capx_sales + log(TOTAL_assets_CCY)) model_roa_macro <- (ROA ~ CFC + D_E + Volatility_of_net_sales + capx_sales + log(TOTAL_assets_CCY) + GDPg + CPI) #Micro models roa_ols <- plm(model_roa, data = prof_all, na.action = na.exclude, model="pooling") roa_fe <- plm(model_roa, data = prof_all, na.action = na.exclude, effect = "twoway", model="within") roa_fd <- plm(model_roa, data = prof_all, na.action = na.exclude, model="fd") roa_re <- plm(model_roa, data = prof_all, na.action = na.exclude, model="random") stargazer(roa_ols, roa_fe, roa_fd, roa_re ,title = "Results", align = TRUE) #Tests for the models vif(roa_mfd) pFtest(roa_fe, roa_ols) phtest(roa_fe, roa_re) bptest(roa_fe) pbgtest(roa_fe) pdwtest(roa_fe) #Macromodel roa_mols <- plm(model_roa_macro, data = prof_all, na.action = na.exclude, model="pooling") roa_mfe <- plm(model_roa_macro, data = prof_all, na.action = na.exclude, effect = "individual", model="within") roa_mfd <- plm(model_roa_macro, data = prof_all, na.action = na.exclude, model="fd") roa_mre <- plm(model_roa_macro, data = prof_all, na.action = na.exclude, model="random") stargazer(roa_mols, roa_mfe, roa_mfd, roa_mre ,title = "Results", align = TRUE) summary(roa_fd) cl_robust_fe <- coeftest(roa_fe, vcovHC(roa_fe, method = "arellano", type = "HC1")) cl_robust_fd <- coeftest(roa_fd, vcov = vcovHC(roa_fd, type = "HC0", cluster = "time")) se_robust_fe <- cl_robust_fe[, 2] se_robust_fd <- cl_robust_fd[, 2] cl_robust_mfe <- coeftest(roa_mfe, vcovHC(roa_mfe, method = "arellano", type = "HC1")) cl_robust_mfd <- coeftest(roa_mfd, vcov = vcovHC(roa_mfd, type = "HC0", cluster = "time")) se_robust_mfe <- cl_robust_mfe[, 2] se_robust_mfd <- cl_robust_mfd[, 2] stargazer(roa_fe, roa_fd, roa_mfe, roa_mfd, se = list(se_robust_fe, se_robust_fd, se_robust_mfe, se_robust_mfd)) vif(roa_mfd) summary(roa_mfd) ########################### #Profitability models - GMM model_gmm_log <- ROA ~ lag(ROA, 1) + CFC + log(D_E) + log(Volatility_of_net_sales) + log(capx_sales) + log(TOTAL_assets_CCY) + GDPg + CPI | lag(ROA, 2:6) model_gmm <- ROA ~ lag(ROA, 1) + CFC + D_E + Volatility_of_net_sales + capx_sales + log(TOTAL_assets_CCY) + GDPg + CPI | lag(ROA, 2:6) model_sgmm <- pgmm(model_gmm, data = na.omit(data_dt_d), effect = "individual", model = c("twosteps"), transformation = "ld", index=c("client_id", "year")) model_dgmm <- pgmm(model_gmm, data = na.omit(data_dt_d), effect = "individual", model = c("twosteps"), transformation = "d", index=c("client_id", "year")) summary(model_sgmm) summary(model_dgmm) stargazer(roa_mfe,roa_mfd, model_dgmm, se = list(se_robust_mfe, se_robust_mfd, NULL)) #ESG impact analysis dgmm_esg_h <- pgmm(model_gmm , data = na.omit(data_dt_d[data_dt_d$bin_score_S %in% c(1,2), ]), effect = "individual", model = c("twosteps"), transformation = "d", index=c("client_id", "year")) dgmm_esg_m <- pgmm(model_gmm, data = na.omit(data_dt_d[data_dt_d$bin_score_E %in% c(1,2,3), ]), effect = "individual", model = c("twosteps"), transformation = "d", index=c("client_id", "year")) dgmm_esg_l <- pgmm(model_gmm, data = na.omit(data_dt_d[data_dt_d$bin_score_E %in% c(4,5), ]), effect = "individual", model = c("twosteps"), transformation = "d", index=c("client_id", "year")) dgmm_esg_q <- pgmm(model_gmm, data = na.omit(data_dt_d[data_dt_d$bin_score_E %in% c(5), ]), effect = "individual", model = c("twosteps"), transformation = "d", index=c("client_id", "year")) dgmm_esg_h <- model_dgmm summary(dgmm_esg_h) summary(dgmm_esg_m) summary(dgmm_esg_l) #Table for 3 rows stargazer(dgmm_esg_h, dgmm_esg_m, dgmm_esg_l , add.lines = list(c("Sargan test", str_c("chisq(",summary(dgmm_esg_h)$sargan$parameter,") = " ,round(summary(dgmm_esg_h)$sargan$statistic,3)), str_c("chisq(",summary(dgmm_esg_m)$sargan$parameter,") = " ,round(summary(dgmm_esg_m)$sargan$statistic,3)), str_c("chisq(",summary(dgmm_esg_l)$sargan$parameter,") = " ,round(summary(dgmm_esg_l)$sargan$statistic,3))), c("p-value", round(summary(dgmm_esg_h)$sargan$p.value, 3), round(summary(dgmm_esg_m)$sargan$p.value, 3), round(summary(dgmm_esg_l)$sargan$p.value, 3)), c("Autocorr. test (1)", round(summary(dgmm_esg_h)$m1$statistic, 3), round(summary(dgmm_esg_m)$m1$statistic, 3), round(summary(dgmm_esg_l)$m1$statistic, 3)), c("p-value", round(summary(dgmm_esg_h)$m1$p.value, 3), round(summary(dgmm_esg_m)$m1$p.value, 3), round(summary(dgmm_esg_l)$m1$p.value, 3)), c("Autocorr. test (2)", round(summary(dgmm_esg_h)$m2$statistic, 3), round(summary(dgmm_esg_m)$m2$statistic, 3), round(summary(dgmm_esg_l)$m2$statistic, 3)), c("p-value", round(summary(dgmm_esg_h)$m2$p.value, 3), round(summary(dgmm_esg_m)$m2$p.value, 3), round(summary(dgmm_esg_l)$m2$p.value, 3)), c("Wald test for coefficients", str_c("chisq(",summary(dgmm_esg_h)$wald.coef$parameter,") = " ,round(summary(dgmm_esg_h)$wald.coef$statistic,3)), str_c("chisq(",summary(dgmm_esg_m)$wald.coef$parameter,") = " ,round(summary(dgmm_esg_m)$wald.coef$statistic,3)), str_c("chisq(",summary(dgmm_esg_l)$wald.coef$parameter,") = " ,round(summary(dgmm_esg_l)$wald.coef$statistic,3))), c("p-value", round(summary(dgmm_esg_h)$wald.coef$p.value, 3), round(summary(dgmm_esg_m)$wald.coef$p.value, 3), round(summary(dgmm_esg_l)$wald.coef$p.value, 3)) )) model_sgmm <- emodel_sgmm model_dgmm <- emodel_dgmm #Table for 2 rows stargazer(model_sgmm, model_dgmm ,add.lines = list(c("Sargan test", str_c("chisq(",summary(model_sgmm)$sargan$parameter,") = " ,round(summary(model_sgmm)$sargan$statistic,3)), str_c("chisq(",summary(model_dgmm)$sargan$parameter,") = " ,round(summary(model_dgmm)$sargan$statistic,3))), c("p-value", round(summary(model_sgmm)$sargan$p.value, 3), round(summary(model_dgmm)$sargan$p.value, 3)), c("Autocorr. test (1)", round(summary(model_sgmm)$m1$statistic, 3), round(summary(model_dgmm)$m1$statistic, 3)), c("p-value", round(summary(model_sgmm)$m1$p.value, 3), round(summary(model_dgmm)$m1$p.value, 3)), c("Autocorr. test (2)", round(summary(model_sgmm)$m2$statistic, 3), round(summary(model_dgmm)$m2$statistic, 3)), c("p-value", round(summary(model_sgmm)$m2$p.value, 3), round(summary(model_dgmm)$m2$p.value, 3)), c("Wald test for coefficients", str_c("chisq(",summary(model_sgmm)$wald.coef$parameter,") = " ,round(summary(model_sgmm)$wald.coef$statistic,3)), str_c("chisq(",summary(model_dgmm)$wald.coef$parameter,") = " ,round(summary(model_dgmm)$wald.coef$statistic,3))), c("p-value", round(summary(model_sgmm)$wald.coef$p.value, 3), round(summary(model_dgmm)$wald.coef$p.value, 3)) )) emodel_gmm <- ROE ~ lag(ROE, 1) + CFC + D_E + Volatility_of_net_sales + capx_sales + log(TOTAL_assets_CCY) + GDPg + CPI | lag(ROE, 2:6) emodel_sgmm <- pgmm(emodel_gmm, data = na.omit(data_dt_d), effect = "individual", model = c("twosteps"), transformation = "ld", index=c("client_id", "year")) emodel_dgmm <- pgmm(emodel_gmm, data = na.omit(data_dt_d), effect = "individual", model = c("twosteps"), transformation = "d", index=c("client_id", "year")) summary(emodel_dgmm) stargazer(emodel_sgmm, emodel_dgmm, type = "text") edgmm_esg_m <- pgmm(emodel_gmm, data = na.omit(data_dt_d[data_dt_d$bin_score_ESG %in% c(1,2), ]), effect = "individual", model = c("twosteps"), transformation = "d", index=c("client_id", "year")) edgmm_esg_l <- pgmm(emodel_gmm, data = na.omit(data_dt_d[data_dt_d$bin_score_ESG %in% c(3,4,5), ]), effect = "individual", model = c("twosteps"), transformation = "d", index=c("client_id", "year")) dgmm_esg_h <- emodel_dgmm dgmm_esg_m <- edgmm_esg_m dgmm_esg_l <- edgmm_esg_l pd_all <- pdata.frame(data_dt_d, index=c("client_id", "year")) pd_all[, 28:34] <- lapply(pd_all[, 28:34], as.numeric) pd_all[, 36:39] <- lapply(pd_all[, 36:39], as.numeric) pd_all <- pd_all %>% group_by(client_id) %>% mutate(current = if_else(year == max(year), 1, 0)) %>% ungroup() pd_all <- pd_all %>% arrange(client_id, desc(year)) %>% group_by(client_id) %>% mutate(first_lag = ifelse(row_number() == 2, 1, 0)) %>% ungroup() pd_all <- pd_all %>% arrange(client_id, desc(year)) %>% # Sort by id and year descending group_by(client_id) %>% # Group by individual ID mutate(second_lag = ifelse(row_number() == 3, 1, 0)) %>% # Binary variable ungroup() pd_all$t_pd <- log(pd_all$PD_RWA/(1-pd_all$PD_RWA)) pd_all$assets <- pd_all$TOTAL_assets_CCY /1000000 pd_all$d <- ifelse(pd_all$PD_RWA > 0.15, 1, 0) filtered_data <- pd_all %>% group_by(client_id) %>% filter(any(current == 1) & any(first_lag == 1) & any(second_lag == 1)) %>% ungroup() high <- pd_all %>% filter(bin_score_ESG %in% c(1,2) & current == 1) low <- pd_all %>% filter(bin_score_ESG %in% c(3,4,5) & current== 1) model_pd <- d ~ ROE + CFC + D_E + Volatility_of_net_sales + capx_sales + assets model_tpd <- t_pd ~ ROE + CFC + D_E + Volatility_of_net_sales + capx_sales + assets model_pdrwa <- PD_RWA ~ ROE + CFC + D_E + Volatility_of_net_sales + capx_sales + assets #PD log transformed models pd_ols_0 <- lm(model_tpd, data = filtered_data[filtered_data$current == 1, ], na.action = na.exclude) pd_ols_1 <- lm(model_tpd, data = filtered_data[filtered_data$first_lag == 1, ], na.action = na.exclude) pd_ols_2 <- lm(model_tpd, data = filtered_data[filtered_data$second_lag == 1, ], na.action = na.exclude) summary(pd_ols_0) stargazer(pd_ols_0, pd_ols_1, pd_ols_2) #Default models pd_ols <- lm(model_tpd, data = pd_all[pd_all$current == 1, ], na.action = na.exclude) pd_prob <- glm(model_pd, data = pd_all[pd_all$current == 1, ], family = binomial(link = "probit"), na.action = na.exclude) pd_log <- glm(model_pd, data = pd_all[pd_all$current == 1, ], family = binomial(link = "logit"), na.action = na.exclude) pd_rlog <- zelig(model_pd, data = pd_all[pd_all$current == 1, ], model = "relogit", tau = 16/1000) stargazer(pd_ols, pd_prob, pd_log, pd_rlog) #relogit models pd_rlog_0 <- zelig(model_pd, data = filtered_data[filtered_data$current == 1, ], model = "relogit", tau = 16/1000) pd_rlog_1 <- zelig(model_pd, data = filtered_data[filtered_data$first_lag == 1, ], model = "relogit", tau = 16/1000) pd_rlog_2 <- zelig(model_pd, data = filtered_data[filtered_data$second_lag == 1, ], model = "relogit", tau = 16/1000) stargazer(pd_rlog_0, pd_rlog_1, pd_rlog_2) lrtest(pd_rlog_0, pd_rlog_1) table(pd_all[pd_all$current == 1, ]$d == 1) betamod <- betareg(model_pdrwa, filtered_data[filtered_data$current == 1, ], na.action = na.exclude, link = c("logit")) summary(betamod) pd_rlog_h <- zelig(model_pd, data = high1, model = "relogit", tau = 16/1000) pd_rlog_l <- zelig(model_pd, data = low1, model = "relogit", tau = 16/1000) stargazer(pd_rlog_h, pd_rlog_l) high1 <- pd_all %>% filter(bin_score_ESG %in% c(1,2) & second_lag == 1) low1 <- pd_all %>% filter(bin_score_ESG %in% c(3,4,5) & second_lag== 1) betareg_h <- betareg(model_pdrwa, data=high, na.action = na.exclude, link = c("logit")) betareg_l <- betareg(model_pdrwa, data=low, na.action = na.exclude, link = c("logit")) stargazer(betareg_h, betareg_l, type="text") pd_ols_h <- lm(model_tpd, data = high, na.action = na.exclude) pd_ols_l <- lm(model_tpd, data = low, na.action = na.exclude) stargazer(pd_ols, pd_ols_h, pd_ols_l) mu <- mean(pd_all[pd_all$current == 1, ]$PD_RWA) phi <- var(pd_all[pd_all$current == 1, ]$PR_RWA) alpha <- ((1 - mu) / phi - 1 / mu) * mu ^ 2 beta <- alpha * (1 / mu - 1) alpha beta yy <- pd_all[pd_all$current == 1, ]$t_pd dbeta(filtered_data[filtered_data$current == 1, ]$PD_RWA, shape1=alpha, shape2=beta) ks.test(yy, "pbeta", shape1=alpha, shape2=beta) plot(y, dbeta(y, alpha, beta), type='l') library(nortest) result <- ad.test(yy, "pbeta", shape1 = alpha, shape2 = beta) ggplot(pd_all[pd_all$current == 1, ], aes(x=PD_RWA)) + geom_density(lwd=0.9) + stat_function(fun = dbeta, colour = "red", lwd=1.3, args = list(shape1 = alpha, shape2 = beta))+ theme_minimal() + theme(#panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), axis.text.y = element_text(size=12), axis.text.x = element_blank(), axis.title.y = element_text(margin=margin(r=20))) + labs(title = NULL, x = "Default probability", y = "Frequency")