setwd("C:/Users/Tamerlan/Desktop")
library(haven)
library(plm)
library(lfe)
library(gravity)
library(lmtest)
library(pglm)
library(fixest)
load("LUL.dta")

# -------------------------------------------------- OLS -------------------------------------------------------- #
OLS = lm(lTF ~ lGDP_o + lGDP_d + ldist + EU_o + EU_d + factor(Year) + factor(iso_o) + factor(iso_d), data = C)
summary(OLS)
# --------------------------------------------- FIXED EFFECTS --------------------------------------------------- #
#plm function
#effect="twoways"  == "Factor(Year)"
#FE = plm(lTF ~ lGDP_o + lGDP_d + ldist + EU_o + EU_d, data = A, model = "within",index=c("pair","Year"), effect = "twoways")
#Inefficient use of RAM

#felm function - 
#UZ = felm(lTF ~ lGDP_d + lGDP_o + ldist + EU_o + EU_d + EU_lag_o + EU_lag_d + EU_lag2_o + EU_lag2_d + 
#both_in + one_in + iso_code_o:Year + iso_code_d:Year | pair, data = C)
#Better than plm


############################## FE using lfe ###################################
C = subset(B,B$TF>0)

#Benchmark - EU dummies
Alpha = felm(lTF ~ lGDP_d + lGDP_o + ldist + EU_o + EU_d|as.factor(pair) + as.factor(Year), data = C)

#TradeCreation
Beta = felm(lTF ~ lGDP_d + lGDP_o + ldist + both_in + one_in|as.factor(pair) + as.factor(Year), data = C)

#EU-dummies + lags
Gamma = felm(lTF ~ lGDP_d + lGDP_o + ldist + EU_o + EU_d + EU_lag_o + EU_lag_d + EU_lag2_o + EU_lag2_d|as.factor(pair) + as.factor(Year), data = C)

stargazer(Alpha, Beta, Gamma, type ="text")

stargazer(Alpha,Beta,Gamma,
         title = "FE (year & pair) dummies",
         column.labels = c("EU-Effect","Trade Creation","EU-Dummies"), 
         omit = c("factor", "Constant"), dep.var.labels =c("log(exports)","log(exports)", "log(exports)"),
         add.lines = list (c("Year FE", "Yes", "Yes","Yes"),
                           c("Pair FE", "Yes", "Yes", "Yes")),
         no.space = TRUE, out = "LaTeX.tex", type = "text")


#TEST: FE x OLS
pFtest(Alpha, OLS)    #p-value is <0.05 => FE > OLS

# Clustered standard errors
##Clustering by year

coeftest(FE, vcov=vcovHC(FE, type="sss", cluster="time"))
##Clustering by country
coeftest(FE, vcov=vcovHC(FE, type="sss", cluster="group"))

stargazer(FE, coeftest(FE, vcov=vcovHC(FE, type="sss", cluster="time")),
          coeftest(FE, vcov=vcovHC(FE, type="sss", cluster="group")), type = 'text')


# --------------------------------------------- RANDOM EFFECTS --------------------------------------------------- #

RE = plm(lTF ~ lGDP_o + lGDP_d + ldist + EU_o + EU_d + EU_lag_o + EU_lag_d + EU_lag2_o + EU_lag2_d , 
         data = C, model = "random", index=c("pair","Year"), effect = "twoways")

summary(RE)

#BP-Test
bptest(RE, data = A)     # => rejected (p<0.05) => heteroskedasticity is present

#Hausman - Test
phtest(RE,gamma)     # again p<0.05 => FE is preferred over RE

# -------------------------------------------------- P P M L -------------------------------------------------------- #
## NOTE: USE FRESH DATASET, WHERE THERE ARE MISSING DATA
#B = unknownToNA(Dtaset, "..")
#B$GDP_d = as.numeric(as.character(B$GDP_d))
#B$GDP_o = as.numeric(as.character(B$GDP_o))
#B$lGDP_o = log(B$GDP_o)
##B$lGDP_d = log(B$GDP_d)
#B$lTF = log(B$TF)
#B$ldist = log(B$distw)
#B = pdata.frame(B , index  =c("pair", "Year"))

#Exporter-time-variant fixed effects
B$expyears <- paste(B$iso_code_o, B$Years)
exp_years_d = factor(B$expyears)
exp_years_dummies = model.matrix(~exp_years_d)
exp_years_dummies <- exp_years_dummies[,-1]
#Importer-time-variant fixed effects
B$impyears <- paste(B$iso_code_d, B$Years)
imp_years_d = factor(B$impyears)
imp_years_dummies = model.matrix(~imp_years_d)
imp_years_dummies <- imp_years_dummies[,-1]
#Pair-time-variant fixed effects
B$pairfe = paste(factor(B$pair))
B$yearfe = paste(factor(B$Year))

#Checking the results of ppml fucntion with pglm:
Poi = pglm(formula = TF ~ lGDP_o + lGDP_d + ldist + EU_o + EU_d + pairfe + yearfe,
           family = poisson, model = "pooling", data = B)
summary(Poi)      # => pglm double check successful => ppml function derived same results as pglm



Poisson = feglm(TF ~ ldist + lGDP_o + lGDP_d + EU_o + EU_d|pair + Year,B)
summary(Poisson)

Poisson2 = feglm(TF ~ ldist + lGDP_o + lGDP_d + EU_o + EU_d + EU_lag_o + EU_lag_d + EU_lag2_o + EU_lag2_d
                 |pair + Year,B)
summary(Poisson2)

Poisson3 = feglm(TF ~ ldist + lGDP_o + lGDP_d + both_in + one_in|pair + Year,B)
summary(Poisson3)



  # -------------------------------------------------- EXPORT -------------------------------------------------------- #
library(stargazer)

stargazer(Po1,Po2,Po3,Po4,
          title = "PPML",
          column.labels = c("1","2","3","4"), 
          omit = c("factor", "Constant"), dep.var.labels =c("log(exports)","exports"),
          no.space = TRUE,
          out = "lel.tex", type = "text")
# Doesnt work on Poisson function


# ---------------------------------------- SENSITIVITY ANALYSIS -----------------------------------------------------------#

Prima = felm(lTF ~ lGDP_d + lGDP_o + ldist + EU_o + EU_d|as.factor(pair) + as.factor(Year), data = C)
Sekunda = felm(lTF ~ lGDP_d + lGDP_o + ldist + EU_o + EU_d + EU_lag_o + EU_lag_d|as.factor(pair) + as.factor(Year), data = C)
Tertia = felm(lTF ~ lGDP_d + lGDP_o + ldist + EU_o + EU_d + EU_lag2_o + EU_lag2_d|as.factor(pair) + as.factor(Year), data = C)
Quarta = felm(lTF ~ lGDP_d + lGDP_o + ldist + EU_o + EU_d + EU_lag_o + EU_lag_d + EU_lag2_o + EU_lag2_d|as.factor(pair) + as.factor(Year), data = C)

stargazer(Prima,Sekunda,Tertia,Quarta, type="text", cluster = "pair")


PoissonA = feglm(TF ~ ldist + lGDP_o + lGDP_d + EU_o + EU_d|pair + Year,B)
PoissonB = feglm(TF ~ ldist + lGDP_o + lGDP_d + EU_o + EU_d + EU_lag_o + EU_lag_d|pair + Year,B)
PoissonC = feglm(TF ~ ldist + lGDP_o + lGDP_d + EU_o + EU_d + EU_lag2_o + EU_lag2_d|pair + Year,B)
PoissonD = feglm(TF ~ ldist + lGDP_o + lGDP_d + EU_o + EU_d + EU_lag_o + EU_lag_d + EU_lag2_o + EU_lag2_d|pair + Year,B)
summary(PoissonA)


stargazer(Prima, Sekunda, Tertia
          , Quarta,
          title = "FE - Dummy Variations",
          column.labels = c("EU Effect","First Lag","Second Lag","Both Lags"), 
          omit = c("factor", "Constant"),
          add.lines = list (c("Year FE", "Yes", "Yes","Yes", "Yes"),
                            c("Pair FE", "Yes", "Yes", "Yes", "Yes")),
          no.space = TRUE, out = "lel.tex", type = "text")

####---------- ROBUST SE ----------------####
FEOLS_pair_year <- feols(lTF ~ lGDP_d + lGDP_o + ldist + EU_o + EU_d   
                         | as.factor(pair) + as.factor(Year), data = B)
summary(FEOLS_pair_year, cluster = "pair")

FEOLS_pair_year2 <- feols(lTF ~ lGDP_d + lGDP_o + ldist + both_in + one_in   
                          | as.factor(pair) + as.factor(Year), data = B)
summary(FEOLS_pair_year2, cluster = "pair")

FEOLS_pair_year3 <- feols(lTF ~ lGDP_d + lGDP_o + ldist + EU_o + EU_d + EU_lag_o + EU_lag_d + EU_lag2_o + EU_lag2_d  
                          | as.factor(pair) + as.factor(Year), data = B)
summary(FEOLS_pair_year3, cluster = "pair")


poisson_simple1 <- feglm(TF ~ lGDP_d + lGDP_o + ldist + EU_o + EU_d  | pair + Year, data = B)
summary(poisson_simple1, cluster = "pair")


poisson_simple2 <- feglm(TF ~ lGDP_d + lGDP_o + ldist  + both_in + one_in   | pair + Year, data = B)
summary(poisson_simple2, cluster = "pair")


poisson_simple3 <- feglm(TF ~ lGDP_d + lGDP_o + ldist + EU_o + EU_d + EU_lag_o + EU_lag_d + EU_lag2_o + EU_lag2_d| pair + Year, data = B)
summary(poisson_simple3, cluster = "pair")

##################################### DATA CREATION ##########################################################
setwd("C:/Users/Tamerlan/Desktop")
load("GravData.dta") # <====== DATA FOR PPML


Uno$GDP_o = as.numeric(as.character(Uno$GDP_o))
Uno$GDP_d = as.numeric(as.character(Uno$GDP_d))
Uno$lGDP_d = log(Uno$GDP_d)
Uno$lGDP_o = log(Uno$GDP_o)
Uno$ldist = log(Uno$distw)
Uno$TF[Uno$TF == 0] = NA 
Uno = na.omit(object = Uno)
Uno$lTF = log(Uno$TF)



Uno = Uno[,c(2,1,6,4,5,3,8,7,10,9,18,11,12,13,14,15,16,17,19,20,21,22)]
library(plm)
Uno =pdata.frame(Uno, index  =c("pair", "Year"))
attach(Uno)

#######################################################################################################################################
######################################################################################################################################################
setwd("C:/Users/Tamerlan/Desktop")
library(haven)
CEPII = read_dta("cepii_time_invariant.dta")
library(readxl)
Origin = read_excel("gdpwrld.xlsx")
Destination = read_excel("gdpwrld2.xlsx")
library(tidyr)
A = c(Origin$iso_code_o)
B = c(Destination$iso_code_d)


library(dplyr)
Prima = crossing(A, B)
names(Prima)[1] = "iso_code_o"
names(Prima)[2] = "iso_code_d"

#Seconda is the almost final files missing in 3 or 4 columns
Secunda = full_join(Prima, Origin, by = "iso_code_o")
#creating the columns to merge with secunda
Tertia = full_join(Prima, Destination, by = "iso_code_d")
supplementtable = within(Tertia, rm("iso_code_o", "Year", "iso_code_d"))
# cbind => CBIND might be used but it only merges dataset with a columns and does not match the data, 
#thus you must reorder the columns u need to merge in excel
Final = cbind(supplementtable, Secunda)
#reordering columns = FINAL FILE
Final$pair = paste(Final$iso_code_d, Final$iso_code_o, sep ="")
#Merge with CEPII
Finalu = merge(Final, CEPII, by = "pair")

#----------------------------------------------


load("trdflw.dta")
trdata = U
trdata$pair = paste(trdata$ReporterISO3, trdata$PartnerISO3, sep ="")
trdata$ReporterISO3 = NULL
trdata$PartnerISO3 = NULL

Dtaset = merge(trdata, Finalu, by.x = c("pair", "Year"), by.y=c("pair", "Year"))

Dtaset = Dtaset[,c(2,1,4,10,11,9,5,12,6,13,7,14,8,15,16:21,3)]
colnames(Dtaset)[3] = "iso_o"
colnames(Dtaset)[4] = "iso_code_o"
colnames(Dtaset)[5] = "iso_d"
colnames(Dtaset)[6] = "iso_code_d"
colnames(Dtaset)[7] = "GDP_o"
colnames(Dtaset)[8] = "GDP_d"
colnames(Dtaset)[9] = "EU_o"
colnames(Dtaset)[10] = "EU_d"
colnames(Dtaset)[11] = "EU_lag_o"
colnames(Dtaset)[12] = "EU_lag_d"
colnames(Dtaset)[13] = "EU_lag2_o"
colnames(Dtaset)[14] = "EU_lag2_d"

B = Dtaset
B$both_in = ifelse((B$EU_o)*(B$EU_d)==1,1,0)
B$one_in = ifelse((B$EU_o)+(B$EU_d)==1,1,0)
B = B[,c(1:22,28,24:27)]
B$GDP_d = as.numeric(as.character(B$GDP_d))
B$GDP_o = as.numeric(as.character(B$GDP_o))
B$lGDP_o = log(B$GDP_o)
B$lGDP_d = log(B$GDP_d)
B$lTF = log(B$TF)
B$ldist = log(B$distw)


save(B, file = "LUL.dta")
#################################################################################################
one = read.csv("1-4.csv")
two = read.csv("5-7.csv")
three = read.csv("3-6.csv")
four = read.csv("8-2.csv")
five = read.csv("8-10.csv")
six = read.csv("7-0.csv")
seven = read.csv("11-13.csv")
eight = read.csv("14-15.csv")
nine = read.csv("16-18.csv")

U = rbind(one,two,three, four, five, six, seven, eight, nine)
save(U, file="trdflw.dta")


#--- CREATE LOG OF Xi ---#
library(gdata)
A = B
A = Dtaset
A$TF[A$TF==0] = NA
A = A[complete.cases(A),]
#A = unknownToNA(Dtaset, "..")
#A = na.omit(A, object = A)
A$GDP_d = as.numeric(as.character(A$GDP_d))
A$GDP_o = as.numeric(as.character(A$GDP_o))
A$lGDP_o = log(A$GDP_o)
A$lGDP_d = log(A$GDP_d)
A$lTF = log(A$TF)
A$ldist = log(A$distw)