rm(list=ls())
library(tidyverse)
install.packages("plm")
library(plm)
setwd("C:/Users/harari/Desktop/Dropbox/enseignement/M2 EPOG econometrics/3 panel")

panel<- read.csv("./panel_large.csv",
                 header=TRUE,sep=",")
head(panel)

panel_toy<-panel %>% 
  select(Company_Name,year,marketcap,
         IQ_TOTAL_ASSETS,ncne,sic1)
head(panel_toy)

panel_toy<-panel_toy %>% arrange(Company_Name)
head(panel_toy)

# Pooled model ----------
lm1<-lm(marketcap~IQ_TOTAL_ASSETS, data=panel_toy)
summary(lm1)

ggplot(data=panel_toy,
 aes(x=IQ_TOTAL_ASSETS,y=marketcap))+
  geom_point()+
  geom_smooth(method="lm")

ggplot(data=panel_toy,
       aes(x=IQ_TOTAL_ASSETS,
           y=marketcap))+
  geom_point(aes(colour=Company_Name))+
  geom_smooth(method="lm")

install.packages("plotly")
library(plotly)
p1<-ggplot(data=panel_toy,
           aes(x=IQ_TOTAL_ASSETS,y=marketcap))+
  geom_point(aes(colour=Company_Name))+
  geom_smooth(method="lm")
p1;ggplotly(p1)

#breaking the panel: time series 
ggplot(data=panel_toy,
       aes(x=IQ_TOTAL_ASSETS,y=marketcap,
           colour=Company_Name))+
  geom_point()+geom_smooth(method="lm",level=NA)

# breaking the panel: cross sectional
cross_sectional<-panel_toy %>% filter(year>2005) %>% mutate(year=as.factor(year))


ggplot(data=cross_sectional,aes(x=IQ_TOTAL_ASSETS,y=marketcap,colour=year))+
  geom_point()+geom_smooth(method="lm",level=NA)



#Fixed effect------
#install.packages("plm")
library(plm)
model.fe<-plm(marketcap~IQ_TOTAL_ASSETS,
              data=panel_toy,
              index=c("Company_Name","year"),
              model="within")
summary(model.fe)
fixef(model.fe)#display the fixed effects (=an intercept for each firm)

fe2<-panel_toy %>% mutate(fitted=marketcap-model.fe$residuals)

ggplot(data=fe2,aes(x=IQ_TOTAL_ASSETS,y=marketcap))+
  geom_point(aes(colour=Company_Name))+
  geom_line(aes(x=IQ_TOTAL_ASSETS,y=fitted,
                group=Company_Name,colour=Company_Name))

#comparing one single linear model with a FE panel model
ggplot(data=fe2,aes(x=IQ_TOTAL_ASSETS,y=marketcap))+geom_point(aes(colour=Company_Name))+
  geom_line(aes(x=IQ_TOTAL_ASSETS,y=fitted,group=Company_Name,colour=Company_Name))+
  geom_smooth(method="lm",level=NA)

#the FE model is equivalent to a linear model with dummies for each company
lm2<-lm(marketcap~IQ_TOTAL_ASSETS+factor(Company_Name), data=panel_toy)
ggplot(data=panel_toy,aes(x=IQ_TOTAL_ASSETS,y=marketcap))+geom_point(aes(colour=Company_Name))+
  geom_line(data=lm2,aes(x=lm2$model$IQ_TOTAL_ASSETS,y=lm2$fitted.values,group=lm2$model$"factor(Company_Name)"))

# add time-fixed effects (=an intercept for each year)
model.fe.time<-plm(marketcap~IQ_TOTAL_ASSETS+ factor(year),data=panel_toy,
              index=c("Company_Name","year"),model="within")
summary(model.fe.time)
fixef(model.fe.time)



# Random efffect------------
model.re<-plm(marketcap~IQ_TOTAL_ASSETS,data=panel_toy,model="random")
summary(model.re)

# time invariant variables------------
#if we add a time invariant variable (sic1 gives the sector of a company, which is constant), FE should not work
model.fe2<-plm(marketcap~IQ_TOTAL_ASSETS+factor(sic1),data=panel_toy,model="within")
summary(model.fe2)
#the results are exactly the same as without sic1:
summary(model.fe)

#on the contrary, it works with the random effects model 
model.re2<-plm(marketcap~IQ_TOTAL_ASSETS+factor(sic1),data=panel_toy,model="random")
summary(model.re2)

#Control tests -----------

#Hausman test of validity of the Random effects model
phtest(model.fe,model.re)
#is p-value <0.05 reject the Random effect: you have to use fixed effects


#Fisher test for interest of adding  new variables
pFtest(model.fe.time,model.fe)
#p-value <0.05 yes significant effects, keep the new variables

#test for autocorrelation in residuals
pbgtest(model.re2)
#p-value = 4.338e-08 yes there is.

#the effect of the autocorrelation of residuals is that in the plm model, standard-errors are underestimated
#and then significativity of explanatory variables is overestimated
#unfortunatly there is no easy way to correct the p-values.
#Then you should mitigate comments on effects when not very very significant


#? package clubSandwich