rm(list=ls()) library(tidyverse) install.packages("plm") library(plm) setwd("C:/Users/harari/Desktop/Dropbox/enseignement/M2 EPOG econometrics/3 panel") panel<- read.csv("./panel_large.csv", header=TRUE,sep=",") head(panel) panel_toy<-panel %>% select(Company_Name,year,marketcap, IQ_TOTAL_ASSETS,ncne,sic1) head(panel_toy) panel_toy<-panel_toy %>% arrange(Company_Name) head(panel_toy) # Pooled model ---------- lm1<-lm(marketcap~IQ_TOTAL_ASSETS, data=panel_toy) summary(lm1) ggplot(data=panel_toy, aes(x=IQ_TOTAL_ASSETS,y=marketcap))+ geom_point()+ geom_smooth(method="lm") ggplot(data=panel_toy, aes(x=IQ_TOTAL_ASSETS, y=marketcap))+ geom_point(aes(colour=Company_Name))+ geom_smooth(method="lm") install.packages("plotly") library(plotly) p1<-ggplot(data=panel_toy, aes(x=IQ_TOTAL_ASSETS,y=marketcap))+ geom_point(aes(colour=Company_Name))+ geom_smooth(method="lm") p1;ggplotly(p1) #breaking the panel: time series ggplot(data=panel_toy, aes(x=IQ_TOTAL_ASSETS,y=marketcap, colour=Company_Name))+ geom_point()+geom_smooth(method="lm",level=NA) # breaking the panel: cross sectional cross_sectional<-panel_toy %>% filter(year>2005) %>% mutate(year=as.factor(year)) ggplot(data=cross_sectional,aes(x=IQ_TOTAL_ASSETS,y=marketcap,colour=year))+ geom_point()+geom_smooth(method="lm",level=NA) #Fixed effect------ #install.packages("plm") library(plm) model.fe<-plm(marketcap~IQ_TOTAL_ASSETS, data=panel_toy, index=c("Company_Name","year"), model="within") summary(model.fe) fixef(model.fe)#display the fixed effects (=an intercept for each firm) fe2<-panel_toy %>% mutate(fitted=marketcap-model.fe$residuals) ggplot(data=fe2,aes(x=IQ_TOTAL_ASSETS,y=marketcap))+ geom_point(aes(colour=Company_Name))+ geom_line(aes(x=IQ_TOTAL_ASSETS,y=fitted, group=Company_Name,colour=Company_Name)) #comparing one single linear model with a FE panel model ggplot(data=fe2,aes(x=IQ_TOTAL_ASSETS,y=marketcap))+geom_point(aes(colour=Company_Name))+ geom_line(aes(x=IQ_TOTAL_ASSETS,y=fitted,group=Company_Name,colour=Company_Name))+ geom_smooth(method="lm",level=NA) #the FE model is equivalent to a linear model with dummies for each company lm2<-lm(marketcap~IQ_TOTAL_ASSETS+factor(Company_Name), data=panel_toy) ggplot(data=panel_toy,aes(x=IQ_TOTAL_ASSETS,y=marketcap))+geom_point(aes(colour=Company_Name))+ geom_line(data=lm2,aes(x=lm2$model$IQ_TOTAL_ASSETS,y=lm2$fitted.values,group=lm2$model$"factor(Company_Name)")) # add time-fixed effects (=an intercept for each year) model.fe.time<-plm(marketcap~IQ_TOTAL_ASSETS+ factor(year),data=panel_toy, index=c("Company_Name","year"),model="within") summary(model.fe.time) fixef(model.fe.time) # Random efffect------------ model.re<-plm(marketcap~IQ_TOTAL_ASSETS,data=panel_toy,model="random") summary(model.re) # time invariant variables------------ #if we add a time invariant variable (sic1 gives the sector of a company, which is constant), FE should not work model.fe2<-plm(marketcap~IQ_TOTAL_ASSETS+factor(sic1),data=panel_toy,model="within") summary(model.fe2) #the results are exactly the same as without sic1: summary(model.fe) #on the contrary, it works with the random effects model model.re2<-plm(marketcap~IQ_TOTAL_ASSETS+factor(sic1),data=panel_toy,model="random") summary(model.re2) #Control tests ----------- #Hausman test of validity of the Random effects model phtest(model.fe,model.re) #is p-value <0.05 reject the Random effect: you have to use fixed effects #Fisher test for interest of adding new variables pFtest(model.fe.time,model.fe) #p-value <0.05 yes significant effects, keep the new variables #test for autocorrelation in residuals pbgtest(model.re2) #p-value = 4.338e-08 yes there is. #the effect of the autocorrelation of residuals is that in the plm model, standard-errors are underestimated #and then significativity of explanatory variables is overestimated #unfortunatly there is no easy way to correct the p-values. #Then you should mitigate comments on effects when not very very significant #? package clubSandwich