#R code for econometrics first lecture #Correct the path to your location of the data rm(list=ls()) population <- read.csv("C:/Users/harari/Desktop/Dropbox/enseignement/M2 EPOG econometrics/2 linear model/population.csv", sep=";") # "population" is a data.frame #Gender = 1 if female #real model : Income = 1200-150*Gender+100+40*Exp-1*(Exp-10)^2+error #in excel ARRONDI(1200-150*Gender+100+40*Exp-1*(Exp-10)^2+LOI.NORMALE.INVERSE.N(ALEA();0;100);0) #description of the population N<-nrow(population) # number of lines histI<-hist(population$Income) mu<-mean(population$Income);mu var(population$Income) hist_E<-hist(population$Experience) #sample_n of size n n<- 200 indexes<-sample(1:N,n,replace=T) sample_n<-population[indexes,] mu_hat<-mean(sample_n$Income);mu_hat #description of the sample hist(sample_n$Income) hist(sample_n$Experience) plot(histI,freq=FALSE) histI_ech<-hist(sample_n$Income,plot=FALSE) plot(histI_ech, col=rgb(0,0,1,1/4),freq=FALSE,add=T) # Linear model plot(sample_n$Experience,sample_n$Income) # Income = a Experience Output_of_lm1<- lm(sample_n$Income ~ sample_n$Experience) summary(Output_of_lm1) residuals(Output_of_lm1) fitted.values(Output_of_lm1) # plotting the fitted model and the data set layout(c(1)) plot(sample_n$Experience,sample_n$Income) lines(sample_n$Experience,fitted.values(Output_of_lm1),col="red") layout(matrix(1:4,2,2));plot(Output_of_lm1) # Income = a Experience lm2<-lm(sample_n$Income ~ sample_n$Experience-1) summary(lm2) layout(matrix(1:4,2,2));plot(lm2) layout(c(1)) plot(sample_n$Experience,sample_n$Income,xlab="Experience",ylab="Income") lines(sample_n$Experience,fitted.values(lm1),col="red") lines(sample_n$Experience,fitted.values(lm2),col="orange") legend("bottomright",c("Income","lm1","lm2"),col = c("black","red","orange"), pch=c("o","_","_")) # Income = a Experience + b gender lm3<-lm(sample_n$Income ~ sample_n$Experience + sample_n$Gender) lm3<-lm(Income~Experience+Gender,data=sample_n) summary(lm3) plot(lm3) sample_n$Exp2<-sample_n$Experience^2 lm3b<-lm(Income~Experience+Exp2+Gender,data=sample_n) plot(lm3b) layout(matrix(1:4,2,2));plot(lm3) layout(c(1)) plot(sample_n$Experience,sample_n$Income,xlab="Experience",ylab="Income",pch=c("."),lwd=0) points(sample_n$Experience[sample_n$Gender==0],sample_n$Income[sample_n$Gender==0],xlab="Experience",ylab="Income",col="blue",pch=c("x")) points(sample_n$Experience[sample_n$Gender==1],sample_n$Income[sample_n$Gender==1],xlab="Experience",ylab="Income",col="red",pch=c("x")) #lines(sample_n$Experience,fitted.values(lm2),lwd=2) points(sample_n$Experience[sample_n$Gender==0],fitted.values(lm3)[sample_n$Gender==0],col="blue",lwd=2) points(sample_n$Experience[sample_n$Gender==1],fitted.values(lm3)[sample_n$Gender==1],col="red",lwd=2) legend("bottomright",c("Income","Exp only (lm2)","Men", "Women"),col = c("black","black","blue","red"), pch=c("x","_","o","o")) #Try to model interaction between gender and experience layout(c(1)) plot(sample_n$Experience[sample_n$Gender==0],sample_n$Income[sample_n$Gender==0], col="blue",xlab="Experience",ylab="Income",xlim=range(sample_n$Experience),ylim=range(sample_n$Income)) points(sample_n$Experience[sample_n$Gender==1],sample_n$Income[sample_n$Gender==1],col="red") sample_n<-sample_n[order(sample_n$Experience),] # Income = a Experience + b gender +c experience*gender lm4<-lm(Income ~ Experience + Gender +Gender*Experience, data=sample_n) summary(lm4) lines(sample_n[sample_n$Gender==1,]$Experience,lm5$fitted.values[sample_n$Gender==1],col="red",lwd=2) lines(sample_n[sample_n$Gender==0,]$Experience,lm5$fitted.values[sample_n$Gender==0],col="blue",lwd=2) legend("bottomright",c("Women","Men"),col = c("red","blue"), pch=c("_","_"),lwd=2) # Income = a Experience + b gender +c experience*gender + d Experience ^2 sample_n$Experience2<-(sample_n$Experience)^2 lm5<-lm(sample_n$Income ~ sample_n$Experience +sample_n$Experience2 + sample_n$Gender + sample_n$Gender*sample_n$Experience+ sample_n$Gender*sample_n$Experience2) #lm5<-lm(Income ~ Experience +Experience2 + Gender + Gender*Experience+ Gender*Experience2, data=sample_n) summary(lm5);plot(lm5) anova(lm5) #comparing two (nested) models lmA<-lm(sample_n$Income ~ sample_n$Experience + sample_n$Gender) lmB<-lm(sample_n$Income ~ sample_n$Experience +sample_n$Exp2 + sample_n$Gender) plot(sample_n$Income,residuals(lmA),col="red") points(sample_n$Income,residuals(lmB),col="blue") legend("topleft",c("small model","large model"),col = c("red","blue"), pch=c("o","o"),lwd=2) anova(lmA,lmB)