#R code for econometrics #Correct the path to your location of the data population <- read.csv("C:/Users/Hugo-Harari-Kermadec/Dropbox/enseignement/M2 EPOG econometrics/1bis basics/population.csv", sep=";") population <- read.csv("C:/Users/harari/Desktop/Dropbox/enseignement/M2 EPOG econometrics/1bis basics/population.csv", sep=";") library(tidyverse) # "population" is a data.frame #Gender = 1 if female #real model : Income = 1100-150*Gender+40*Exp-1*(Exp-10)^2+error #in excel ARRONDI(1100-150*Gender+40*Exp-1*(Exp-10)^2+LOI.NORMALE.INVERSE.N(ALEA();0;100);0) head(population) view(population) #description of the population N<-nrow(population);N # number of lines histI<-hist(population$Income) mu<-mean(population$Income);mu var(population$Income) q<-quantile(population$Income,c(.25,.75)) ggplot(population)+geom_histogram(aes(Income),color="black",fill="white",bins = 20) points(x=q,y=c(0,0),col='blue',pch='x', cex=8) histG<-hist(population$Gender, plot=FALSE) muG<-mean(population$Gender);muG var(population$Gender) #sampling an individual index<-sample(1:N,1) index population[index,] #sample_n of size n n<- 2000 indexes<-sample(1:N,n,replace=T);indexes; sample_n<-population[indexes,] #head(sample_n) mu_hat<-mean(sample_n$Income);mu_hat #description of the sample hist(population$Income,freq=F) hist_sample<-hist(sample_n$Income,plot=FALSE) plot(hist_sample, col=rgb(0,0,1,1/4),freq=F,add=T) mu_hat<-mean(sample_n$Income);mu_hatn<- 50 indexes<-sample(1:N,n,replace=T);indexes; sample_n<-population[indexes,] histR_ech<-hist(sample_n$Income,plot=FALSE) #Confidence Interval mu_hat<-mean(sample_n$Income)# estimator of the poplation mean bu the sample mean mu_hat var_hat<-var(sample_n$Income)# estimator of the variance plot(histI,col=rgb(1,0,0,1/4),freq=FALSE) points(c(mu),c(0),col='red',pch='x', cex=2) points(c(mu_hat),c(0),col='blue',pch='x', cex=2) l_b<-mu_hat-sqrt(var_hat/n)*qt(.975, df=n-1);l_b # formula for the lower bound u_b<-mu_hat+sqrt(var_hat/n)*qt(.975, df=n-1);u_b # formula for the upper bound #direct function: qt(.975, df=n-1) points(c(l_b,u_b),c(0,0),col='blue',pch='I', cex=2) #Test t.test(sample_n$Income, mu= 1700,conf.level = 0.95) #the test is equivalent than checking if the tested value is in the Condidence Interval #Testing that mean of income in a sample of men #is equal to the mean income of women population$Gender==1 males<-population[population$Gender==0,] muM<-mean(males$Income);muM females<-population[population$Gender==1,] muF<-mean(females$Income);muF head(females) n<- 100 indexes<-sample(1:nrow(males),n,replace=T) sample_n<-males[indexes,] t.test(sample_n$Income, mu= muF) #Expression of the t statistic sqrt(nrow(sample_n))*(mean(sample_n$Income)-muF)/sqrt(var(sample_n$Income)) # Linear model plot(sample_n$Experience,sample_n$Income)