#this is my first code 1+1 x<-3 x^2 income<-c(873,1050,2765,3423,4536) mean(income) age<-c(21,22,NA,51) mean(age) mean(age,na.rm=TRUE) #part 1---- setwd("C:/Users/harari/Desktop/econometrics") OLS_EPOG_2016_population <- read.csv("OLS_EPOG_2016_population.csv", sep=";") population<-OLS_EPOG_2016_population N<-nrow(population);N index<-sample(1:N,1) index population[index,] head(population) names(population) library(tidyverse)#install.package("tidyverse") ggplot(population)+ geom_histogram(aes(Income),color="black", fill="white",bins=40) q<-quantile(population$Income,c(.25,.5,.75)) quarts<-as_tibble(q) %>% mutate(y=0) #sampling n<- 50 indexes<-sample(1:N,n,replace=T); sample_n<-population[indexes,] #head(sample_n) m_hat<-mean(sample_n$Income);m_hat var_hat<-var(sample_n$Income) means<-tibble(x=c(mean(population$Income),mean(sample_n$Income)), y=0) ggplot(population,aes(y=..density..))+ geom_histogram(aes(Income),color="black",fill="white",bins = 40)+ geom_histogram(data=sample_n,aes(Income),fill="blue",alpha=0.3,bins = 20)+ geom_point(data=means,aes(x=x,y=y),col=c("red","blue"),pch='x', cex=8) #Confidence Interval ------------ n<- 50 indexes<-sample(1:N,n,replace=T); sample_n<-population[indexes,] #head(sample_n) m_hat<-mean(sample_n$Income);m_hat var_hat<-var(sample_n$Income) means<-tibble(x=c(mean(population$Income),mean(sample_n$Income)), y=0) l_b<-m_hat-sqrt(var_hat/n)*qt(.995, df=n-1);l_b # formula for the lower bound u_b<-m_hat+sqrt(var_hat/n)*qt(.995, df=n-1);u_b # formula for the upper bound IC<-tibble(x=c(l_b,u_b), y=0) ggplot(population,aes(y=..density..))+ geom_histogram(aes(Income),color="black",fill="white",bins = 40)+ geom_histogram(data=sample_n,aes(Income),fill="blue",alpha=0.3,bins = 20)+ geom_point(data=means,aes(x=x,y=y),col=c("red","blue"),pch='x', cex=8)+ geom_point(data=IC,aes(x=x,y=y),col="blue",pch='I', cex=8) # Linear model ggplot(population,aes(x=Experience,y=Income))+ geom_point() ggplot(sample_n,aes(x=Experience,y=Income))+ geom_point()+ geom_line() lm1<-lm(data=sample_n,Income~Experience) summary(lm1) plot(lm1) lm2<-lm(sample_n$Income ~ sample_n$Experience-1) ggplot(sample_n,aes(x=Experience,y=Income))+ geom_point()+ geom_line(aes(x=Experience,y=lm1$fitted.values),col='blue')+ geom_line(aes(x=Experience,y=lm2$fitted.values),col='red')