# OLS intuition # set seed for random number generation set.seed(1234) # Define number of individuals / sample size n1 n1 = 100 # Create normally distributed random variables x1 = rnorm(n1,mean=1,sd=10) epsilon1 = rnorm(n1,5,1) # Define the true coefficient beta_1 = 1 y1 = beta_1 * x1 + epsilon1 # lm() is the built-in procedure in R that calculates OLS model1 = lm(y1~ x1) # summary() returns some descriptive statistics summary(model1) plot(x1,y1) abline(model1) # Questions: What do you expect to happen if # 1. We increase the variance of x1 or epsilon? # 2. We choose mean(epsilon) != 0? # 3. We increase the sample size, holding everything else fixed? # New model, including an additional variable n2 = 1000 x1 = rnorm(n2,0,3) x2 = rnorm(n2,0,1) # define the true values of the coefficients beta_1 = 2 beta_2 = 4 epsilon2 = rnorm(n2,0,5) + x2^2 y2 = beta_1 * x1 + beta_2 * x2 + epsilon2 model2 = lm(y2~ x1 + x2 ) summary(model2) plot(x1,y2) abline(model2) # We now take a similar model, this time with an error term that is # correlated with one of the regressors. What will happen??? n3 = 1000 x1 = rnorm(n3,2,3) x2 = rnorm(n3,2,1) # define the true values of the coefficients beta_1 = 2 beta_2 = 4 epsilon3 = rnorm(n3,1,1) + x2^2 y3 = beta_1 * x1 + beta_2 * x2 + epsilon3 model3 = lm(y3~ x1 + x2) summary(model3)