Important packages (libraries) for this lab:
1. Read Dataset (.csv file)
Generate numeric values for educ
View the labels created and encode
2. Generate variables
Weighted summary statistics
3. Estimating Regression
4. Estimating the regressions using robust option
5. Hypothesis testing
6. More testing
7. Age and Leisure, plots
The following steps are ones we will explore in class next week ……
Run Regressions and generate predicted values
Plot the Predicted Values
Final Chart

This is a second version of the lab 6 where we use the dataset imported from the stata version of this lab. Using this dataset corrects all the slight mismatch between the r and stata results

Important packages (libraries) for this lab:

tidyverse: Contain libraries such as dplyr and ggplot, which are instrumental here
stargazer: helpful in creating multi-model regression tables
sandwich: Helpful in estimating robust regression models
lmtest : helpful in hypothesis testing..
car : helpful in model testing
survey : useful in running weighted statistical tests

#Load libraries

1. Read Dataset (.csv file)

ATUS<-read.csv("IPUMS_ATUS_2019.csv")

source("/Volumes/middfiles/Classes/Fall23/ECON0211A/Noe_Labs/NV.R") #load NV.R to access nv_sum() function for quick summary statistic table

## 
## Attaching package: 'rlang'

## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice

Generate numeric values for educ

ATUS<-ATUS%>%
  mutate(codeduc=with(ATUS, match(educ, unique(educ))))

View the labels created and encode

ATUS%>%
  group_by(educ, codeduc)%>%
  summarise(mean_l<-mean(leisure))

## `summarise()` has grouped output by 'educ'. You can override using the
## `.groups` argument.

## # A tibble: 17 × 3
## # Groups:   educ [17]
##    educ                                           codeduc mean_l <- mean(leisu…¹
##    <chr>                                            <int>                  <dbl>
##  1 10th grade                                           1                   407.
##  2 11th grade                                           4                   395.
##  3 12th grade - no diploma                              5                   401.
##  4 1st, 2nd, 3rd, or 4th grade                         15                   358.
##  5 5th or 6th grade                                     8                   362.
##  6 7th or 8th grade                                     2                   431.
##  7 9th grade                                            3                   367.
##  8 associate degree - academic program                  9                   328.
##  9 associate degree - occupational vocational          13                   341.
## 10 bachelor's degree (ba, ab, bs, etc.)                11                   311.
## 11 doctoral degree (phd, edd, etc.)                    10                   302.
## 12 high school graduate - diploma                       6                   373.
## 13 high school graduate - ged                          12                   394.
## 14 less than 1st grade                                 17                   350.
## 15 master's degree (ma, ms, meng, med, msw, etc.)      14                   293.
## 16 professional school degree (md, dds, dvm, etc…      16                   295.
## 17 some college but no degree                           7                   360.
## # ℹ abbreviated name: ¹`mean_l <- mean(leisure)`

#encode
ATUS<-ATUS%>%
  mutate(edyearF=factor(codeduc, c(1,4,5,15,8,2,3,9,13,11,10,6,12,17,14,16,7),
                        labels=c("10","11","11", "3", "6", "7", "9", "14","14","16", "20","12", "12","0", "18","18", "14")),
         edyear=as.numeric(as.character(edyearF)))
#summarize to check the match
ATUS%>%
  group_by(educ, edyear)%>%
  summarise(mean_l = mean(leisure))

## `summarise()` has grouped output by 'educ'. You can override using the
## `.groups` argument.

## # A tibble: 17 × 3
## # Groups:   educ [17]
##    educ                                            edyear mean_l
##    <chr>                                            <dbl>  <dbl>
##  1 10th grade                                          10   407.
##  2 11th grade                                          11   395.
##  3 12th grade - no diploma                             11   401.
##  4 1st, 2nd, 3rd, or 4th grade                          3   358.
##  5 5th or 6th grade                                     6   362.
##  6 7th or 8th grade                                     7   431.
##  7 9th grade                                            9   367.
##  8 associate degree - academic program                 14   328.
##  9 associate degree - occupational vocational          14   341.
## 10 bachelor's degree (ba, ab, bs, etc.)                16   311.
## 11 doctoral degree (phd, edd, etc.)                    20   302.
## 12 high school graduate - diploma                      12   373.
## 13 high school graduate - ged                          12   394.
## 14 less than 1st grade                                  0   350.
## 15 master's degree (ma, ms, meng, med, msw, etc.)      18   293.
## 16 professional school degree (md, dds, dvm, etc.)     18   295.
## 17 some college but no degree                          14   360.

 #check employment
ATUS%>%
  group_by(empstat)%>%
  summarise(mean_emp=mean(leisure))

## # A tibble: 5 × 2
##   empstat                mean_emp
##   <chr>                     <dbl>
## 1 employed - absent          362.
## 2 employed - at work         272.
## 3 not in labor force         448.
## 4 unemployed - looking       381.
## 5 unemployed - on layoff     403.

#check kidund18
ATUS%>%
  group_by(kidund18)%>%
  summarise(mean_kid=mean(leisure))

## # A tibble: 2 × 2
##   kidund18 mean_kid
##   <chr>       <dbl>
## 1 no           381.
## 2 yes          249.

2. Generate variables

ATUS<-ATUS%>%
  mutate(LTHS =ifelse(edyear<12,1,0),
        HS = ifelse(edyear==12,1,0),
        SC = ifelse(edyear>12 & edyear<16, 1, 0),
        BA = ifelse(edyear==16,1,0),
        GRAD=ifelse(edyear>16,1,0),
        employed =ifelse(empstat=="employed - at work",1,0),
        male =ifelse(sex=="male",1,0),
        child=ifelse(kidund18=="yes",1,0))

#summarize the 10 variables; using nv_sum()

nv_sum(ATUS, LTHS, HS, SC, BA, GRAD, employed, male, child, leisure, age, weight = FALSE)

##    variable  Obs min        mean median      st.dev  max
## 1      LTHS 9435   0   0.1062003      0   0.3081102    1
## 2        HS 9435   0   0.2301007      0   0.4209194    1
## 3        SC 9435   0   0.2682565      0   0.4430753    1
## 4        BA 9435   0   0.2331744      0   0.4228747    1
## 5      GRAD 9435   0   0.1622682      0   0.3687162    1
## 6  employed 9435   0   0.5775305      1   0.4939786    1
## 7      male 9435   0   0.4569157      0   0.4981667    1
## 8     child 9435   0   0.2960254      0   0.4565265    1
## 9   leisure 9435   0 342.0162162    305 229.0125318 1300
## 10      age 9435  15  51.0865925     51  18.1744901   85

Weighted summary statistics

# select variables to summarise
selected_vars <-dplyr::select(ATUS,LTHS, HS,SC, BA, GRAD, employed, male,child, leisure, age)
# Create a survey design object, from survey package
sv_obj <- svydesign(id = ~1, weights = ~wt06, data = ATUS) # id: formula specifying cluster id; ~1 indicate that there is no cluster; weights: formula specifying the sampling weights

# Function to calculate weighted mean and standard error
summary_stats <-function(x,y) { # supply function iterate through the selected variables x while using the weights in the sv_object (y)
  c(
    obs =sum(!is.na(x)),
    mean=svymean(x,y, na.rm=TRUE),#calculate the weighted mean
    Std.dev = sqrt(svyvar(x,y, na.rm = TRUE)),# calculate the weighted SD
    min = min(x, na.rm = TRUE),
    max =max(x, na.rm = TRUE)
   # quant=svyquantile(x,y, 0.50, na.rm=TRUE)
  )
}
sum_table<-sapply(selected_vars, summary_stats, sv_obj) # use the sapply function here to the whole created function to create a vector-type data
#sum_table
# Convert to a data frame for better presentation
results_df <- as.data.frame(t(sum_table))
# Print the results
print(results_df)

##           obs        mean     Std.dev min  max
## LTHS     9435   0.1432380   0.3503339   0    1
## HS       9435   0.2748641   0.4464694   0    1
## SC       9435   0.2417681   0.4281772   0    1
## BA       9435   0.2075264   0.4055572   0    1
## GRAD     9435   0.1326034   0.3391636   0    1
## employed 9435   0.6045367   0.4889759   0    1
## male     9435   0.4843585   0.4997818   0    1
## child    9435   0.2557646   0.4363132   0    1
## leisure  9435 311.1453426 221.0180079   0 1300
## age      9435  46.1374554  18.9997780  15   85

3. Estimating Regression

m1<-lm(leisure~HS+ SC+ BA +GRAD, data = ATUS)
#summary(m1)

m2<-lm(leisure~HS+SC+ BA+ GRAD, data =ATUS, weights = wt06) # non robust regression
#summary(m2)

#robust regression
m3<-lm(leisure~HS+SC+ BA+ GRAD, data =ATUS, weights = wt06) 

#rob<-coeftest(m3, vcov = vcovHC(m3, "HC1"))    # robust; HC1 (Stata default)
rob_se3<-sqrt(diag(vcovHC(m3,"HC1"))) #calculate robust standard error
#rob_se3

stargazer( m1,m2,m3,
          se =list(NULL, NULL, rob_se3),
          type = "text",
          title = "Table 1",
          out = "table1.text",
          column.labels = c("Regular", "Weighted", "Robust SE 1"),
          notes = "Significance level"
          
          )

## 
## Table 1
## ==================================================================
##                                        Dependent variable:        
##                                 ----------------------------------
##                                              leisure              
##                                  Regular    Weighted   Robust SE 1
##                                    (1)         (2)         (3)    
## ------------------------------------------------------------------
## HS                              -17.862**    -0.331      -0.331   
##                                  (8.653)     (7.355)    (11.437)  
##                                                                   
## SC                              -43.569*** -30.079***  -30.079*** 
##                                  (8.457)     (7.525)    (11.249)  
##                                                                   
## BA                              -82.241*** -58.748***  -58.748*** 
##                                  (8.635)     (7.753)    (10.762)  
##                                                                   
## GRAD                            -98.076*** -75.240***  -75.240*** 
##                                  (9.207)     (8.601)    (10.911)  
##                                                                   
## Constant                        392.905*** 340.677***  340.677*** 
##                                  (7.158)     (5.963)     (9.430)  
##                                                                   
## ------------------------------------------------------------------
## Observations                      9,435       9,435       9,435   
## R2                                0.022       0.017       0.017   
## Adjusted R2                       0.021       0.016       0.016   
## Residual Std. Error (df = 9430)  226.578   699,720.800 699,720.800
## F Statistic (df = 4; 9430)      51.960***   39.932***   39.932*** 
## ==================================================================
## Note:                                  *p<0.1; **p<0.05; ***p<0.01
##                                                 Significance level

4. Estimating the regressions using robust option

m1<-lm(leisure~HS + SC + BA + GRAD, data =ATUS, weights = wt06)
r_se1<-sqrt(diag(vcovHC(m1,"HC1")))

m2<-lm(leisure~HS + SC + BA + GRAD + employed, data =ATUS, weights = wt06)
r_se2<-sqrt(diag(vcovHC(m2,"HC1")))

m3<-lm(leisure~HS + SC + BA + GRAD + employed + male, data =ATUS, weights = wt06)
r_se3<-sqrt(diag(vcovHC(m3,"HC1")))

m4<-lm(leisure~HS + SC + BA + GRAD + employed + male + child, data =ATUS, weights = wt06)
r_se4<-sqrt(diag(vcovHC(m4,"HC1")))

m5<-lm(leisure~HS + SC + BA + GRAD + employed + male + child + age, data =ATUS, weights = wt06)
r_se5<-sqrt(diag(vcovHC(m5,"HC1")))

stargazer(m1, m2, m3, m4,m5,
          se=list(r_se1, r_se2, r_se3, r_se4, r_se5),
          type = "text",
          title = "Table 2: Weighted Regression with Robust Standard Error.",
          out="table2.txt",
          notes.append = TRUE)

## 
## Table 2: Weighted Regression with Robust Standard Error.
## ====================================================================================================================================================
##                                                                           Dependent variable:                                                       
##                     --------------------------------------------------------------------------------------------------------------------------------
##                                                                                 leisure                                                             
##                               (1)                       (2)                       (3)                       (4)                       (5)           
## ----------------------------------------------------------------------------------------------------------------------------------------------------
## HS                           -0.331                  28.668***                 30.404***                 30.436***                   5.966          
##                             (11.437)                 (10.959)                  (10.863)                  (10.925)                  (10.984)         
##                                                                                                                                                     
## SC                         -30.079***                  9.687                    15.483                    15.315                    -4.164          
##                             (11.249)                 (11.145)                  (11.180)                  (11.290)                  (11.371)         
##                                                                                                                                                     
## BA                         -58.748***                 -7.716                    -2.924                     0.552                   -21.531**        
##                             (10.762)                 (10.547)                  (10.525)                  (10.630)                  (10.630)         
##                                                                                                                                                     
## GRAD                       -75.240***                -20.940**                  -15.439                   -9.249                  -39.215***        
##                             (10.911)                 (10.633)                  (10.595)                  (10.753)                  (10.880)         
##                                                                                                                                                     
## employed                                            -161.779***               -170.667***               -156.435***               -138.178***       
##                                                       (6.119)                   (6.211)                   (6.242)                   (6.762)         
##                                                                                                                                                     
## male                                                                           61.444***                 57.906***                 58.051***        
##                                                                                 (5.762)                   (5.738)                   (5.627)         
##                                                                                                                                                     
## child                                                                                                   -75.699***                -64.990***        
##                                                                                                           (5.686)                   (6.022)         
##                                                                                                                                                     
## age                                                                                                                                1.584***         
##                                                                                                                                     (0.183)         
##                                                                                                                                                     
## Constant                   340.677***               403.103***                375.113***                386.073***                319.120***        
##                             (9.430)                   (9.564)                   (9.846)                   (9.943)                  (13.123)         
##                                                                                                                                                     
## ----------------------------------------------------------------------------------------------------------------------------------------------------
## Observations                 9,435                     9,435                     9,435                     9,435                     9,435          
## R2                           0.017                     0.139                     0.158                     0.179                     0.194          
## Adjusted R2                  0.016                     0.138                     0.157                     0.178                     0.193          
## Residual Std. Error 699,720.800 (df = 9430)   654,876.200 (df = 9429)   647,687.500 (df = 9428)   639,544.700 (df = 9427)   633,621.700 (df = 9426) 
## F Statistic         39.932*** (df = 4; 9430) 303.814*** (df = 5; 9429) 294.074*** (df = 6; 9428) 293.181*** (df = 7; 9427) 283.610*** (df = 8; 9426)
## ====================================================================================================================================================
## Note:                                                                                                                    *p<0.1; **p<0.05; ***p<0.01

5. Hypothesis testing

#b
m1<-lm(leisure~ HS + SC + BA + GRAD + employed + age + male + child, data = ATUS, weights = wt06)
r_seM1<-sqrt(diag(vcovHC.default(m1, "HC1")))
summary(m1)

## 
## Call:
## lm(formula = leisure ~ HS + SC + BA + GRAD + employed + age + 
##     male + child, data = ATUS, weights = wt06)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -4739440  -356313    -7432   378327  5288153 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  319.1197     7.8162  40.828  < 2e-16 ***
## HS             5.9661     6.9511   0.858   0.3908    
## SC            -4.1645     7.0585  -0.590   0.5552    
## BA           -21.5313     7.3484  -2.930   0.0034 ** 
## GRAD         -39.2150     8.2495  -4.754 2.03e-06 ***
## employed    -138.1779     4.6237 -29.885  < 2e-16 ***
## age            1.5844     0.1187  13.344  < 2e-16 ***
## male          58.0507     4.1397  14.023  < 2e-16 ***
## child        -64.9902     4.8814 -13.314  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 633600 on 9426 degrees of freedom
## Multiple R-squared:  0.194,  Adjusted R-squared:  0.1933 
## F-statistic: 283.6 on 8 and 9426 DF,  p-value: < 2.2e-16

r_seM1

## (Intercept)          HS          SC          BA        GRAD    employed 
##  13.1227179  10.9839615  11.3708467  10.6299208  10.8803269   6.7622559 
##         age        male       child 
##   0.1833738   5.6266617   6.0219273

t1<-linearHypothesis(m1, "BA =GRAD")
print(t1)

## Linear hypothesis test
## 
## Hypothesis:
## BA - GRAD = 0
## 
## Model 1: restricted model
## Model 2: leisure ~ HS + SC + BA + GRAD + employed + age + male + child
## 
##   Res.Df        RSS Df  Sum of Sq      F  Pr(>F)  
## 1   9427 3.7867e+15                               
## 2   9426 3.7843e+15  1 2.4139e+12 6.0126 0.01422 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#c
t2<-linearHypothesis(m1, c("HS", "SC", "BA", "GRAD"))
print(t2)

## Linear hypothesis test
## 
## Hypothesis:
## HS = 0
## SC = 0
## BA = 0
## GRAD = 0
## 
## Model 1: restricted model
## Model 2: leisure ~ HS + SC + BA + GRAD + employed + age + male + child
## 
##   Res.Df        RSS Df  Sum of Sq      F    Pr(>F)    
## 1   9430 3.8057e+15                                   
## 2   9426 3.7843e+15  4 2.1406e+13 13.329 7.841e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6. More testing

#a
linearHypothesis(m1, c("child =0", "employed=0"))

## Linear hypothesis test
## 
## Hypothesis:
## child = 0
## employed = 0
## 
## Model 1: restricted model
## Model 2: leisure ~ HS + SC + BA + GRAD + employed + age + male + child
## 
##   Res.Df        RSS Df  Sum of Sq     F    Pr(>F)    
## 1   9428 4.2703e+15                                  
## 2   9426 3.7843e+15  2 4.8603e+14 605.3 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

linearHypothesis(m1, "child-employed=0") # or linearHypothesis(m1, "child=employed =0)

## Linear hypothesis test
## 
## Hypothesis:
## - employed  + child = 0
## 
## Model 1: restricted model
## Model 2: leisure ~ HS + SC + BA + GRAD + employed + age + male + child
## 
##   Res.Df        RSS Df  Sum of Sq      F    Pr(>F)    
## 1   9427 3.8259e+15                                   
## 2   9426 3.7843e+15  1 4.1573e+13 103.55 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

linearHypothesis(m1, "2*child =employed")

## Linear hypothesis test
## 
## Hypothesis:
## - employed  + 2 child = 0
## 
## Model 1: restricted model
## Model 2: leisure ~ HS + SC + BA + GRAD + employed + age + male + child
## 
##   Res.Df        RSS Df  Sum of Sq     F Pr(>F)
## 1   9427 3.7845e+15                           
## 2   9426 3.7843e+15  1 2.0795e+11 0.518 0.4717

#b
linearHypothesis(m1, c("HS", "SC", "BA", "GRAD", "employed", "age", "male", "child "))

## Linear hypothesis test
## 
## Hypothesis:
## HS = 0
## SC = 0
## BA = 0
## GRAD = 0
## employed = 0
## age = 0
## male = 0
## child = 0
## 
## Model 1: restricted model
## Model 2: leisure ~ HS + SC + BA + GRAD + employed + age + male + child
## 
##   Res.Df        RSS Df Sum of Sq      F    Pr(>F)    
## 1   9434 4.6952e+15                                  
## 2   9426 3.7843e+15  8 9.109e+14 283.61 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7. Age and Leisure, plots

#c
ggplot(ATUS, aes(age, leisure))+
  geom_point()+
  labs(
    title = "scatter plot of age and Leisure"
  )

#Scatter plot is not helpful here, we can try something else
sv_ob <- svydesign(id = ~1, weights = ~wt06, data = ATUS) 

 sum<-ATUS%>%
   group_by(age)%>%
   summarise(mean_leisure = weighted.mean(leisure, wt06),
             n=n())
 #sum
 
 #generate a variable that is the average leisure by age
ATUS <-ATUS%>%
   group_by(age)%>%
   mutate(leisure_byAge = mean(leisure))
  
 ggplot(ATUS, aes(age, leisure_byAge))+
   geom_line()+
   labs(
     title = "Leisure by age and Age"
   )

 # regress leisure on age
 m1<-lm(leisure~age, data = ATUS, weights = wt06)
 summary(m1)

## 
## Call:
## lm(formula = leisure ~ age, data = ATUS, weights = wt06)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -2798926  -435886   -44055   403995  4575406 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 183.7885     5.8054   31.66   <2e-16 ***
## age           2.7604     0.1164   23.73   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 685400 on 9433 degrees of freedom
## Multiple R-squared:  0.05631,    Adjusted R-squared:  0.05621 
## F-statistic: 562.9 on 1 and 9433 DF,  p-value: < 2.2e-16

 #robust
 rm1<-coeftest(m1,vcov=vcovHC(m1,"HC1")) #robust coefficient and se
 se_m1<-sqrt(diag(vcovHC(m1, "HC1")))

 
 #atus19$pred_leisure<-predict(m1)
 ATUS$fitted_values<-fitted(m1)
 
ggplot(ATUS, aes(age, leisure_byAge))+
  geom_line(aes(color ="Leisure_byAge"))+
  geom_line(aes(y= fitted_values, color ="fitted_values"))+
   labs(
     title = "Predicted values"
   )

The following steps are ones we will explore in class next week ……

#d
ATUS<-ATUS%>%
  mutate(age2 =age^2)

for (i in 2:5){
  new_val_name <- paste0("age", i)
  ATUS[[new_val_name]]<-ATUS$age^i
  
}

Run Regressions and generate predicted values

m1<-lm(leisure~ age, weights = wt06, data = ATUS)
ATUS$pred_yht1<-predict(m1)

m1se<-sqrt(diag(vcovHC.default(m1, "HC1")))

m2<-lm(leisure~ age+ age2, weights = wt06, data = ATUS)
ATUS$pred_yht2<-predict(m2)

m2se<-sqrt(diag(vcovHC.default(m2, "HC1")))

m3<-lm(leisure~ age + age2 + age3, weights = wt06, data = ATUS)
ATUS$pred_yht3<-predict(m3)

m3se<-sqrt(diag(vcovHC.default(m3, "HC1")))

m4<-lm(leisure~ age + age2 + age3 + age4, weights = wt06, data = ATUS)
ATUS$pred_yht4<-predict(m4)

m4se<-sqrt(diag(vcovHC.default(m4,"HC1")))

stargazer(m1, m2, m3, m4,
          se =list(m1se, m2se, m3se, m4se),
          type = "text",
          title = "Table 3.",
          out="table3.txt",
          notes.append = TRUE)

## 
## Table 3.
## ===========================================================================================================================
##                                                               Dependent variable:                                          
##                     -------------------------------------------------------------------------------------------------------
##                                                                     leisure                                                
##                                (1)                       (2)                       (3)                       (4)           
## ---------------------------------------------------------------------------------------------------------------------------
## age                         2.760***                  -9.975***                -24.276***                   6.979          
##                              (0.163)                   (0.851)                   (3.090)                  (10.123)         
##                                                                                                                            
## age2                                                  0.134***                  0.460***                   -0.670*         
##                                                        (0.008)                   (0.066)                   (0.346)         
##                                                                                                                            
## age3                                                                            -0.002***                 0.014***         
##                                                                                 (0.0004)                   (0.005)         
##                                                                                                                            
## age4                                                                                                     -0.0001***        
##                                                                                                           (0.00002)        
##                                                                                                                            
## Constant                   183.789***                438.048***                619.588***                329.830***        
##                              (8.763)                  (20.252)                  (44.685)                  (101.663)        
##                                                                                                                            
## ---------------------------------------------------------------------------------------------------------------------------
## Observations                  9,435                     9,435                     9,435                     9,435          
## R2                            0.056                     0.100                     0.105                     0.107          
## Adjusted R2                   0.056                     0.100                     0.104                     0.106          
## Residual Std. Error  685,358.600 (df = 9433)   669,183.900 (df = 9432)   667,644.500 (df = 9431)   666,942.400 (df = 9430) 
## F Statistic         562.858*** (df = 1; 9433) 526.458*** (df = 2; 9432) 367.441*** (df = 3; 9431) 281.377*** (df = 4; 9430)
## ===========================================================================================================================
## Note:                                                                                           *p<0.1; **p<0.05; ***p<0.01

Plot the Predicted Values

ggplot(ATUS, aes(age, leisure_byAge, color = "leisure_byAge"))+
  geom_line()+
  geom_line(aes(y =pred_yht1, color ="pred_yht1"))+
   labs(
     title = "Predicted values"
   )

ggplot(ATUS, aes(age, leisure_byAge, color = "leisure_byAge"))+
  geom_line()+
  geom_line(aes(y =pred_yht1, color ="pred_yht1"))+
  geom_line(aes(y =pred_yht2, color ="pred_yht2"))+
  geom_line(aes(y =pred_yht3, color ="pred_yht3"))+
  geom_line(aes(y =pred_yht4, color ="pred_yht4"))+
   labs(
     title = "Predicted values"
   )

Final Chart

ggplot(ATUS, aes(age, leisure_byAge, linetype = "leisure_byAge", color="leisure_byAge"))+
  geom_line()+
  geom_line(aes(y =pred_yht1, linetype ="linear",color = "linear"))+
  geom_line(aes(y =pred_yht2,linetype ="quadratic",color="quadratic"))+
  geom_line(aes(y =pred_yht3, linetype ="cubic", color="cubic"))+
  geom_line(aes(y =pred_yht4, linetype ="quartic",color="quartic"))+
  scale_linetype_manual(values = c("linear"="dashed", "quadratic"="dotted", "cubic"="twodash", "quartic"="dotdash", "leisure_byAge"="solid"))+
  scale_color_manual(values = c("linear"="blue", "quadratic"="green", "cubic"="purple", "quartic"="red", "leisure_byAge"="black"))+
   labs(
     title = "Predicted values",
     linetype ="Model", # line type will merge with color type to created an annotated, colored legend based on linetype and color type
     color ="Model",
     y="Leisure by Age"
   )+
  theme_minimal()

Regression Analysis (Econ 0211), Lab6: Hypothesis Testing and Motivating Nonlinear Regression

Noe Vyizigiro

April, 2024