This is a second version of the lab 6 where we use the dataset imported from the stata version of this lab. Using this dataset corrects all the slight mismatch between the r and stata results

Important packages (libraries) for this lab:

  1. tidyverse: Contain libraries such as dplyr and ggplot, which are instrumental here
  2. stargazer: helpful in creating multi-model regression tables
  3. sandwich: Helpful in estimating robust regression models
  4. lmtest : helpful in hypothesis testing..
  5. car : helpful in model testing
  6. survey : useful in running weighted statistical tests

#Load libraries

1. Read Dataset (.csv file)

ATUS<-read.csv("IPUMS_ATUS_2019.csv")

source("/Volumes/middfiles/Classes/Fall23/ECON0211A/Noe_Labs/NV.R") #load NV.R to access nv_sum() function for quick summary statistic table
## 
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice

Generate numeric values for educ

ATUS<-ATUS%>%
  mutate(codeduc=with(ATUS, match(educ, unique(educ))))

View the labels created and encode

ATUS%>%
  group_by(educ, codeduc)%>%
  summarise(mean_l<-mean(leisure))
## `summarise()` has grouped output by 'educ'. You can override using the
## `.groups` argument.
## # A tibble: 17 × 3
## # Groups:   educ [17]
##    educ                                           codeduc mean_l <- mean(leisu…¹
##    <chr>                                            <int>                  <dbl>
##  1 10th grade                                           1                   407.
##  2 11th grade                                           4                   395.
##  3 12th grade - no diploma                              5                   401.
##  4 1st, 2nd, 3rd, or 4th grade                         15                   358.
##  5 5th or 6th grade                                     8                   362.
##  6 7th or 8th grade                                     2                   431.
##  7 9th grade                                            3                   367.
##  8 associate degree - academic program                  9                   328.
##  9 associate degree - occupational vocational          13                   341.
## 10 bachelor's degree (ba, ab, bs, etc.)                11                   311.
## 11 doctoral degree (phd, edd, etc.)                    10                   302.
## 12 high school graduate - diploma                       6                   373.
## 13 high school graduate - ged                          12                   394.
## 14 less than 1st grade                                 17                   350.
## 15 master's degree (ma, ms, meng, med, msw, etc.)      14                   293.
## 16 professional school degree (md, dds, dvm, etc…      16                   295.
## 17 some college but no degree                           7                   360.
## # ℹ abbreviated name: ¹​`mean_l <- mean(leisure)`
#encode
ATUS<-ATUS%>%
  mutate(edyearF=factor(codeduc, c(1,4,5,15,8,2,3,9,13,11,10,6,12,17,14,16,7),
                        labels=c("10","11","11", "3", "6", "7", "9", "14","14","16", "20","12", "12","0", "18","18", "14")),
         edyear=as.numeric(as.character(edyearF)))
#summarize to check the match
ATUS%>%
  group_by(educ, edyear)%>%
  summarise(mean_l = mean(leisure))
## `summarise()` has grouped output by 'educ'. You can override using the
## `.groups` argument.
## # A tibble: 17 × 3
## # Groups:   educ [17]
##    educ                                            edyear mean_l
##    <chr>                                            <dbl>  <dbl>
##  1 10th grade                                          10   407.
##  2 11th grade                                          11   395.
##  3 12th grade - no diploma                             11   401.
##  4 1st, 2nd, 3rd, or 4th grade                          3   358.
##  5 5th or 6th grade                                     6   362.
##  6 7th or 8th grade                                     7   431.
##  7 9th grade                                            9   367.
##  8 associate degree - academic program                 14   328.
##  9 associate degree - occupational vocational          14   341.
## 10 bachelor's degree (ba, ab, bs, etc.)                16   311.
## 11 doctoral degree (phd, edd, etc.)                    20   302.
## 12 high school graduate - diploma                      12   373.
## 13 high school graduate - ged                          12   394.
## 14 less than 1st grade                                  0   350.
## 15 master's degree (ma, ms, meng, med, msw, etc.)      18   293.
## 16 professional school degree (md, dds, dvm, etc.)     18   295.
## 17 some college but no degree                          14   360.
 #check employment
ATUS%>%
  group_by(empstat)%>%
  summarise(mean_emp=mean(leisure))
## # A tibble: 5 × 2
##   empstat                mean_emp
##   <chr>                     <dbl>
## 1 employed - absent          362.
## 2 employed - at work         272.
## 3 not in labor force         448.
## 4 unemployed - looking       381.
## 5 unemployed - on layoff     403.
#check kidund18
ATUS%>%
  group_by(kidund18)%>%
  summarise(mean_kid=mean(leisure))
## # A tibble: 2 × 2
##   kidund18 mean_kid
##   <chr>       <dbl>
## 1 no           381.
## 2 yes          249.

2. Generate variables

ATUS<-ATUS%>%
  mutate(LTHS =ifelse(edyear<12,1,0),
        HS = ifelse(edyear==12,1,0),
        SC = ifelse(edyear>12 & edyear<16, 1, 0),
        BA = ifelse(edyear==16,1,0),
        GRAD=ifelse(edyear>16,1,0),
        employed =ifelse(empstat=="employed - at work",1,0),
        male =ifelse(sex=="male",1,0),
        child=ifelse(kidund18=="yes",1,0))

#summarize the 10 variables; using nv_sum()

nv_sum(ATUS, LTHS, HS, SC, BA, GRAD, employed, male, child, leisure, age, weight = FALSE)
##    variable  Obs min        mean median      st.dev  max
## 1      LTHS 9435   0   0.1062003      0   0.3081102    1
## 2        HS 9435   0   0.2301007      0   0.4209194    1
## 3        SC 9435   0   0.2682565      0   0.4430753    1
## 4        BA 9435   0   0.2331744      0   0.4228747    1
## 5      GRAD 9435   0   0.1622682      0   0.3687162    1
## 6  employed 9435   0   0.5775305      1   0.4939786    1
## 7      male 9435   0   0.4569157      0   0.4981667    1
## 8     child 9435   0   0.2960254      0   0.4565265    1
## 9   leisure 9435   0 342.0162162    305 229.0125318 1300
## 10      age 9435  15  51.0865925     51  18.1744901   85

Weighted summary statistics

# select variables to summarise
selected_vars <-dplyr::select(ATUS,LTHS, HS,SC, BA, GRAD, employed, male,child, leisure, age)
# Create a survey design object, from survey package
sv_obj <- svydesign(id = ~1, weights = ~wt06, data = ATUS) # id: formula specifying cluster id; ~1 indicate that there is no cluster; weights: formula specifying the sampling weights

# Function to calculate weighted mean and standard error
summary_stats <-function(x,y) { # supply function iterate through the selected variables x while using the weights in the sv_object (y)
  c(
    obs =sum(!is.na(x)),
    mean=svymean(x,y, na.rm=TRUE),#calculate the weighted mean
    Std.dev = sqrt(svyvar(x,y, na.rm = TRUE)),# calculate the weighted SD
    min = min(x, na.rm = TRUE),
    max =max(x, na.rm = TRUE)
   # quant=svyquantile(x,y, 0.50, na.rm=TRUE)
  )
}
sum_table<-sapply(selected_vars, summary_stats, sv_obj) # use the sapply function here to the whole created function to create a vector-type data
#sum_table
# Convert to a data frame for better presentation
results_df <- as.data.frame(t(sum_table))
# Print the results
print(results_df)
##           obs        mean     Std.dev min  max
## LTHS     9435   0.1432380   0.3503339   0    1
## HS       9435   0.2748641   0.4464694   0    1
## SC       9435   0.2417681   0.4281772   0    1
## BA       9435   0.2075264   0.4055572   0    1
## GRAD     9435   0.1326034   0.3391636   0    1
## employed 9435   0.6045367   0.4889759   0    1
## male     9435   0.4843585   0.4997818   0    1
## child    9435   0.2557646   0.4363132   0    1
## leisure  9435 311.1453426 221.0180079   0 1300
## age      9435  46.1374554  18.9997780  15   85

3. Estimating Regression

m1<-lm(leisure~HS+ SC+ BA +GRAD, data = ATUS)
#summary(m1)

m2<-lm(leisure~HS+SC+ BA+ GRAD, data =ATUS, weights = wt06) # non robust regression
#summary(m2)

#robust regression
m3<-lm(leisure~HS+SC+ BA+ GRAD, data =ATUS, weights = wt06) 

#rob<-coeftest(m3, vcov = vcovHC(m3, "HC1"))    # robust; HC1 (Stata default)
rob_se3<-sqrt(diag(vcovHC(m3,"HC1"))) #calculate robust standard error
#rob_se3

stargazer( m1,m2,m3,
          se =list(NULL, NULL, rob_se3),
          type = "text",
          title = "Table 1",
          out = "table1.text",
          column.labels = c("Regular", "Weighted", "Robust SE 1"),
          notes = "Significance level"
          
          )
## 
## Table 1
## ==================================================================
##                                        Dependent variable:        
##                                 ----------------------------------
##                                              leisure              
##                                  Regular    Weighted   Robust SE 1
##                                    (1)         (2)         (3)    
## ------------------------------------------------------------------
## HS                              -17.862**    -0.331      -0.331   
##                                  (8.653)     (7.355)    (11.437)  
##                                                                   
## SC                              -43.569*** -30.079***  -30.079*** 
##                                  (8.457)     (7.525)    (11.249)  
##                                                                   
## BA                              -82.241*** -58.748***  -58.748*** 
##                                  (8.635)     (7.753)    (10.762)  
##                                                                   
## GRAD                            -98.076*** -75.240***  -75.240*** 
##                                  (9.207)     (8.601)    (10.911)  
##                                                                   
## Constant                        392.905*** 340.677***  340.677*** 
##                                  (7.158)     (5.963)     (9.430)  
##                                                                   
## ------------------------------------------------------------------
## Observations                      9,435       9,435       9,435   
## R2                                0.022       0.017       0.017   
## Adjusted R2                       0.021       0.016       0.016   
## Residual Std. Error (df = 9430)  226.578   699,720.800 699,720.800
## F Statistic (df = 4; 9430)      51.960***   39.932***   39.932*** 
## ==================================================================
## Note:                                  *p<0.1; **p<0.05; ***p<0.01
##                                                 Significance level

4. Estimating the regressions using robust option

m1<-lm(leisure~HS + SC + BA + GRAD, data =ATUS, weights = wt06)
r_se1<-sqrt(diag(vcovHC(m1,"HC1")))

m2<-lm(leisure~HS + SC + BA + GRAD + employed, data =ATUS, weights = wt06)
r_se2<-sqrt(diag(vcovHC(m2,"HC1")))

m3<-lm(leisure~HS + SC + BA + GRAD + employed + male, data =ATUS, weights = wt06)
r_se3<-sqrt(diag(vcovHC(m3,"HC1")))

m4<-lm(leisure~HS + SC + BA + GRAD + employed + male + child, data =ATUS, weights = wt06)
r_se4<-sqrt(diag(vcovHC(m4,"HC1")))

m5<-lm(leisure~HS + SC + BA + GRAD + employed + male + child + age, data =ATUS, weights = wt06)
r_se5<-sqrt(diag(vcovHC(m5,"HC1")))

stargazer(m1, m2, m3, m4,m5,
          se=list(r_se1, r_se2, r_se3, r_se4, r_se5),
          type = "text",
          title = "Table 2: Weighted Regression with Robust Standard Error.",
          out="table2.txt",
          notes.append = TRUE)
## 
## Table 2: Weighted Regression with Robust Standard Error.
## ====================================================================================================================================================
##                                                                           Dependent variable:                                                       
##                     --------------------------------------------------------------------------------------------------------------------------------
##                                                                                 leisure                                                             
##                               (1)                       (2)                       (3)                       (4)                       (5)           
## ----------------------------------------------------------------------------------------------------------------------------------------------------
## HS                           -0.331                  28.668***                 30.404***                 30.436***                   5.966          
##                             (11.437)                 (10.959)                  (10.863)                  (10.925)                  (10.984)         
##                                                                                                                                                     
## SC                         -30.079***                  9.687                    15.483                    15.315                    -4.164          
##                             (11.249)                 (11.145)                  (11.180)                  (11.290)                  (11.371)         
##                                                                                                                                                     
## BA                         -58.748***                 -7.716                    -2.924                     0.552                   -21.531**        
##                             (10.762)                 (10.547)                  (10.525)                  (10.630)                  (10.630)         
##                                                                                                                                                     
## GRAD                       -75.240***                -20.940**                  -15.439                   -9.249                  -39.215***        
##                             (10.911)                 (10.633)                  (10.595)                  (10.753)                  (10.880)         
##                                                                                                                                                     
## employed                                            -161.779***               -170.667***               -156.435***               -138.178***       
##                                                       (6.119)                   (6.211)                   (6.242)                   (6.762)         
##                                                                                                                                                     
## male                                                                           61.444***                 57.906***                 58.051***        
##                                                                                 (5.762)                   (5.738)                   (5.627)         
##                                                                                                                                                     
## child                                                                                                   -75.699***                -64.990***        
##                                                                                                           (5.686)                   (6.022)         
##                                                                                                                                                     
## age                                                                                                                                1.584***         
##                                                                                                                                     (0.183)         
##                                                                                                                                                     
## Constant                   340.677***               403.103***                375.113***                386.073***                319.120***        
##                             (9.430)                   (9.564)                   (9.846)                   (9.943)                  (13.123)         
##                                                                                                                                                     
## ----------------------------------------------------------------------------------------------------------------------------------------------------
## Observations                 9,435                     9,435                     9,435                     9,435                     9,435          
## R2                           0.017                     0.139                     0.158                     0.179                     0.194          
## Adjusted R2                  0.016                     0.138                     0.157                     0.178                     0.193          
## Residual Std. Error 699,720.800 (df = 9430)   654,876.200 (df = 9429)   647,687.500 (df = 9428)   639,544.700 (df = 9427)   633,621.700 (df = 9426) 
## F Statistic         39.932*** (df = 4; 9430) 303.814*** (df = 5; 9429) 294.074*** (df = 6; 9428) 293.181*** (df = 7; 9427) 283.610*** (df = 8; 9426)
## ====================================================================================================================================================
## Note:                                                                                                                    *p<0.1; **p<0.05; ***p<0.01

5. Hypothesis testing

#b
m1<-lm(leisure~ HS + SC + BA + GRAD + employed + age + male + child, data = ATUS, weights = wt06)
r_seM1<-sqrt(diag(vcovHC.default(m1, "HC1")))
summary(m1)
## 
## Call:
## lm(formula = leisure ~ HS + SC + BA + GRAD + employed + age + 
##     male + child, data = ATUS, weights = wt06)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -4739440  -356313    -7432   378327  5288153 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  319.1197     7.8162  40.828  < 2e-16 ***
## HS             5.9661     6.9511   0.858   0.3908    
## SC            -4.1645     7.0585  -0.590   0.5552    
## BA           -21.5313     7.3484  -2.930   0.0034 ** 
## GRAD         -39.2150     8.2495  -4.754 2.03e-06 ***
## employed    -138.1779     4.6237 -29.885  < 2e-16 ***
## age            1.5844     0.1187  13.344  < 2e-16 ***
## male          58.0507     4.1397  14.023  < 2e-16 ***
## child        -64.9902     4.8814 -13.314  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 633600 on 9426 degrees of freedom
## Multiple R-squared:  0.194,  Adjusted R-squared:  0.1933 
## F-statistic: 283.6 on 8 and 9426 DF,  p-value: < 2.2e-16
r_seM1
## (Intercept)          HS          SC          BA        GRAD    employed 
##  13.1227179  10.9839615  11.3708467  10.6299208  10.8803269   6.7622559 
##         age        male       child 
##   0.1833738   5.6266617   6.0219273
t1<-linearHypothesis(m1, "BA =GRAD")
print(t1)
## Linear hypothesis test
## 
## Hypothesis:
## BA - GRAD = 0
## 
## Model 1: restricted model
## Model 2: leisure ~ HS + SC + BA + GRAD + employed + age + male + child
## 
##   Res.Df        RSS Df  Sum of Sq      F  Pr(>F)  
## 1   9427 3.7867e+15                               
## 2   9426 3.7843e+15  1 2.4139e+12 6.0126 0.01422 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#c
t2<-linearHypothesis(m1, c("HS", "SC", "BA", "GRAD"))
print(t2)
## Linear hypothesis test
## 
## Hypothesis:
## HS = 0
## SC = 0
## BA = 0
## GRAD = 0
## 
## Model 1: restricted model
## Model 2: leisure ~ HS + SC + BA + GRAD + employed + age + male + child
## 
##   Res.Df        RSS Df  Sum of Sq      F    Pr(>F)    
## 1   9430 3.8057e+15                                   
## 2   9426 3.7843e+15  4 2.1406e+13 13.329 7.841e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6. More testing

#a
linearHypothesis(m1, c("child =0", "employed=0"))
## Linear hypothesis test
## 
## Hypothesis:
## child = 0
## employed = 0
## 
## Model 1: restricted model
## Model 2: leisure ~ HS + SC + BA + GRAD + employed + age + male + child
## 
##   Res.Df        RSS Df  Sum of Sq     F    Pr(>F)    
## 1   9428 4.2703e+15                                  
## 2   9426 3.7843e+15  2 4.8603e+14 605.3 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
linearHypothesis(m1, "child-employed=0") # or linearHypothesis(m1, "child=employed =0)
## Linear hypothesis test
## 
## Hypothesis:
## - employed  + child = 0
## 
## Model 1: restricted model
## Model 2: leisure ~ HS + SC + BA + GRAD + employed + age + male + child
## 
##   Res.Df        RSS Df  Sum of Sq      F    Pr(>F)    
## 1   9427 3.8259e+15                                   
## 2   9426 3.7843e+15  1 4.1573e+13 103.55 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
linearHypothesis(m1, "2*child =employed")
## Linear hypothesis test
## 
## Hypothesis:
## - employed  + 2 child = 0
## 
## Model 1: restricted model
## Model 2: leisure ~ HS + SC + BA + GRAD + employed + age + male + child
## 
##   Res.Df        RSS Df  Sum of Sq     F Pr(>F)
## 1   9427 3.7845e+15                           
## 2   9426 3.7843e+15  1 2.0795e+11 0.518 0.4717
#b
linearHypothesis(m1, c("HS", "SC", "BA", "GRAD", "employed", "age", "male", "child "))
## Linear hypothesis test
## 
## Hypothesis:
## HS = 0
## SC = 0
## BA = 0
## GRAD = 0
## employed = 0
## age = 0
## male = 0
## child = 0
## 
## Model 1: restricted model
## Model 2: leisure ~ HS + SC + BA + GRAD + employed + age + male + child
## 
##   Res.Df        RSS Df Sum of Sq      F    Pr(>F)    
## 1   9434 4.6952e+15                                  
## 2   9426 3.7843e+15  8 9.109e+14 283.61 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7. Age and Leisure, plots

#c
ggplot(ATUS, aes(age, leisure))+
  geom_point()+
  labs(
    title = "scatter plot of age and Leisure"
  )

#Scatter plot is not helpful here, we can try something else
sv_ob <- svydesign(id = ~1, weights = ~wt06, data = ATUS) 

 sum<-ATUS%>%
   group_by(age)%>%
   summarise(mean_leisure = weighted.mean(leisure, wt06),
             n=n())
 #sum
 
 #generate a variable that is the average leisure by age
ATUS <-ATUS%>%
   group_by(age)%>%
   mutate(leisure_byAge = mean(leisure))
  
 ggplot(ATUS, aes(age, leisure_byAge))+
   geom_line()+
   labs(
     title = "Leisure by age and Age"
   )

 # regress leisure on age
 m1<-lm(leisure~age, data = ATUS, weights = wt06)
 summary(m1)
## 
## Call:
## lm(formula = leisure ~ age, data = ATUS, weights = wt06)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -2798926  -435886   -44055   403995  4575406 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 183.7885     5.8054   31.66   <2e-16 ***
## age           2.7604     0.1164   23.73   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 685400 on 9433 degrees of freedom
## Multiple R-squared:  0.05631,    Adjusted R-squared:  0.05621 
## F-statistic: 562.9 on 1 and 9433 DF,  p-value: < 2.2e-16
 #robust
 rm1<-coeftest(m1,vcov=vcovHC(m1,"HC1")) #robust coefficient and se
 se_m1<-sqrt(diag(vcovHC(m1, "HC1")))

 
 #atus19$pred_leisure<-predict(m1)
 ATUS$fitted_values<-fitted(m1)
 
ggplot(ATUS, aes(age, leisure_byAge))+
  geom_line(aes(color ="Leisure_byAge"))+
  geom_line(aes(y= fitted_values, color ="fitted_values"))+
   labs(
     title = "Predicted values"
   )

The following steps are ones we will explore in class next week ……

#d
ATUS<-ATUS%>%
  mutate(age2 =age^2)

for (i in 2:5){
  new_val_name <- paste0("age", i)
  ATUS[[new_val_name]]<-ATUS$age^i
  
}

Run Regressions and generate predicted values

m1<-lm(leisure~ age, weights = wt06, data = ATUS)
ATUS$pred_yht1<-predict(m1)

m1se<-sqrt(diag(vcovHC.default(m1, "HC1")))

m2<-lm(leisure~ age+ age2, weights = wt06, data = ATUS)
ATUS$pred_yht2<-predict(m2)

m2se<-sqrt(diag(vcovHC.default(m2, "HC1")))

m3<-lm(leisure~ age + age2 + age3, weights = wt06, data = ATUS)
ATUS$pred_yht3<-predict(m3)

m3se<-sqrt(diag(vcovHC.default(m3, "HC1")))

m4<-lm(leisure~ age + age2 + age3 + age4, weights = wt06, data = ATUS)
ATUS$pred_yht4<-predict(m4)

m4se<-sqrt(diag(vcovHC.default(m4,"HC1")))

stargazer(m1, m2, m3, m4,
          se =list(m1se, m2se, m3se, m4se),
          type = "text",
          title = "Table 3.",
          out="table3.txt",
          notes.append = TRUE)
## 
## Table 3.
## ===========================================================================================================================
##                                                               Dependent variable:                                          
##                     -------------------------------------------------------------------------------------------------------
##                                                                     leisure                                                
##                                (1)                       (2)                       (3)                       (4)           
## ---------------------------------------------------------------------------------------------------------------------------
## age                         2.760***                  -9.975***                -24.276***                   6.979          
##                              (0.163)                   (0.851)                   (3.090)                  (10.123)         
##                                                                                                                            
## age2                                                  0.134***                  0.460***                   -0.670*         
##                                                        (0.008)                   (0.066)                   (0.346)         
##                                                                                                                            
## age3                                                                            -0.002***                 0.014***         
##                                                                                 (0.0004)                   (0.005)         
##                                                                                                                            
## age4                                                                                                     -0.0001***        
##                                                                                                           (0.00002)        
##                                                                                                                            
## Constant                   183.789***                438.048***                619.588***                329.830***        
##                              (8.763)                  (20.252)                  (44.685)                  (101.663)        
##                                                                                                                            
## ---------------------------------------------------------------------------------------------------------------------------
## Observations                  9,435                     9,435                     9,435                     9,435          
## R2                            0.056                     0.100                     0.105                     0.107          
## Adjusted R2                   0.056                     0.100                     0.104                     0.106          
## Residual Std. Error  685,358.600 (df = 9433)   669,183.900 (df = 9432)   667,644.500 (df = 9431)   666,942.400 (df = 9430) 
## F Statistic         562.858*** (df = 1; 9433) 526.458*** (df = 2; 9432) 367.441*** (df = 3; 9431) 281.377*** (df = 4; 9430)
## ===========================================================================================================================
## Note:                                                                                           *p<0.1; **p<0.05; ***p<0.01

Plot the Predicted Values

ggplot(ATUS, aes(age, leisure_byAge, color = "leisure_byAge"))+
  geom_line()+
  geom_line(aes(y =pred_yht1, color ="pred_yht1"))+
   labs(
     title = "Predicted values"
   )

ggplot(ATUS, aes(age, leisure_byAge, color = "leisure_byAge"))+
  geom_line()+
  geom_line(aes(y =pred_yht1, color ="pred_yht1"))+
  geom_line(aes(y =pred_yht2, color ="pred_yht2"))+
  geom_line(aes(y =pred_yht3, color ="pred_yht3"))+
  geom_line(aes(y =pred_yht4, color ="pred_yht4"))+
   labs(
     title = "Predicted values"
   )

Final Chart

ggplot(ATUS, aes(age, leisure_byAge, linetype = "leisure_byAge", color="leisure_byAge"))+
  geom_line()+
  geom_line(aes(y =pred_yht1, linetype ="linear",color = "linear"))+
  geom_line(aes(y =pred_yht2,linetype ="quadratic",color="quadratic"))+
  geom_line(aes(y =pred_yht3, linetype ="cubic", color="cubic"))+
  geom_line(aes(y =pred_yht4, linetype ="quartic",color="quartic"))+
  scale_linetype_manual(values = c("linear"="dashed", "quadratic"="dotted", "cubic"="twodash", "quartic"="dotdash", "leisure_byAge"="solid"))+
  scale_color_manual(values = c("linear"="blue", "quadratic"="green", "cubic"="purple", "quartic"="red", "leisure_byAge"="black"))+
   labs(
     title = "Predicted values",
     linetype ="Model", # line type will merge with color type to created an annotated, colored legend based on linetype and color type
     color ="Model",
     y="Leisure by Age"
   )+
  theme_minimal()