Important packages (libraries) for this lab:

  1. tydiverse
  2. stargazer

Read data

bws<-read_csv("Birthweight_Smoking.csv")
## Rows: 3000 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (12): nprevist, alcohol, tripre1, tripre2, tripre3, tripre0, birthweight...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
bws
## # A tibble: 3,000 × 12
##    nprevist alcohol tripre1 tripre2 tripre3 tripre0 birthweight smoker unmarried
##       <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>       <dbl>  <dbl>     <dbl>
##  1       12       0       1       0       0       0        4253      1         1
##  2        5       0       0       1       0       0        3459      0         0
##  3       12       0       1       0       0       0        2920      1         0
##  4       13       0       1       0       0       0        2600      0         0
##  5        9       0       1       0       0       0        3742      0         0
##  6       11       0       1       0       0       0        3420      0         0
##  7       12       0       1       0       0       0        2325      1         0
##  8       10       0       1       0       0       0        4536      0         0
##  9       13       0       1       0       0       0        2850      0         0
## 10       10       0       1       0       0       0        2948      0         0
## # ℹ 2,990 more rows
## # ℹ 3 more variables: educ <dbl>, age <dbl>, drinks <dbl>
source("/Volumes/middfiles/Classes/Fall23/ECON0211A/Noe_Labs/NV.R") #load NV.R to access nv_sum() function for quick summary statistic table
## 
## Attaching package: 'rlang'
## 
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice

1. Summary

#as.data.frame(summary(bws))
summary(bws)
##     nprevist        alcohol           tripre1         tripre2     
##  Min.   : 0.00   Min.   :0.00000   Min.   :0.000   Min.   :0.000  
##  1st Qu.: 9.00   1st Qu.:0.00000   1st Qu.:1.000   1st Qu.:0.000  
##  Median :12.00   Median :0.00000   Median :1.000   Median :0.000  
##  Mean   :10.99   Mean   :0.01933   Mean   :0.804   Mean   :0.153  
##  3rd Qu.:13.00   3rd Qu.:0.00000   3rd Qu.:1.000   3rd Qu.:0.000  
##  Max.   :35.00   Max.   :1.00000   Max.   :1.000   Max.   :1.000  
##     tripre3         tripre0      birthweight       smoker        unmarried     
##  Min.   :0.000   Min.   :0.00   Min.   : 425   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.000   1st Qu.:0.00   1st Qu.:3062   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :0.000   Median :0.00   Median :3420   Median :0.000   Median :0.0000  
##  Mean   :0.033   Mean   :0.01   Mean   :3383   Mean   :0.194   Mean   :0.2267  
##  3rd Qu.:0.000   3rd Qu.:0.00   3rd Qu.:3750   3rd Qu.:0.000   3rd Qu.:0.0000  
##  Max.   :1.000   Max.   :1.00   Max.   :5755   Max.   :1.000   Max.   :1.0000  
##       educ            age            drinks        
##  Min.   : 0.00   Min.   :14.00   Min.   : 0.00000  
##  1st Qu.:12.00   1st Qu.:23.00   1st Qu.: 0.00000  
##  Median :12.00   Median :27.00   Median : 0.00000  
##  Mean   :12.91   Mean   :26.89   Mean   : 0.05833  
##  3rd Qu.:14.00   3rd Qu.:31.00   3rd Qu.: 0.00000  
##  Max.   :17.00   Max.   :44.00   Max.   :21.00000

3. Regress birthweight on smoker

m1<-lm(birthweight~smoker, data = bws)
summary(m1)
## 
## Call:
## lm(formula = birthweight ~ smoker, data = bws)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3007.06  -313.06    26.94   366.94  2322.94 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3432.06      11.87 289.115   <2e-16 ***
## smoker       -253.23      26.95  -9.396   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 583.7 on 2998 degrees of freedom
## Multiple R-squared:  0.0286, Adjusted R-squared:  0.02828 
## F-statistic: 88.28 on 1 and 2998 DF,  p-value: < 2.2e-16

4. correlation: Is there OVB due to other bahaviors

cov(bws[, c("smoker", "alcohol")], use = "complete.obs")
##              smoker     alcohol
## smoker  0.156416139 0.006584862
## alcohol 0.006584862 0.018965878
cor(bws[, c("smoker", "alcohol")], use = "complete.obs")
##            smoker   alcohol
## smoker  1.0000000 0.1208981
## alcohol 0.1208981 1.0000000

#5 Regress birthweight on smoker and alcohol

m2<-lm(birthweight~smoker+alcohol, data = bws)
summary(m2)
## 
## Call:
## lm(formula = birthweight ~ smoker + alcohol, data = bws)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3007.7  -313.7    26.3   366.3  2322.3 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3432.70      11.90 288.373   <2e-16 ***
## smoker       -250.80      27.15  -9.237   <2e-16 ***
## alcohol       -57.60      77.98  -0.739     0.46    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 583.8 on 2997 degrees of freedom
## Multiple R-squared:  0.02878,    Adjusted R-squared:  0.02813 
## F-statistic: 44.41 on 2 and 2997 DF,  p-value: < 2.2e-16

#6 Regress birthweight on smoker, alcohol, and npreveist

m3<-lm(birthweight~smoker+alcohol+nprevist, data = bws)
summary(m3)
## 
## Call:
## lm(formula = birthweight ~ smoker + alcohol + nprevist, data = bws)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2733.53  -307.57    21.42   358.09  2192.70 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3051.249     34.016  89.701  < 2e-16 ***
## smoker      -217.580     26.680  -8.155 5.07e-16 ***
## alcohol      -30.491     76.234  -0.400    0.689    
## nprevist      34.070      2.855  11.933  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 570.5 on 2996 degrees of freedom
## Multiple R-squared:  0.07285,    Adjusted R-squared:  0.07192 
## F-statistic: 78.47 on 3 and 2996 DF,  p-value: < 2.2e-16

#7 Predict

bws$pred_birthwt<-predict(m3,data= m3$fitted.values)

bws%>%
  filter(smoker==1 & alcohol==0 & nprevist==8)%>%
  summarise(Obs =n(),
            mean = mean(pred_birthwt))
## # A tibble: 1 × 2
##     Obs  mean
##   <int> <dbl>
## 1    39 3106.

sum tripre

nv_sum(bws, tripre1,tripre2, tripre3,tripre0, weight = FALSE)
## # A tibble: 4 × 7
##   variable   Obs   min  mean median st.dev   max
##   <chr>    <int> <dbl> <dbl>  <dbl>  <dbl> <dbl>
## 1 tripre1   3000     0 0.804      1 0.397      1
## 2 tripre2   3000     0 0.153      0 0.360      1
## 3 tripre3   3000     0 0.033      0 0.179      1
## 4 tripre0   3000     0 0.01       0 0.0995     1

9. Let’s including the dummy variables

m4<-lm(birthweight~smoker+alcohol+tripre1+tripre2+tripre3+tripre0, data =bws) # there is a multicollinearity in this model, so we don't include it in the table
summary(m4)
## 
## Call:
## lm(formula = birthweight ~ smoker + alcohol + tripre1 + tripre2 + 
##     tripre3 + tripre0, data = bws)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3029.55  -307.55    31.35   372.45  2401.29 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2756.58     106.69  25.838  < 2e-16 ***
## smoker       -228.85      27.16  -8.424  < 2e-16 ***
## alcohol       -15.10      77.54  -0.195    0.846    
## tripre1       697.97     106.88   6.531 7.66e-11 ***
## tripre2       597.13     109.42   5.457 5.23e-08 ***
## tripre3       561.01     120.88   4.641 3.61e-06 ***
## tripre0           NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 578.7 on 2994 degrees of freedom
## Multiple R-squared:  0.04647,    Adjusted R-squared:  0.04487 
## F-statistic: 29.18 on 5 and 2994 DF,  p-value: < 2.2e-16
m5<-lm(birthweight~smoker+alcohol+tripre2+tripre3+tripre0, data =bws)
summary(m5)
## 
## Call:
## lm(formula = birthweight ~ smoker + alcohol + tripre2 + tripre3 + 
##     tripre0, data = bws)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3029.55  -307.55    31.35   372.45  2401.29 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3454.55      12.65 273.077  < 2e-16 ***
## smoker       -228.85      27.16  -8.424  < 2e-16 ***
## alcohol       -15.10      77.54  -0.195 0.845613    
## tripre2      -100.84      29.62  -3.404 0.000672 ***
## tripre3      -136.96      59.58  -2.299 0.021595 *  
## tripre0      -697.97     106.88  -6.531 7.66e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 578.7 on 2994 degrees of freedom
## Multiple R-squared:  0.04647,    Adjusted R-squared:  0.04487 
## F-statistic: 29.18 on 5 and 2994 DF,  p-value: < 2.2e-16
m6<-lm(birthweight~smoker+alcohol+tripre1+tripre3+tripre0, data =bws)
summary(m6)
## 
## Call:
## lm(formula = birthweight ~ smoker + alcohol + tripre1 + tripre3 + 
##     tripre0, data = bws)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3029.55  -307.55    31.35   372.45  2401.29 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3353.71      28.05 119.561  < 2e-16 ***
## smoker       -228.85      27.16  -8.424  < 2e-16 ***
## alcohol       -15.10      77.54  -0.195 0.845613    
## tripre1       100.84      29.62   3.404 0.000672 ***
## tripre3       -36.12      64.17  -0.563 0.573601    
## tripre0      -597.13     109.42  -5.457 5.23e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 578.7 on 2994 degrees of freedom
## Multiple R-squared:  0.04647,    Adjusted R-squared:  0.04487 
## F-statistic: 29.18 on 5 and 2994 DF,  p-value: < 2.2e-16

Regression table

stargazer( m1, m2, m3, m5,m6,
          #type = "html",
          type = "text",
          title = "Table 1. Birthweight and characteristic of the mother",
          out = "table1.txt",
          notes = "Significance level"
          )
## 
## Table 1. Birthweight and characteristic of the mother
## ================================================================================================================================================
##                                                                         Dependent variable:                                                     
##                     ----------------------------------------------------------------------------------------------------------------------------
##                                                                             birthweight                                                         
##                               (1)                      (2)                      (3)                      (4)                      (5)           
## ------------------------------------------------------------------------------------------------------------------------------------------------
## smoker                    -253.228***              -250.803***              -217.580***              -228.848***              -228.848***       
##                             (26.951)                 (27.153)                 (26.680)                 (27.165)                 (27.165)        
##                                                                                                                                                 
## alcohol                                              -57.601                  -30.491                  -15.100                  -15.100         
##                                                      (77.977)                 (76.234)                 (77.541)                 (77.541)        
##                                                                                                                                                 
## nprevist                                                                     34.070***                                                          
##                                                                               (2.855)                                                           
##                                                                                                                                                 
## tripre2                                                                                              -100.837***                                
##                                                                                                        (29.619)                                 
##                                                                                                                                                 
## tripre1                                                                                                                        100.837***       
##                                                                                                                                 (29.619)        
##                                                                                                                                                 
## tripre3                                                                                               -136.955**                -36.118         
##                                                                                                        (59.581)                 (64.173)        
##                                                                                                                                                 
## tripre0                                                                                              -697.969***              -597.131***       
##                                                                                                       (106.876)                (109.421)        
##                                                                                                                                                 
## Constant                  3,432.060***             3,432.703***             3,051.249***             3,454.549***             3,353.712***      
##                             (11.871)                 (11.904)                 (34.016)                 (12.650)                 (28.050)        
##                                                                                                                                                 
## ------------------------------------------------------------------------------------------------------------------------------------------------
## Observations                 3,000                    3,000                    3,000                    3,000                    3,000          
## R2                           0.029                    0.029                    0.073                    0.046                    0.046          
## Adjusted R2                  0.028                    0.028                    0.072                    0.045                    0.045          
## Residual Std. Error   583.730 (df = 2998)      583.774 (df = 2997)      570.471 (df = 2996)      578.724 (df = 2994)      578.724 (df = 2994)   
## F Statistic         88.279*** (df = 1; 2998) 44.406*** (df = 2; 2997) 78.470*** (df = 3; 2996) 29.179*** (df = 5; 2994) 29.179*** (df = 5; 2994)
## ================================================================================================================================================
## Note:                                                                                                                *p<0.1; **p<0.05; ***p<0.01
##                                                                                                                               Significance level