bws<-read_csv("Birthweight_Smoking.csv")
## Rows: 3000 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (12): nprevist, alcohol, tripre1, tripre2, tripre3, tripre0, birthweight...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
bws
## # A tibble: 3,000 × 12
## nprevist alcohol tripre1 tripre2 tripre3 tripre0 birthweight smoker unmarried
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 12 0 1 0 0 0 4253 1 1
## 2 5 0 0 1 0 0 3459 0 0
## 3 12 0 1 0 0 0 2920 1 0
## 4 13 0 1 0 0 0 2600 0 0
## 5 9 0 1 0 0 0 3742 0 0
## 6 11 0 1 0 0 0 3420 0 0
## 7 12 0 1 0 0 0 2325 1 0
## 8 10 0 1 0 0 0 4536 0 0
## 9 13 0 1 0 0 0 2850 0 0
## 10 10 0 1 0 0 0 2948 0 0
## # ℹ 2,990 more rows
## # ℹ 3 more variables: educ <dbl>, age <dbl>, drinks <dbl>
source("/Volumes/middfiles/Classes/Fall23/ECON0211A/Noe_Labs/NV.R") #load NV.R to access nv_sum() function for quick summary statistic table
##
## Attaching package: 'rlang'
##
## The following objects are masked from 'package:purrr':
##
## %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
## flatten_raw, invoke, splice
#as.data.frame(summary(bws))
summary(bws)
## nprevist alcohol tripre1 tripre2
## Min. : 0.00 Min. :0.00000 Min. :0.000 Min. :0.000
## 1st Qu.: 9.00 1st Qu.:0.00000 1st Qu.:1.000 1st Qu.:0.000
## Median :12.00 Median :0.00000 Median :1.000 Median :0.000
## Mean :10.99 Mean :0.01933 Mean :0.804 Mean :0.153
## 3rd Qu.:13.00 3rd Qu.:0.00000 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :35.00 Max. :1.00000 Max. :1.000 Max. :1.000
## tripre3 tripre0 birthweight smoker unmarried
## Min. :0.000 Min. :0.00 Min. : 425 Min. :0.000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.00 1st Qu.:3062 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.000 Median :0.00 Median :3420 Median :0.000 Median :0.0000
## Mean :0.033 Mean :0.01 Mean :3383 Mean :0.194 Mean :0.2267
## 3rd Qu.:0.000 3rd Qu.:0.00 3rd Qu.:3750 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.000 Max. :1.00 Max. :5755 Max. :1.000 Max. :1.0000
## educ age drinks
## Min. : 0.00 Min. :14.00 Min. : 0.00000
## 1st Qu.:12.00 1st Qu.:23.00 1st Qu.: 0.00000
## Median :12.00 Median :27.00 Median : 0.00000
## Mean :12.91 Mean :26.89 Mean : 0.05833
## 3rd Qu.:14.00 3rd Qu.:31.00 3rd Qu.: 0.00000
## Max. :17.00 Max. :44.00 Max. :21.00000
m1<-lm(birthweight~smoker, data = bws)
summary(m1)
##
## Call:
## lm(formula = birthweight ~ smoker, data = bws)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3007.06 -313.06 26.94 366.94 2322.94
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3432.06 11.87 289.115 <2e-16 ***
## smoker -253.23 26.95 -9.396 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 583.7 on 2998 degrees of freedom
## Multiple R-squared: 0.0286, Adjusted R-squared: 0.02828
## F-statistic: 88.28 on 1 and 2998 DF, p-value: < 2.2e-16
cov(bws[, c("smoker", "alcohol")], use = "complete.obs")
## smoker alcohol
## smoker 0.156416139 0.006584862
## alcohol 0.006584862 0.018965878
cor(bws[, c("smoker", "alcohol")], use = "complete.obs")
## smoker alcohol
## smoker 1.0000000 0.1208981
## alcohol 0.1208981 1.0000000
#5 Regress birthweight on smoker and alcohol
m2<-lm(birthweight~smoker+alcohol, data = bws)
summary(m2)
##
## Call:
## lm(formula = birthweight ~ smoker + alcohol, data = bws)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3007.7 -313.7 26.3 366.3 2322.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3432.70 11.90 288.373 <2e-16 ***
## smoker -250.80 27.15 -9.237 <2e-16 ***
## alcohol -57.60 77.98 -0.739 0.46
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 583.8 on 2997 degrees of freedom
## Multiple R-squared: 0.02878, Adjusted R-squared: 0.02813
## F-statistic: 44.41 on 2 and 2997 DF, p-value: < 2.2e-16
#6 Regress birthweight on smoker, alcohol, and npreveist
m3<-lm(birthweight~smoker+alcohol+nprevist, data = bws)
summary(m3)
##
## Call:
## lm(formula = birthweight ~ smoker + alcohol + nprevist, data = bws)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2733.53 -307.57 21.42 358.09 2192.70
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3051.249 34.016 89.701 < 2e-16 ***
## smoker -217.580 26.680 -8.155 5.07e-16 ***
## alcohol -30.491 76.234 -0.400 0.689
## nprevist 34.070 2.855 11.933 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 570.5 on 2996 degrees of freedom
## Multiple R-squared: 0.07285, Adjusted R-squared: 0.07192
## F-statistic: 78.47 on 3 and 2996 DF, p-value: < 2.2e-16
#7 Predict
bws$pred_birthwt<-predict(m3,data= m3$fitted.values)
bws%>%
filter(smoker==1 & alcohol==0 & nprevist==8)%>%
summarise(Obs =n(),
mean = mean(pred_birthwt))
## # A tibble: 1 × 2
## Obs mean
## <int> <dbl>
## 1 39 3106.
nv_sum(bws, tripre1,tripre2, tripre3,tripre0, weight = FALSE)
## # A tibble: 4 × 7
## variable Obs min mean median st.dev max
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 tripre1 3000 0 0.804 1 0.397 1
## 2 tripre2 3000 0 0.153 0 0.360 1
## 3 tripre3 3000 0 0.033 0 0.179 1
## 4 tripre0 3000 0 0.01 0 0.0995 1
m4<-lm(birthweight~smoker+alcohol+tripre1+tripre2+tripre3+tripre0, data =bws) # there is a multicollinearity in this model, so we don't include it in the table
summary(m4)
##
## Call:
## lm(formula = birthweight ~ smoker + alcohol + tripre1 + tripre2 +
## tripre3 + tripre0, data = bws)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3029.55 -307.55 31.35 372.45 2401.29
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2756.58 106.69 25.838 < 2e-16 ***
## smoker -228.85 27.16 -8.424 < 2e-16 ***
## alcohol -15.10 77.54 -0.195 0.846
## tripre1 697.97 106.88 6.531 7.66e-11 ***
## tripre2 597.13 109.42 5.457 5.23e-08 ***
## tripre3 561.01 120.88 4.641 3.61e-06 ***
## tripre0 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 578.7 on 2994 degrees of freedom
## Multiple R-squared: 0.04647, Adjusted R-squared: 0.04487
## F-statistic: 29.18 on 5 and 2994 DF, p-value: < 2.2e-16
m5<-lm(birthweight~smoker+alcohol+tripre2+tripre3+tripre0, data =bws)
summary(m5)
##
## Call:
## lm(formula = birthweight ~ smoker + alcohol + tripre2 + tripre3 +
## tripre0, data = bws)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3029.55 -307.55 31.35 372.45 2401.29
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3454.55 12.65 273.077 < 2e-16 ***
## smoker -228.85 27.16 -8.424 < 2e-16 ***
## alcohol -15.10 77.54 -0.195 0.845613
## tripre2 -100.84 29.62 -3.404 0.000672 ***
## tripre3 -136.96 59.58 -2.299 0.021595 *
## tripre0 -697.97 106.88 -6.531 7.66e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 578.7 on 2994 degrees of freedom
## Multiple R-squared: 0.04647, Adjusted R-squared: 0.04487
## F-statistic: 29.18 on 5 and 2994 DF, p-value: < 2.2e-16
m6<-lm(birthweight~smoker+alcohol+tripre1+tripre3+tripre0, data =bws)
summary(m6)
##
## Call:
## lm(formula = birthweight ~ smoker + alcohol + tripre1 + tripre3 +
## tripre0, data = bws)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3029.55 -307.55 31.35 372.45 2401.29
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3353.71 28.05 119.561 < 2e-16 ***
## smoker -228.85 27.16 -8.424 < 2e-16 ***
## alcohol -15.10 77.54 -0.195 0.845613
## tripre1 100.84 29.62 3.404 0.000672 ***
## tripre3 -36.12 64.17 -0.563 0.573601
## tripre0 -597.13 109.42 -5.457 5.23e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 578.7 on 2994 degrees of freedom
## Multiple R-squared: 0.04647, Adjusted R-squared: 0.04487
## F-statistic: 29.18 on 5 and 2994 DF, p-value: < 2.2e-16
stargazer( m1, m2, m3, m5,m6,
#type = "html",
type = "text",
title = "Table 1. Birthweight and characteristic of the mother",
out = "table1.txt",
notes = "Significance level"
)
##
## Table 1. Birthweight and characteristic of the mother
## ================================================================================================================================================
## Dependent variable:
## ----------------------------------------------------------------------------------------------------------------------------
## birthweight
## (1) (2) (3) (4) (5)
## ------------------------------------------------------------------------------------------------------------------------------------------------
## smoker -253.228*** -250.803*** -217.580*** -228.848*** -228.848***
## (26.951) (27.153) (26.680) (27.165) (27.165)
##
## alcohol -57.601 -30.491 -15.100 -15.100
## (77.977) (76.234) (77.541) (77.541)
##
## nprevist 34.070***
## (2.855)
##
## tripre2 -100.837***
## (29.619)
##
## tripre1 100.837***
## (29.619)
##
## tripre3 -136.955** -36.118
## (59.581) (64.173)
##
## tripre0 -697.969*** -597.131***
## (106.876) (109.421)
##
## Constant 3,432.060*** 3,432.703*** 3,051.249*** 3,454.549*** 3,353.712***
## (11.871) (11.904) (34.016) (12.650) (28.050)
##
## ------------------------------------------------------------------------------------------------------------------------------------------------
## Observations 3,000 3,000 3,000 3,000 3,000
## R2 0.029 0.029 0.073 0.046 0.046
## Adjusted R2 0.028 0.028 0.072 0.045 0.045
## Residual Std. Error 583.730 (df = 2998) 583.774 (df = 2997) 570.471 (df = 2996) 578.724 (df = 2994) 578.724 (df = 2994)
## F Statistic 88.279*** (df = 1; 2998) 44.406*** (df = 2; 2997) 78.470*** (df = 3; 2996) 29.179*** (df = 5; 2994) 29.179*** (df = 5; 2994)
## ================================================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
## Significance level