Important packages (libraries) for this lab:

  1. tidyverse
  2. sandwich: Helpful in estimating robust regression models
  3. ggrepel
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.5.1     ✔ purrr   1.0.2
## ✔ tibble  3.2.1     ✔ dplyr   1.1.4
## ✔ tidyr   1.2.1     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
if(!require("sandwich")) install.packages("sandwich") # for robust regression
## Loading required package: sandwich
library(sandwich)
if(!require("ggrepel")) install.packages("ggrepel") # for robust regression
## Loading required package: ggrepel
library(ggrepel)

rm(list = ls())

#Read data

inflation<-read_csv("FRED_inflation.csv")
## Rows: 66 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): year, inflation
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
unemployment<-read_csv("FRED_unemployment.csv")
## Rows: 75 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): year, unemployment
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

3. Merge the data and keep the dataset from 1960

infl_unemp<-unemployment%>%
  left_join(inflation, by="year")%>%
  filter(year>=1960)

4. Graphing

# line graph
ggplot(infl_unemp, aes( year))+
  geom_line(aes(y=inflation, color ="inflation"))+
  geom_line(aes(y = unemployment, color ="unemployment"))+
  scale_linetype_manual(values = c("inflation"="dash", "unemployment"="solid"))+
  scale_x_continuous(breaks = seq(1960, 2020, by =10))+
  labs(
    title = "Evidence for  the Phillips Curve",
    subtitle= "FRED: 1960-2022",
    x = "Years", y="Rates",
    color = "Legend"
  )+
  theme_minimal()
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's linetype values.

# scatter plot
ggplot(infl_unemp, aes(unemployment, inflation))+
  geom_point()+
  labs(
    title = "Evidence for  the Phillipss Curve",
    subtitle= "FRED: 1960-2022",
    x = "Unemployment", y="Inflation",
    color =""
  )

5. Estimating phillips curve by regressing inflation on unemployment

phillips<-lm(inflation~unemployment, data = infl_unemp)
p_se1<-sqrt(diag(vcovHC(phillips,"HC1"))) #robust se
summary(phillips)
## 
## Call:
## lm(formula = inflation ~ unemployment, data = infl_unemp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8466 -1.5626 -0.7031  0.9438  8.3032 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)    1.8474     1.2006   1.539    0.129
## unemployment   0.3124     0.1946   1.605    0.114
## 
## Residual standard error: 2.46 on 61 degrees of freedom
## Multiple R-squared:  0.04054,    Adjusted R-squared:  0.02481 
## F-statistic: 2.577 on 1 and 61 DF,  p-value: 0.1136
infl_unemp$yhat<-predict(phillips)

ggplot(infl_unemp, aes(unemployment, inflation))+
  geom_point()+
  geom_line(aes(y=yhat, color ="yhat"))+
  labs(
    title = "Evidence for  the Phillips Curve",
    subtitle= "FRED: 1960-2022",
    x = "Unemployment", y="Inflation",
    color ="Legend "
  )

6. Regraph

ggplot(infl_unemp, aes(unemployment, inflation))+
  geom_point()+
  ggplot2::geom_text(aes(label = year), size = 3, vjust = -0.5, hjust = -0.5) +
  scale_color_viridis_d() +
  labs(
    title = "Evidence for  the Phillips curve",
    subtitle= "FRED: 1960-2022",
    x = "Unemployment", y="Inflation",
    color ="Legend"
    
  )

# to make results more clear
# Create a new variable to identify the subsets
infl_unemp$period <- with(infl_unemp,
                          ifelse(year >= 1974 & year <= 1983, "1974-1983",
                                  ifelse(year >= 2020, "2020+", 
                                         "Others")))



# Create the plot
ggplot(infl_unemp, aes(x = unemployment, y = inflation, color = period, label = year)) +
  geom_point() +
  geom_text_repel(size = 3) +
  scale_color_manual(values = c("1974-1983" = "red", "2020+" = "green", "Others" = "blue")) +
  labs(
    title = "Evidence for the Phillips Curve",
    subtitle = "FRED data 1960-2021",
    x = "Unemployment",
    y = "Inflation",
    color = "Legend"
  ) 
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

7. Investigate non-linearity

#creating a dummy variable 
infl_unemp<-infl_unemp%>%
  mutate(oilyears =ifelse(year >= 1974 & year <= 1983, 1,0))

# run a regression
phillips2<-lm(inflation~unemployment+oilyears, data = infl_unemp)
p_se2<-sqrt(diag(vcovHC(phillips2,"HC1"))) #robust se
summary(phillips2)
## 
## Call:
## lm(formula = inflation ~ unemployment + oilyears, data = infl_unemp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7564 -1.1164 -0.4024  1.2132  4.1681 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    4.2295     0.7882   5.366 1.36e-06 ***
## unemployment  -0.2398     0.1339  -1.791   0.0784 .  
## oilyears       5.7291     0.5836   9.817 4.29e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.536 on 60 degrees of freedom
## Multiple R-squared:  0.6318, Adjusted R-squared:  0.6196 
## F-statistic: 51.49 on 2 and 60 DF,  p-value: 9.569e-14
infl_unemp$yhat2<-predict(phillips2)
# Create separate variables for oil and non-oil years
infl_unemp <- infl_unemp %>%
  mutate(
    p_oil = ifelse(oilyears == 1, yhat2, 0),
    p_non_oil = ifelse(oilyears == 0, yhat2, 0)
  )

# Create the plot
ggplot(infl_unemp) +
  geom_point(aes(x = unemployment, y = inflation), color = "black") +
  geom_point(aes(x = unemployment, y = p_oil, color = "Oil Years"), linetype = "solid", group =1) +
  geom_point(aes(x = unemployment, y = p_non_oil, color = "Non-Oil Years"), linetype = "dashed", group =1) +
 scale_color_manual(values = c("Oil Years" = "red", "Non-Oil Years" = "blue")) +
  labs(
    title = "Scatter Plot with Predicted Lines for Oil and Non-Oil Years",
    x = "Unemployment",
    y = "Inflation",
    color = "Legend"
  ) 
## Warning in geom_point(aes(x = unemployment, y = p_oil, color = "Oil Years"), :
## Ignoring unknown parameters: `linetype`
## Warning in geom_point(aes(x = unemployment, y = p_non_oil, color = "Non-Oil
## Years"), : Ignoring unknown parameters: `linetype`

  #theme_minimal()
# plotting with conditions
ggplot(infl_unemp, aes(x = unemployment, y = inflation, color = period, label = year))+
  geom_point()+
  geom_line(data = subset(infl_unemp, year >= 1974 & year <= 1983), aes(x=unemployment,y=p_oil, colour = period))+
  geom_line(data= subset(infl_unemp, year<1974 | year>1983), aes(x=unemployment,y=p_non_oil, colour = period))+
  geom_text_repel(size = 3) +
  scale_color_manual(values = c("1974-1983" = "red", "2020+" = "blue", "Others" = "blue")) +
  
  
  labs(
    title = "Evidence for the Phillips curve",
    subtitle = "FRED data: 1960=2021"
  )
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

8)Introduce an Interaction Term

## generate the interaction variable on your own
infl_unemp<-infl_unemp%>%
  mutate(oilyearsXunemployment =oilyears*unemployment)
# run a regression
phillips3<-lm(inflation~unemployment+oilyears+oilyearsXunemployment, data = infl_unemp)
p_se3<-sqrt(diag(vcovHC(phillips3,"HC1"))) #robust se
summary(phillips3)
## 
## Call:
## lm(formula = inflation ~ unemployment + oilyears + oilyearsXunemployment, 
##     data = infl_unemp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9436 -1.0660 -0.5469  1.1376  4.0519 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             3.8663     0.8438   4.582 2.44e-05 ***
## unemployment           -0.1758     0.1441  -1.220  0.22751    
## oilyears                8.9952     2.8296   3.179  0.00235 ** 
## oilyearsXunemployment  -0.4511     0.3825  -1.179  0.24294    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.531 on 59 degrees of freedom
## Multiple R-squared:  0.6403, Adjusted R-squared:  0.622 
## F-statistic: 35.01 on 3 and 59 DF,  p-value: 3.974e-13
infl_unemp$yhat3 =predict(phillips3) # predict yhat3
infl_unemp<-infl_unemp%>%
  mutate(
    p2_oil =ifelse(oilyears==1,yhat3,0),
    p2_non_oil =ifelse(oilyears==0,yhat3,0)
    )

# create a graph
interactions<-ggplot(infl_unemp) +
  geom_point(aes(x = unemployment, y = inflation), color = "black") +
  geom_point(aes(x = unemployment, y = p2_oil, color = "Oil Years")) +
  geom_point(aes(x = unemployment, y = p2_non_oil, color = "Non-Oil Years")) +
  scale_color_manual(values = c("Oil Years" = "red", "Non-Oil Years" = "blue")) +
  labs(
    title = "Scatter Plot with Predicted Lines for Oil and Non-Oil Years",
    x = "Unemployment",
    y = "Inflation",
    color = "Legend"
  ) 
  #theme_minimal()
print(interactions)

9) Let’s see what the graph looks like now!

fancy_scatter<-ggplot(infl_unemp, aes(x = unemployment, y = inflation, color = period, label = year))+
  geom_point()+
  geom_line(data = subset(infl_unemp, year >= 1974 & year <= 1983), aes(x=unemployment,y=p2_oil, colour = period))+
  geom_line(data= subset(infl_unemp, year<1974 | year>1983), aes(x=unemployment,y=p2_non_oil, colour = period))+
  geom_text_repel(size = 3) +
  scale_color_manual(values = c("1974-1983" = "red", "2020+" = "blue", "Others" = "blue")) +
  
  
  labs(
    title = "Evidence for the Phillips Curve?",
    subtitle = "FRED data: 1960=2021"
  )
print(fancy_scatter)
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ggsave(filename = "fancy_scatter.png", plot = fancy_scatter)
## Saving 7 x 5 in image
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps