From 47df29e0ba861c6bdfe1815e74dbd20771ff24d9 Mon Sep 17 00:00:00 2001 From: Andrew Baker Date: Mon, 17 Jan 2022 14:00:32 -0800 Subject: [PATCH] Upload all code and updated README file --- .DS_Store | Bin 0 -> 6148 bytes .Rprofile | 1 + .gitignore | 4 + Code/1a. Simulations - Save Data.R | 85 ++ Code/1b. TWFE + binary DiD.R | 733 ++++++++++ Code/1c. Alternative Estimators.R | 69 + Code/1d. Goodman-Bacom Decomp.R | 409 ++++++ Code/1e. Alternative Event Study Estimators.R | 242 ++++ Code/1f. Rejections.R | 190 +++ ...res With Event Studies and Heterogeneity.R | 321 ++++ Code/2. BLL.R | 616 ++++++++ Code/3. FHLT.R | 629 ++++++++ JFE_DID.Rproj | 13 + README.html | 234 +++ README.md | 25 +- renv.lock | 1285 +++++++++++++++++ renv/.gitignore | 5 + renv/activate.R | 668 +++++++++ renv/settings.dcf | 8 + 19 files changed, 5536 insertions(+), 1 deletion(-) create mode 100644 .DS_Store create mode 100644 .Rprofile create mode 100644 .gitignore create mode 100644 Code/1a. Simulations - Save Data.R create mode 100644 Code/1b. TWFE + binary DiD.R create mode 100644 Code/1c. Alternative Estimators.R create mode 100644 Code/1d. Goodman-Bacom Decomp.R create mode 100644 Code/1e. Alternative Event Study Estimators.R create mode 100644 Code/1f. Rejections.R create mode 100644 Code/1g. Failures With Event Studies and Heterogeneity.R create mode 100644 Code/2. BLL.R create mode 100644 Code/3. FHLT.R create mode 100644 JFE_DID.Rproj create mode 100644 README.html create mode 100644 renv.lock create mode 100644 renv/.gitignore create mode 100644 renv/activate.R create mode 100644 renv/settings.dcf diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..951c84fd97b15e93cb17d9f0880a2355624dacb3 GIT binary patch literal 6148 zcmeHK%Wl&^6ur~BU{fAJLZU2?X2UK*NrNi7sAN*JV23b*1&`LoR^rO>Shhn1A(ejv zet2I!<|QiM`>VH1Zb`_bH?{IcaGzk3;>Y+IO+nl0Kmq^(K?6S3Bv8X zEy#+}Ttp;1M>^>GFW4ww@H7g;fMMWoWPpFW7TkmZBv9$!@9;y@He#L^rYuCAz4htx z(fOk<#2IhQd0|`v7t%fWZNd$528Hgy%y|Zr4hv8q|6l;U&B02%Bs`(Mpjm>wKHdJ$FZFY&NF|=CVmxE6(m_)h~OlO(dFsmB#YRj&&F9)iafqFh>7-E z8`J6L?d>-0b$hcmoo;V-+Voa$Yc_N2t2eqk_x4AxUQgf54&U+>;69BTwUA2+kKr8- z<_pwcfpD4InxyY|@4wG~2#TC0%_rG)V)fiF{US?QHbCuhox;I%r@?@t*j_)4u<0xC3z|n%*#A7|C`N0mHz5 z#Q?E9-|OR&)Y-bQIDXbrZ2Q=_aKBoj2*FMr$6~;b;tgy<&_?+H(N$PWL=B4hBOqun KonhdQGVlWwKB*-D literal 0 HcmV?d00001 diff --git a/.Rprofile b/.Rprofile new file mode 100644 index 0000000..81b960f --- /dev/null +++ b/.Rprofile @@ -0,0 +1 @@ +source("renv/activate.R") diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/Code/1a. Simulations - Save Data.R b/Code/1a. Simulations - Save Data.R new file mode 100644 index 0000000..6c783ea --- /dev/null +++ b/Code/1a. Simulations - Save Data.R @@ -0,0 +1,85 @@ +library(tidyverse) +library(RPostgres) +library(fixest) +library(e1071) +library(kableExtra) +library(ggthemes) +library(patchwork) +library(did) +library(furrr) +library(latex2exp) +library(bacondecomp) +library(ggforce) +library(fastDummies) +library(progressr) + +# source my passwords +source('/Users/andrew/Box Sync/Projects/Passwords/Password.R') + +# Connect to WRDS Server -------------------------------------------------- +wrds <- dbConnect(Postgres(), + host = 'wrds-pgdata.wharton.upenn.edu', + port = 9737, + user = user, + password = password, + dbname = 'wrds', + sslmode = 'require') + +# download compustat data +comp <- tbl(wrds, sql("SELECT gvkey, fyear, oibdp, at, indfmt, datafmt, popsrc, consol, fic, sich FROM comp.funda")) %>% + # filter the data - between 1979 and 2015, non-missing assets, and in the US + filter(indfmt == 'INDL' & datafmt == 'STD' & popsrc == 'D' & consol == 'C' & !is.na(fyear) & + fyear %>% between(1979, 2015) & !is.na(at) & at > 0 & fic == "USA") %>% + # make ROA variable + group_by(gvkey) %>% + mutate(roa = oibdp / lag(at), + gvkey = as.numeric(gvkey)) %>% + # drop missing ROA + filter(!is.na(roa)) %>% + # drop 1979 observations - just needed lagged assets + filter(fyear >= 1980) %>% + ungroup() %>% + collect() + +# download comp header which has more info on location etc +comp_header <- tbl(wrds, sql("SELECT * FROM crsp.comphead")) %>% + mutate(gvkey = as.numeric(gvkey)) %>% + collect() + +# merge in state of incorporation and industry information +comp <- comp %>% + left_join(comp_header %>% select(gvkey, incorp, sic)) %>% + # drop if state of incorporation is missing or not 50 states + filter(!is.na(incorp) & incorp %in% state.abb) %>% + # clean up SIC code - use historical sic code if available, if not use header sic + mutate(sich = coalesce(sich, sic)) %>% + # drop financial firms + filter(!(sich %in% c(6000:6999))) + +# make sure that each firm has at least 5 observations +comp <- comp %>% + group_by(gvkey) %>% + add_tally() %>% + filter(n >= 5) %>% + ungroup() + +# winsorize ROA at 99, and censor at -1 +wins <- function(x) { + # winsorize and return + case_when( + is.na(x) ~ NA_real_, + x < -1 ~ -1, + x > quantile(x, 0.99, na.rm = TRUE) ~ quantile(x, 0.99, na.rm = TRUE), + TRUE ~ x + ) +} + +# winsorize ROA by year +comp <- comp %>% + group_by(fyear) %>% + mutate(roa = wins(roa)) %>% + arrange(gvkey, fyear) %>% + ungroup() + +# save +saveRDS(comp, here::here("Data", "simulation_data.rds")) \ No newline at end of file diff --git a/Code/1b. TWFE + binary DiD.R b/Code/1b. TWFE + binary DiD.R new file mode 100644 index 0000000..4cf8725 --- /dev/null +++ b/Code/1b. TWFE + binary DiD.R @@ -0,0 +1,733 @@ +library(tidyverse) +library(RPostgres) +library(fixest) +library(e1071) +library(kableExtra) +library(ggthemes) +library(patchwork) +library(did) +library(furrr) +library(latex2exp) +library(bacondecomp) +library(ggforce) +library(fastDummies) +library(progressr) + +# set plot theme +theme_set(theme_clean() + theme(plot.background = element_blank(), + legend.background = element_blank())) + +# load in compustat data +comp <- read_rds(here::here("Data", "simulation_data.rds")) + +# estimate the fixed effects regression of ROA on firm and year fixed effects +mod <- feols(roa ~ 1 | gvkey + fyear, cluster = "incorp", data = comp) + +# get the moments for the residuals from the baseline model +resid_sd <- sd(mod$residuals) +resid_skew <- skewness(mod$residuals) +resid_kurtosis <- kurtosis(mod$residuals) + +# get firm and years and state of incorporation +shell <- comp %>% select(gvkey, fyear) + +# get the firm and year fes, as well as the standard deviation of ROA +firm_fes <- fixef(mod)$gvkey +n_firm_fes <- length(fixef(mod)$gvkey) +year_fes <- fixef(mod)$fyear +n_year_fes <- length(fixef(mod)$fyear) +sd_roa <- sd(comp$roa) + +# function to run simulation, pull firm and year FE, as well as the residuals from their empirical distributions +# then add in treatment effects following DGP in our six simulations +run_sim <- function(i, p) { + + p() + + # pull firm FEfrom empirical distribution with replacement, + # also uniformly assign state of incorporation + sim_firm_fe <- tibble( + gvkey = unique(shell$gvkey), + firm_fe = sample(firm_fes, n_firm_fes, replace = TRUE), + incorp = sample(state.abb, n_firm_fes, replace = TRUE) + ) + + # pull year FE from the empirical distribution with replacement + sim_year_fe <- tibble( + fyear = unique(shell$fyear), + year_fe = sample(year_fes, n_year_fes, replace = TRUE) + ) + + # merge in the FE to the firm/year observations and add in residuals from the + # empirical distribution. ROA is the linear combination of the FEs and the residual + data <- shell %>% + left_join(sim_firm_fe, by = "gvkey") %>% + left_join(sim_year_fe, by = "fyear") %>% + mutate(resid = sample(mod$residuals, length(mod$residuals), replace = TRUE), + roa = firm_fe + year_fe + resid) + + # save the moments of the residuals from this dataset + sim_mod <- feols(roa ~ 1 | gvkey + fyear, cluster = "incorp", data = data) + mom <- c(sd(sim_mod$residuals), skewness(sim_mod$residuals), kurtosis(sim_mod$residuals)) + + # randomly assign the state of incorporation into treatment groups + # put random states into a vector + random_states <- sample(state.abb, length(state.abb), replace = FALSE) + + # now add in the treatment effect - One Treatment Period, Constant Treatment Effects + data1 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:25] ~ "T", + incorp %in% random_states[26:50] ~ "C"), + # add in treatment effects - constant half of a standard deviation of ROA + delta = case_when(fyear >= 1998 & group == "T" ~ 0.5*sd_roa, + TRUE ~ 0), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(group == "T" & fyear >= 1998, 1, 0), + # make a relative-to-treatment year variable + rel_year = fyear - 1998, + # get a first treat variable for CS + first_treat = if_else(group == "T", 1998, 0)) + + # One Treatment Period, Dynamic Treatment Effects + data2 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:25] ~ "T", + incorp %in% random_states[26:50] ~ "C"), + # add in treatment effects - percentage of standard deviation of ROA added per year + delta_base = case_when(fyear >= 1998 & group == "T" ~ 0.05*sd_roa, + TRUE ~ 0), + # true treatment effect is the cumulative sum of this - dynamic trend break treatment effect + delta = delta_base * (fyear - 1998 + 1), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(group == "T" & fyear >= 1998, 1, 0), + # make a relative-to-treatment year variable + rel_year = fyear - 1998, + # get a first treat variable for CS + first_treat = if_else(group == "T", 1998, 0)) + + # Multiple Treatment Periods and Constant Equal Treatment Effects + data3 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[36:50] ~ 2007 + ), + # add in treatment effects - half a percentage of standard deviation of ROA + delta = case_when( + fyear >= group & group == 1989 ~ .5*sd_roa, + fyear >= group & group == 1998 ~ .5*sd_roa, + fyear >= group & group == 2007 ~ .5*sd_roa, + TRUE ~ 0 + ), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0), + # make a relative-to-treatment year variable + rel_year = fyear - group, + # get a first treat variable for CS + first_treat = group) + + # Multiple Treatment Periods and Constant Different Treatment Effects + data4 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[36:50] ~ 2007 + ), + # add in treatment effects - varying percentage of standard deviation of ROA + delta = case_when( + fyear >= group & group == 1989 ~ .5*sd_roa, + fyear >= group & group == 1998 ~ .3*sd_roa, + fyear >= group & group == 2007 ~ .1*sd_roa, + TRUE ~ 0 + ), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0), + # make a relative-to-treatment year variable + rel_year = fyear - group, + # get a first treat variable for CS + first_treat = group) + + # Multiple Treatment Periods and Dynamic Equal Treatment Effects + data5 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[36:50] ~ 2007 + ), + # add in treatment effects - 3% of standard deviation of ROA added per year + delta_base = case_when( + fyear >= group & group == 1989 ~ .03*sd_roa, + fyear >= group & group == 1998 ~ .03*sd_roa, + fyear >= group & group == 2007 ~ .03*sd_roa, + TRUE ~ 0 + ), + # true treatment effect is the cumulative sum of this - dynamic trend break treatment effect + delta = delta_base * (fyear - group + 1), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0), + # make a relative-to-treatment year variable + rel_year = fyear - group, + # get a first treat variable for CS + first_treat = group) + + # Multiple Treatment Periods and Dynamic Unequal Treatment Effects + data6 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[36:50] ~ 2007 + ), + # add in treatment effects - varying percent of standard deviation of ROA added per year + delta_base = case_when( + fyear >= group & group == 1989 ~ .05*sd_roa, + fyear >= group & group == 1998 ~ .03*sd_roa, + fyear >= group & group == 2007 ~ .01*sd_roa, + TRUE ~ 0 + ), + # true treatment effect is the cumulative sum of this - dynamic trend break treatment effect + delta = delta_base * (fyear - group + 1), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0), + # make a relative-to-treatment year variable + rel_year = fyear - group, + # get a first treat variable for CS + first_treat = group) + + # make function to get estimates and treatment effects from data for k in 1:6 + get_est <- function(k) { + + # load in k-specific data + dt <- get(paste0("data", k)) + + # get values + # full treatment effect + # observation level + full_te_1 <- dt %>% filter(treat == 1) %>% pull(delta) %>% mean() + # firm level average + full_te_2 <- dt %>% filter(treat == 1) %>% group_by(gvkey) %>% + summarize(m = mean(delta)) %>% pull(m) %>% mean() + + # full treatment effects with no 2007 treatment group + full_te_no_2007_1 <- dt %>% filter(treat == 1 & first_treat < 2007) %>% pull(delta) %>% mean() + full_te_no_2007_2 <- dt %>% filter(treat == 1 & first_treat < 2007) %>% + group_by(gvkey) %>% summarize(m = mean(delta)) %>% pull(m) %>% mean() + + # treatment effect for just years 1 - 5 after treatment + te_5_1 <- dt %>% filter(treat == 1 & rel_year %in% 0:5 & first_treat < 2007) %>% + pull(delta) %>% mean() + te_5_2 <- dt %>% filter(treat == 1 & rel_year %in% 0:5 & first_treat < 2007) %>% + group_by(gvkey) %>% summarize(m = mean(delta)) %>% pull(m) %>% mean() + + # get twfe estimates on full data + twfe <- feols(treat_roa ~ treat | gvkey + fyear, cluster = "incorp", data = dt)$coefficients[1] + + # get CS estimates + # first full the full set of attgts + CS_out <- att_gt(yname = "treat_roa", + data = dt, + gname = "first_treat", + idname = "gvkey", + tname = "fyear", + bstrap = F, + cband = F, + est_method = "reg", + control_group = "notyettreated", + print_details = F, + panel = TRUE, + allow_unbalanced_panel = TRUE) + + # aggregate by group and then take average over number in groups + cs_aggte_group <- aggte(CS_out, type = "group", min_e = 0, max_e = 5, bstrap = FALSE, cband = FALSE) + + cs <- weighted.mean( + cs_aggte_group$att.egt, + dt %>% + filter(treat == 1 & rel_year %in% 0:5 & first_treat < 2007) %>% + group_by(first_treat) %>% + summarize(n = length(unique(gvkey))) %>% + arrange(first_treat) %>% + pull(n) + ) + + # Stacked regressions + # first make the stacked datasets + # get the treatment cohorts + cohorts <- dt %>% + # drop never treateds, and also 2007 when everyone is treated + filter(!(first_treat %in% c(0, 2007))) %>% + pull(first_treat) %>% + unique() + + # make formula to create the dataset + getdata <- function(j) { + + #keep what we need + dt %>% + # keep treated units and all units not treated within -5 to 5 + filter(first_treat == j | first_treat == 0 | first_treat > j + 5) %>% + # keep just year -5 to 5 + filter(fyear >= j - 5 & fyear <= j + 5) %>% + # create an indicator for the dataset + mutate(df = j) + } + + # get data stacked + stacked_data <- map_df(cohorts, getdata) + + # get stacked value + stacked <- feols(treat_roa ~ treat | gvkey^df + fyear^df, data = stacked_data)$coefficients[1] + + # finally get the sun and abraham value + # need to make a dataset without observations more than 5 years after treatment + sa_data <- dt %>% + filter(treat == 0 | rel_year <= 5) %>% + filter(fyear < 2007) + + # run SA model through the fixest package + sun_ab <- feols(treat_roa ~ 1 + sunab(first_treat, fyear) | gvkey + fyear, sa_data) + # get the overall att + sa <- summary(sun_ab, agg = "att")$coeftable[1] + + tibble(sim = i, dt = k, full_te_1 = full_te_1, full_te_2 = full_te_2, full_te_no_2007_1 = full_te_no_2007_1, + full_te_no_2007_2 = full_te_no_2007_2, te_5_1 = te_5_1, te_5_2 = te_5_2, + twfe = twfe, cs = cs, stacked = stacked, sa = sa) + } + + # run it for our six simulations and store results in a dataset + estimates <- map_dfr(1:6, get_est) + + # get moments into a tibble as well + moments <- tibble( + moment = 1:3, + value = mom + ) %>% + mutate(sim = i) + + # output a list of both objects that we want + list(moments = moments, + estimates = estimates) + +} + +# parallelize and do 500 simulations +x <- 1:500 +options(future.globals.maxSize= 891289600) +set.seed(28101695) +plan(multisession, workers = 6) +with_progress({ + p <- progressor(steps = length(x)) + out <- future_map( + .x = x, + .f = run_sim, + p = p, + .options = furrr_options(globals = c("mod", "shell", "firm_fes", "n_firm_fes", + "year_fes", "n_year_fes", "sd_roa"), + packages = c("tidyverse", "fixest", "e1071", "did", "fastDummies"), + seed = TRUE) + )}) + +# unpack the two datasets +moments <- do.call(function(...) mapply(bind_rows, ..., SIMPLIFY=F), args = out)$moments +estimates <- do.call(function(...) mapply(bind_rows, ..., SIMPLIFY=F), args = out)$estimates + +# save a version +saveRDS(moments, here::here("Data", "moments.rds")) +saveRDS(estimates, here::here("Data", "estimates.rds")) + +## Next - do one pull of the simulation to plot the group means +# pull firm FE from empirical distribution with replacement +set.seed(28101695) +sim_firm_fe <- tibble( + gvkey = unique(shell$gvkey), + firm_fe = sample(firm_fes, n_firm_fes, replace = TRUE), + incorp = sample(state.abb, n_firm_fes, replace = TRUE) +) + +# pull year FE from the empirical distribution with replacement +sim_year_fe <- tibble( + fyear = unique(comp$fyear), + year_fe = sample(year_fes, n_year_fes, replace = TRUE) +) + +# merge in the FE to the firm/year/state observations and add in residuals from the +# empirical distribution. ROA is the linear combination of the FEs and the residual +data <- shell %>% + left_join(sim_firm_fe, by = "gvkey") %>% + left_join(sim_year_fe, by = "fyear") %>% + mutate(resid = sample(mod$residuals, length(mod$residuals), replace = TRUE), + roa = firm_fe + year_fe + resid) + +# randomly assign the state of incorporation into treatment groups +random_states <- sample(state.abb, length(state.abb), replace = FALSE) + +# One Treatment Period, Constant Treatment Effects +data1 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:25] ~ "T", + incorp %in% random_states[26:50] ~ "C"), + # add in treatment effects - percentage of standard deviation of ROA added per year + delta = case_when(fyear >= 1998 & group == "T" ~ 0.5*sd_roa, + TRUE ~ 0), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(group == "T" & fyear >= 1998, 1, 0), + dt = "Simulation 1") + +# One Treatment Period, Dynamic Treatment Effects +data2 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:25] ~ "T", + incorp %in% random_states[26:50] ~ "C"), + # add in treatment effects - percentage of standard deviation of ROA added per year + delta_base = case_when(fyear >= 1998 & group == "T" ~ 0.05*sd_roa, + TRUE ~ 0), + # true treatment effect is the cumulative sum of this - dynamic trend break treatment effect + delta = delta_base * (fyear - 1998 + 1), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(group == "T" & fyear >= 1998, 1, 0), + dt = "Simulation 2") + +# Multiple Treatment Periods and Constant Equal Treatment Effects +data3 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[36:50] ~ 2007 + ), + # add in treatment effects - percentage of standard deviation of ROA added per year + delta = case_when( + fyear >= group & group == 1989 ~ .5*sd_roa, + fyear >= group & group == 1998 ~ .5*sd_roa, + fyear >= group & group == 2007 ~ .5*sd_roa, + TRUE ~ 0 + ), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0), + group = as.character(group), + dt = "Simulation 3") + +# Multiple Treatment Periods and Constant Different Treatment Effects +data4 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[36:50] ~ 2007 + ), + # add in treatment effects - percentage of standard deviation of ROA added per year + delta = case_when( + fyear >= group & group == 1989 ~ .5*sd_roa, + fyear >= group & group == 1998 ~ .3*sd_roa, + fyear >= group & group == 2007 ~ .1*sd_roa, + TRUE ~ 0 + ), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0)) + +# Multiple Treatment Periods and Dynamic Equal Treatment Effects +data5 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[36:50] ~ 2007 + ), + # add in treatment effects - percentage of standard deviation of ROA added per year + delta_base = case_when( + fyear >= group & group == 1989 ~ .03*sd_roa, + fyear >= group & group == 1998 ~ .03*sd_roa, + fyear >= group & group == 2007 ~ .03*sd_roa, + TRUE ~ 0 + ), + # true treatment effect is the cumulative sum of this - dynamic trend break treatment effect + delta = delta_base * (fyear - group + 1), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0)) + +# Multiple Treatment Periods and Unequal Dynamic Treatment Effects +data6 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[36:50] ~ 2007 + ), + # add in treatment effects - percentage of standard deviation of ROA added per year + delta_base = case_when( + fyear >= group & group == 1989 ~ .05*sd_roa, + fyear >= group & group == 1998 ~ .03*sd_roa, + fyear >= group & group == 2007 ~ .01*sd_roa, + TRUE ~ 0 + ), + # true treatment effect is the cumulative sum of this - dynamic trend break treatment effect + delta = delta_base * (fyear - group + 1), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0)) + +# plot for simulation 1 +sim1_means <- data1 %>% + ggplot(aes(x = fyear, y = treat_roa, group = gvkey)) + + # unit specific lines + geom_line(alpha = 1/30, color = "grey") + + # group specific averages + geom_line( + data = . %>% + group_by(group, fyear) %>% + summarize(treat_roa = mean(treat_roa)), + aes(x = fyear, y = treat_roa, group = factor(group), + color = factor(group)), size = 1) + + labs(x = "", y = "ROA", color = "Group") + + geom_vline(xintercept = 1997.5, color = '#4B5F6C', + linetype = "dashed", size = 1) + + scale_y_continuous(limits = c(-0.5*sd_roa, 1.5*sd_roa)) + + scale_color_manual(values = c("#A7473A", "#4B5F6C")) + + ggtitle("Simulation 1") + + labs(subtitle = expression(paste("Not Staggered + Constant ", delta))) + + theme(legend.position = 'bottom', + legend.title = element_blank(), + plot.title = element_text(hjust = 0.5), + plot.subtitle = element_text(hjust = 0.5), + axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360)) + +# plot for simulation 2 +sim2_means <- data2 %>% + ggplot(aes(x = fyear, y = treat_roa, group = gvkey)) + + # unit specific lines + geom_line(alpha = 1/30, color = "grey") + + # group specific averages + geom_line( + data = . %>% + group_by(group, fyear) %>% + summarize(treat_roa = mean(treat_roa)), + aes(x = fyear, y = treat_roa, group = factor(group), + color = factor(group)), size = 1) + + labs(x = "", y = "", color = "Group") + + geom_vline(xintercept = 1997.5, color = '#4B5F6C', + linetype = "dashed", size = 1) + + scale_color_manual(values = c("#A7473A", "#4B5F6C")) + + scale_y_continuous(limits = c(-0.5*sd_roa, 1.5*sd_roa)) + + ggtitle("Simulation 2") + + labs(subtitle = expression(paste("Not Staggered + Dynamic ", delta))) + + theme(legend.position = 'bottom', + legend.title = element_blank(), + plot.title = element_text(hjust = 0.5), + plot.subtitle = element_text(hjust = 0.5), + axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360)) + +# plot for simulation 3 +sim3_means <- data3 %>% + ggplot(aes(x = fyear, y = treat_roa, group = gvkey)) + + # unit specific lines + geom_line(alpha = 1/30, color = "grey") + + # group specific averages + geom_line( + data = . %>% + group_by(group, fyear) %>% + summarize(treat_roa = mean(treat_roa)), + aes(x = fyear, y = treat_roa, group = factor(group), + color = factor(group)), size = 1) + + labs(x = "", y = "", color = "Group") + + scale_y_continuous(limits = c(-0.5*sd_roa, 1.5*sd_roa)) + + geom_vline(xintercept = 1988.5, color = "#A7473A", + linetype = "dashed", size = 1) + + geom_vline(xintercept = 1997.5, color = "#4B5F6C", + linetype = "dashed", size = 1) + + geom_vline(xintercept = 2006.5, color = "#51806a", + linetype = "dashed", size = 1) + + scale_color_manual(values = c("#A7473A", "#4B5F6C", "#51806a")) + + ggtitle("Simulation 3") + + labs(subtitle = expression(paste("Staggered + Constant/Equal ", delta))) + + theme(legend.position = 'bottom', + legend.title = element_blank(), + plot.title = element_text(hjust = 0.5), + plot.subtitle = element_text(hjust = 0.5), + axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360)) + +# plot for simulation 4 +sim4_means <- data4 %>% + ggplot(aes(x = fyear, y = treat_roa, group = gvkey)) + + # unit specific lines + geom_line(alpha = 1/30, color = "grey") + + # group specific averages + geom_line( + data = . %>% + group_by(group, fyear) %>% + summarize(treat_roa = mean(treat_roa)), + aes(x = fyear, y = treat_roa, group = factor(group), + color = factor(group)), size = 1) + + labs(x = "", y = "ROA", color = "Group") + + scale_y_continuous(limits = c(-.5*sd_roa, 1.5*sd_roa)) + + geom_vline(xintercept = 1988.5, color = "#A7473A", + linetype = "dashed", size = 1) + + geom_vline(xintercept = 1997.5, color = "#4B5F6C", + linetype = "dashed", size = 1) + + geom_vline(xintercept = 2006.5, color = "#51806a", + linetype = "dashed", size = 1) + + scale_color_manual(values = c("#A7473A", "#4B5F6C", "#51806a")) + + ggtitle("Simulation 4") + + labs(subtitle = expression(paste("Staggered + Constant/Unequal ", delta))) + + theme(legend.position = 'none', + legend.title = element_blank(), + plot.title = element_text(hjust = 0.5), + plot.subtitle = element_text(hjust = 0.5), + axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360)) + +# plot for simulation 5 +sim5_means <- data5 %>% + ggplot(aes(x = fyear, y = treat_roa, group = gvkey)) + + # unit specific lines + geom_line(alpha = 1/30, color = "grey") + + # group specific averages + geom_line( + data = . %>% + group_by(group, fyear) %>% + summarize(treat_roa = mean(treat_roa)), + aes(x = fyear, y = treat_roa, group = factor(group), + color = factor(group)), size = 1) + + labs(x = "", y = "", color = "Group") + + scale_y_continuous(limits = c(-.5*sd_roa, 1.5*sd_roa)) + + geom_vline(xintercept = 1988.5, color = "#A7473A", + linetype = "dashed", size = 1) + + geom_vline(xintercept = 1997.5, color = "#4B5F6C", + linetype = "dashed", size = 1) + + geom_vline(xintercept = 2006.5, color = "#51806a", + linetype = "dashed", size = 1) + + scale_color_manual(values = c("#A7473A", "#4B5F6C", "#51806a")) + + ggtitle("Simulation 5") + + labs(subtitle = expression(paste("Staggered + Dynamic/Equal ", delta))) + + theme(legend.position = 'bottom', + legend.title = element_blank(), + plot.title = element_text(hjust = 0.5), + plot.subtitle = element_text(hjust = 0.5), + axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360)) + +# plot for simulation 6 +sim6_means <- data6 %>% + ggplot(aes(x = fyear, y = treat_roa, group = gvkey)) + + # unit specific lines + geom_line(alpha = 1/30, color = "grey") + + # group specific averages + geom_line( + data = . %>% + group_by(group, fyear) %>% + summarize(treat_roa = mean(treat_roa)), + aes(x = fyear, y = treat_roa, group = factor(group), + color = factor(group)), size = 1) + + labs(x = "", y = "", color = "Group") + + scale_y_continuous(limits = c(-.5*sd_roa, 1.5*sd_roa)) + + geom_vline(xintercept = 1988.5, color = "#A7473A", + linetype = "dashed", size = 1) + + geom_vline(xintercept = 1997.5, color = "#4B5F6C", + linetype = "dashed", size = 1) + + geom_vline(xintercept = 2006.5, color = "#51806a", + linetype = "dashed", size = 1) + + scale_color_manual(values = c("#A7473A", "#4B5F6C", "#51806a")) + + ggtitle("Simulation 6") + + labs(subtitle = expression(paste("Staggered + Dynamic/Unequal ", delta))) + + theme(legend.position = 'none', + legend.title = element_blank(), + plot.title = element_text(hjust = 0.5), + plot.subtitle = element_text(hjust = 0.5), + axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360)) + +# plot treatment paths - sims 1 - 3 +Sims_1_3_trends <- sim1_means + sim2_means + sim3_means + +#save +ggsave(Sims_1_3_trends, filename = here::here("Figs_Tables", "Sims_1_3_trends.png"), dpi = 500, + width = 10, height = 4) + +# plot treatment paths - sims 4 - 6 +Sims_4_6_trends <- sim4_means + sim5_means + sim6_means + +#save +ggsave(Sims_4_6_trends, filename = here::here("Figs_Tables", "Sims_4_6_trends.png"), dpi = 500, + width = 10, height = 4) + +# now plot the distributions for the bottom panels +# make formula to make the plots +make_dist_plot <- function(i, name) { + estimates %>% + filter(dt == i) %>% + ggplot(aes(x = twfe)) + + geom_density(aes(fill = "TWFE Estimates"), alpha = 3/5) + + geom_vline(aes(xintercept = mean(estimates %>% filter(dt == i) %>% pull(full_te_1)), + color = "Observation Average"), + linetype = "dashed", size = 1, alpha = 3/5,) + + geom_vline(aes(xintercept = mean(estimates %>% filter(dt == i) %>% pull(full_te_2)), + color = "Firm Average"), + linetype = "dashed", size = 1, alpha = 3/5) + + ggtitle(paste0("Simulation ", i)) + + labs(subtitle = TeX(paste0(name, "$\\delta$"))) + + labs(y = "", x = if_else(i %in% c(2, 5), expression(widehat(delta^'DD')), expression(""))) + + scale_color_manual(name = "", values = c("Observation Average" = "#A7473A", + "Firm Average" = "#4B5F6C")) + + scale_fill_manual(name = "", values = c("TWFE Estimates" = "#CACFD0")) + + theme(plot.title = element_text(hjust = 0.5), + plot.subtitle = element_text(hjust = 0.5), + axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), + legend.position = if_else(i %in% c(2, 5), "bottom", "none")) +} + +# run all the densities +sim1_estimates <- make_dist_plot(1, "Not Staggered + Constant ") +sim2_estimates <- make_dist_plot(2, "Not Staggered + Dynamic ") +sim3_estimates <- make_dist_plot(3, "Staggered + Constant/Equal ") +sim4_estimates <- make_dist_plot(4, "Staggered + Constant/Unequal ") +sim5_estimates <- make_dist_plot(5, "Staggered + Dynamic/Equal ") +sim6_estimates <- make_dist_plot(6, "Staggered + Dynamic/Unequal ") + +# plot estimates of TWFE DD +Sims_1_3_dist <- sim1_estimates + sim2_estimates + sim3_estimates +Sims_4_6_dist <- sim4_estimates + sim5_estimates + sim6_estimates + +# save +ggsave(Sims_1_3_dist, filename = here::here("Figs_Tables", "Sims_1_3_dist.png"), dpi = 500, + width = 10, height = 4) +ggsave(Sims_4_6_dist, filename = here::here("Figs_Tables", "Sims_4_6_dist.png"), dpi = 500, + width = 10, height = 4) diff --git a/Code/1c. Alternative Estimators.R b/Code/1c. Alternative Estimators.R new file mode 100644 index 0000000..6ed373c --- /dev/null +++ b/Code/1c. Alternative Estimators.R @@ -0,0 +1,69 @@ +library(tidyverse) +library(RPostgres) +library(fixest) +library(e1071) +library(kableExtra) +library(ggthemes) +library(patchwork) +library(did) +library(furrr) +library(latex2exp) +library(bacondecomp) +library(ggforce) +library(fastDummies) +library(progressr) + +# set plot theme +theme_set(theme_clean() + theme(plot.background = element_blank(), + legend.background = element_blank())) + +# load in compustat data +comp <- read_rds(here::here("Data", "simulation_data.rds")) +estimates <- read_rds(here::here("Data", "estimates.rds")) + +## Make plots for alternative estimators +# make function +make_alt_dist_plot <- function(i, name) { + estimates %>% + filter(dt == i) %>% + pivot_longer(cols = c(cs, stacked, sa)) %>% + mutate(name = case_when( + name == "cs" ~ "Callaway & Sant'Anna", + name == "sa" ~ "Sun & Abraham", + TRUE ~ "Stacked" + )) %>% + ggplot(aes(x = value, group = name, fill = name)) + + geom_density(alpha = 1/3) + + scale_fill_manual(values = c("#115896", "#719E56", "#BA2F00")) + + geom_vline(aes(xintercept = mean(estimates %>% filter(dt == i) %>% pull(te_5_1)), + color = "Observation Average"), + linetype = "dashed", size = 1, alpha = 3/5,) + + geom_vline(aes(xintercept = mean(estimates %>% filter(dt == i) %>% pull(te_5_2)), + color = "Firm Average"), + linetype = "dashed", size = 1, alpha = 3/5) + + scale_color_manual(name = "", values = c("Observation Average" = "#A7473A", + "Firm Average" = "#4B5F6C")) + + ggtitle(paste0("Simulation ", i)) + + labs(subtitle = TeX(paste0(name, "$\\delta$"))) + + labs(y = "", x = if_else(i == 5, expression(widehat(delta^'DD')), expression(""))) + + theme(plot.title = element_text(hjust = 0.5), + plot.subtitle = element_text(hjust = 0.5), + axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), + legend.position = if_else(i == 5, "bottom", "none"), + legend.title = element_blank()) +} + +# run all the densities +sim1_alt_estimates <- make_alt_dist_plot(1, "Not Staggered + Constant ") +sim2_alt_estimates <- make_alt_dist_plot(2, "Not Staggered + Dynamic ") +sim3_alt_estimates <- make_alt_dist_plot(3, "Staggered + Constant/Equal ") +sim4_alt_estimates <- make_alt_dist_plot(4, "Staggered + Constant/Unequal ") +sim5_alt_estimates <- make_alt_dist_plot(5, "Staggered + Dynamic/Equal ") +sim6_alt_estimates <- make_alt_dist_plot(6, "Staggered + Dynamic/Unequal ") + +# save +Sims_1_6_alt_dist <- (sim1_alt_estimates + sim2_alt_estimates + sim3_alt_estimates) / + (sim4_alt_estimates + sim5_alt_estimates + sim6_alt_estimates) + +ggsave(Sims_1_6_alt_dist, filename = here::here("Figs_Tables", "Sims_1_6_alt_dist.png"), dpi = 500, + width = 10, height = 8) \ No newline at end of file diff --git a/Code/1d. Goodman-Bacom Decomp.R b/Code/1d. Goodman-Bacom Decomp.R new file mode 100644 index 0000000..5be2698 --- /dev/null +++ b/Code/1d. Goodman-Bacom Decomp.R @@ -0,0 +1,409 @@ +library(tidyverse) +library(RPostgres) +library(fixest) +library(e1071) +library(kableExtra) +library(ggthemes) +library(patchwork) +library(did) +library(furrr) +library(latex2exp) +library(bacondecomp) +library(ggforce) +library(fastDummies) +library(progressr) + +# set plot theme +theme_set(theme_clean() + theme(plot.background = element_blank(), + legend.background = element_blank())) + +# load in compustat data +comp <- read_rds(here::here("Data", "simulation_data.rds")) + +# estimate the fixed effects regression of ROA on firm and year fixed effects +mod <- feols(roa ~ 1 | gvkey + fyear, cluster = "incorp", data = comp) + +# get the moments for the residuals from the baseline model +resid_sd <- sd(mod$residuals) +resid_skew <- skewness(mod$residuals) +resid_kurtosis <- kurtosis(mod$residuals) + +# get firm and years and state of incorporation +shell <- comp %>% select(gvkey, fyear) + +# get the firm and year fes, as well as the standard deviation of ROA +firm_fes <- fixef(mod)$gvkey +n_firm_fes <- length(fixef(mod)$gvkey) +year_fes <- fixef(mod)$fyear +n_year_fes <- length(fixef(mod)$fyear) +sd_roa <- sd(comp$roa) + +## Now do the BG decomposition +# First make a balanced panel dataset of simulation 6 +# merge in the FE to the firm/year/state observations and add in residuals from the +# empirical distribution. ROA is the linear combination of the FEs and the residual +set.seed(28101695) +# pull firm FE from empirical distribution with replacement +sim_firm_fe <- tibble( + gvkey = unique(shell$gvkey), + firm_fe = sample(firm_fes, n_firm_fes, replace = TRUE), + incorp = sample(state.abb, n_firm_fes, replace = TRUE) +) + +# pull year FE from the empirical distribution with replacement +sim_year_fe <- tibble( + fyear = unique(shell$fyear), + year_fe = sample(year_fes, n_year_fes, replace = TRUE) +) + +# combine all of the data +data <- crossing(gvkey = sample(sim_firm_fe$gvkey, floor(n_firm_fes/10), replace = FALSE), + fyear = sim_year_fe$fyear) %>% + left_join(sim_firm_fe, by = "gvkey") %>% + left_join(sim_year_fe, by = "fyear") %>% + mutate(resid = sample(mod$residuals, floor(n_firm_fes/10) * n_year_fes, replace = TRUE), + roa = firm_fe + year_fe + resid) %>% + left_join(comp %>% select(gvkey, incorp) %>% distinct()) + +# randomly assign the state of incorporation into treatment groups +# put random states into a vector +random_states <- sample(state.abb, length(state.abb), replace = FALSE) + +# Multiple Treatment Periods and Constant Different Treatment Effects +data4 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[36:50] ~ 2007 + ), + # add in treatment effects - percentage of standard deviation of ROA added per year + delta = case_when( + fyear >= group & group == 1989 ~ .5*sd_roa, + fyear >= group & group == 1998 ~ .3*sd_roa, + fyear >= group & group == 2007 ~ .1*sd_roa, + TRUE ~ 0 + ), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0)) + +# Multiple Treatment Periods and Dynamic Equal Treatment Effects +data5 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[36:50] ~ 2007 + ), + # add in treatment effects - percentage of standard deviation of ROA added per year + delta_base = case_when( + fyear >= group & group == 1989 ~ .03*sd_roa, + fyear >= group & group == 1998 ~ .03*sd_roa, + fyear >= group & group == 2007 ~ .03*sd_roa, + TRUE ~ 0 + ), + # true treatment effect is the cumulative sum of this - dynamic trend break treatment effect + delta = delta_base * (fyear - group + 1), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0)) + +# Multiple Treatment Periods and Dynamic Treatment Effects +data6 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[36:50] ~ 2007 + ), + # add in treatment effects - percentage of standard deviation of ROA added per year + delta_base = case_when( + fyear >= group & group == 1989 ~ .05*sd_roa, + fyear >= group & group == 1998 ~ .03*sd_roa, + fyear >= group & group == 2007 ~ .01*sd_roa, + TRUE ~ 0 + ), + # true treatment effect is the cumulative sum of this - dynamic trend break treatment effect + delta = delta_base * (fyear - group + 1), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0)) + +# Figure - GB Decomposition ------------------------------------------------------------- +# calculate the bacon decomposition without covariates for Sims 4-6 + +bacon_4 <- bacon(treat_roa ~ treat, + data = data4, + id_var = "gvkey", + time_var = "fyear") + +bacon_5 <- bacon(treat_roa ~ treat, + data = data5, + id_var = "gvkey", + time_var = "fyear") + + +bacon_6 <- bacon(treat_roa ~ treat, + data = data6, + id_var = "gvkey", + time_var = "fyear") + +# make data frames with all of the info for the plots +bacon_4_plotdata <- bacon_4 %>% + # fix up the names + mutate(treated = substr(treated, 3, 4), + untreated = substr(untreated, 3, 4), + name = glue::glue("T = '{treated} \n C = '{untreated}")) %>% + # get the true weights and estimates from the simulation + mutate( + weight2 = case_when( + treated == "89" ~ mean(data4$group == 1989)/2, + treated == "98" ~ mean(data4$group == 1998)/2, + treated == "07" ~ mean(data4$group == 2007)/2), + estimate2 = case_when( + treated == "89" & untreated == "98" ~ .5*sd_roa, + treated == "89" & untreated == "07" ~ .5*sd_roa, + treated == "98" & untreated == "07" ~ .3*sd_roa, + treated == "98" & untreated == "89" ~ .3*sd_roa, + treated == "07" & untreated == "98" ~ .1*sd_roa, + treated == "07" & untreated == "89" ~ .1*sd_roa)) %>% + pivot_longer(cols = c(weight, weight2), + names_to = "weight", values_to = "weight_vl") %>% + pivot_longer(cols = c(estimate, estimate2), + names_to = "estimate", values_to = "estimate_vl") %>% + filter(weight == "weight" & estimate == "estimate" | + weight == "weight2" & estimate == "estimate2") %>% + mutate(identifier = case_when( + type == "Later vs Earlier Treated" & weight == "weight" ~ "Later vs. Earlier Treated - DiD Estimate", + type == "Later vs Earlier Treated" & weight == "weight2" ~ "Later vs. Earlier Treated - True Value", + type == "Earlier vs Later Treated" & weight == "weight" ~ "Earlier vs. Later Treated - DiD Estimate", + type == "Earlier vs Later Treated" & weight == "weight2" ~ "Earlier vs. Later Treated - True Value")) + + +# function to get true group level treatment effects +get_true_te <- function(dt, start, end, grp) { + dt %>% + filter(treat == 1 & group == grp & fyear %>% between(start, end)) %>% + group_by(gvkey) %>% + summarize(mdelta = mean(delta)) %>% + pull(mdelta) %>% + mean() +} + +bacon_5_plotdata <- bacon_5 %>% + mutate(treated = substr(treated, 3, 4), + untreated = substr(untreated, 3, 4), + name = glue::glue("T = '{treated} \n C = '{untreated}")) %>% + # get the true weights and estimates from the simulation + mutate( + weight2 = case_when( + treated == "89" ~ mean(data5$group == 1989)/2, + treated == "98" ~ mean(data5$group == 1998)/2, + treated == "07" ~ mean(data5$group == 2007)/2), + estimate2 = case_when( + treated == "89" & untreated == "98" ~ get_true_te(data5, 1989, 1997, 1989), + treated == "89" & untreated == "07" ~ get_true_te(data5, 1989, 2006, 1989), + treated == "98" & untreated == "07" ~ get_true_te(data5, 1998, 2006, 1998), + treated == "98" & untreated == "89" ~ get_true_te(data5, 1998, 2015, 1998), + treated == "07" & untreated == "98" ~ get_true_te(data5, 2007, 2017, 2007), + treated == "07" & untreated == "89" ~ get_true_te(data5, 2007, 2017, 2007))) %>% + pivot_longer(cols = c(weight, weight2), + names_to = "weight", values_to = "weight_vl") %>% + pivot_longer(cols = c(estimate, estimate2), + names_to = "estimate", values_to = "estimate_vl") %>% + filter(weight == "weight" & estimate == "estimate" | + weight == "weight2" & estimate == "estimate2") %>% + mutate(identifier = case_when( + type == "Later vs Earlier Treated" & weight == "weight" ~ "Later vs. Earlier Treated - DiD Estimate", + type == "Later vs Earlier Treated" & weight == "weight2" ~ "Later vs. Earlier Treated - True Value", + type == "Earlier vs Later Treated" & weight == "weight" ~ "Earlier vs. Later Treated - DiD Estimate", + type == "Earlier vs Later Treated" & weight == "weight2" ~ "Earlier vs. Later Treated - True Value")) + +bacon_6_plotdata <- bacon_6 %>% + mutate(treated = substr(treated, 3, 4), + untreated = substr(untreated, 3, 4), + name = glue::glue("T = '{treated} \n C = '{untreated}")) %>% + # get the true weights and estimates from the simulation + mutate( + weight2 = case_when( + treated == "89" ~ mean(data6$group == 1989)/2, + treated == "98" ~ mean(data6$group == 1998)/2, + treated == "07" ~ mean(data6$group == 2007)/2), + estimate2 = case_when( + treated == "89" & untreated == "98" ~ get_true_te(data6, 1989, 1997, 1989), + treated == "89" & untreated == "07" ~ get_true_te(data6, 1989, 2006, 1989), + treated == "98" & untreated == "07" ~ get_true_te(data6, 1998, 2006, 1998), + treated == "98" & untreated == "89" ~ get_true_te(data6, 1998, 2015, 1998), + treated == "07" & untreated == "98" ~ get_true_te(data6, 2007, 2017, 2007), + treated == "07" & untreated == "89" ~ get_true_te(data6, 2007, 2017, 2007))) %>% + pivot_longer(cols = c(weight, weight2), + names_to = "weight", values_to = "weight_vl") %>% + pivot_longer(cols = c(estimate, estimate2), + names_to = "estimate", values_to = "estimate_vl") %>% + filter(weight == "weight" & estimate == "estimate" | + weight == "weight2" & estimate == "estimate2") %>% + mutate(identifier = case_when( + type == "Later vs Earlier Treated" & weight == "weight" ~ "Later vs. Earlier Treated - DiD Estimate", + type == "Later vs Earlier Treated" & weight == "weight2" ~ "Later vs. Earlier Treated - True Value", + type == "Earlier vs Later Treated" & weight == "weight" ~ "Earlier vs. Later Treated - DiD Estimate", + type == "Earlier vs Later Treated" & weight == "weight2" ~ "Earlier vs. Later Treated - True Value")) + +### merge in the true values +# set colors, fills, and shapes for the decomp plot +colors <- c("Earlier vs. Later Treated - DiD Estimate" = "#A7473A", + "Later vs. Earlier Treated - DiD Estimate" = "#4B5F6C", + "Earlier vs. Later Treated - True Value" = "#A7473A", + "Later vs. Earlier Treated - True Value" = "#4B5F6C") + +fills <- c("Earlier vs. Later Treated - DiD Estimate" = "#A7473A", + "Later vs. Earlier Treated - DiD Estimate" = "#4B5F6C", + "Earlier vs. Later Treated - True Value" = "white", + "Later vs. Earlier Treated - True Value" = "white", + "07" = "#51806a") + +fills <- c("Earlier vs. Later Treated - DiD Estimate" = "#A7473A", + "Later vs. Earlier Treated - DiD Estimate" = "#4B5F6C", + "Earlier vs. Later Treated - True Value" = "white", + "Later vs. Earlier Treated - True Value" = "white") + +shapes <- c("Earlier vs. Later Treated - DiD Estimate" = 21, + "Later vs. Earlier Treated - DiD Estimate" = 24, + "Earlier vs. Later Treated - True Value" = 21, + "Later vs. Earlier Treated - True Value" = 24) + +# sim4 plot +sim4 <- bacon_4_plotdata %>% + # jigger things so that they look better + mutate(estimate_vl = if_else(treated == "89" & untreated == "07" | treated == "98" & untreated == "07" | + treated == "07" & untreated == "98", + estimate_vl + 0.01, estimate_vl)) %>% + arrange(desc(weight)) %>% + ggplot(aes(x = weight_vl, y = estimate_vl, shape = identifier, color = identifier, fill = identifier)) + + geom_point(size = 2) + + geom_path(aes(group = name), arrow = arrow(length = unit(0.1, "inches"), ends = "last")) + + geom_hline(yintercept = 0, linetype = "dashed") + + annotate("text", label = "T = '89 \n C = '98", x = .12, y = .125, color = "#A7473A") + + annotate("text", label = "T = '89 \n C = '07", x = .21, y = .15, color = "#A7473A") + + annotate("text", label = "T = '98 \n C = '07", x = .16, y = .10, color = "#A7473A") + + annotate("text", label = "T = '98 \n C = '89", x = .21, y = .11, color = "#4B5F6C") + + annotate("text", label = "T = '07 \n C = '89", x = .20, y = .05, color = "#4B5F6C") + + annotate("text", label = "T = '07 \n C = '98", x = .12, y = .06, color = "#4B5F6C") + + scale_color_manual(values = colors) + + scale_fill_manual(values = fills) + + scale_shape_manual(values = shapes) + + labs(x = "", y = expression(widehat(delta^'DD'))) + + ggtitle("Simulation 4") + + theme(legend.position = "none", + legend.title = element_blank(), + axis.title.y = element_text(angle = 360, hjust = 0.5, vjust = 0.5), + plot.title = element_text(hjust = 0.5, vjust = 0.5), + panel.grid.major.x = element_blank(), + panel.grid.major.y = element_blank()) + +# sim 5 plot +sim5 <- bacon_5_plotdata %>% + arrange(desc(weight)) %>% + # jigger things so that they look better + mutate(estimate_vl = if_else(treated == "89" & untreated == "07" | treated == "98" & untreated == "07" | + treated == "07" & untreated == "98" & estimate == "estimate2", + estimate_vl + 0.01, estimate_vl)) %>% + ggplot(aes(x = weight_vl, y = estimate_vl, shape = identifier, color = identifier, fill = identifier)) + + geom_point(size = 2) + + geom_path(aes(group = name), arrow = arrow(length = unit(0.1, "inches"), ends = "last"), + show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "dashed") + + annotate("text", label = "T = '89 \n C = '98", x = .11, y = .07, color = "#A7473A") + + annotate("text", label = "T = '89 \n C = '07", x = .22, y = .09, color = "#A7473A") + + annotate("text", label = "T = '98 \n C = '07", x = .225, y = .05, color = "#A7473A") + + annotate("text", label = "T = '98 \n C = '89", x = .22, y = -.06, color = "#4B5F6C") + + annotate("text", label = "T = '07 \n C = '89", x = .16, y = -.07, color = "#4B5F6C") + + annotate("text", label = "T = '07 \n C = '98", x = .115, y = -.05, color = "#4B5F6C") + + scale_color_manual(values = colors) + + scale_fill_manual(values = fills) + + scale_shape_manual(values = shapes) + + labs(x = "Weight", y = "") + + ggtitle("Simulation 5") + + theme(legend.position = "bottom", + legend.title = element_blank(), + axis.title.y = element_text(angle = 360, hjust = 0.5, vjust = 0.5), + plot.title = element_text(hjust = 0.5, vjust = 0.5), + panel.grid.major.x = element_blank(), + panel.grid.major.y = element_blank()) + + guides(color = guide_legend(nrow = 2), + shape = guide_legend(nrow = 2)) + +# sim 6 plot +sim6 <- bacon_6_plotdata %>% + # jigger things so that they look better + mutate(estimate_vl = if_else(treated == "98" & untreated == "89" & estimate == "estimate2" | + treated == "07" & untreated == "98" & estimate == "estimate2", + estimate_vl + 0.02, estimate_vl)) %>% + arrange(desc(weight)) %>% + ggplot(aes(x = weight_vl, y = estimate_vl, shape = identifier, color = identifier, fill = identifier)) + + geom_point(size = 2) + + geom_path(aes(group = name), arrow = arrow(length = unit(0.1, "inches"), ends = "last"), + show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5) + + annotate("text", label = "T = '89 \n C = '98", x = .11, y = 0.15, color = "#A7473A") + + annotate("text", label = "T = '89 \n C = '07", x = .21, y = 0.15, color = "#A7473A") + + annotate("text", label = "T = '98 \n C = '07", x = .225, y = 0.05, color = "#A7473A") + + annotate("text", label = "T = '98 \n C = '89", x = .225, y = -.18, color = "#4B5F6C") + + annotate("text", label = "T = '07 \n C = '89", x = .145, y = -.20, color = "#4B5F6C") + + annotate("text", label = "T = '07 \n C = '98", x = .11, y = -0.13, color = "#4B5F6C") + + scale_color_manual(values = colors) + + scale_fill_manual(values = fills) + + scale_shape_manual(values = shapes) + + ylim(c(-0.22, .19)) + + labs(x = "", y = "") + + geom_mark_circle(aes(description = "Bad \n 2x2 Below", filter = treated == "07" & untreated == "89" & weight == "weight"), + con.type = "straight", label.buffer = unit(5, 'mm'), expand = unit(10, "mm"), fill = "#51806a", + label.fontsize = 8, con.arrow = arrow(length = unit(0.1, "inches")), label.fill = "#51806a30", show.legend = FALSE) + + ggtitle("Simulation 6") + + theme(legend.position = "none", + legend.title = element_blank(), + axis.title.y = element_text(angle = 360, hjust = 0.5, vjust = 0.5), + plot.title = element_text(hjust = 0.5, vjust = 0.5), + panel.grid.major.x = element_blank(), + panel.grid.major.y = element_blank()) + +# make subplot showing 2007 treated v. 1989 control +colors2 <- c("Treated" = "#A7473A", "Control" = "#4B5F6C") + +# make subplot +subplot <- data6 %>% + filter((group == 2007 | group == 1989) & fyear >= 1989) %>% + mutate(group = if_else(group == 2007, "Treated", "Control")) %>% + ggplot(aes(x = fyear, y = treat_roa, group = gvkey)) + + # unit specific lines + geom_line(alpha = 1/10, color = "grey") + + # group specific averages + geom_line( + data = . %>% + group_by(group, fyear) %>% + summarize(treat_roa = mean(treat_roa)), + aes(x = fyear, y = treat_roa, group = group, + color = group), size = 1) + + scale_color_manual(values = colors2) + + ylim(c(-.5*sd_roa, 1.5*sd_roa)) + + labs(x = "", y = "ROA") + + geom_vline(xintercept = 2006.5, color = "#A7473A" , + linetype = "dashed", size = 1) + + ggtitle("Biased 2x2 Estimate From Simulation 6") + + labs(subtitle = expression(paste("Treated = ",'G'['2007'], "; Control = ", 'G'['1989']))) + + theme(legend.position = 'bottom', + legend.title = element_blank(), + plot.title = element_text(hjust = 0.5), + plot.subtitle = element_text(hjust = 0.5), + axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360)) + +# combine and save +GB_decomp_sims <- (sim4 + sim5 + sim6) / (subplot) +ggsave(GB_decomp_sims, filename = here::here("Figs_Tables", "GB_decomp_sims.png"), dpi = 500, + width = 10, height = 8) \ No newline at end of file diff --git a/Code/1e. Alternative Event Study Estimators.R b/Code/1e. Alternative Event Study Estimators.R new file mode 100644 index 0000000..727d4bc --- /dev/null +++ b/Code/1e. Alternative Event Study Estimators.R @@ -0,0 +1,242 @@ +library(tidyverse) +library(RPostgres) +library(fixest) +library(e1071) +library(kableExtra) +library(ggthemes) +library(patchwork) +library(did) +library(furrr) +library(latex2exp) +library(bacondecomp) +library(ggforce) +library(fastDummies) +library(progressr) + +# set plot theme +theme_set(theme_clean() + theme(plot.background = element_blank(), + legend.background = element_blank())) + +# load in compustat data +comp <- read_rds(here::here("Data", "simulation_data.rds")) + +# estimate the fixed effects regression of ROA on firm and year fixed effects +mod <- feols(roa ~ 1 | gvkey + fyear, cluster = "incorp", data = comp) + +# get the moments for the residuals from the baseline model +resid_sd <- sd(mod$residuals) +resid_skew <- skewness(mod$residuals) +resid_kurtosis <- kurtosis(mod$residuals) + +# get firm and years and state of incorporation +shell <- comp %>% select(gvkey, fyear) + +# get the firm and year fes, as well as the standard deviation of ROA +firm_fes <- fixef(mod)$gvkey +n_firm_fes <- length(fixef(mod)$gvkey) +year_fes <- fixef(mod)$fyear +n_year_fes <- length(fixef(mod)$fyear) +sd_roa <- sd(comp$roa) + +## Last Plot - show the alternative methods and how they work over time +run_sim_es <- function(i, p) { + + p() + + # pull firm FE from empirical distribution with replacement and + # randomly assign state of incorporation + sim_firm_fe <- tibble( + gvkey = unique(shell$gvkey), + firm_fe = sample(firm_fes, n_firm_fes, replace = TRUE), + incorp = sample(state.abb, n_firm_fes, replace = TRUE) + ) + + # pull year FE from the empirical distribution with replacement + sim_year_fe <- tibble( + fyear = unique(shell$fyear), + year_fe = sample(year_fes, n_year_fes, replace = TRUE) + ) + + # merge in the FE to the firm/year/state observations and add in residuals from the + # empirical distribution. ROA is the linear combination of the FEs and the residual + data <- shell %>% + left_join(sim_firm_fe, by = "gvkey") %>% + left_join(sim_year_fe, by = "fyear") %>% + mutate(resid = sample(mod$residuals, length(mod$residuals), replace = TRUE), + roa = firm_fe + year_fe + resid) + + # randomly assign the state of incorporation into treatment groups + # put random states into a vector + random_states <- sample(state.abb, length(state.abb), replace = FALSE) + + # now add in the treatment effect - Multiple Treatment Periods and Dynamic Treatment Effects + data6 <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[35:50] ~ 2007 + ), + # add in treatment effects - varying percent of standard deviation of ROA added per year + delta_base = case_when( + fyear >= group & group == 1989 ~ .05*sd_roa, + fyear >= group & group == 1998 ~ .03*sd_roa, + fyear >= group & group == 2007 ~ .01*sd_roa, + TRUE ~ 0 + ), + # true treatment effect is the cumulative sum of this - dynamic trend break treatment effect + delta = delta_base * (fyear - group + 1), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0), + # make a relative-to-treatment year variable + rel_year = fyear - group, + # get a first treat variable for CS + first_treat = group) + + # true treatment effects + dt <- tibble( + sim = rep(i, 11), + t = -5:5, + # get the average of the imputed treatment effects for each relative time period. Drop 2007 cohort bc can't estimate. + true_te = map_dbl(-5:5, function(x) {data6 %>% filter(rel_year == x & first_treat < 2007) %>% pull(delta) %>% mean}) + ) + + # get CS estimates + # first full the full set of attgts + CS_out <- att_gt(yname = "treat_roa", + data = data6, + gname = "first_treat", + idname = "gvkey", + tname = "fyear", + bstrap = F, + cband = F, + est_method = "reg", + control_group = "notyettreated", + print_details = F, + panel = TRUE, + allow_unbalanced_panel = TRUE) + + # get cs estimates aggregated to event time + cs <- aggte(CS_out, type = "dynamic", min_e = -5, max_e = 5, bstrap = FALSE, cband = FALSE) + + # add into the data + dt$cs <- cs$att.egt + + # Stacked regressions + # first make the stacked datasets + # get the treatment cohorts + cohorts <- data6 %>% + # drop never treateds, and also 2007 when everyone is treated + filter(!(first_treat %in% c(0, 2007))) %>% + pull(first_treat) %>% + unique() + + # make formula to create the sub-datasets + getdata <- function(j) { + + #keep what we need + data6 %>% + # keep treated units and all units not treated within -5 to 5 + filter(first_treat == j | first_treat == 0 | first_treat > j + 5) %>% + # keep just year -5 to 5 + filter(fyear >= j - 5 & fyear <= j + 5) %>% + # create an indicator for the dataset + mutate(df = j) + } + + # get data stacked + stacked_data <- map_df(cohorts, getdata) %>% + mutate(rel_year = if_else(df == group, rel_year, NA_real_)) %>% + fastDummies::dummy_cols("rel_year", ignore_na = TRUE) %>% + mutate(across(starts_with("rel_year_"), ~replace_na(., 0))) + + # get stacked value + stacked <- feols(treat_roa ~ `rel_year_-5` + `rel_year_-4` + `rel_year_-3` + + `rel_year_-2` + rel_year_0 + rel_year_1 + rel_year_2 + rel_year_3 + + rel_year_4 + rel_year_5 | gvkey^df + fyear^df, data = stacked_data)$coefficients + + # add in 0 for omitted -1 + stacked <- c(stacked[1:4], 0, stacked[5:10]) + + # add in + dt$stacked <- stacked + + # finally get the sun and abraham value + # need to make a dataset without observations more than 5 years after treatment + sa_data <- data6 %>% + filter(treat == 0 | rel_year <= 5) %>% + filter(fyear < 2007) + + # tidy up sun abraham estimates + sun_ab <- feols(treat_roa ~ 1 + sunab(first_treat, fyear) | gvkey + fyear, sa_data) + sa <- tidy(sun_ab)[14:23, ] %>% pull(estimate) + sa <- c(sa[1:4], 0, sa[5:10]) + dt$sa <- sa + + # export results + dt + +} + +# parallelize and do 500 simulations +x <- 1:500 +options(future.globals.maxSize= 891289600) +set.seed(28101695) +plan(multisession, workers = 6) +with_progress({ + p <- progressor(steps = length(x)) + out <- future_map_dfr( + .x = x, + .f = run_sim_es, + p = p, + .options = furrr_options(globals = c("mod", "shell", "firm_fes", "n_firm_fes", + "year_fes", "n_year_fes", "sd_roa"), + packages = c("tidyverse", "fixest", "e1071", "did", "fastDummies"), + seed = TRUE) + )}) + +## make plots +# function to make plot +make_es_plot <- function(name, title) { + out %>% + group_by(t) %>% + summarize(true_effect = mean(true_te), + est = mean({{name}}), + lower_ci = quantile({{name}}, probs = 0.025), + upper_ci = quantile({{name}}, 0.975)) %>% + # split the error bands by pre-post + mutate(band_groups = case_when( + t < -1 ~ "Pre", + t >= 0 ~ "Post", + t == -1 ~ "" + )) %>% + # plot + ggplot(aes(x = t, y = est)) + + geom_line(aes(x = t, y = true_effect, color = "True Effect"), linetype = "dashed") + + geom_ribbon(aes(ymin = lower_ci, ymax = upper_ci), + color = "lightgrey", alpha = 1/4) + + geom_pointrange(aes(ymin = lower_ci, ymax = upper_ci, color = "Estimated Effect"), show.legend = FALSE) + + geom_hline(yintercept = 0) + + geom_vline(xintercept = -0.5, linetype = "dashed") + + scale_x_continuous(breaks = -5:5) + + labs(x = "Relative Time", y = if_else(title == "Callaway & Sant'Anna", expression(widehat(delta)), expression(" "))) + + scale_color_manual(values = c("#A7473A", "#4B5F6C")) + + ggtitle(title) + + theme(legend.position = if_else(title == "Sun & Abraham", "bottom", "none"), + legend.title = element_blank(), + plot.title = element_text(hjust = 0.5), + axis.title.y = element_text(angle = 360, hjust = 0.5, vjust = 0.5)) +} + +# run plots +cs_es <- make_es_plot(cs, "Callaway & Sant'Anna") +sa_es <- make_es_plot(sa, "Sun & Abraham") +stacked_es <- make_es_plot(stacked, "Stacked Regression") + +# combine and save +new_did_sims <- cs_es + sa_es + stacked_es +ggsave(new_did_sims, filename = here::here("Figs_Tables", "new_did_sims.png"), dpi = 500, + width = 10, height = 4) diff --git a/Code/1f. Rejections.R b/Code/1f. Rejections.R new file mode 100644 index 0000000..484161a --- /dev/null +++ b/Code/1f. Rejections.R @@ -0,0 +1,190 @@ +library(tidyverse) +library(fixest) +library(ggthemes) +library(patchwork) +library(furrr) +library(fastDummies) +library(progressr) + +# set plot theme +theme_set(theme_clean() + theme(plot.background = element_blank(), + legend.background = element_blank())) + +# load in compustat data +comp <- read_rds(here::here("Data", "simulation_data.rds")) + +# estimate the fixed effects regression of ROA on firm and year fixed effects +mod <- feols(roa ~ 1 | gvkey + fyear, cluster = "incorp", data = comp) + +# get firm and years and state of incorporation +shell <- comp %>% select(gvkey, fyear) + +# get the firm and year fes, as well as the standard deviation of ROA +firm_fes <- fixef(mod)$gvkey +n_firm_fes <- length(fixef(mod)$gvkey) +year_fes <- fixef(mod)$fyear +n_year_fes <- length(fixef(mod)$fyear) +sd_roa <- sd(comp$roa) + +## Show that 0 treatment effects will end up significant with heterogeneity +run_sim <- function(i, p) { + # need this for progress bar to work + p() + + # pull firm FE from empirical distribution with replacement and + # randomly assign state of incorporation + sim_firm_fe <- tibble( + gvkey = unique(shell$gvkey), + firm_fe = sample(firm_fes, n_firm_fes, replace = TRUE), + incorp = sample(state.abb, n_firm_fes, replace = TRUE) + ) + + # pull year FE from the empirical distribution with replacement + sim_year_fe <- tibble( + fyear = unique(shell$fyear), + year_fe = sample(year_fes, n_year_fes, replace = TRUE) + ) + + # merge in the FE to the firm/year/state observations and add in residuals from the + # empirical distribution. ROA is the linear combination of the FEs and the residual + data <- shell %>% + left_join(sim_firm_fe, by = "gvkey") %>% + left_join(sim_year_fe, by = "fyear") %>% + mutate(resid = sample(mod$residuals, length(mod$residuals), replace = TRUE), + roa = firm_fe + year_fe + resid) + + # randomly assign the state of incorporation into treatment groups + random_states <- sample(state.abb, length(state.abb), replace = FALSE) + + # function for different trend decay rates + get_t <- function(val) { + + # simulate treatment effects centered at 0 + taus <- rnorm(3, 0, val*sd_roa) + + # put data together + dt <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[35:50] ~ 2007 + ), + # add in treatment effects - varying percent of standard deviation of ROA added per year + delta_base = case_when( + fyear >= group & group == 1989 ~ taus[1], + fyear >= group & group == 1998 ~ taus[2], + fyear >= group & group == 2007 ~ taus[3], + TRUE ~ 0 + ), + # true treatment effect is the cumulative sum of this - dynamic trend break treatment effect + # vary the power variable which represents the decay rate + delta = if_else(delta_base == 0, 0, delta_base * (fyear - group + 1)), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0), + # make a relative-to-treatment year variable + rel_year = fyear - group, + # get a first treat variable for CS + first_treat = group) + + feols(treat_roa ~ treat | gvkey + fyear, cluster = "incorp", data = dt) %>% + broom::tidy(conf.int = TRUE) %>% + select(statistic) %>% + # add in variables for the size of the break, and the amount of decay + mutate(val = val) +} + + # estimate for a sequence of values + map_dfr(seq(0, 0.1, by = 0.0025), get_t) %>% + mutate(sim = i) +} + +# parallelize and do 500 simulations +x <- 1:500 +options(future.globals.maxSize= 891289600) +set.seed(20210915) +plan(multisession, workers = 6) +with_progress({ + p <- progressor(steps = length(x)) + out <- future_map_dfr( + .x = x, + .f = run_sim, + p = p, + .options = furrr_options(globals = c("mod", "shell", "firm_fes", "n_firm_fes", + "year_fes", "n_year_fes", "sd_roa"), + packages = c("tidyverse", "fixest", "e1071", "did", "fastDummies"), + seed = TRUE)) +}) + +# make a plot that we can highlight under the curve with tstats bw -1.96 and 1.96 +# first get the data for the full curve using ggplot_build +p1data <- ggplot_build( + out %>% + mutate(insig = if_else(abs(statistic) < 1.96, 1, 0)) %>% + filter(val == 0.03) %>% + ggplot(aes(x = statistic)) + geom_density() +)$data[[1]] + +# get percent insignificant +percent_insig <- out %>% + filter(val == 0.03) %>% + mutate(insig = if_else(statistic %>% between(-1.96, 1.96), 1, 0)) %>% + summarize(m = mean(insig)) %>% + pull(m) + +# make label for the plot +plotlabel <- glue::glue(scales::label_percent(accuracy = .1)(percent_insig), + " insignificant \n t-Stats") + +# now make the plot - gray for the whole curve, red for between +# -1.96 and 1.96 +p1 <- p1data %>% + ggplot(aes(x = x, y = y)) + + geom_area(fill = "#4B5F6C", color = "darkgray", alpha = 1/3) + + geom_area( + data = p1data %>% filter(x %>% between(-1.96, 1.96)), + aes(x = x, y = y), + fill = "#A7473A", + alpha = 3/4 + ) + + annotate("label", x = 14, y = 0.045, label = plotlabel, + size = 2) + + geom_curve(aes(x = 14, y = .05, xend = 0, yend = 0.055), + color = "#A7473A", + arrow = arrow(length = unit(0.03, "npc"))) + + scale_x_continuous(breaks = c(-20, -10, -1.96, 0, 1.96, 10, 20), + labels = c(-20, -10, -1.96, 0, 1.96, 10, 20)) + + scale_y_continuous(limits = c(0, 0.06)) + + labs(x = "t-Statistic", y = "Density") + + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360, size = 8), + axis.title.x = element_text(size = 8), + axis.text = element_text(size = 5)) + +# plot 2 +p2data <- out %>% + mutate(insig = if_else(abs(statistic) < 1.96, 1, 0)) %>% + group_by(val) %>% + summarize(insig = mean(insig)) + +p2 <- p2data %>% + ggplot(aes(x = val, y = insig)) + + geom_line() + + geom_hline(yintercept = 0.95, color = "#A7473A", linetype = "dashed") + + labs(x = "Percent of ROA Standard Deviation", y = "Percent \n Insignificant") + + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360, + size = 8), + axis.title.x = element_text(size = 8), + axis.text = element_text(size = 5)) + + scale_x_continuous(breaks = seq(0, .1, length.out = 5), + labels = scales::percent) + + scale_y_continuous(limits = c(0, 1), + labels = scales::percent) + +# save +ggsave(p1, filename = here::here("Figs_Tables", "rejection_plots_1.png"), + dpi = 800, width = 4, height = 3) +ggsave(p2, filename = here::here("Figs_Tables", "rejection_plots_2.png"), + dpi = 800, width = 4, height = 3) \ No newline at end of file diff --git a/Code/1g. Failures With Event Studies and Heterogeneity.R b/Code/1g. Failures With Event Studies and Heterogeneity.R new file mode 100644 index 0000000..6b25387 --- /dev/null +++ b/Code/1g. Failures With Event Studies and Heterogeneity.R @@ -0,0 +1,321 @@ +library(tidyverse) +library(RPostgres) +library(fixest) +library(e1071) +library(kableExtra) +library(ggthemes) +library(patchwork) +library(did) +library(furrr) +library(latex2exp) +library(bacondecomp) +library(ggforce) +library(fastDummies) +library(progressr) + +# set plot theme +theme_set(theme_clean() + theme(plot.background = element_blank(), + legend.background = element_blank())) + +# load in compustat data +comp <- read_rds(here::here("Data", "simulation_data.rds")) + +# estimate the fixed effects regression of ROA on firm and year fixed effects +mod <- feols(roa ~ 1 | gvkey + fyear, cluster = "incorp", data = comp) + +# get the moments for the residuals from the baseline model +resid_sd <- sd(mod$residuals) +resid_skew <- skewness(mod$residuals) +resid_kurtosis <- kurtosis(mod$residuals) + +# get firm and years and state of incorporation +shell <- comp %>% select(gvkey, fyear) + +# get the firm and year fes, as well as the standard deviation of ROA +firm_fes <- fixef(mod)$gvkey +n_firm_fes <- length(fixef(mod)$gvkey) +year_fes <- fixef(mod)$fyear +n_year_fes <- length(fixef(mod)$fyear) +sd_roa <- sd(comp$roa) + +# function to run simulation, pull firm and year FE, as well as the residuals from their empirical distributions +# then add in treatment effects following our six simulations +run_sim <- function(i, p) { + + p() + + # pull firm FE from empirical distribution with replacement + sim_firm_fe <- tibble( + gvkey = unique(shell$gvkey), + firm_fe = sample(firm_fes, n_firm_fes, replace = TRUE), + incorp = sample(state.abb, n_firm_fes, replace = TRUE) + ) + + # pull year FE from the empirical distribution with replacement + sim_year_fe <- tibble( + fyear = unique(shell$fyear), + year_fe = sample(year_fes, n_year_fes, replace = TRUE) + ) + + # merge in the FE to the firm/year/state observations and add in residuals from the + # empirical distribution. ROA is the linear combination of the FEs and the residual + data <- shell %>% + left_join(sim_firm_fe, by = "gvkey") %>% + left_join(sim_year_fe, by = "fyear") %>% + mutate(resid = sample(mod$residuals, length(mod$residuals), replace = TRUE), + roa = firm_fe + year_fe + resid) + + # randomly assign the state of incorporation into treatment groups + # put random states into a vector + random_states <- sample(state.abb, length(state.abb), replace = FALSE) + + # now add in the treatment effect - Multiple Treatment Periods and Dynamic Equal Treatment Effects + dt <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[36:50] ~ 2007 + ), + # add in treatment effects - 3% of standard deviation of ROA added per year + delta_base = case_when( + fyear >= group & group == 1989 ~ .1*sd_roa, + fyear >= group & group == 1998 ~ .05*sd_roa, + fyear >= group & group == 2007 ~ .01*sd_roa, + TRUE ~ 0 + ), + # true treatment effect is the cumulative sum of this - dynamic trend break treatment effect + delta = delta_base * (fyear - group + 1), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0), + # make a relative-to-treatment year variable + rel_year = fyear - group, + # get a first treat variable for CS + first_treat = group) %>% + # make dummy cols + mutate(Pre = ifelse(rel_year < -5, 1, 0), + `rel_year_-5` = if_else(rel_year == -5, 1, 0), + `rel_year_-4` = if_else(rel_year == -4, 1, 0), + `rel_year_-3` = if_else(rel_year == -3, 1, 0), + `rel_year_-2` = if_else(rel_year == -2, 1, 0), + rel_year_0 = if_else(rel_year == 0, 1, 0), + rel_year_1 = if_else(rel_year == 1, 1, 0), + rel_year_2 = if_else(rel_year == 2, 1, 0), + rel_year_3 = if_else(rel_year == 3, 1, 0), + rel_year_4 = if_else(rel_year == 4, 1, 0), + rel_year_5 = if_else(rel_year == 5, 1, 0), + Post = ifelse(rel_year > 5, 1, 0)) + + # put time indicators into vector + indicators <- c("Pre", paste0("`", "rel_year_", c(-5:-2, 0:5), "`"), "Post") + + # estimate model + mod <- feols(treat_roa ~ .[indicators] | gvkey + fyear, data = dt, cluster = "incorp") + + # export results we need + broom::tidy(mod) %>% + # drop the binned indicators + filter(!(term %in% c("Pre", "Post"))) %>% + # add in a time variable + mutate(t = c(-5:-2, 0:5)) %>% + select(t, estimate) %>% + # add in omitted category + bind_rows(tibble(t = -1, estimate = 0)) %>% + arrange(t) %>% + mutate(sim = i) %>% + # get true effect + mutate(true_te = map_dbl(c(-5:5), function(x) {dt %>% filter(rel_year == x) %>% pull(delta) %>% mean})) +} + +# parallelize and do 500 simulations +x <- 1:500 +options(future.globals.maxSize= 891289600) +set.seed(28101695) +plan(multisession, workers = 6) +with_progress({ + p <- progressor(steps = length(x)) + out <- future_map_dfr( + .x = x, + .f = run_sim, + p = p, + .options = furrr_options(globals = c("mod", "shell", "firm_fes", "n_firm_fes", + "year_fes", "n_year_fes", "sd_roa"), + packages = c("tidyverse", "fixest", "fastDummies", "broom"), + seed = TRUE) + )}) + +# plot +p1 <- out %>% + group_by(t) %>% + summarize(est = mean(estimate), + true_effect = mean(true_te), + lower_ci = quantile(estimate, probs = 0.025), + upper_ci = quantile(estimate, probs = 0.975)) %>% + # split the error bands by pre-post + mutate(band_groups = case_when( + t < -1 ~ "Pre", + t >= 0 ~ "Post", + t == -1 ~ "" + )) %>% + # plot + ggplot(aes(x = t, y = est)) + + geom_line(aes(x = t, y = true_effect, color = "True Effect"), linetype = "dashed", size = 2) + + geom_ribbon(aes(ymin = lower_ci, ymax = upper_ci), + color = "lightgrey", alpha = 1/4) + + geom_pointrange(aes(ymin = lower_ci, ymax = upper_ci, color = "Estimated Effect"), show.legend = FALSE) + + geom_hline(yintercept = 0) + + geom_vline(xintercept = -0.5, linetype = "dashed") + + scale_x_continuous(breaks = -5:5) + + labs(x = "Relative Time", y = "Estimate") + + scale_color_manual(values = c("#A7473A", "#4B5F6C")) + + ggtitle("No Real Pre-Trends") + + ylim(c(-0.04, 0.075)) + + theme(legend.position = "bottom", + legend.title = element_blank(), + plot.title = element_text(hjust = 0.5), + axis.title.y = element_text(angle = 360, hjust = 0.5, vjust = 0.5)) + +# Do the opposite - real pre trends but looks like none +# function to run simulation, pull firm and year FE, as well as the residuals from their empirical distributions +# then add in treatment effects following our six simulations +run_sim_2 <- function(i, p) { + + p() + + # pull firm FE from empirical distribution with replacement + sim_firm_fe <- tibble( + gvkey = unique(shell$gvkey), + firm_fe = sample(firm_fes, n_firm_fes, replace = TRUE), + incorp = sample(state.abb, n_firm_fes, replace = TRUE) + ) + + # pull year FE from the empirical distribution with replacement + sim_year_fe <- tibble( + fyear = unique(shell$fyear), + year_fe = sample(year_fes, n_year_fes, replace = TRUE) + ) + + # merge in the FE to the firm/year/state observations and add in residuals from the + # empirical distribution. ROA is the linear combination of the FEs and the residual + data <- shell %>% + left_join(sim_firm_fe, by = "gvkey") %>% + left_join(sim_year_fe, by = "fyear") %>% + mutate(resid = sample(mod$residuals, length(mod$residuals), replace = TRUE), + roa = firm_fe + year_fe + resid) + + # randomly assign the state of incorporation into treatment groups + # put random states into a vector + random_states <- sample(state.abb, length(state.abb), replace = FALSE) + + # now add in the treatment effect - Multiple Treatment Periods and Dynamic Equal Treatment Effects + dt <- data %>% + mutate( + # figure out treatment group based on random ordering of states of incorporation + group = case_when( + incorp %in% random_states[1:17] ~ 1989, + incorp %in% random_states[18:35] ~ 1998, + incorp %in% random_states[36:50] ~ 2007 + ), + # add in treatment effects - 3% of standard deviation of ROA added per year + delta_base = case_when( + fyear < group & group == 1989 ~ -0.01*sd_roa, + fyear < group & group == 1998 ~ -0.035*sd_roa, + fyear < group & group == 2007 ~ -0.035*sd_roa, + TRUE ~ 0 + ), + # true treatment effect is the cumulative sum of this - dynamic trend break treatment effect + delta = delta_base * (group - fyear), + # new ROA is the sum of the old ROA and the treatment effect + treat_roa = roa + delta, + # make indicator variable for obs when treatment is turned on for the TWFE regs + treat = ifelse(fyear >= group, 1, 0), + # make a relative-to-treatment year variable + rel_year = fyear - group, + # get a first treat variable for CS + first_treat = group) %>% + # make dummy cols + mutate(Pre = ifelse(rel_year < -5, 1, 0), + `rel_year_-5` = if_else(rel_year == -5, 1, 0), + `rel_year_-4` = if_else(rel_year == -4, 1, 0), + `rel_year_-3` = if_else(rel_year == -3, 1, 0), + `rel_year_-2` = if_else(rel_year == -2, 1, 0), + rel_year_0 = if_else(rel_year == 0, 1, 0), + rel_year_1 = if_else(rel_year == 1, 1, 0), + rel_year_2 = if_else(rel_year == 2, 1, 0), + rel_year_3 = if_else(rel_year == 3, 1, 0), + rel_year_4 = if_else(rel_year == 4, 1, 0), + rel_year_5 = if_else(rel_year == 5, 1, 0), + Post = ifelse(rel_year > 5, 1, 0)) + + # put time indicators into vector + indicators <- c("Pre", paste0("`", "rel_year_", c(-5:-2, 0:5), "`"), "Post") + + # estimate model + mod <- feols(treat_roa ~ .[indicators] | gvkey + fyear, data = dt, cluster = "incorp") + + broom::tidy(mod) %>% + filter(!(term %in% c("Pre", "Post"))) %>% + mutate(t = c(-5:-2, 0:5)) %>% + select(t, estimate) %>% + bind_rows(tibble(t = -1, estimate = 0)) %>% + arrange(t) %>% + mutate(sim = i) %>% + mutate(true_te = map_dbl(c(-5:5), function(x) {dt %>% filter(rel_year == x) %>% pull(delta) %>% mean})) +} + +# parallelize and do 500 simulations +x <- 1:500 +options(future.globals.maxSize= 891289600) +set.seed(28101695) +plan(multisession, workers = 6) +with_progress({ + p <- progressor(steps = length(x)) + out <- future_map_dfr( + .x = x, + .f = run_sim_2, + p = p, + .options = furrr_options(globals = c("mod", "shell", "firm_fes", "n_firm_fes", + "year_fes", "n_year_fes", "sd_roa"), + packages = c("tidyverse", "fixest", "fastDummies", "broom"), + seed = TRUE) + )}) + +# plot +p2 <- out %>% + group_by(t) %>% + summarize(est = mean(estimate), + true_effect = mean(true_te), + lower_ci = quantile(estimate, probs = 0.025), + upper_ci = quantile(estimate, probs = 0.975)) %>% + # split the error bands by pre-post + mutate(band_groups = case_when( + t < -1 ~ "Pre", + t >= 0 ~ "Post", + t == -1 ~ "" + )) %>% + # plot + ggplot(aes(x = t, y = est)) + + geom_line(aes(x = t, y = true_effect, color = "True Effect"), linetype = "dashed", size = 2) + + geom_ribbon(aes(ymin = lower_ci, ymax = upper_ci), + color = "lightgrey", alpha = 1/4) + + geom_pointrange(aes(ymin = lower_ci, ymax = upper_ci, color = "Estimated Effect"), show.legend = FALSE) + + geom_hline(yintercept = 0) + + geom_vline(xintercept = -0.5, linetype = "dashed") + + scale_x_continuous(breaks = -5:5) + + labs(x = "Relative Time", y = "Estimate") + + scale_color_manual(values = c("#A7473A", "#4B5F6C")) + + ggtitle("Actual Pre-Trends") + + ylim(c(-0.035, 0.02)) + + theme(legend.position = "bottom", + legend.title = element_blank(), + plot.title = element_text(hjust = 0.5), + axis.title.y = element_text(angle = 360, hjust = 0.5, vjust = 0.5)) + +# combine the two plots and save +plot <- p1 + p2 + +ggsave(plot, filename = here::here("Figs_Tables", "bad_event_study_plots.png"), dpi = 500, + width = 8, height = 4) \ No newline at end of file diff --git a/Code/2. BLL.R b/Code/2. BLL.R new file mode 100644 index 0000000..5348793 --- /dev/null +++ b/Code/2. BLL.R @@ -0,0 +1,616 @@ +# load packages +library(tidyverse) +library(patchwork) +library(fastDummies) +library(ggthemes) +library(did) +library(bacondecomp) +library(kableExtra) +library(fixest) + +# set defaults +select <- dplyr::select +# set plot theme +theme_set(theme_clean() + theme(plot.background = element_blank(), + legend.background = element_blank())) +options(knitr.kable.NA = '') + +# set seed for CS bootstrap estimator to be replicable +set.seed(20210215) + +# load data. This is downloaded from https://dataverse.nl/dataset.xhtml?persistentId=hdl:10411/15996. +data <- haven::read_dta(here::here("Reps/BLL/bbb/macro_workfile.dta")) + +# make relative year, treatment indicator, and and log gini variables +data <- data %>% + mutate(rel_year = wrkyr - branch_reform, + log_gini = log(gini), + treat = `_intra`) + +# function to get significance stars +make_stars <- function(t, dof) { + if (2 * pt(-t, df=dof) < 0.01) { + ptstar <- "***" + } else if (2 * pt(-t, df=dof) < 0.05) { + ptstar <- "**" + } else if (2 * pt(-t, df=dof) < 0.1) { + ptstar <- "*" + } else { + ptstar <- "" + } + return(ptstar) +} + +# estimate the models +# no controls +mod1 <- feols(log_gini ~ treat | statefip + wrkyr, cluster = "statefip", data = data) + +# controls +mod2 <- feols(log_gini ~ treat + gsp_pc_growth + prop_blacks + prop_dropouts + prop_female_headed + + unemploymentrate | statefip + wrkyr, cluster = "statefip", data = data) + +# Table 4 - Bacon Decomposition ----------------------------------------------------- +# calculate the bacon decomposition without covariates +bacon_out <- bacon(log_gini ~ treat, + data = data, + id_var = "state", + time_var = "wrkyr") %>% + # rename Later vs. Always Treated to Later v. Earlier Treated + # because the code changed and it is accurate + mutate(type = if_else(type == "Later vs Always Treated", "Later vs Earlier Treated", type)) + +# first get the total weight for each group. +total_weights <- bacon_out %>% + group_by(type) %>% + summarize(weight = sum(weight)) + +# get the weighted average within group +group_avg <- bacon_out %>% + group_by(type) %>% + summarize(avg = weighted.mean(estimate, weight), + weights = sum(weight)) + +# make the table +BLL_decomp <- group_avg %>% + kable("latex", digits = 3, align = 'lcc', + booktabs = T, + col.names = c("Type", "Weighted \n Average", "Total \n Weight"), + label = "BLL_decomp") %>% + kable_styling(position = "center", font_size = 8, + latex_options = c("HOLD_position", "scale_down")) + +# save +write_lines(BLL_decomp, file = here::here("Figs_Tables", "BLL_decomp.tex")) + +# Figure 5 - Bacon Decomp ------------------------------------------------- +# first early v late plot +EvL <- bacon_out %>% + filter(type == "Earlier vs Later Treated") %>% + ggplot(aes(x = weight, y = estimate)) + + geom_point(size = 3, alpha = 1/2) + + geom_hline(yintercept = 0, linetype = "dashed") + + geom_hline(yintercept = group_avg$avg[1], color = "darkred", size = 2) + + labs(x = "", y = expression(widehat(delta^'DD'))) + + ggtitle(paste0("Early vs Later Treated \n Total Weight =", scales::percent(total_weights$weight[1]))) + + scale_y_continuous(limits = c(-.12, 0.12)) + + theme(plot.title = element_text(hjust = 0.5), + legend.position = "bottom", + legend.title = element_blank(), + axis.title.y = element_text(angle = 360, hjust = 0.5, vjust = 0.5)) + +# late v early plot +LvE <- bacon_out %>% + filter(type == "Later vs Earlier Treated") %>% + ggplot(aes(x = weight, y = estimate)) + + geom_point(size = 3, alpha = 1/2) + + geom_hline(yintercept = 0, linetype = "dashed") + + geom_hline(yintercept = group_avg$avg[2], color = "darkred", size = 2) + + labs(x = "Weight", y = expression(widehat(delta^'DD'))) + + scale_y_continuous(limits = c(-.12, 0.12)) + + ggtitle(paste0("Later vs Earlier Treated \n Total Weight = ", scales::percent(total_weights$weight[2]))) + + theme(plot.title = element_text(hjust = 0.5), + legend.position = "bottom", + legend.title = element_blank(), + axis.title.y = element_text(angle = 360, hjust = 0.5, vjust = 0.5)) + +# combine the figures +BLL_decomp_plot <- EvL / LvE + +# save +ggsave(BLL_decomp_plot, filename = here::here("Figs_Tables", "BLL_decomp_plot.png"), + dpi = 500, width = 5, height = 20/3) + +# Figure 7 - timing of adoption ------------------------------------------- +BLL_timing <- data %>% + select(state, wrkyr, branch_reform) %>% + mutate(state = fct_reorder(state, rank(desc(state)))) %>% + mutate(post = if_else(wrkyr < branch_reform, "Pre", "Post")) %>% + mutate(post = factor(post, levels = c("Pre", "Post"))) %>% + ggplot(aes(x = wrkyr, y = state)) + + geom_tile(aes(fill = as.factor(post)), alpha = 3/4) + + scale_fill_manual(values = c("#4B5F6C", "#A7473A")) + + theme(legend.position = 'bottom', + legend.title = element_blank(), + panel.grid.minor = element_blank(), + axis.title = element_blank(), + legend.background = element_rect(color = "white")) + +# save +ggsave(BLL_timing, filename = here::here("Figs_Tables", "BLL_timing.png"), + dpi = 500, width = 5, height = 20/3) + +# Figure 8 - Fixed Event Studies ------------------------------------------ +# make dummy variables +data_dummies <- data %>% + dummy_cols(select_columns = "rel_year", remove_selected_columns = FALSE, + ignore_na = TRUE) %>% + mutate(across(starts_with("rel_year_"), ~replace_na(., 0))) %>% + # bin end points + mutate(`rel_year_-10` = if_else(rel_year <= -10, 1, 0), + rel_year_15 = if_else(rel_year >= 15, 1, 0)) + +# make the formula to estimate +indicators <- c(paste0("`", "rel_year_", c(-10:-1, 1:15), "`")) + +# estimate model as published +es_published <- feols(log_gini ~ .[indicators] | wrkyr + statefip, + data = data_dummies, cluster = "statefip") + +# plot +ES_1 <- broom::tidy(es_published, conf.int = TRUE) %>% + # add in the relative time variable + mutate(t = c(-10:-1, 1:15)) %>% + # substract out the the mean for beta -10 to -1 + mutate(conf.low = conf.low - mean(estimate[t < 0]), + conf.high = conf.high - mean(estimate[t < 0]), + estimate = estimate - mean(estimate[t < 0])) %>% + select(t, estimate, conf.low, conf.high) %>% + bind_rows(tibble(t = 0, estimate = 0, conf.low = 0, conf.high = 0)) %>% + # make two different periods for the connection + mutate(group = as.factor(case_when( + t < 0 ~ 1, + t >= 0 ~ 2 + ))) %>% + # plot + ggplot(aes(x = t, y = estimate)) + + geom_point(fill = "white", shape = 21) + geom_line() + + ggtitle("(A)") + + geom_errorbar(aes(ymin = conf.low, ymax = conf.high), + linetype = "longdash", show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "longdash", color = "gray") + + geom_vline(xintercept = -0.5, linetype = "longdash", color = "gray") + + labs(y = "Percent \n Change", x = "Years Relative to Deregulation") + + scale_x_continuous(breaks = seq(-10, 15, by = 5)) + + scale_y_continuous(breaks = seq(-0.06, 0.04, by = 0.02), + label = scales::percent_format(accuracy = 1)) + + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), + plot.title = element_text(hjust = 0.5)) + +# panel B - don't detrend +ES_2 <- broom::tidy(es_published, conf.int = TRUE) %>% + # add in the relative time variable + mutate(t = c(-10:-1, 1:15)) %>% + select(t, estimate, conf.low, conf.high) %>% + bind_rows(tibble(t = 0, estimate = 0, conf.low = 0, conf.high = 0)) %>% + # make two different periods for the connection + mutate(group = as.factor(case_when( + t < 0 ~ 1, + t >= 0 ~ 2 + ))) %>% + # plot + ggplot(aes(x = t, y = estimate)) + + geom_point(fill = "white", shape = 21) + geom_line() + + ggtitle("(B)") + + geom_errorbar(aes(ymin = conf.low, ymax = conf.high), + linetype = "longdash", show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "longdash", color = "gray") + + geom_vline(xintercept = -0.5, linetype = "longdash", color = "gray") + + labs(y = "Percent \n Change", x = "Years Relative to Deregulation") + + scale_x_continuous(breaks = seq(-10, 15, by = 5)) + + scale_y_continuous(breaks = seq(-0.06, 0.04, by = 0.02), + label = scales::percent_format(accuracy = 1)) + + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), + plot.title = element_text(hjust = 0.5)) + +# panel C - don't detrend and also include the full set of relative time indicators +# make dummy variables +data_dummies <- data %>% + dummy_cols(select_columns = "rel_year", remove_selected_columns = FALSE, + ignore_na = TRUE) %>% + mutate(across(starts_with("rel_year_"), ~replace_na(., 0))) + +# get the relative year indicators - drop most negative and 0. +yrs <- sort(unique(data_dummies$rel_year)) +yrs <- yrs[which(yrs != min(yrs) & yrs != 0)] + +# make formula +indicators <- c(paste0("`", "rel_year_", yrs, "`")) + +# estimate the model and plot +# estimate the model +ES_fix1 <- feols(log_gini ~ .[indicators] | wrkyr + statefip, + data = data_dummies, cluster = "statefip") + +# plot +ES_3 <- broom::tidy(ES_fix1, conf.int = TRUE) %>% + # add in the relative time variable + mutate(t = yrs) %>% + filter(t %>% between(-10, 15)) %>% + select(t, estimate, conf.low, conf.high) %>% + bind_rows(tibble(t = 0, estimate = 0, conf.low = 0, conf.high = 0)) %>% + # make two different periods for the connection + mutate(group = as.factor(case_when( + t < 0 ~ 1, + t >= 0 ~ 2 + ))) %>% + # plot + ggplot(aes(x = t, y = estimate)) + + geom_point(fill = "white", shape = 21) + geom_line() + + ggtitle("(C)") + + geom_errorbar(aes(ymin = conf.low, ymax = conf.high), + linetype = "longdash", show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "longdash", color = "gray") + + geom_vline(xintercept = -0.5, linetype = "longdash", color = "gray") + + labs(y = "Percent \n Change", x = "Years Relative to Deregulation") + + scale_x_continuous(breaks = seq(-10, 15, by = 5)) + + scale_y_continuous(breaks = seq(-0.06, 0.06, by = 0.02), + label = scales::percent_format(accuracy = 1)) + + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), + plot.title = element_text(hjust = 0.5)) + +# fig 8d - drop firms treated before the panel and all years once everyone is treated. +# make dummy variables +data_dummies <- data %>% + # drop states treated before the sample + filter(branch_reform >= 1977) %>% + # drop observations after which everyone is treated + filter(wrkyr <= 1998) %>% + # remove dummy variables for firms treated in the last year + mutate(rel_year = if_else(branch_reform == max(branch_reform), NA_real_, rel_year)) %>% + dummy_cols(select_columns = "rel_year", remove_selected_columns = FALSE, + ignore_na = TRUE) %>% + mutate(across(starts_with("rel_year_"), ~replace_na(., 0))) + +# get the relative year indicators +yrs <- sort(unique(data_dummies$rel_year)) +yrs <- yrs[which(yrs != min(yrs) & yrs != 0)] + +# make formula +indicators <- c(paste0("`", "rel_year_", yrs, "`")) + +# estimate the model and plot +# estimate the model +ES_fix2 <- feols(log_gini ~ .[indicators] | wrkyr + statefip, + data = data_dummies, cluster = "statefip") + +# plot +ES_4 <- broom::tidy(ES_fix2, conf.int = TRUE) %>% + # add in the relative time variable + mutate(t = yrs) %>% + filter(t %>% between(-10, 15)) %>% + select(t, estimate, conf.low, conf.high) %>% + bind_rows(tibble(t = 0, estimate = 0, conf.low = 0, conf.high = 0)) %>% + # make two different periods for the connection + mutate(group = as.factor(case_when( + t < 0 ~ 1, + t >= 0 ~ 2 + ))) %>% + # plot + ggplot(aes(x = t, y = estimate)) + + geom_point(fill = "white", shape = 21) + geom_line() + + ggtitle("(D)") + + geom_errorbar(aes(ymin = conf.low, ymax = conf.high), + linetype = "longdash", show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "longdash", color = "gray") + + geom_vline(xintercept = -0.5, linetype = "longdash", color = "gray") + + labs(y = "Percent \n Change", x = "Years Relative to Deregulation") + + scale_x_continuous(breaks = seq(-10, 15, by = 5)) + + scale_y_continuous(breaks = seq(-0.08, 0.08, by = 0.02), + label = scales::percent_format(accuracy = 1)) + + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), + plot.title = element_text(hjust = 0.5)) + +# combine plots +BLL_ES <- (ES_1 + ES_2)/(ES_3 + ES_4) + +#save +ggsave(BLL_ES, filename = here::here("Figs_Tables", "BLL_ES.png"), + dpi = 500, width = 10, height = 20/3) + +# Remedies ---------------------------------------------------------------- +# get treated years that we can estimate +treats <- data %>% + filter(branch_reform >= 1977 & branch_reform < max(branch_reform)) %>% + pull(branch_reform) %>% + unique() %>% + sort() + +# function to get treat-year specific cohorts +make_dt <- function(tyr) { + data %>% + # drop all observations on or after 1999 when everyone is treated + filter(wrkyr < 1999) %>% + # keep firms in the adopt year or those firms in years t + 10 + filter(branch_reform == tyr | branch_reform > tyr + 10) %>% + # keep just years t - 5 to t + 10 + filter(wrkyr %>% between(tyr - 5, tyr + 10)) %>% + # replace adopt year to NA if not in treated year to make dummies + mutate(branch_reform = if_else(branch_reform == tyr, branch_reform, NA_real_), + rel_year = wrkyr - branch_reform) %>% + select(statefip, wrkyr, branch_reform, rel_year, log_gini) %>% + mutate(dt = as.character(tyr)) +} + +# stack the datasets +stacked_data <- map_dfr(treats, make_dt) %>% + dummy_cols(select_columns = "rel_year", remove_selected_columns = FALSE, + ignore_na = TRUE) %>% + mutate(across(starts_with("rel_year_"), ~replace_na(., 0))) %>% + # interact cluster with statefip + mutate(cluster = paste0(statefip, "_", dt)) + +# make formula +indicators <- c(paste0("`", "rel_year_", c(-5:-1, 1:10), "`")) + +# estimate the model and plot +# estimate the model +stack1 <- feols(log_gini ~ .[indicators] | wrkyr^dt + statefip^dt, + cluster = "cluster", data = stacked_data) + +# plot +BLL_stack1 <- broom::tidy(stack1, conf.int = TRUE) %>% + # add in the relative time variable + mutate(t = c(-5:-1, 1:10)) %>% + select(t, estimate, conf.low, conf.high) %>% + bind_rows(tibble(t = 0, estimate = 0, conf.low = 0, conf.high = 0)) %>% + # plot + ggplot(aes(x = t, y = estimate)) + + geom_point(fill = "white", shape = 21) + geom_line() + + ggtitle("(A)") + + geom_errorbar(aes(ymin = conf.low, ymax = conf.high), + linetype = "longdash", show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "longdash", color = "gray") + + geom_vline(xintercept = -0.5, linetype = "longdash", color = "gray") + + labs(y = "Percent \n Change", x = "Years Relative to Deregulation") + + scale_x_continuous(breaks = seq(-10, 15, by = 5)) + + scale_y_continuous(breaks = seq(-0.06, 0.06, by = 0.02), + label = scales::percent_format(accuracy = 1)) + + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), + plot.title = element_text(hjust = 0.5)) + +# remake but allowing for more observations to enter +# get treated years that we can estimate +treats <- data %>% + filter(branch_reform >= 1977 & branch_reform < max(branch_reform)) %>% + pull(branch_reform) %>% + unique() %>% + sort() + +# function to get treat-year specific cohorts +make_dt <- function(tyr) { + data %>% + # keep firms in the adopt year or those obs without treatment before t + 10 + filter(branch_reform == tyr | (branch_reform > tyr & wrkyr < branch_reform)) %>% + # keep just years t - 5 to t + 10 + filter(wrkyr %>% between(tyr - 5, tyr + 10)) %>% + # replace adopt year to NA to make dummies + mutate(branch_reform = if_else(branch_reform == tyr, branch_reform, NA_real_), + rel_year = wrkyr - branch_reform, + treat = if_else(is.na(branch_reform) | wrkyr < tyr, 0, 1)) %>% + select(statefip, wrkyr, branch_reform, rel_year, log_gini, treat) %>% + mutate(dt = as.character(tyr)) +} + +# stack the datasets +stacked_data <- map_dfr(treats, make_dt) %>% + dummy_cols(select_columns = "rel_year", remove_selected_columns = FALSE, + ignore_na = TRUE) %>% + mutate(across(starts_with("rel_year_"), ~replace_na(., 0))) %>% + mutate(cluster = paste0(statefip, "_", dt)) + +# make formula +indicators <- c(paste0("`", "rel_year_", c(-5:-1, 1:10), "`")) + +# estimate the model and plot +# estimate the model +stack2 <- feols(log_gini ~ .[indicators] | wrkyr^dt + statefip^dt, + cluster = "cluster", data = stacked_data) + +# estimate the model and plot +# estimate the model +BLL_stack2 <- broom::tidy(stack2, conf.int = TRUE) %>% + # add in the relative time variable + mutate(t = c(-5:-1, 1:10)) %>% + select(t, estimate, conf.low, conf.high) %>% + bind_rows(tibble(t = 0, estimate = 0, conf.low = 0, conf.high = 0)) %>% + # plot + ggplot(aes(x = t, y = estimate)) + + geom_point(fill = "white", shape = 21) + geom_line() + + ggtitle("(B)") + + geom_errorbar(aes(ymin = conf.low, ymax = conf.high), + linetype = "longdash", show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "longdash", color = "gray") + + geom_vline(xintercept = -0.5, linetype = "longdash", color = "gray") + + labs(y = "Percent \n Change", x = "Years Relative to Deregulation") + + scale_x_continuous(breaks = seq(-10, 15, by = 5)) + + scale_y_continuous(breaks = seq(-0.06, 0.06, by = 0.02), + label = scales::percent_format(accuracy = 1)) + + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), + plot.title = element_text(hjust = 0.5)) + +# combine and save +BLL_stack <- BLL_stack1 + BLL_stack2 + +#save +ggsave(BLL_stack, filename = here::here("Figs_Tables", "BLL_stack.png"), + dpi = 500, width = 10, height = 20/6) + +# CS Method --------------------------------------------------------------- +# never treateds only as control states +# make the dataaset - drop states treated before 1977 +data_cs <- data %>% + # drop states treated before data + filter(branch_reform >= 1977) %>% + # keep only observations through 1998 + filter(wrkyr <= 1998) %>% + # set branch reform = 0 for last treated state + mutate(branch_reform = if_else(branch_reform == 1999, 0, branch_reform)) %>% + select(log_gini, wrkyr, statefip, branch_reform) + +# run +out1 <- att_gt(yname = "log_gini", + data = data_cs, + tname = "wrkyr", + idname = "statefip", + gname = "branch_reform", + xformla = NULL, + control_group = "nevertreated", + est_method = "reg", + print_details = FALSE, + bstrap = T, + cband = T, + clustervars = "statefip") + +# make the dynamic event study +es1 <- aggte(out1, type="dynamic", min_e = -5, max_e = 10) + +# plot +BLL_CS1 <- tidy(es1) %>% + as_tibble() %>% + # plot + ggplot(aes(x = event.time, y = estimate)) + + geom_point(fill = "white", shape = 21) + geom_line() + + ggtitle("(A)") + + geom_errorbar(aes(ymin = point.conf.low, ymax = point.conf.high), + linetype = "longdash", show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "longdash", color = "gray") + + geom_vline(xintercept = -0.5, linetype = "longdash", color = "gray") + + labs(y = "Percent \n Change", x = "Years Relative to Deregulation") + + scale_x_continuous(breaks = seq(-10, 15, by = 5)) + + scale_y_continuous(breaks = seq(-0.06, 0.06, by = 0.02), + label = scales::percent_format(accuracy = 1)) + + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), + plot.title = element_text(hjust = 0.5)) + +## estimate with `notyettreated' +# make the dataset - drop states treated before 1977 +data_cs <- data %>% + # drop states treated before data + filter(branch_reform >= 1977) %>% + # keep only observations through 1998 + filter(wrkyr <= 1998) %>% + select(statefip, wrkyr, branch_reform, log_gini) + +# run +out2 <- att_gt(yname = "log_gini", + data = data_cs, + tname = "wrkyr", + idname = "statefip", + gname = "branch_reform", + xformla = NULL, + control_group = "notyettreated", + est_method = "reg", + print_details = FALSE, + bstrap = T, + cband = T, + clustervars = "statefip") + +# make the dynamic event study +es2 <- aggte(out2, type="dynamic", min_e = -5, max_e = 10) + +# plot +BLL_CS2 <- tidy(es2) %>% + as_tibble() %>% + # plot + ggplot(aes(x = event.time, y = estimate)) + + geom_point(fill = "white", shape = 21) + geom_line() + + ggtitle("(B)") + + geom_errorbar(aes(ymin = point.conf.low, ymax = point.conf.high), + linetype = "longdash", show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "longdash", color = "gray") + + geom_vline(xintercept = -0.5, linetype = "longdash", color = "gray") + + labs(y = "Percent \n Change", x = "Years Relative to Deregulation") + + scale_x_continuous(breaks = seq(-10, 15, by = 5)) + + scale_y_continuous(breaks = seq(-0.06, 0.06, by = 0.02), + label = scales::percent_format(accuracy = 1)) + + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), + plot.title = element_text(hjust = 0.5)) + +# combine and save +BLL_CS <- BLL_CS1 + BLL_CS2 + +#save +ggsave(BLL_CS, filename = here::here("Figs_Tables", "BLL_CS.png"), + dpi = 500, width = 10, height = 20/6) + +## Make the regression table +# run a static regression on the stacked dataset +# estimate the model +static_stack <- feols(log_gini ~ treat | wrkyr^dt + statefip^dt, + cluster = "cluster", data = stacked_data) + +BLL_table <- bind_rows( + bind_cols( + # get the standard error and estimates into a table for mod1 + broom::tidy(mod1, se = "cluster") %>% + mutate(variable = "Bank deregulation") %>% + select(variable, estimate, std.error) %>% + mutate(t = estimate/std.error, + estimate = paste0(as.character(format(round(estimate, 3), nsmall = 3)), + make_stars(abs(t), 1500)), + std.error = paste0("(", as.character(format(round(std.error, 3), nsmall = 3)), ")")) %>% + select(-t) %>% + pivot_longer(cols = c(estimate, std.error), + names_to = "name", + values_to = "Log Gini"), + + # get the standard error and estimates into a table for mod2 + broom::tidy(mod2, se = "cluster") %>% + filter(term == "treat") %>% + mutate(variable = "Bank deregulation") %>% + select(variable, estimate, std.error) %>% + mutate(t = estimate/std.error, + estimate = paste0(as.character(format(round(estimate, 3), nsmall = 3)), + make_stars(abs(t), 1500)), + std.error = paste0("(", as.character(format(round(std.error, 3), nsmall = 3)), ")")) %>% + select(-t) %>% + pivot_longer(cols = c(estimate, std.error), + names_to = "name", + values_to = "Log Gini") %>% + select(`Log Gini`) + ) %>% set_names(c("variable", "name", "lg1", "lg2")), + # add in adjusted r2 and the number of observations + tibble( + variable = c("Observations", 'Adj. R2'), + name = rep("estimate", 2), + "lg1" = c(as.character(format(round(broom::glance(mod1)$nobs, 0), nsmall = 0)), + as.character(format(round(broom::glance(mod1)$adj.r.squared, 2), nsmall = 2))), + "lg2" = c(as.character(format(round(broom::glance(mod2)$nobs, 0), nsmall = 0)), + as.character(format(round(broom::glance(mod2)$adj.r.squared, 2), nsmall = 2))) + )) %>% + # drop name and make table + mutate(variable = if_else(name == "estimate", variable, NA_character_)) %>% + select(-name) %>% + # add in alternative estimators + bind_rows( + tibble( + variable = c("Callaway & Sant'Anna", NA_character_, "Stacked Regression", NA_character_), + lg1 = c(paste0(as.character(format(round(es2$overall.att, 3), nsmall = 3)), + make_stars(abs(es2$overall.att/es2$overall.se), 1500)), + paste0("(", as.character(format(round(es2$overall.se, 3), nsmall = 3)), ")"), + paste0(as.character(format(round(static_stack$coeftable[1], 3), nsmall = 3)), + make_stars(abs(static_stack$coeftable[1]/static_stack$coeftable[2]), 1500)), + paste0("(", as.character(format(round(static_stack$coeftable[2], 3), nsmall = 3)), ")")), + lg2 = rep(NA_character_, 4)) + ) %>% + kable("latex", escape = F, align = 'lcc', + booktabs = T, + col.names = c(" ", "Log Gini", "Log Gini"), + label = "BLL_table", + caption = "The Impact of Deregulation on Income Inequality") %>% + kable_styling(position = "center", latex_options = c("HOLD_position")) %>% + add_header_above(c(" " = 1, "No \n Controls" = 1, "With \n Controls" = 1)) %>% + pack_rows("Alternative Estimators", 5, 8) + +# save +write_lines(BLL_table, file = here::here("Figs_Tables", "BLL_table.tex")) \ No newline at end of file diff --git a/Code/3. FHLT.R b/Code/3. FHLT.R new file mode 100644 index 0000000..c571e0e --- /dev/null +++ b/Code/3. FHLT.R @@ -0,0 +1,629 @@ +# load packages +library(tidyverse) +library(kableExtra) +library(bacondecomp) +library(ggthemes) +library(did) +library(patchwork) +library(fastDummies) +library(fixest) + +# set themes and output location +select <- dplyr::select +theme_set(theme_clean() + theme(plot.background = element_blank(), + legend.background = element_blank())) +# save out into dropbox folder +options(knitr.kable.NA = '') + +# set seed for CS bootstrap estimator to be replicable +set.seed(20210215) + +# load the data +data <- haven::read_dta(here::here("Reps/FHLT", 'reformdata.dta')) + +# function to get significance stars +make_stars <- function(t, dof) { + if (2 * pt(-t, df=dof) < 0.01) { + ptstar <- "***" + } else if (2 * pt(-t, df=dof) < 0.05) { + ptstar <- "**" + } else if (2 * pt(-t, df=dof) < 0.1) { + ptstar <- "*" + } else { + ptstar <- "" + } + return(ptstar) +} + +# function to get info from models +get_info <- function(est, modelname, type, variable) { + broom::tidy(est) %>% + filter(term == variable) %>% + select(estimate, statistic, std.error) %>% + mutate(mod = modelname, type = type) +} + +# estimate the models +mod1 <- feols(qw ~ post + itenforce + postto + divtax + capgaintax + loggdppc + fdi + rulelaw + lntaw + logage + + debttaw + cashtoaw + ppesalesw + forsale2w + rdsales2w + capextaw + ch + cl + iq | year + code, + cluster = "ccode", data = data) + +mod2 <- feols(qw ~ post1 + itenforce + postto + divtax + capgaintax + loggdppc + fdi + rulelaw + lntaw + logage + + debttaw + cashtoaw + ppesalesw + forsale2w + rdsales2w + capextaw + ch + cl + iq | year + code, + cluster = "ccode", data = data) + +# estimate the two models without controls +mod3 <- feols(qw ~ post | year + code, cluster = "ccode", data = data) + +mod4 <- feols(qw ~ post1 | year + code, cluster = "ccode", data = data) + +# Event Study + Timing Graphs --------------------------------------------- +enacts <- bind_rows( + # major reforms by country year + data %>% + group_by(ccode, year) %>% + summarize(reform_type = "Major Reforms", + post = mean(post), + count = n()), + # first reforms by country year + data %>% + group_by(ccode, year) %>% + summarize(reform_type = "First Reforms", + post = mean(post1), + count = n()) +) %>% + mutate(reform_type = factor(reform_type, + levels = c("Major Reforms", "First Reforms"))) + +# make the timing plot +FHLT_TIMING <- enacts %>% + mutate(post = if_else(post == 1, "Post", "Pre"), + post = factor(post, levels = c("Pre", "Post"))) %>% + ggplot(aes(x = year, y = ccode)) + + geom_tile(aes(fill = as.factor(post), alpha = count)) + + scale_alpha(range = c(0.5, 1)) + + scale_fill_manual(values = c("#4B5F6C", "#A7473A")) + + labs(x = "Year", y = "Country") + + theme(legend.position = 'bottom', + legend.title = element_blank(), + panel.grid.minor = element_blank(), + axis.title = element_blank(), + legend.background = element_rect(color = "white")) + + guides(alpha = "none") + + facet_wrap(~reform_type) + +# # make the event study estimates +# # function to estimate the event study DID by reform type and with and without covariates +# run_es <- function(reformtype, title, lastyear) { +# +# # make relative time dummies with data +# dt <- data %>% +# # drop after last treated year +# filter(year < lastyear) %>% +# mutate(rel_year = year - {{reformtype}}, +# rel_year = if_else({{reformtype}} == lastyear, NA_real_, rel_year)) %>% +# # make dummies +# dummy_cols(select_columns = "rel_year", remove_selected_columns = FALSE, +# ignore_na = TRUE) %>% +# mutate(across(starts_with("rel_year_"), ~replace_na(., 0))) +# +# # get the relative year indicators +# yrs <- sort(unique(dt$rel_year)) +# # drop most negative and time t = -1 +# yrs <- yrs[which(yrs != min(yrs) & yrs != -1)] +# +# # get the indicator names +# indicators <- c(paste0("`", "rel_year_", yrs, "`")) +# +# # estimate the model +# mod <- feols(qw ~ .[indicators] | year + code, cluster = "ccode", data = dt) +# +# # estimate the model and plot +# broom::tidy(mod, conf.int = TRUE) %>% +# # add in the relative time variable +# mutate(t = yrs) %>% +# filter(t %>% between(-5, 5)) %>% +# select(t, estimate, conf.low, conf.high) %>% +# bind_rows( +# tibble( +# t = -1, estimate = 0, conf.low = 0, conf.high = 0 +# ) +# ) %>% +# # plot +# ggplot(aes(x = t, y = estimate)) + +# geom_point(fill = "white", shape = 21) + geom_line() + +# geom_errorbar(aes(ymin = conf.low, ymax = conf.high), +# linetype = "longdash", show.legend = FALSE) + +# geom_hline(yintercept = 0, linetype = "longdash", color = "gray") + +# geom_vline(xintercept = -0.5, linetype = "longdash", color = "gray") + +# labs(y = "Effect", x = "Years Relative to Reform") + +# scale_x_continuous(breaks = seq(-5, 5, by = 1)) + +# ggtitle(title) + +# theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), +# plot.title = element_text(hjust = 0.5)) +# +# } +# +# # estimate the two event studies +# FHLT_ES1 <- run_es(reform, "(A)", 2007) +# FHLT_ES2 <- run_es(firstreform, "(B)", 2006) +# +# # combine the plots +# FHLT_ES <- FHLT_ES1 + FHLT_ES2 +# +# # combine the timing plot and the event study plots and save +# FHLT_TIMING_ES <- FHLT_TIMING + FHLT_ES + plot_layout(nrow = 2, heights = c(1.5, 1)) + +# save +ggsave(FHLT_TIMING, filename = here::here("Figs_Tables", "FHLT_TIMING.png"), + dpi = 500, width = 8, height = 4) + +# Remedies ---------------------------------------------------------------- +# Callaway Sant'Anna +# make id variable +ids <- tibble( + code = unique(data$code) +) %>% + mutate(firm = 1:n()) + +# bring in id +data_cs <- data %>% + left_join(ids, by = "code") %>% + group_by(ccode) %>% + mutate(ccode = cur_group_id()) %>% + ungroup() %>% + select(qw, year, firm, reform, firstreform, ccode) + +# run estimate +cs1 <- att_gt(yname = "qw", + data = data_cs, + tname = "year", + idname = "firm", + gname = "reform", + clustervars = "ccode", + bstrap = T, + cband = T, + est_method = "reg", + xformla = NULL, + control_group = "notyetreated", + print_details = FALSE, + panel = TRUE, + allow_unbalanced_panel = TRUE) + +# make the dynamic event study +es1 <- aggte(cs1, type="dynamic", na.rm = TRUE, min_e = -5, max_e = 5) + +# plot +FHLT_CS1 <- tidy(es1) %>% + as_tibble() %>% + # plot + ggplot(aes(x = event.time, y = estimate)) + + geom_point(fill = "white", shape = 21) + geom_line() + + ggtitle("(A)") + + geom_errorbar(aes(ymin = point.conf.low, ymax = point.conf.high), + linetype = "longdash", show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "longdash", color = "gray") + + geom_vline(xintercept = -0.5, linetype = "longdash", color = "gray") + + labs(y = "Effect", x = "Years Relative to Reform") + + scale_x_continuous(breaks = seq(-5, 5, by = 1)) + + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), + plot.title = element_text(hjust = 0.5)) + +# first reforms +# run CS estimator +cs2 <- att_gt(yname = "qw", + data = data_cs, + tname = "year", + idname = "firm", + gname = "firstreform", + clustervars = "ccode", + bstrap = T, + cband = T, + est_method = "reg", + xformla = NULL, + control_group = "notyettreated", + print_details = FALSE, + panel = TRUE, + allow_unbalanced_panel = TRUE) + +# make the dynamic event study +es2 <- aggte(cs2, type="dynamic", min_e = -5, max_e =5, na.rm = TRUE) + +# plot +FHLT_CS2 <- broom::tidy(es2) %>% + # plot + ggplot(aes(x = event.time, y = estimate)) + + geom_point(fill = "white", shape = 21) + geom_line() + + ggtitle("(B)") + + geom_errorbar(aes(ymin = point.conf.low, ymax = point.conf.high), + linetype = "longdash", show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "longdash", color = "gray") + + geom_vline(xintercept = -0.5, linetype = "longdash", color = "gray") + + labs(y = "Effect", x = "Years Relative to Reform") + + scale_x_continuous(breaks = seq(-5, 5, by = 1)) + + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), + plot.title = element_text(hjust = 0.5)) + +# stacked regression with full exclusion +stacked <- function(reformvar, lastyear, title) { + + # get treated years that we can estimate + treats <- data %>% + filter({{reformvar}} < lastyear) %>% + pull({{reformvar}}) %>% + unique() %>% + sort() + + # function to get treat-year specific datasets + make_dt <- function(tyr) { + data %>% + filter(year < lastyear) %>% + # keep firms in the adopt year pre-treatment observations + filter({{reformvar}} == tyr | ({{reformvar}} > tyr & year < {{reformvar}})) %>% + # keep just years t -5 to t + 5 + filter(year %>% between(tyr - 5, min(tyr + 5, lastyear - 1))) %>% + # replace adopt year to NA to make dummies + mutate(newyear = if_else({{reformvar}} == tyr, {{reformvar}}, NA_real_), + rel_year = year - newyear, + treat = if_else(is.na(newyear) | year < newyear, 0, 1)) %>% + select(code, year, ccode, newyear, rel_year, qw, treat) %>% + mutate(dt = as.character(tyr)) + } + + # run over out treated years + stacked_data <- map_dfr(treats, make_dt) %>% + dummy_cols(select_columns = "rel_year", remove_selected_columns = FALSE, + ignore_na = TRUE) %>% + mutate(across(starts_with("rel_year_"), ~replace_na(., 0))) %>% + mutate(cluster = paste0(ccode, "_", dt)) + + # make formula + yrs <- sort(unique(stacked_data$rel_year)) + + # drop time t = -1 + yrs <- yrs[which(yrs != -1)] + + # make covariates and formula + indicators <- c(paste0("`", "rel_year_", yrs, "`")) + + # estimate the model + mod <- feols(qw ~ .[indicators] | year^dt + code^dt, cluster = "cluster", data = stacked_data) + + # plot + plot <- broom::tidy(mod, conf.int = TRUE) %>% + # add in the relative time variable + mutate(t = yrs) %>% + filter(t %>% between(-5, 5)) %>% + select(t, estimate, conf.low, conf.high) %>% + bind_rows( + tibble( + t = -1, estimate = 0, conf.low = 0, conf.high = 0 + ) + ) %>% + # plot + ggplot(aes(x = t, y = estimate)) + + geom_point(fill = "white", shape = 21) + geom_line() + + geom_errorbar(aes(ymin = conf.low, ymax = conf.high), + linetype = "longdash", show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "longdash", color = "gray") + + geom_vline(xintercept = -0.5, linetype = "longdash", color = "gray") + + labs(y = "Effect", x = "Years Relative to Reform") + + scale_x_continuous(breaks = seq(-5, 5, by = 1)) + + ggtitle(title) + + theme(axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360), + plot.title = element_text(hjust = 0.5)) + + # estimate the static reg + static_reg <- feols(qw ~ treat | year^dt + code^dt, cluster = "cluster", data = stacked_data) + + # output both + list(plot = plot, + static_reg = static_reg) +} + +# estimate the two event studies +fhlt_stack_reform <- stacked(reform, 2007, "(A)") +fhlt_stack_firstreform <- stacked(firstreform, 2006, "(B)") + +# estimate the two event studies +FHLT_stack1 <- fhlt_stack_reform$plot + +# do same for plot 2 +FHLT_stack2 <- fhlt_stack_firstreform$plot + +# combine and save +FHLT_CS <- FHLT_CS1 + FHLT_CS2 +FHLT_STACK <- FHLT_stack1 + FHLT_stack2 +ggsave(FHLT_CS, filename = here::here("Figs_Tables", "FHLT_CS.png"), + dpi = 800, width = 10, height = 20/6) +ggsave(FHLT_STACK, filename = here::here("Figs_Tables", "FHLT_STACK.png"), + dpi = 800, width = 10, height = 20/6) + +### Make the regression table +# first get the static stacked regresion estimates +static_stack1 <- fhlt_stack_reform$static_reg +static_stack2 <- fhlt_stack_firstreform$static_reg + +# show the with and without controls side by side +FHLT_table <- bind_rows( + get_info(mod1, "Major Reform", "Controls", "post"), + get_info(mod2, "First Reform", "Controls", "post1"), + get_info(mod3, "Major Reform", "No Controls", "post"), + get_info(mod4, "First Reform", "No Controls", "post1"), +) %>% + rowwise() %>% + # make estimate and statistic into three digits with stars + mutate(estimate = paste0(as.character(format(round(estimate, 3), nsmall = 3)), make_stars(statistic, 10000)), + std.error = paste0("(", as.character(format(round(std.error, 2), nsmall = 2)), ")")) %>% + ungroup() %>% + # push and pull + pivot_longer(cols = c(estimate, std.error), + names_to = "variable", + values_to = "value") %>% + pivot_wider(id_cols = variable, + names_from = c(mod, type), + values_from = c(value)) %>% + select(-variable) %>% + # add in rows for controls and such + bind_rows( + tibble( + `Major Reform_Controls` = c(rep("Yes", 3), "196,016", + as.character(format(round(broom::glance(mod1)$adj.r.squared, 3), nsmall = 3))), + `First Reform_Controls` = c(rep("Yes", 3), "196,016", + as.character(format(round(broom::glance(mod2)$adj.r.squared, 3), nsmall = 3))), + `Major Reform_No Controls` = c("No", rep("Yes", 2), "196,016", + as.character(format(round(broom::glance(mod3)$adj.r.squared, 3), nsmall = 3))), + `First Reform_No Controls` = c("No", rep("Yes", 2), "196,016", + as.character(format(round(broom::glance(mod4)$adj.r.squared, 3), nsmall = 3))) + ) + ) %>% + mutate(Variable = c("Post", NA_character_, "Control variables", "Firm fixed effects", + "Year fixed effects", "Observations", "Adj. R2")) %>% + select(Variable, everything()) %>% + # add in new estimators + bind_rows( + tibble( + Variable = c("Callaway & Sant'Anna", NA_character_, "Stacked Regression", NA_character_), + `Major Reform_Controls` = rep(NA_character_, 4), + `First Reform_Controls` = rep(NA_character_, 4), + `Major Reform_No Controls` = c(paste0(as.character(format(round(es1$overall.att, 3), nsmall = 3)), + make_stars(abs(es1$overall.att/es1$overall.se), 1500)), + paste0("(", as.character(format(round(es1$overall.se, 3), nsmall = 3)), ")"), + paste0(as.character(format(round(static_stack1$coeftable[1], 3), nsmall = 3)), + make_stars(abs(static_stack1$coeftable[1]/static_stack1$coeftable[2]), 1500)), + paste0("(", as.character(format(round(static_stack1$coeftable[2], 3), nsmall = 3)), ")")), + `First Reform_No Controls` = c(paste0(as.character(format(round(es2$overall.att, 3), nsmall = 3)), + make_stars(abs(es2$overall.att/es2$overall.se), 1500)), + paste0("(", as.character(format(round(es2$overall.se, 3), nsmall = 3)), ")"), + paste0(as.character(format(round(static_stack2$coeftable[1], 3), nsmall = 3)), + make_stars(abs(static_stack2$coeftable[1]/static_stack2$coeftable[2]), 1500)), + paste0("(", as.character(format(round(static_stack2$coeftable[2], 3), nsmall = 3)), ")")) + ) + ) %>% + # make and report table + kable("latex", align = 'lcccc', booktabs = T, + col.names = c("Variable", rep(c("Major Reform", "First Reform"), 2)), + label = "FHLT_table", + caption = "The Impact of Board Reforms on Firm Value") %>% + kable_styling(position = "center", latex_options = c("HOLD_position")) %>% + add_header_above(c(" " = 1, "With Covariates" = 2, "Without Covariates" = 2)) %>% + add_header_above(c(" " = 1, "Full Sample" = 4)) %>% + pack_rows("Alternative Estimators", 8, 11) + +# save +write_lines(FHLT_table, file = here::here("Figs_Tables", "FHLT_table.tex")) + +## Finally, plots showing event study design changes +# make a restricted dataset that FHLT use for event studies +data_restricted <- data %>% + filter(nreform <= 5) %>% + # make relative time indicators - they use the year before the indicator + mutate(`rel_year_-4` = if_else(year - reform == -5, 1, 0), + `rel_year_-3` = if_else(year - reform == -4, 1, 0), + `rel_year_-2` = if_else(year - reform == -3, 1, 0), + `rel_year_-1` = if_else(year - reform == -2, 1, 0), + rel_year_0 = if_else(year - reform == -1, 1, 0), + rel_year_1 = if_else(year - reform == 0, 1, 0), + rel_year_2 = if_else(year - reform == 1, 1, 0), + rel_year_3 = if_else(year - reform == 2, 1, 0), + rel_year_4 = if_else(year - reform == 3, 1, 0), + rel_year_5 = if_else(year - reform == 4, 1, 0), + rel_year_6 = if_else(year - reform == 5, 1, 0), + rel_year_2_plus = if_else(year - reform >= 1, 1, 0)) + +# model one - as published +mod1 <- feols(qw ~ `rel_year_-1` + rel_year_0 + rel_year_1 + rel_year_2_plus + + itenforce + postto + divtax + capgaintax + loggdppc + fdi + rulelaw + lntaw + logage + + debttaw + cashtoaw + ppesalesw + forsale2w + rdsales2w + capextaw + ch + cl + iq | year + code, + cluster = "ccode", data = data_restricted) %>% + broom::tidy(conf.int = TRUE) %>% + ## keep just the variables we need and add in time indicators + filter(str_detect(term, "rel_year")) %>% + mutate(t = c(-1, 0, 1, 2), + model = "Model 1") + +# model 2 - drop the binning +mod2 <- feols(qw ~ `rel_year_-3` + `rel_year_-2` + `rel_year_-1` + rel_year_0 + rel_year_1 + + rel_year_2_plus + itenforce + postto + divtax + capgaintax + loggdppc + fdi + + rulelaw + lntaw + logage + debttaw + cashtoaw + ppesalesw + forsale2w + + rdsales2w + capextaw + ch + cl + iq | year + code, + cluster = "ccode", data = data_restricted) %>% + broom::tidy(conf.int = TRUE) %>% + ## keep just the variables we need and add in time indicators + filter(str_detect(term, "rel_year")) %>% + mutate(t = c(-3:2), + model = "Model 2") + +# model 3 - fully saturate the model +mod3 <- feols(qw ~ `rel_year_-3` + `rel_year_-2` + `rel_year_-1` + rel_year_1 + rel_year_2 + rel_year_3 + + rel_year_4 + rel_year_5 + rel_year_6 + + itenforce + postto + divtax + capgaintax + loggdppc + fdi + rulelaw + lntaw + logage + + debttaw + cashtoaw + ppesalesw + forsale2w + rdsales2w + capextaw + ch + cl + iq | year + code, + cluster = "ccode", data = data_restricted) %>% + broom::tidy(conf.int = TRUE) %>% + ## keep just the variables we need and add in time indicators + filter(str_detect(term, "rel_year")) %>% + mutate(t = c(-3:-1, 1:6)) %>% + # add in the relative indicator for 0 + bind_rows(tibble( + term = "rel_year_0", estimate = 0, std.error = 0, statistic = 0., + p.value = 0, conf.low = 0, conf.high = 0, t = 0 + )) %>% + mutate(model = "Model 3") + +## do our model event study +# make relative time dummies with data +dt <- data %>% + # drop after last treated year + filter(year < 2007) %>% + mutate(rel_year = year - reform + 1, + rel_year = if_else(reform == 2007, NA_real_, rel_year)) %>% + # make dummies + dummy_cols(select_columns = "rel_year", remove_selected_columns = FALSE, + ignore_na = TRUE) %>% + mutate(across(starts_with("rel_year_"), ~replace_na(., 0))) + +# get the relative year indicators +yrs <- sort(unique(dt$rel_year)) +# drop most negative and time t = -1 +yrs <- yrs[which(yrs != min(yrs) & yrs != 0)] + +# get the indicator names +indicators <- c(paste0("`", "rel_year_", yrs, "`")) + +# estimate the model +mod4 <- feols(qw ~ .[indicators] | year + code, cluster = "ccode", data = dt) %>% + broom::tidy(conf.int = TRUE) %>% + filter(str_detect(term, "rel_year")) %>% + mutate(t = yrs) %>% + filter(t %>% between(-4, 6)) %>% + bind_rows(tibble( + term = "rel_year_0", estimate = 0, std.error = 0, statistic = 0., + p.value = 0, conf.low = 0, conf.high = 0, t = 0 + )) %>% + mutate(model = "Model 4") + +# combine plots for major reform +p1 <- bind_rows(mod1, mod2, mod3, mod4) %>% + ggplot(aes(x = t, y = estimate)) + + geom_point(fill = "white", shape = 21) + geom_line() + + geom_errorbar(aes(ymin = conf.low, ymax = conf.high), + linetype = "longdash", show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "longdash", color = "gray") + + geom_vline(xintercept = -0.5, linetype = "longdash", color = "gray") + + geom_errorbar(aes(ymin = conf.low, ymax = conf.high)) + + scale_x_continuous(breaks = seq(-4, 6, by = 1)) + + scale_color_manual(values = c("#44781E", "#2C3B75", "#B8321A")) + + labs(x = "Relative Year", y = "Estimate") + + theme(legend.position = "bottom", + legend.title = element_blank(), + axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360)) + + facet_wrap(~model, scales = "free_x", nrow = 1) + +# Now do the same thing for first reforms +# make a restricted dataset +data_restricted <- data %>% + filter(nreform1 <= 5) %>% + mutate(`rel_year_-4` = if_else(year - firstreform == -5, 1, 0), + `rel_year_-3` = if_else(year - firstreform == -4, 1, 0), + `rel_year_-2` = if_else(year - firstreform == -3, 1, 0), + `rel_year_-1` = if_else(year - firstreform == -2, 1, 0), + rel_year_0 = if_else(year - firstreform == -1, 1, 0), + rel_year_1 = if_else(year - firstreform == 0, 1, 0), + rel_year_2 = if_else(year - firstreform == 1, 1, 0), + rel_year_3 = if_else(year - firstreform == 2, 1, 0), + rel_year_4 = if_else(year - firstreform == 3, 1, 0), + rel_year_5 = if_else(year - firstreform == 4, 1, 0), + rel_year_6 = if_else(year - firstreform == 5, 1, 0), + rel_year_2_plus = if_else(year - firstreform >= 1, 1, 0)) + +# model one - as published +mod1 <- feols(qw ~ `rel_year_-1` + rel_year_0 + rel_year_1 + rel_year_2_plus + + itenforce + postto + divtax + capgaintax + loggdppc + fdi + rulelaw + lntaw + logage + + debttaw + cashtoaw + ppesalesw + forsale2w + rdsales2w + capextaw + ch + cl + iq | year + code, + cluster = "ccode", data = data_restricted) %>% + broom::tidy(conf.int = TRUE) %>% + filter(str_detect(term, "rel_year")) %>% + mutate(t = c(-1, 0, 1, 2), + model = "Model 1") + +# model 2 - drop the binning +mod2 <- feols(qw ~ `rel_year_-3` + `rel_year_-2` + `rel_year_-1` + rel_year_0 + rel_year_1 + + rel_year_2_plus + itenforce + postto + divtax + capgaintax + loggdppc + fdi + + rulelaw + lntaw + logage + debttaw + cashtoaw + ppesalesw + forsale2w + + rdsales2w + capextaw + ch + cl + iq | year + code, + cluster = "ccode", data = data_restricted) %>% + broom::tidy(conf.int = TRUE) %>% + filter(str_detect(term, "rel_year")) %>% + mutate(t = c(-3:2), + model = "Model 2") + +# model 3 - fully saturate the model +mod3 <- feols(qw ~ `rel_year_-3` + `rel_year_-2` + `rel_year_-1` + rel_year_1 + rel_year_2 + rel_year_3 + + rel_year_4 + rel_year_5 + rel_year_6 + + itenforce + postto + divtax + capgaintax + loggdppc + fdi + rulelaw + lntaw + logage + + debttaw + cashtoaw + ppesalesw + forsale2w + rdsales2w + capextaw + ch + cl + iq | year + code, + cluster = "ccode", data = data_restricted) %>% + broom::tidy(conf.int = TRUE) %>% + filter(str_detect(term, "rel_year")) %>% + mutate(t = c(-3:-1, 1:6)) %>% + bind_rows(tibble( + term = "rel_year_0", estimate = 0, std.error = 0, statistic = 0., + p.value = 0, conf.low = 0, conf.high = 0, t = 0 + )) %>% + mutate(model = "Model 3") + +## do our model event study +# make relative time dummies with data +dt <- data %>% + # drop after last treated year + filter(year < 2006) %>% + mutate(rel_year = year - firstreform + 1, + rel_year = if_else(firstreform == 2006, NA_real_, rel_year)) %>% + # make dummies + dummy_cols(select_columns = "rel_year", remove_selected_columns = FALSE, + ignore_na = TRUE) %>% + mutate(across(starts_with("rel_year_"), ~replace_na(., 0))) + +# get the relative year indicators +yrs <- sort(unique(dt$rel_year)) +# drop most negative and time t = -1 +yrs <- yrs[which(yrs != min(yrs) & yrs != 0)] + +# get the indicator names +indicators <- c(paste0("`", "rel_year_", yrs, "`")) + +# estimate the model +mod4 <- feols(qw ~ .[indicators] | year + code, cluster = "ccode", data = dt) %>% + broom::tidy(conf.int = TRUE) %>% + filter(str_detect(term, "rel_year")) %>% + mutate(t = yrs) %>% + filter(t %>% between(-4, 6)) %>% + bind_rows(tibble( + term = "rel_year_0", estimate = 0, std.error = 0, statistic = 0., + p.value = 0, conf.low = 0, conf.high = 0, t = 0 + )) %>% + mutate(model = "Model 4") + +# combine plots for major reform +p2 <- bind_rows(mod1, mod2, mod3, mod4) %>% + ggplot(aes(x = t, y = estimate)) + + geom_point(fill = "white", shape = 21) + geom_line() + + geom_errorbar(aes(ymin = conf.low, ymax = conf.high), + linetype = "longdash", show.legend = FALSE) + + geom_hline(yintercept = 0, linetype = "longdash", color = "gray") + + geom_vline(xintercept = -0.5, linetype = "longdash", color = "gray") + + geom_errorbar(aes(ymin = conf.low, ymax = conf.high)) + + scale_color_manual(values = c("#44781E", "#2C3B75", "#B8321A")) + + scale_x_continuous(breaks = seq(-4, 6, by = 1)) + + labs(x = "Relative Year", y = "Estimate") + + theme(legend.position = "bottom", + legend.title = element_blank(), + axis.title.y = element_text(hjust = 0.5, vjust = 0.5, angle = 360)) + + facet_wrap(~model, scales = "free_x", nrow = 1) + +# save +ggsave(p1, file = here::here("Figs_Tables", "modifed_event_studies_1.png"), + dpi = 800, width = 10, height = 3) +ggsave(p2, file = here::here("Figs_Tables", "modifed_event_studies_2.png"), + dpi = 800, width = 10, height = 3) \ No newline at end of file diff --git a/JFE_DID.Rproj b/JFE_DID.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/JFE_DID.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/README.html b/README.html new file mode 100644 index 0000000..30097cd --- /dev/null +++ b/README.html @@ -0,0 +1,234 @@ + + + + + + + + + + + + + +README + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

JFE_DID

+

This repo contains the code to replicate the analyses in Baker, Larcker, Wang, “How Much Should We Trust Staggered Difference-in-Difference Estimates”, forthcoming in the Journal of Financial Economics. The code here replicates the analyses in the January 2022 version of the paper. For those looking for replication files for a prior version (dated March 2021), these can be found at https://github.com/andrewchbaker/DiD_Codes.

+

Note that I use the package renv to save a snapshot of the versions of the software that are used in all of the codes. This should make the results fully replicable. To the extent you have any questions try reaching out to Grant McDermott, which is what most of us do when we have any kind of coding/version control questions.

+

The structure of the repo (codes in the Code folder) is as follows:

+
    +
  • 1a. Simulations - Save Data.R: this code will pull the data from Compustat, make and winsorize the ROA variable for the simulations, and save a file called simulations_data.rds in a Data folder. Note that it requires a WRDS password and username, which I source from a separate (not included file). You will need to fill in your own username and password.

  • +
  • 1b. TWFE + binary DiD: This code runs the Monte Carlo simulation as described in the Section 3.1.1. of the paper. It also produces the plots for Figures 1 and 2.

  • +
  • 1c. Alternative Estimators: This code takes the simulation results from 1b and makes Figure 6, which provides the static DiD results for the alternative event study estimators.

  • +
  • 1d. Goodman-Bacon Decomp.R: This code produces the Goodman-Bacon decomposition results which feed into Figure 3.

  • +
  • 1e. Alternative Event Study Estimators.R: This code produces the event study estimates using the alternative DiD estimators, as reported in Figure 7.

  • +
  • 1f. Rejections.R - This code produces the results in Figure 4 showing TWFE rejection rates when the true expected ATT is 0.

  • +
  • 1g. Failures with Event Studies and Heterogeneity.R: This code produces the results in Figure 5, showing how standard TWFE estimates (as commonly implemented) can both fail to pick up true pre-trends in the data, or falsely indicate the presence of pre-trends when they don’t exist.

  • +
  • 2. BLL.R: This code produces the figures and tables for the replication and extension of Beck, Levine, and Levkov (2010) text. Note that the replication data for this paper can be found at https://dataverse.nl/dataset.xhtml?persistentId=hdl:10411/15996.

  • +
  • 3. FHLT.R: This code produces the figures and tables for some Fauver, Hung, Li, and Taboada (2017) text. Unfortunately the data for this analysis was provided by the authors and cannot be uploaded here.

  • +
+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/README.md b/README.md index 85e68b3..420d668 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,25 @@ # JFE_DID -This repo contains the code to replicate the analyses in Baker, Larcker, Wang. + +This repo contains the code to replicate the analyses in Baker, Larcker, Wang, "How Much Should We Trust Staggered Difference-in-Difference Estimates", forthcoming in the **Journal of Financial Economics**. The code here replicates the analyses in the January 2022 version of the paper. For those looking for replication files for a prior version (dated March 2021), these can be found at https://github.com/andrewchbaker/DiD_Codes. + +Note that I use the package `renv` to save a snapshot of the versions of the software that are used in all of the codes. This should make the results fully replicable. To the extent you have any questions try reaching out to Grant McDermott, which is what most of us do when we have any kind of coding/version control questions. + +The structure of the repo (codes in the `Code` folder) is as follows: + +- **1a. Simulations - Save Data.R**: this code will pull the data from Compustat, make and winsorize the ROA variable for the simulations, and save a file called simulations_data.rds in a Data folder. Note that it requires a WRDS password and username, which I source from a separate (not included file). You will need to fill in your own username and password. + +- **1b. TWFE + binary DiD**: This code runs the Monte Carlo simulation as described in the Section 3.1.1. of the paper. It also produces the plots for Figures 1 and 2. + +- **1c. Alternative Estimators**: This code takes the simulation results from 1b and makes Figure 6, which provides the static DiD results for the alternative event study estimators. + +- **1d. Goodman-Bacon Decomp.R**: This code produces the Goodman-Bacon decomposition results which feed into Figure 3. + +- **1e. Alternative Event Study Estimators.R**: This code produces the event study estimates using the alternative DiD estimators, as reported in Figure 7. + +- **1f. Rejections.R** - This code produces the results in Figure 4 showing TWFE rejection rates when the true expected ATT is 0. + +- **1g. Failures with Event Studies and Heterogeneity.R**: This code produces the results in Figure 5, showing how standard TWFE estimates (as commonly implemented) can both fail to pick up true pre-trends in the data, or falsely indicate the presence of pre-trends when they don't exist. + +- **2. BLL.R**: This code produces the figures and tables for the replication and extension of *Beck, Levine, and Levkov (2010)* text. Note that the replication data for this paper can be found at https://dataverse.nl/dataset.xhtml?persistentId=hdl:10411/15996. + +- **3. FHLT.R**: This code produces the figures and tables for some *Fauver, Hung, Li, and Taboada (2017)* text. Unfortunately the data for this analysis was provided by the authors and cannot be uploaded here. diff --git a/renv.lock b/renv.lock new file mode 100644 index 0000000..ccdd382 --- /dev/null +++ b/renv.lock @@ -0,0 +1,1285 @@ +{ + "R": { + "Version": "4.0.5", + "Repositories": [ + { + "Name": "CRAN", + "URL": "https://cran.rstudio.com" + } + ] + }, + "Packages": { + "BMisc": { + "Package": "BMisc", + "Version": "1.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d352e84298855efd0074b8e921a29e15" + }, + "DBI": { + "Package": "DBI", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "030aaec5bc6553f35347cbb1e70b1a17" + }, + "DRDID": { + "Package": "DRDID", + "Version": "1.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5f398fa985f0a40acb60faa98fe65f06" + }, + "Formula": { + "Package": "Formula", + "Version": "1.2-4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "cc8c8c4d61346cde1ca60030ff9c241f" + }, + "KernSmooth": { + "Package": "KernSmooth", + "Version": "2.23-18", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9e703ad8bf0e99f3691f05da32dfe68b" + }, + "MASS": { + "Package": "MASS", + "Version": "7.3-53.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "4ef21dd0348b9abb7f8bd1d77e4cd0c3" + }, + "Matrix": { + "Package": "Matrix", + "Version": "1.3-2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ff280503079ad8623d3c4b1519b24ea2" + }, + "MatrixModels": { + "Package": "MatrixModels", + "Version": "0.5-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "366a8838782928e398b8762c932a42a3" + }, + "ModelMetrics": { + "Package": "ModelMetrics", + "Version": "1.2.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "40a55bd0b44719941d103291ac5e9d74" + }, + "R6": { + "Package": "R6", + "Version": "2.5.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "470851b6d5d0ac559e9d01bb352b4021" + }, + "RColorBrewer": { + "Package": "RColorBrewer", + "Version": "1.1-2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e031418365a7f7a766181ab5a41a5716" + }, + "RPostgres": { + "Package": "RPostgres", + "Version": "1.3.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "42834fa9d01b66d5780c374ab94b75b4" + }, + "Rcpp": { + "Package": "Rcpp", + "Version": "1.0.7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "dab19adae4440ae55aa8a9d238b246bb" + }, + "RcppArmadillo": { + "Package": "RcppArmadillo", + "Version": "0.10.7.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "82b411caa086a4f007ec43a288fba990" + }, + "RcppEigen": { + "Package": "RcppEigen", + "Version": "0.3.3.9.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ddfa72a87fdf4c80466a20818be91d00" + }, + "SQUAREM": { + "Package": "SQUAREM", + "Version": "2021.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0cf10dab0d023d5b46a5a14387556891" + }, + "SparseM": { + "Package": "SparseM", + "Version": "1.81", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "2042cd9759cc89a453c4aefef0ce9aae" + }, + "abind": { + "Package": "abind", + "Version": "1.4-5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "4f57884290cc75ab22f4af9e9d4ca862" + }, + "askpass": { + "Package": "askpass", + "Version": "1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e8a22846fff485f0be3770c2da758713" + }, + "assertthat": { + "Package": "assertthat", + "Version": "0.2.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "50c838a310445e954bc13f26f26a6ecf" + }, + "backports": { + "Package": "backports", + "Version": "1.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "00c7752ce37466d4a74774bcca1a507c" + }, + "bacondecomp": { + "Package": "bacondecomp", + "Version": "0.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "4a5dcfcb7c0ff3b6ea01ef9592ed89e0" + }, + "base64enc": { + "Package": "base64enc", + "Version": "0.1-3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "543776ae6848fde2f48ff3816d0628bc" + }, + "bit": { + "Package": "bit", + "Version": "4.0.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f36715f14d94678eea9933af927bc15d" + }, + "bit64": { + "Package": "bit64", + "Version": "4.0.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9fe98599ca456d6552421db0d6772d8f" + }, + "blob": { + "Package": "blob", + "Version": "1.2.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9addc7e2c5954eca5719928131fed98c" + }, + "boot": { + "Package": "boot", + "Version": "1.3-27", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d9778c960792721e8433daaf3db8f16a" + }, + "broom": { + "Package": "broom", + "Version": "0.7.10", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ddf8bc55ea050f984835dd2d23cd6828" + }, + "callr": { + "Package": "callr", + "Version": "3.7.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "461aa75a11ce2400245190ef5d3995df" + }, + "car": { + "Package": "car", + "Version": "3.0-12", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "40e15704635050c063b1b26acce2051c" + }, + "carData": { + "Package": "carData", + "Version": "3.0-4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7ff5c94cec207b3fd9774cfaa5157738" + }, + "caret": { + "Package": "caret", + "Version": "6.0-90", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "42469c788063931bd460be0c4d6044eb" + }, + "cellranger": { + "Package": "cellranger", + "Version": "1.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f61dbaec772ccd2e17705c1e872e9e7c" + }, + "class": { + "Package": "class", + "Version": "7.3-18", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "15ef288688a6919417ade6251deea2b3" + }, + "cli": { + "Package": "cli", + "Version": "3.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "66a3834e54593c89d8beefb312347e58" + }, + "clipr": { + "Package": "clipr", + "Version": "0.7.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ebaa97ac99cc2daf04e77eecc7b781d7" + }, + "codetools": { + "Package": "codetools", + "Version": "0.2-18", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "019388fc48e48b3da0d3a76ff94608a8" + }, + "colorspace": { + "Package": "colorspace", + "Version": "2.0-2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6baccb763ee83c0bd313460fdb8b8a84" + }, + "conquer": { + "Package": "conquer", + "Version": "1.2.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8225311a3d9cf63a74917bfa8167d868" + }, + "corrplot": { + "Package": "corrplot", + "Version": "0.92", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "fcf11a91936fd5047b2ee9bc00595e36" + }, + "cowplot": { + "Package": "cowplot", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b418e8423699d11c7f2087c2bfd07da2" + }, + "cpp11": { + "Package": "cpp11", + "Version": "0.4.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "70976176dfd7f179f212783aab2547b1" + }, + "crayon": { + "Package": "crayon", + "Version": "1.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0a6a65d92bd45b47b94b84244b528d17" + }, + "curl": { + "Package": "curl", + "Version": "4.3.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "022c42d49c28e95d69ca60446dbabf88" + }, + "data.table": { + "Package": "data.table", + "Version": "1.14.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "36b67b5adf57b292923f5659f5f0c853" + }, + "dbplyr": { + "Package": "dbplyr", + "Version": "2.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "1f37fa4ab2f5f7eded42f78b9a887182" + }, + "did": { + "Package": "did", + "Version": "2.0.1.908", + "Source": "GitHub", + "RemoteType": "github", + "RemoteHost": "api.github.com", + "RemoteRepo": "did", + "RemoteUsername": "bcallaway11", + "RemoteRef": "HEAD", + "RemoteSha": "616d3b116a7679d27d26ceb3efb97e10fa92b7ac", + "Hash": "3c2ee6e276cf799e5a26bb617140ecf1" + }, + "digest": { + "Package": "digest", + "Version": "0.6.28", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "49b5c6e230bfec487b8917d5a0c77cca" + }, + "dplyr": { + "Package": "dplyr", + "Version": "1.0.7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "36f1ae62f026c8ba9f9b5c9a08c03297" + }, + "dreamerr": { + "Package": "dreamerr", + "Version": "1.2.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5b31f43f60613477ee28abec9ab07f1b" + }, + "dtplyr": { + "Package": "dtplyr", + "Version": "1.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "1e14e4c5b2814de5225312394bc316da" + }, + "e1071": { + "Package": "e1071", + "Version": "1.7-9", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "32885be243a29301c90d33db37c3aad8" + }, + "ellipsis": { + "Package": "ellipsis", + "Version": "0.3.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077" + }, + "evaluate": { + "Package": "evaluate", + "Version": "0.14", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ec8ca05cffcc70569eaaad8469d2a3a7" + }, + "fansi": { + "Package": "fansi", + "Version": "0.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d447b40982c576a72b779f0a3b3da227" + }, + "farver": { + "Package": "farver", + "Version": "2.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c98eb5133d9cb9e1622b8691487f11bb" + }, + "fastDummies": { + "Package": "fastDummies", + "Version": "1.6.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "15fcf98f57d734f967370f36f4533219" + }, + "fixest": { + "Package": "fixest", + "Version": "0.10.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9a3a6cec29f7c2a49d98d197ac3cc9aa" + }, + "forcats": { + "Package": "forcats", + "Version": "0.5.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "81c3244cab67468aac4c60550832655d" + }, + "foreach": { + "Package": "foreach", + "Version": "1.5.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e32cfc0973caba11b65b1fa691b4d8c9" + }, + "foreign": { + "Package": "foreign", + "Version": "0.8-81", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "74628ea7a3be5ee8a7b5bb0a8e84882e" + }, + "fs": { + "Package": "fs", + "Version": "1.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "44594a07a42e5f91fac9f93fda6d0109" + }, + "furrr": { + "Package": "furrr", + "Version": "0.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9f8988c1c716080a968a2949d1fd9af3" + }, + "future": { + "Package": "future", + "Version": "1.23.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7bf6fbed7f00cae876901fd70c04f3a4" + }, + "future.apply": { + "Package": "future.apply", + "Version": "1.8.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f568ce73d3d59582b0f7babd0eb33d07" + }, + "gargle": { + "Package": "gargle", + "Version": "1.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "bc3bc77ea458de0a6efe94e8e7e9c641" + }, + "generics": { + "Package": "generics", + "Version": "0.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "3f6bcfb0ee5d671d9fd1893d2faa79cb" + }, + "ggforce": { + "Package": "ggforce", + "Version": "0.3.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "4c64a588b497f8c088e1d308ddd40bc6" + }, + "ggplot2": { + "Package": "ggplot2", + "Version": "3.3.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d7566c471c7b17e095dd023b9ef155ad" + }, + "ggpubr": { + "Package": "ggpubr", + "Version": "0.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "77089557d374c69db7cb77e65f0d6ab0" + }, + "ggrepel": { + "Package": "ggrepel", + "Version": "0.9.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "08ab869f37e6a7741a64ab9069bcb67d" + }, + "ggsci": { + "Package": "ggsci", + "Version": "2.9", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "81ccb8213ed592598210afd10c3a5936" + }, + "ggsignif": { + "Package": "ggsignif", + "Version": "0.6.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "2e82e829a1c4a6c5d41921c177051e85" + }, + "ggthemes": { + "Package": "ggthemes", + "Version": "4.2.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "617fb2c300f68e8b1ba21043fba88fd7" + }, + "globals": { + "Package": "globals", + "Version": "0.14.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "eca8023ed5ca6372479ebb9b3207f5ae" + }, + "glue": { + "Package": "glue", + "Version": "1.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5ccb956a6d09b4ca448094582f8c7571" + }, + "googledrive": { + "Package": "googledrive", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "79ba5d18133290a69b7c135dc3dfef1a" + }, + "googlesheets4": { + "Package": "googlesheets4", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "fd028aa45f6785d7e83c0ed896b45904" + }, + "gower": { + "Package": "gower", + "Version": "0.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "be6a2b3529928bd803d1c437d1d43152" + }, + "gridExtra": { + "Package": "gridExtra", + "Version": "2.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7d7f283939f563670a697165b2cf5560" + }, + "gtable": { + "Package": "gtable", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ac5c6baf7822ce8732b343f14c072c4d" + }, + "haven": { + "Package": "haven", + "Version": "2.4.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "10bec8a8264f3eb59531e8c4c0303f96" + }, + "here": { + "Package": "here", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "24b224366f9c2e7534d2344d10d59211" + }, + "highr": { + "Package": "highr", + "Version": "0.9", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8eb36c8125038e648e5d111c0d7b2ed4" + }, + "hms": { + "Package": "hms", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5b8a2dd0fdbe2ab4f6081e6c7be6dfca" + }, + "htmltools": { + "Package": "htmltools", + "Version": "0.5.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "af2c2531e55df5cf230c4b5444fc973c" + }, + "httr": { + "Package": "httr", + "Version": "1.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "a525aba14184fec243f9eaec62fbed43" + }, + "ids": { + "Package": "ids", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "99df65cfef20e525ed38c3d2577f7190" + }, + "ipred": { + "Package": "ipred", + "Version": "0.9-12", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8312ebd8121ad2eca1c76441040bee5d" + }, + "isoband": { + "Package": "isoband", + "Version": "0.2.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7ab57a6de7f48a8dc84910d1eca42883" + }, + "iterators": { + "Package": "iterators", + "Version": "1.0.13", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "64778782a89480e9a644f69aad9a2877" + }, + "jsonlite": { + "Package": "jsonlite", + "Version": "1.7.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "98138e0994d41508c7a6b84a0600cfcb" + }, + "kableExtra": { + "Package": "kableExtra", + "Version": "1.3.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "49b625e6aabe4c5f091f5850aba8ff78" + }, + "knitr": { + "Package": "knitr", + "Version": "1.36", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "46344b93f8854714cdf476433a59ed10" + }, + "labeling": { + "Package": "labeling", + "Version": "0.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "3d5108641f47470611a32d0bdf357a72" + }, + "latex2exp": { + "Package": "latex2exp", + "Version": "0.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f96b4c78546d415fa21c99e83629543a" + }, + "lattice": { + "Package": "lattice", + "Version": "0.20-41", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "fbd9285028b0263d76d18c95ae51a53d" + }, + "lava": { + "Package": "lava", + "Version": "1.6.10", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "4c31a28528978d8689145f5274ce9058" + }, + "lifecycle": { + "Package": "lifecycle", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "a6b6d352e3ed897373ab19d8395c98d0" + }, + "listenv": { + "Package": "listenv", + "Version": "0.8.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0bde42ee282efb18c7c4e63822f5b4f7" + }, + "lme4": { + "Package": "lme4", + "Version": "1.1-27.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c995b0405ce0894d6fe52b3e08ea9085" + }, + "lubridate": { + "Package": "lubridate", + "Version": "1.8.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "2ff5eedb6ee38fb1b81205c73be1be5a" + }, + "magrittr": { + "Package": "magrittr", + "Version": "2.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "41287f1ac7d28a92f0a286ed507928d3" + }, + "maptools": { + "Package": "maptools", + "Version": "1.1-2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "41e7097a7a02db986344eeca0d8a49ac" + }, + "matrixStats": { + "Package": "matrixStats", + "Version": "0.61.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b8e6221fc11247b12ab1b055a6f66c27" + }, + "mgcv": { + "Package": "mgcv", + "Version": "1.8-34", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "bd4a6c4b600f58651d60d381b0e9a397" + }, + "mime": { + "Package": "mime", + "Version": "0.10", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "26fa77e707223e1ce042b2b5d09993dc" + }, + "minqa": { + "Package": "minqa", + "Version": "1.2.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "eaee7d2a6f3ed4491df868611cb064cc" + }, + "modelr": { + "Package": "modelr", + "Version": "0.1.8", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9fd59716311ee82cba83dc2826fc5577" + }, + "munsell": { + "Package": "munsell", + "Version": "0.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6dfe8bf774944bd5595785e3229d8771" + }, + "nlme": { + "Package": "nlme", + "Version": "3.1-152", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "35de1ce639f20b5e10f7f46260730c65" + }, + "nloptr": { + "Package": "nloptr", + "Version": "1.2.2.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "22eeb1eb7129a2ca17b8eb70b56a02fe" + }, + "nnet": { + "Package": "nnet", + "Version": "7.3-15", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b67ac021b3fb3a4b69d0d3c2bc049e9f" + }, + "numDeriv": { + "Package": "numDeriv", + "Version": "2016.8-1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "df58958f293b166e4ab885ebcad90e02" + }, + "openssl": { + "Package": "openssl", + "Version": "1.4.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f4dbc5a47fd93d3415249884d31d6791" + }, + "pROC": { + "Package": "pROC", + "Version": "1.18.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "417fd0d40479932c19faf2747817c473" + }, + "parallelly": { + "Package": "parallelly", + "Version": "1.29.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b5f399c9ce96977e22ef32c20b6cfe87" + }, + "patchwork": { + "Package": "patchwork", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c446b30cb33ec125ff02588b60660ccb" + }, + "pbapply": { + "Package": "pbapply", + "Version": "1.5-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "11359a5bb73622ab3f4136bf57108b64" + }, + "pbkrtest": { + "Package": "pbkrtest", + "Version": "0.5.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b304ff5955f37b48bd30518faf582929" + }, + "pillar": { + "Package": "pillar", + "Version": "1.6.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "60200b6aa32314ac457d3efbb5ccbd98" + }, + "pkgconfig": { + "Package": "pkgconfig", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "01f28d4278f15c76cddbea05899c5d6f" + }, + "plogr": { + "Package": "plogr", + "Version": "0.2.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "09eb987710984fc2905c7129c7d85e65" + }, + "plyr": { + "Package": "plyr", + "Version": "1.8.6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ec0e5ab4e5f851f6ef32cd1d1984957f" + }, + "polyclip": { + "Package": "polyclip", + "Version": "1.10-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "cb167f328b3ada4ec5cf67a7df4c900a" + }, + "polynom": { + "Package": "polynom", + "Version": "1.4-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c396592ecfe9e75cee1013533efafe34" + }, + "prettyunits": { + "Package": "prettyunits", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "95ef9167b75dde9d2ccc3c7528393e7e" + }, + "processx": { + "Package": "processx", + "Version": "3.5.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0cbca2bc4d16525d009c4dbba156b37c" + }, + "prodlim": { + "Package": "prodlim", + "Version": "2019.11.13", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c243bf70db3a6631a0c8783152fb7db9" + }, + "progress": { + "Package": "progress", + "Version": "1.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "14dc9f7a3c91ebb14ec5bb9208a07061" + }, + "progressr": { + "Package": "progressr", + "Version": "0.9.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ca0d80ecc29903f7579edbabd91f4199" + }, + "proxy": { + "Package": "proxy", + "Version": "0.4-26", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "50b405c6419e921b9e9360cc9ebbcf2d" + }, + "ps": { + "Package": "ps", + "Version": "1.6.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "32620e2001c1dce1af49c49dccbb9420" + }, + "purrr": { + "Package": "purrr", + "Version": "0.3.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "97def703420c8ab10d8f0e6c72101e02" + }, + "quantreg": { + "Package": "quantreg", + "Version": "5.86", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "28692dfa3efea8e19d29347d05f5a489" + }, + "rappdirs": { + "Package": "rappdirs", + "Version": "0.3.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5e3c5dc0b071b21fa128676560dbe94d" + }, + "readr": { + "Package": "readr", + "Version": "2.0.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7cb2c3ecfbc2c6786221d2c0c1f6ed68" + }, + "readxl": { + "Package": "readxl", + "Version": "1.3.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "63537c483c2dbec8d9e3183b3735254a" + }, + "recipes": { + "Package": "recipes", + "Version": "0.1.17", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "443951ef5d9e72a96405cbb0157bb1d4" + }, + "rematch": { + "Package": "rematch", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c66b930d20bb6d858cd18e1cebcfae5c" + }, + "rematch2": { + "Package": "rematch2", + "Version": "2.1.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "76c9e04c712a05848ae7a23d2f170a40" + }, + "renv": { + "Package": "renv", + "Version": "0.14.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "30e5eba91b67f7f4d75d31de14bbfbdc" + }, + "reprex": { + "Package": "reprex", + "Version": "2.0.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8482bbeef0c194ac236aef7c51ee375f" + }, + "reshape2": { + "Package": "reshape2", + "Version": "1.4.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "bb5996d0bd962d214a11140d77589917" + }, + "rlang": { + "Package": "rlang", + "Version": "0.4.12", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0879f5388fe6e4d56d7ef0b7ccb031e5" + }, + "rmarkdown": { + "Package": "rmarkdown", + "Version": "2.8", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f518ba47713f92d0d603eec7c6888faf" + }, + "rpart": { + "Package": "rpart", + "Version": "4.1-15", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9787c1fcb680e655d062e7611cadf78e" + }, + "rprojroot": { + "Package": "rprojroot", + "Version": "2.0.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "249d8cd1e74a8f6a26194a91b47f21d1" + }, + "rstatix": { + "Package": "rstatix", + "Version": "0.7.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "aa020f8efde649badd0b2b5456e942fe" + }, + "rstudioapi": { + "Package": "rstudioapi", + "Version": "0.13", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "06c85365a03fdaf699966cc1d3cf53ea" + }, + "rvest": { + "Package": "rvest", + "Version": "1.0.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "74b905b0076e1de6e27f540c95ba68d5" + }, + "sandwich": { + "Package": "sandwich", + "Version": "3.0-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "10994501b2ed2262168cae1556b29168" + }, + "scales": { + "Package": "scales", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6f76f71042411426ec8df6c54f34e6dd" + }, + "selectr": { + "Package": "selectr", + "Version": "0.4-2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "3838071b66e0c566d55cc26bd6e27bf4" + }, + "sp": { + "Package": "sp", + "Version": "1.4-6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ce8613f4e8c84ef4da9eba65b874ebe9" + }, + "stringi": { + "Package": "stringi", + "Version": "1.7.6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "bba431031d30789535745a9627ac9271" + }, + "stringr": { + "Package": "stringr", + "Version": "1.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0759e6b6c0957edb1311028a49a35e76" + }, + "survival": { + "Package": "survival", + "Version": "3.2-10", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6b7453cd9bb32b12577c78d54eeea56a" + }, + "svglite": { + "Package": "svglite", + "Version": "2.0.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8fb6188960bf0f90996ce52f9c2106ac" + }, + "sys": { + "Package": "sys", + "Version": "3.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b227d13e29222b4574486cfcbde077fa" + }, + "systemfonts": { + "Package": "systemfonts", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6e899d7c097698d50ec87b1d8e65f246" + }, + "tibble": { + "Package": "tibble", + "Version": "3.1.6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8a8f02d1934dfd6431c671361510dd0b" + }, + "tidyr": { + "Package": "tidyr", + "Version": "1.1.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c8fbdbd9fcac223d6c6fe8e406f368e1" + }, + "tidyselect": { + "Package": "tidyselect", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7243004a708d06d4716717fa1ff5b2fe" + }, + "tidyverse": { + "Package": "tidyverse", + "Version": "1.3.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "fc4c72b6ae9bb283416bd59a3303bbab" + }, + "timeDate": { + "Package": "timeDate", + "Version": "3043.102", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "fde4fc571f5f61978652c229d4713845" + }, + "tinytex": { + "Package": "tinytex", + "Version": "0.31", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "25b572f764f3c19fef9aac33b5724f3d" + }, + "trust": { + "Package": "trust", + "Version": "0.1-8", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f50614d2145ba1012b62ff75f2129d5b" + }, + "tweenr": { + "Package": "tweenr", + "Version": "1.0.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6cc663f970a529dbf776f11d5bcd1a2e" + }, + "tzdb": { + "Package": "tzdb", + "Version": "0.2.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5e069fb033daf2317bd628d3100b75c5" + }, + "utf8": { + "Package": "utf8", + "Version": "1.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c9c462b759a5cc844ae25b5942654d13" + }, + "uuid": { + "Package": "uuid", + "Version": "0.1-4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e4169eb989a5d03ccb6b628cad1b1b50" + }, + "vctrs": { + "Package": "vctrs", + "Version": "0.3.8", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ecf749a1b39ea72bd9b51b76292261f1" + }, + "viridisLite": { + "Package": "viridisLite", + "Version": "0.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "55e157e2aa88161bdb0754218470d204" + }, + "vroom": { + "Package": "vroom", + "Version": "1.5.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9c3b3a3f947c7936cea7485349247e5b" + }, + "webshot": { + "Package": "webshot", + "Version": "0.5.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e99d80ad34457a4853674e89d5e806de" + }, + "withr": { + "Package": "withr", + "Version": "2.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ad03909b44677f930fa156d47d7a3aeb" + }, + "xfun": { + "Package": "xfun", + "Version": "0.28", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f7f3a61ab62cd046d307577a8ae12999" + }, + "xml2": { + "Package": "xml2", + "Version": "1.3.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d4d71a75dd3ea9eb5fa28cc21f9585e2" + }, + "yaml": { + "Package": "yaml", + "Version": "2.2.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "2826c5d9efb0a88f657c7a679c7106db" + }, + "zoo": { + "Package": "zoo", + "Version": "1.8-9", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "035d1c7c12593038c26fb1c2fd40c4d2" + } + } +} diff --git a/renv/.gitignore b/renv/.gitignore new file mode 100644 index 0000000..2129631 --- /dev/null +++ b/renv/.gitignore @@ -0,0 +1,5 @@ +library/ +local/ +lock/ +python/ +staging/ diff --git a/renv/activate.R b/renv/activate.R new file mode 100644 index 0000000..304fd90 --- /dev/null +++ b/renv/activate.R @@ -0,0 +1,668 @@ + +local({ + + # the requested version of renv + version <- "0.14.0" + + # the project directory + project <- getwd() + + # allow environment variable to control activation + activate <- Sys.getenv("RENV_ACTIVATE_PROJECT") + if (!nzchar(activate)) { + + # don't auto-activate when R CMD INSTALL is running + if (nzchar(Sys.getenv("R_INSTALL_PKG"))) + return(FALSE) + + } + + # bail if activation was explicitly disabled + if (tolower(activate) %in% c("false", "f", "0")) + return(FALSE) + + # avoid recursion + if (nzchar(Sys.getenv("RENV_R_INITIALIZING"))) + return(invisible(TRUE)) + + # signal that we're loading renv during R startup + Sys.setenv("RENV_R_INITIALIZING" = "true") + on.exit(Sys.unsetenv("RENV_R_INITIALIZING"), add = TRUE) + + # signal that we've consented to use renv + options(renv.consent = TRUE) + + # load the 'utils' package eagerly -- this ensures that renv shims, which + # mask 'utils' packages, will come first on the search path + library(utils, lib.loc = .Library) + + # check to see if renv has already been loaded + if ("renv" %in% loadedNamespaces()) { + + # if renv has already been loaded, and it's the requested version of renv, + # nothing to do + spec <- .getNamespaceInfo(.getNamespace("renv"), "spec") + if (identical(spec[["version"]], version)) + return(invisible(TRUE)) + + # otherwise, unload and attempt to load the correct version of renv + unloadNamespace("renv") + + } + + # load bootstrap tools + bootstrap <- function(version, library) { + + # attempt to download renv + tarball <- tryCatch(renv_bootstrap_download(version), error = identity) + if (inherits(tarball, "error")) + stop("failed to download renv ", version) + + # now attempt to install + status <- tryCatch(renv_bootstrap_install(version, tarball, library), error = identity) + if (inherits(status, "error")) + stop("failed to install renv ", version) + + } + + renv_bootstrap_tests_running <- function() { + getOption("renv.tests.running", default = FALSE) + } + + renv_bootstrap_repos <- function() { + + # check for repos override + repos <- Sys.getenv("RENV_CONFIG_REPOS_OVERRIDE", unset = NA) + if (!is.na(repos)) + return(repos) + + # if we're testing, re-use the test repositories + if (renv_bootstrap_tests_running()) + return(getOption("renv.tests.repos")) + + # retrieve current repos + repos <- getOption("repos") + + # ensure @CRAN@ entries are resolved + repos[repos == "@CRAN@"] <- getOption( + "renv.repos.cran", + "https://cloud.r-project.org" + ) + + # add in renv.bootstrap.repos if set + default <- c(FALLBACK = "https://cloud.r-project.org") + extra <- getOption("renv.bootstrap.repos", default = default) + repos <- c(repos, extra) + + # remove duplicates that might've snuck in + dupes <- duplicated(repos) | duplicated(names(repos)) + repos[!dupes] + + } + + renv_bootstrap_download <- function(version) { + + # if the renv version number has 4 components, assume it must + # be retrieved via github + nv <- numeric_version(version) + components <- unclass(nv)[[1]] + + methods <- if (length(components) == 4L) { + list( + renv_bootstrap_download_github + ) + } else { + list( + renv_bootstrap_download_cran_latest, + renv_bootstrap_download_cran_archive + ) + } + + for (method in methods) { + path <- tryCatch(method(version), error = identity) + if (is.character(path) && file.exists(path)) + return(path) + } + + stop("failed to download renv ", version) + + } + + renv_bootstrap_download_impl <- function(url, destfile) { + + mode <- "wb" + + # https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17715 + fixup <- + Sys.info()[["sysname"]] == "Windows" && + substring(url, 1L, 5L) == "file:" + + if (fixup) + mode <- "w+b" + + utils::download.file( + url = url, + destfile = destfile, + mode = mode, + quiet = TRUE + ) + + } + + renv_bootstrap_download_cran_latest <- function(version) { + + spec <- renv_bootstrap_download_cran_latest_find(version) + + message("* Downloading renv ", version, " ... ", appendLF = FALSE) + + type <- spec$type + repos <- spec$repos + + info <- tryCatch( + utils::download.packages( + pkgs = "renv", + destdir = tempdir(), + repos = repos, + type = type, + quiet = TRUE + ), + condition = identity + ) + + if (inherits(info, "condition")) { + message("FAILED") + return(FALSE) + } + + # report success and return + message("OK (downloaded ", type, ")") + info[1, 2] + + } + + renv_bootstrap_download_cran_latest_find <- function(version) { + + # check whether binaries are supported on this system + binary <- + getOption("renv.bootstrap.binary", default = TRUE) && + !identical(.Platform$pkgType, "source") && + !identical(getOption("pkgType"), "source") && + Sys.info()[["sysname"]] %in% c("Darwin", "Windows") + + types <- c(if (binary) "binary", "source") + + # iterate over types + repositories + for (type in types) { + for (repos in renv_bootstrap_repos()) { + + # retrieve package database + db <- tryCatch( + as.data.frame( + utils::available.packages(type = type, repos = repos), + stringsAsFactors = FALSE + ), + error = identity + ) + + if (inherits(db, "error")) + next + + # check for compatible entry + entry <- db[db$Package %in% "renv" & db$Version %in% version, ] + if (nrow(entry) == 0) + next + + # found it; return spec to caller + spec <- list(entry = entry, type = type, repos = repos) + return(spec) + + } + } + + # if we got here, we failed to find renv + fmt <- "renv %s is not available from your declared package repositories" + stop(sprintf(fmt, version)) + + } + + renv_bootstrap_download_cran_archive <- function(version) { + + name <- sprintf("renv_%s.tar.gz", version) + repos <- renv_bootstrap_repos() + urls <- file.path(repos, "src/contrib/Archive/renv", name) + destfile <- file.path(tempdir(), name) + + message("* Downloading renv ", version, " ... ", appendLF = FALSE) + + for (url in urls) { + + status <- tryCatch( + renv_bootstrap_download_impl(url, destfile), + condition = identity + ) + + if (identical(status, 0L)) { + message("OK") + return(destfile) + } + + } + + message("FAILED") + return(FALSE) + + } + + renv_bootstrap_download_github <- function(version) { + + enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE") + if (!identical(enabled, "TRUE")) + return(FALSE) + + # prepare download options + pat <- Sys.getenv("GITHUB_PAT") + if (nzchar(Sys.which("curl")) && nzchar(pat)) { + fmt <- "--location --fail --header \"Authorization: token %s\"" + extra <- sprintf(fmt, pat) + saved <- options("download.file.method", "download.file.extra") + options(download.file.method = "curl", download.file.extra = extra) + on.exit(do.call(base::options, saved), add = TRUE) + } else if (nzchar(Sys.which("wget")) && nzchar(pat)) { + fmt <- "--header=\"Authorization: token %s\"" + extra <- sprintf(fmt, pat) + saved <- options("download.file.method", "download.file.extra") + options(download.file.method = "wget", download.file.extra = extra) + on.exit(do.call(base::options, saved), add = TRUE) + } + + message("* Downloading renv ", version, " from GitHub ... ", appendLF = FALSE) + + url <- file.path("https://api.github.com/repos/rstudio/renv/tarball", version) + name <- sprintf("renv_%s.tar.gz", version) + destfile <- file.path(tempdir(), name) + + status <- tryCatch( + renv_bootstrap_download_impl(url, destfile), + condition = identity + ) + + if (!identical(status, 0L)) { + message("FAILED") + return(FALSE) + } + + message("OK") + return(destfile) + + } + + renv_bootstrap_install <- function(version, tarball, library) { + + # attempt to install it into project library + message("* Installing renv ", version, " ... ", appendLF = FALSE) + dir.create(library, showWarnings = FALSE, recursive = TRUE) + + # invoke using system2 so we can capture and report output + bin <- R.home("bin") + exe <- if (Sys.info()[["sysname"]] == "Windows") "R.exe" else "R" + r <- file.path(bin, exe) + args <- c("--vanilla", "CMD", "INSTALL", "-l", shQuote(library), shQuote(tarball)) + output <- system2(r, args, stdout = TRUE, stderr = TRUE) + message("Done!") + + # check for successful install + status <- attr(output, "status") + if (is.numeric(status) && !identical(status, 0L)) { + header <- "Error installing renv:" + lines <- paste(rep.int("=", nchar(header)), collapse = "") + text <- c(header, lines, output) + writeLines(text, con = stderr()) + } + + status + + } + + renv_bootstrap_platform_prefix <- function() { + + # construct version prefix + version <- paste(R.version$major, R.version$minor, sep = ".") + prefix <- paste("R", numeric_version(version)[1, 1:2], sep = "-") + + # include SVN revision for development versions of R + # (to avoid sharing platform-specific artefacts with released versions of R) + devel <- + identical(R.version[["status"]], "Under development (unstable)") || + identical(R.version[["nickname"]], "Unsuffered Consequences") + + if (devel) + prefix <- paste(prefix, R.version[["svn rev"]], sep = "-r") + + # build list of path components + components <- c(prefix, R.version$platform) + + # include prefix if provided by user + prefix <- renv_bootstrap_platform_prefix_impl() + if (!is.na(prefix) && nzchar(prefix)) + components <- c(prefix, components) + + # build prefix + paste(components, collapse = "/") + + } + + renv_bootstrap_platform_prefix_impl <- function() { + + # if an explicit prefix has been supplied, use it + prefix <- Sys.getenv("RENV_PATHS_PREFIX", unset = NA) + if (!is.na(prefix)) + return(prefix) + + # if the user has requested an automatic prefix, generate it + auto <- Sys.getenv("RENV_PATHS_PREFIX_AUTO", unset = NA) + if (auto %in% c("TRUE", "True", "true", "1")) + return(renv_bootstrap_platform_prefix_auto()) + + # empty string on failure + "" + + } + + renv_bootstrap_platform_prefix_auto <- function() { + + prefix <- tryCatch(renv_bootstrap_platform_os(), error = identity) + if (inherits(prefix, "error") || prefix %in% "unknown") { + + msg <- paste( + "failed to infer current operating system", + "please file a bug report at https://github.com/rstudio/renv/issues", + sep = "; " + ) + + warning(msg) + + } + + prefix + + } + + renv_bootstrap_platform_os <- function() { + + sysinfo <- Sys.info() + sysname <- sysinfo[["sysname"]] + + # handle Windows + macOS up front + if (sysname == "Windows") + return("windows") + else if (sysname == "Darwin") + return("macos") + + # check for os-release files + for (file in c("/etc/os-release", "/usr/lib/os-release")) + if (file.exists(file)) + return(renv_bootstrap_platform_os_via_os_release(file, sysinfo)) + + # check for redhat-release files + if (file.exists("/etc/redhat-release")) + return(renv_bootstrap_platform_os_via_redhat_release()) + + "unknown" + + } + + renv_bootstrap_platform_os_via_os_release <- function(file, sysinfo) { + + # read /etc/os-release + release <- utils::read.table( + file = file, + sep = "=", + quote = c("\"", "'"), + col.names = c("Key", "Value"), + comment.char = "#", + stringsAsFactors = FALSE + ) + + vars <- as.list(release$Value) + names(vars) <- release$Key + + # get os name + os <- tolower(sysinfo[["sysname"]]) + + # read id + id <- "unknown" + for (field in c("ID", "ID_LIKE")) { + if (field %in% names(vars) && nzchar(vars[[field]])) { + id <- vars[[field]] + break + } + } + + # read version + version <- "unknown" + for (field in c("UBUNTU_CODENAME", "VERSION_CODENAME", "VERSION_ID", "BUILD_ID")) { + if (field %in% names(vars) && nzchar(vars[[field]])) { + version <- vars[[field]] + break + } + } + + # join together + paste(c(os, id, version), collapse = "-") + + } + + renv_bootstrap_platform_os_via_redhat_release <- function() { + + # read /etc/redhat-release + contents <- readLines("/etc/redhat-release", warn = FALSE) + + # infer id + id <- if (grepl("centos", contents, ignore.case = TRUE)) + "centos" + else if (grepl("redhat", contents, ignore.case = TRUE)) + "redhat" + else + "unknown" + + # try to find a version component (very hacky) + version <- "unknown" + + parts <- strsplit(contents, "[[:space:]]")[[1L]] + for (part in parts) { + + nv <- tryCatch(numeric_version(part), error = identity) + if (inherits(nv, "error")) + next + + version <- nv[1, 1] + break + + } + + paste(c("linux", id, version), collapse = "-") + + } + + renv_bootstrap_library_root_name <- function(project) { + + # use project name as-is if requested + asis <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT_ASIS", unset = "FALSE") + if (asis) + return(basename(project)) + + # otherwise, disambiguate based on project's path + id <- substring(renv_bootstrap_hash_text(project), 1L, 8L) + paste(basename(project), id, sep = "-") + + } + + renv_bootstrap_library_root <- function(project) { + + path <- Sys.getenv("RENV_PATHS_LIBRARY", unset = NA) + if (!is.na(path)) + return(path) + + path <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT", unset = NA) + if (!is.na(path)) { + name <- renv_bootstrap_library_root_name(project) + return(file.path(path, name)) + } + + prefix <- renv_bootstrap_profile_prefix() + paste(c(project, prefix, "renv/library"), collapse = "/") + + } + + renv_bootstrap_validate_version <- function(version) { + + loadedversion <- utils::packageDescription("renv", fields = "Version") + if (version == loadedversion) + return(TRUE) + + # assume four-component versions are from GitHub; three-component + # versions are from CRAN + components <- strsplit(loadedversion, "[.-]")[[1]] + remote <- if (length(components) == 4L) + paste("rstudio/renv", loadedversion, sep = "@") + else + paste("renv", loadedversion, sep = "@") + + fmt <- paste( + "renv %1$s was loaded from project library, but this project is configured to use renv %2$s.", + "Use `renv::record(\"%3$s\")` to record renv %1$s in the lockfile.", + "Use `renv::restore(packages = \"renv\")` to install renv %2$s into the project library.", + sep = "\n" + ) + + msg <- sprintf(fmt, loadedversion, version, remote) + warning(msg, call. = FALSE) + + FALSE + + } + + renv_bootstrap_hash_text <- function(text) { + + hashfile <- tempfile("renv-hash-") + on.exit(unlink(hashfile), add = TRUE) + + writeLines(text, con = hashfile) + tools::md5sum(hashfile) + + } + + renv_bootstrap_load <- function(project, libpath, version) { + + # try to load renv from the project library + if (!requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) + return(FALSE) + + # warn if the version of renv loaded does not match + renv_bootstrap_validate_version(version) + + # load the project + renv::load(project) + + TRUE + + } + + renv_bootstrap_profile_load <- function(project) { + + # if RENV_PROFILE is already set, just use that + profile <- Sys.getenv("RENV_PROFILE", unset = NA) + if (!is.na(profile) && nzchar(profile)) + return(profile) + + # check for a profile file (nothing to do if it doesn't exist) + path <- file.path(project, "renv/local/profile") + if (!file.exists(path)) + return(NULL) + + # read the profile, and set it if it exists + contents <- readLines(path, warn = FALSE) + if (length(contents) == 0L) + return(NULL) + + # set RENV_PROFILE + profile <- contents[[1L]] + if (nzchar(profile)) + Sys.setenv(RENV_PROFILE = profile) + + profile + + } + + renv_bootstrap_profile_prefix <- function() { + profile <- renv_bootstrap_profile_get() + if (!is.null(profile)) + return(file.path("renv/profiles", profile)) + } + + renv_bootstrap_profile_get <- function() { + profile <- Sys.getenv("RENV_PROFILE", unset = "") + renv_bootstrap_profile_normalize(profile) + } + + renv_bootstrap_profile_set <- function(profile) { + profile <- renv_bootstrap_profile_normalize(profile) + if (is.null(profile)) + Sys.unsetenv("RENV_PROFILE") + else + Sys.setenv(RENV_PROFILE = profile) + } + + renv_bootstrap_profile_normalize <- function(profile) { + + if (is.null(profile) || profile %in% c("", "default")) + return(NULL) + + profile + + } + + # load the renv profile, if any + renv_bootstrap_profile_load(project) + + # construct path to library root + root <- renv_bootstrap_library_root(project) + + # construct library prefix for platform + prefix <- renv_bootstrap_platform_prefix() + + # construct full libpath + libpath <- file.path(root, prefix) + + # attempt to load + if (renv_bootstrap_load(project, libpath, version)) + return(TRUE) + + # load failed; inform user we're about to bootstrap + prefix <- paste("# Bootstrapping renv", version) + postfix <- paste(rep.int("-", 77L - nchar(prefix)), collapse = "") + header <- paste(prefix, postfix) + message(header) + + # perform bootstrap + bootstrap(version, libpath) + + # exit early if we're just testing bootstrap + if (!is.na(Sys.getenv("RENV_BOOTSTRAP_INSTALL_ONLY", unset = NA))) + return(TRUE) + + # try again to load + if (requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) { + message("* Successfully installed and loaded renv ", version, ".") + return(renv::load()) + } + + # failed to download or load renv; warn the user + msg <- c( + "Failed to find an renv installation: the project will not be loaded.", + "Use `renv::activate()` to re-initialize the project." + ) + + warning(paste(msg, collapse = "\n"), call. = FALSE) + +}) diff --git a/renv/settings.dcf b/renv/settings.dcf new file mode 100644 index 0000000..fc4e479 --- /dev/null +++ b/renv/settings.dcf @@ -0,0 +1,8 @@ +external.libraries: +ignored.packages: +package.dependency.fields: Imports, Depends, LinkingTo +r.version: +snapshot.type: implicit +use.cache: TRUE +vcs.ignore.library: TRUE +vcs.ignore.local: TRUE