## ======================================
## Appendix A Table 6: Extension to later analysis by Simon et al (2005)
## ======================================

  rm(list=ls())
  
  #Assume base R is running
  my.wd <- getSrcDirectory(function(x){x})
  
  #Check for errors
  if(grepl("error", tolower(class(my.wd)[1])) | my.wd==""){
    #Try to access working directory through R Studio API
    my.wd <- tryCatch(dirname(rstudioapi::getActiveDocumentContext()$path),
                      error = function(e) e)
    
  }
  
  #Set working directory
  setwd(my.wd)

## ======================================
## Load packages and functions
## ======================================

  #Read in function to reshape data for modeling
  source("Functions/prepModelData.R")
  source("Functions/installPackageNotFound.R")
  source("Functions/formatSig.R")
  
  #Libraries for modeling
  installPackageNotFound("data.table")
  installPackageNotFound("plm")
  installPackageNotFound("stargazer")
  installPackageNotFound("lmtest")

## ======================================
## Read in data
## ======================================

  #Hospitalizations
  hosp.zip3.qtr.1983.2009 <- read.csv("../Data/hosp_zip3_quarter_1983_2009.csv", stringsAsFactors = FALSE)

## ==========================
## Reshape data for modeling 
## ==========================  

  ## Simon disease selection
  dta.simon.did <- prepModelData(dta = hosp.zip3.qtr.1983.2009, 
                                     fbi.name = "simon.codes", 
                                     control.name = "append", 
                                     years = 1993:2000,
                                     control.grp = FALSE,
                                     filter = FALSE)
  
  ## Simon disease selection with appendicitis
  dta.simon.ddd <- prepModelData(dta = hosp.zip3.qtr.1983.2009, 
                                  fbi.name = "simon.codes", 
                                  control.name = "append", 
                                  years = 1993:2000,
                                  control.grp = TRUE,
                                  filter = FALSE)

## ==========================
## LA treated models
## ==========================  

  #LA treatment 
  dta.simon.did$prop.treated <- dta.simon.did$prop.la
  dta.simon.ddd$prop.treated <- dta.simon.ddd$prop.la
  
  #1) DiD LA
  did.la <- plm(logCount ~ I(as.numeric(year>=1998)*prop.treated) , 
                index = c("geo", "year.qtr"), 
                model="within", effect="twoways", 
                data = dta.simon.did)
  
  did.la.se <- coeftest(did.la, vcov=vcovHC(did.la, type="HC0", cluster="group"))
  did.la.r2 <- formatSig(summary(lm(logCount ~ I(as.numeric(year>=1998)*prop.treated) +
                               factor(year.qtr) + factor(geo), data = dta.simon.did))$r.squared, 2)
    
  #3) JL spec LA
  jl.la <- plm(logCount ~ I(as.numeric(year>=1998)*prop.treated) + 
                 I(as.numeric(year>=1998)*prop.treated*disease.type), 
               index = c("geo.type.pk", "year.qtr"), 
               model="within", effect="twoways", 
               data = dta.simon.ddd)
  
  jl.la.se <- coeftest(jl.la, vcov=vcovHC(jl.la, type="HC0", cluster="group"))
  jl.la.r2 <- formatSig(summary(lm(logCount ~ I(as.numeric(year>=1998)*prop.treated) + 
                                      I(as.numeric(year>=1998)*prop.treated*disease.type) +
                                      factor(year.qtr) + factor(geo.type.pk), data = dta.simon.ddd))$r.squared, 2)
  
  #5) DDD spec LA
  ddd.la <- plm(logCount ~ I(as.numeric(year>=1998)*prop.treated) + 
                  I(as.numeric(year>=1998)*prop.treated*disease.type) +
                  I(as.numeric(year>=1998)*disease.type), 
                index = c("geo.type.pk", "year.qtr"), 
                model="within", effect="twoways", 
                data = dta.simon.ddd)
  
  ddd.la.se <- coeftest(ddd.la, vcov=vcovHC(ddd.la, type="HC0", cluster="group"))
  ddd.la.r2 <- formatSig(summary(lm(logCount ~ I(as.numeric(year>=1998)*prop.treated) + 
                                     I(as.numeric(year>=1998)*prop.treated*disease.type) +
                                      I(as.numeric(year>=1998)*disease.type) + 
                                     factor(year.qtr) + factor(geo.type.pk), data = dta.simon.ddd))$r.squared, 2)

## ==========================
## SoCal treated models
## ==========================  
  #SoCal treatment 
  dta.simon.did$prop.treated <- dta.simon.did$prop.socal
  dta.simon.ddd$prop.treated <- dta.simon.ddd$prop.socal
  
  #2) DiD SC
  did.soc <- plm(logCount ~ I(as.numeric(year>=1998)*prop.treated) , 
                index = c("geo", "year.qtr"), 
                model="within", effect="twoways", 
                data = dta.simon.did)
  
  did.soc.se <- coeftest(did.soc, vcov=vcovHC(did.soc, type="HC0", cluster="group"))
  did.soc.r2 <- formatSig(summary(lm(logCount ~ I(as.numeric(year>=1998)*prop.treated) +
                                      factor(year.qtr) + factor(geo), data = dta.simon.did))$r.squared, 2)
  
  #4) JL spec SOC
  jl.soc <- plm(logCount ~ I(as.numeric(year>=1998)*prop.treated) + 
                  I(as.numeric(year>=1998)*prop.treated*disease.type), 
               index = c("geo.type.pk", "year.qtr"), 
               model="within", effect="twoways", 
               data = dta.simon.ddd)
  
  jl.soc.se <- coeftest(jl.soc, vcov=vcovHC(jl.soc, type="HC0", cluster="group"))
  jl.soc.r2 <- formatSig(summary(lm(logCount ~ I(as.numeric(year>=1998)*prop.treated) + 
                                     I(as.numeric(year>=1998)*prop.treated*disease.type) +
                                     factor(year.qtr) + factor(geo.type.pk), data = dta.simon.ddd))$r.squared, 2)
  
  #6) DDD spec SOC
  ddd.soc <- plm(logCount ~ I(as.numeric(year>=1998)*prop.treated) + 
                  I(as.numeric(year>=1998)*prop.treated*disease.type) +
                  I(as.numeric(year>=1998)*disease.type), 
                index = c("geo.type.pk", "year.qtr"), 
                model="within", effect="twoways", 
                data = dta.simon.ddd)
  
  ddd.soc.se <- coeftest(ddd.soc, vcov=vcovHC(ddd.soc, type="HC0", cluster="group"))
  ddd.soc.r2 <- formatSig(summary(lm(logCount ~ I(as.numeric(year>=1998)*prop.treated) + 
                                      I(as.numeric(year>=1998)*prop.treated*disease.type) +
                                      I(as.numeric(year>=1998)*disease.type) + 
                                      factor(year.qtr) + factor(geo.type.pk), data = dta.simon.ddd))$r.squared, 2)

## ==========================
## Compare coefficients 
## ==========================  
  #Statistical test of the difference between LA and SoCal estimates - DiD
  did.diff <- did.la$coefficients[["I(as.numeric(year >= 1998) * prop.treated)"]] - did.soc$coefficients[["I(as.numeric(year >= 1998) * prop.treated)"]]
  did.diff.se <- sqrt(did.la.se["I(as.numeric(year >= 1998) * prop.treated)",2]^2 + did.soc.se["I(as.numeric(year >= 1998) * prop.treated)",2]^2)
  did.diff.p <- 2*pnorm(abs(did.diff/did.diff.se), lower.tail = FALSE)
  
  #Statistical test of the difference between LA and SoCal estimates - JL
  jl.diff <- jl.la$coefficients[["I(as.numeric(year >= 1998) * prop.treated * disease.type)"]] - jl.soc$coefficients[["I(as.numeric(year >= 1998) * prop.treated * disease.type)"]]
  jl.diff.se <- sqrt(jl.la.se["I(as.numeric(year >= 1998) * prop.treated * disease.type)",2]^2 + jl.soc.se["I(as.numeric(year >= 1998) * prop.treated * disease.type)",2]^2)
  jl.diff.p <- 2*pnorm(abs(jl.diff/jl.diff.se), lower.tail = FALSE)
  
  #Statistical test of the difference between LA and SoCal estimates - DDD
  ddd.diff <- ddd.la$coefficients[["I(as.numeric(year >= 1998) * prop.treated * disease.type)"]] - ddd.soc$coefficients[["I(as.numeric(year >= 1998) * prop.treated * disease.type)"]]
  ddd.diff.se <- sqrt(ddd.la.se["I(as.numeric(year >= 1998) * prop.treated * disease.type)",2]^2 + ddd.soc.se["I(as.numeric(year >= 1998) * prop.treated * disease.type)",2]^2)
  ddd.diff.p <- 2*pnorm(abs(ddd.diff/ddd.diff.se), lower.tail = FALSE)
  
  print(c(formatSig(did.diff.p,2), formatSig(jl.diff.p,2), formatSig(ddd.diff.p,2)))

## ==========================
## Output: Table
## ==========================  

  #Labels
  cov.label <- c("Grade Cards", "Grade Cards x Foodborne", "Foodborne x post-1998")
  title <- c("Table 6: Replication of Simon analysis with falsification tests substituting Southern CA (excluding LA) as placebo treated units.")

  #Print table
  latex_output <- stargazer(did.la,
                            did.soc,
                            jl.la,
                            jl.soc,
                            ddd.la,
                            ddd.soc,
                            coef = list(round(did.la$coefficients, 2), 
                                        round(did.soc$coefficients, 2),
                                        round(jl.la$coefficients, 2), 
                                        round(jl.soc$coefficients, 2),
                                        round(ddd.la$coefficients, 2), 
                                        round(ddd.soc$coefficients, 2)),
                            se = list(did.la.se[,2], 
                                      did.soc.se[,2], 
                                      jl.la.se[,2], 
                                      jl.soc.se[,2],
                                      ddd.la.se[,2], 
                                      ddd.soc.se[,2]),
                            p = list(did.la.se[,4], 
                                     did.soc.se[,4], 
                                     jl.la.se[,4], 
                                     jl.soc.se[,4],
                                     ddd.la.se[,4], 
                                     ddd.soc.se[,4]),
                            covariate.labels = cov.label,
                            title = title,
                            digits = 2,
                            dep.var.labels = "",
                            dep.var.caption = "",
                            notes.append = FALSE,
                            column.sep.width = "1pt",
                            no.space = TRUE,
                            omit.stat = c("rsq", "adj.rsq", "f"),
                            notes.align = "l",
                            type = "text",
                            add.lines = list(c("R2", 
                                               did.la.r2,
                                               did.soc.r2, 
                                               jl.la.r2,
                                               jl.soc.r2,
                                               ddd.la.r2,
                                               ddd.soc.r2)))
  
