/* Last updated: June, 2019 */

/* This file runs all the regressions reported in the final draft of the paper. Estiamtes are stored
and plotted in CultAssim_plots.do*/

set more off
set matsize 1000
global Kbulk "/disk/bulkw/hkissel/cens1930.work/"
global datamaps "/disk/homedirs/nber/hkissel/cens1930.work/CultAssim/AppendixTables/datamaps"
global writeup "/disk/homedirs/nber/hkissel/cens1930.work/CultAssim/AppendixTables/tables_final"
global datadir "/disk/bulkw/hkissel/cens1930.work/Cultural_Assim_data"
global map "/disk/homedirs/nber/hkissel/cens1930.work/datamaps"


use  $datadir/CultAssim_data_foranalysis_all.dta, clear

/* --------------------------------------------------------------------------
SETUP 
----------------------------------------------------------------------------- */

* 3-year age bands
cap drop three*
gen three_0_2 = age <= 2
gen three_6_8 = age >= 6 & age <= 8 
gen three_9_11 = age >= 9 & age <= 11
gen three_12_15 = age >= 12 & age <= 15 

* 5-year age bands (already defined as by_*) 
drop by_1900_190

* When using mom's age at birth - we use only those who gave birth between age 20-14 
gen mom_aged_birth = birthyear - mother_birthyear
gen flag2 = mom_aged_birth<=40 & mom_aged_birth>=20
keep if flag2 == 1 

replace mom_aged_birth = . if mom_aged_birth < 0 

/*factor variables may not contain noninteger values ew_age= round(age_var,1) */
gen birth_order_fact = round(birth_order_all, 1) 
replace birth_order_fact = 4 if birth_order_fact > 4 


/* drop those born in south */
drop if inlist(bpl, 100, 500, 1200, 1300, 2100, 2200, 2400, 2800, 3700, 4000, 4500, 4700, 4800, 5100, 5400, 1000, 1100)

/* round mbpl */
replace mbpl = floor(mbpl/100)

/* Appendix Fig 1 -  Name foreignness of foreign-born children by mother’s age at birth
Including only children born outside of the US - do before dropping foreign-born kids */  

areg  FB_dumb i.mom_aged_birth by_* i.birth_order_fact if native == 0 , absorb(family) coeflegend
estimates store figA1

/* Keep only native-born kids*/
drop if native == 0 
gen native_mo = foreign_mo == 0 

	/* "auxilary regression" to find the baseline difference in F-index between foreign & native-born moms
	pool natives and foreign born. Do NOT include mother FE. Include FB indicator, age indicators, and 
	interactions between these. Report coefficient from FB indicator as the level of F-index for moms from 
	that country at age 20.*/
	reg FB_dumb i.mom_aged_birth##foreign_mo by* i.birth_order_fact, vce(robust) 
	local diff =  _b[1.foreign_mo]
	local se = _se[1.foreign_mo]
	file open N using "$writeup/baseline_difference.tex", write replace
	file write N "Baseline difference = `diff' (`se') "
	file close N 	
		
/* --------------------------------------------------------------------------
Figure 1: (and various robustness checks)
F-index by mother's years in the US at birth & by mother's age at birth 
----------------------------------------------------------------------------- */
/* Sample restriction: only include moms who have been in us between 1-20 yrs - since we are only keeping in mothers aged 20-40 & birth, 
  we are not including moms who arrive in US when they were children, i.e. if you arrive @ age 5 and give birth @ age 26, you have 
  been in the US for > 20 yrs at birth.. so will not be included. */
	
	gen fig1c_sample = 0 
	replace fig1c_sample = 1 if mom_years_in_us_birth >= 0 & mom_years_in_us_birth <= 20 
	
/* Fig. 1. Panel A. Mom's Age at Birth */
	areg  FB_dumb i.mom_aged_birth by_*   i.birth_order_fact if foreign_mo == 0 , absorb(family) vce(robust) 
	estimates store fig1a_nb

	areg  FB_dumb i.mom_aged_birth by_*   i.birth_order_fact if foreign_mo == 1 , absorb(family) vce(robust) 
	estimates store fig1a_fb	
	
	* with mbpl controls - shouldn't kill results: 
	*areg  FB_dumb i.mom_aged_birth by_*   i.birth_order_fact i.mbpl if foreign_mo == 1 , absorb(family) vce(robust) 
	*estimates store fig1a_fb_mbplcontrols

	
	* using same sample restriction as yrs in US specification: 
	areg  FB_dumb i.mom_aged_birth by_*   i.birth_order_fact if foreign_mo == 1 & fig1c_sample == 1 , absorb(family) vce(robust) 
	estimates store fig1a_fb_fig1csamp	
		
	* estimated seperately by state 
	cap erase $writeup/bystate.xls
	cap erase $writeup/bystate.txt

	local states " 1 4 5 6 8 9 10 12 13 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 44 45 46 47 48 49 50 51 53 54 55 56"
	foreach s in `states'{
		qui: areg  FB_dumb mom_aged_birth by_*  birth_order_all if native==1 & foreign_mo==1 & flag2 ==1 & census_state==`s', absorb(family) 
		qui summ FB_dumb if e(sample)==1
		outreg2 mom_aged_birth    using $writeup/bystate.xls, append addtext("state", "`s'") addstat(Mean DV, `r(mean)', "Std.Dev.\ DV", `r(sd)') excel dec(5) nocons label br
}

/* Fig 1. Panel C.  Moms Years in US at Time of Birth */
	areg  FB_dumb i.mom_years_in_us_birth by_*   i.birth_order_fact if foreign_mo ==1  & fig1c_sample == 1 , absorb(family)       coeflegend
	estimates store fig1c

		
/* Fig 1a. robusntess check: including MOTHER's country of origin x birth order FE */

	* mark mbpl countries that we include in JPE paper:
	cap drop F
	gen F = inlist(mbpl, 404, 405, 410, 414, 434, 450, 453, 465, 426, 421, 400, 412, 411, 401, 436, 420)
	gen M = mbpl 
	replace M = 1 if F==0

	* Mbpl x birth order - all countries (indicators for JPE countries, all others grouped in one bin) 
	areg  FB_dumb i.mom_aged_birth by_*   i.birth_order_fact##i.M if foreign_mo == 1, absorb(family) vce(robust) 
	estimates store fig1a_fb_FE

	local assimilation = 100 * (_b[40.mom_aged_birth] - _b[20.mom_aged_birth])
	file open N using "$writeup/implied_assimilation.tex", write replace
	file write N "Difference in 40 yr coef - 20 yr coef = `assimilation' "
	file close N 	
	
	
	
	* Mbpl x birth order: countires Grouped into bins: .*/
	gen Mgroup = 1 if mbpl == 410| mbpl == 411 | mbpl==412
	replace Mgroup = 2 if mbpl==400 | mbpl==401 | mbpl==404 | mbpl==405
	replace Mgroup = 3 if mbpl == 453 | mbpl==450 
	replace Mgroup = 4 if mbpl==420 | mbpl==421 | mbpl==426
	replace Mgroup = 5 if mbpl==465
	replace Mgroup = 6 if mbpl==434 | mbpl==436
	replace Mgroup = 7 if mbpl==414
	replace Mgroup = 8 if Mgroup == . 
	
	areg  FB_dumb i.mom_aged_birth by_*   i.birth_order_fact##i.Mgroup if foreign_mo == 1  & F==1, absorb(family) vce(robust) 
	estimates store fig1a_fb_FE_grouped

	
/* figA2 robustness check - use F-index based on 20 years prior to child's birth (i.e. "original" FB index) */
	areg  FB i.mom_aged_birth by_* i.birth_order_fac if foreign_mo == 0 , absorb(family) vce(robust)
	estimates store fig1a_nb_origFindex
  
 	areg  FB i.mom_aged_birth by_* i.birth_order_fac if foreign_mo == 1 , absorb(family) vce(robust)
	estimates store fig1a_fb_origFindex
  
/* Figure A.6. robustness check - repeat Fig1b using 3-yr age bands instead */        
	areg  FB_dumb i.mom_aged_birth three_*   i.birth_order_fact if foreign_mo == 0 , absorb(family) vce(robust)
	estimates store fig1a_nb_3yrband
	
	areg  FB_dumb i.mom_aged_birth three_*   i.birth_order_fact if foreign_mo == 1 , absorb(family) vce(robust)
	estimates store fig1a_fb_3yrband
	
/* Figure A.7. robustness check - seperate out sons & daughters of foreign-born moms */
	* Sons: 
	areg  FB_dumb i.mom_aged_birth by_*   i.birth_order_fact if  sex == 1 & foreign_mo == 1, absorb(family) vce(robust)
	estimates store fig1a_fb_sons
	  
	* Daughters: 
	areg  FB_dumb i.mom_aged_birth by_*   i.birth_order_fact if sex == 2 & foreign_mo == 1, absorb(family) vce(robust)
	estimates store fig1a_fb_daughters
 
/* Fig A.8 robustness check - using second gen F index */
	areg  FB_mbpl i.mom_aged_birth by_*   i.birth_order_fact if foreign_mo == 0, absorb(family) vce(robust)
	estimates store fig1a_nb_mbpl
 
 	areg  FB_mbpl i.mom_aged_birth by_*   i.birth_order_fact if foreign_mo == 1 & sex == 1 , absorb(family) vce(robust)
	estimates store fig1a_fb_sons_mbpl
	
	 areg  FB_mbpl i.mom_aged_birth by_*   i.birth_order_fact if foreign_mo == 1 & sex == 2 , absorb(family) vce(robust)
	estimates store fig1a_fb_daughters_mbpl
	
/* Fig A.10 robustness check - using only parents to construct foreignness index */
	areg  FB_dumb_parentsonly i.mom_aged_birth by_*   i.birth_order_fact if foreign_mo ==  0 , absorb(family) vce(robust)
	estimates store fig1a_nb_parentsonly
	
	areg  FB_dumb_parentsonly i.mom_aged_birth by_*   i.birth_order_fact if foreign_mo ==  1 , absorb(family) vce(robust)
	estimates store fig1a_fb_parentsonly
	
/* --------------------------------------------------------------------------
Figure 2. Effect of time in US on name foreignness by sending country,
----------------------------------------------------------------------------- */
local bpls "404 405 410 414 434 450 453 465 426 421 400 412 411 401 436 420"

* Gen 3-year age bins for mom's age at birth 
gen mom_bin_20_22 = mom_aged_birth >= 20 & mom_aged_birth <= 22
gen mom_bin_23_25 = mom_aged_birth >= 23 & mom_aged_birth <= 25
gen mom_bin_26_28 = mom_aged_birth >= 26 & mom_aged_birth <= 28 
gen mom_bin_29_31 = mom_aged_birth >= 29 & mom_aged_birth <= 31
gen mom_bin_32_34 = mom_aged_birth >= 32 & mom_aged_birth <= 34 
gen mom_bin_35_37 = mom_aged_birth >= 35 & mom_aged_birth <= 37
gen mom_bin_38_40 = mom_aged_birth >= 38 & mom_aged_birth <= 40 

drop mom_bin_20_22

* Base group = 20-22 yrs old.... Report coefficint on constant + coefficient on ==1 if FB + coefficient on relevant age group.

* By mother's place of birth (main specification)
  foreach c in `bpls' {
   di "`c'"
  
  * "auxilary regression" - , pool natives + all immigrants from country of origin X. 
  * Then estimate regression with no mother FE. 
  * Regression includes FB indicator, age indicators for ages 20-40 and age indicators x FB indicator for 21-40. 
  * Report the value of the FB indicator as the F-index at age 20
  reg FB_dumb c.mom_aged_birth##i.foreign_mo  i.birth_order_fact  by_* if (mbpl == `c' | native_mo == 1 ) , robust 
  estimates store Findex20_`c'

  * dummy regression: 
  *areg FB_dumb mom_bin* i.birth_order_fact  by_* if mbpl == 100 , absorb(family) vce(robust) 
  
  * Actual regression: 
  areg FB_dumb mom_aged_birth i.birth_order_fact  by_* if mbpl == `c'  , absorb(family) vce(robust) 
  estimates store fig2_`c' 
  
  * Mean F-index (no longer reported) 
  *reg FB_dumb if mbpl == 100*`c'
  *estimates store meanFB_`c'
  }  

  
/*Figure 2 Pooled regression: */
cap gen F = inlist(mbpl, 404, 405, 410, 414, 434, 450, 453, 465, 426, 421, 400, 412, 411, 401, 436, 420)


* run it without the main mom_aged_birth effect (so that we are getting the estimate of borth mom_aged_birth AND mbpl) 
cap drop mom_age_x*
foreach bpl in 404 405 410 414 434 450 453 465 426 421 400 412 411 401 436 420{
gen mom_age_x_`bpl' = mom_aged_birth * (mbpl == `bpl')
}

* with main effect: 
*areg FB_dumb c.mom_aged_birth##i.mbpl i.birth_order_fact  by_* if F==1 , absorb(family) vce(robust)  coeflegend

* without main effect: 
areg FB_dumb mom_age_x* i.birth_order_fact  by_* if F==1 , absorb(family) vce(robust)  coeflegend

estimates store fig2_pooled
 
/*Figure Pooled regression with birth order x mbpl interactions: */
*areg FB_dumb c.mom_aged_birth##i.mbpl i.birth_order_fact i.birth_order_fact##i.M by_* if foreign_mo == 1 &  F==1 , absorb(family) vce(robust) 
*estimates store fig2_pooled_FE
 
 
/* --------------------------------------------------------------------------
Table 1 -  Immigrant name choice with time spent in US, by literacy status and ownership
----------------------------------------------------------------------------- */  

local bpls "404 405 410 414 434 450 453 465 426 421 400 412 411 401 436 420"
  foreach c in `bpls' {
  di "`c'"
  count if  lit_mom==4 & mbpl == `c'
 count if   lit_mom==1 & mbpl == `c'

  
* native born, foreign mom (insamp ==1) , literate (mom_lit ==4)
  areg FB_dumb mom_aged_birth   by_*   i.birth_order_fact if  lit_mom==4 & mbpl == `c' , absorb(family) vce(robust)
  estimates store table1_`c'_lit
  di "`r(N)'"
* native born, foreign mom (insamp ==1) ,NOT  literate (mom_lit ==1)
  areg FB_dumb mom_aged_birth  by_*   i.birth_order_fact if  lit_mom==1 & mbpl == `c' , absorb(family) vce(robust)
  estimates store table1_`c'_nonlit
   di "`r(N)'"

  }

 
/* --------------------------------------------------------------------------
 Appendix Table 3 & 4 - Cultural assimilation in historical data, robustness to sample restrictions
----------------------------------------------------------------------------- */  
* Baseline 
 areg  FB_dumb mom_aged_birth by_*  i.birth_order_fact if foreign_mo==1, absorb(family) 
estimates store tableA4_baseline

* Country-specific F-index
	merge m:1 firstname using $datadir/US1920_FBIndex_dumb_bybpl_girls, keep(1 3) keepusing(FBindex*)
	rename FBindex* FBindex*_girls
	drop _merge
	
	merge m:1 firstname using $datadir/US1920_FBIndex_dumb_bybpl, keep(1 3) keepusing(FBindex*)
	drop _merge
		
	local bpls "404 405 410 414 434 450 453 465 426 421 400 412 411 401 436 420"
	gen FB_by_mbpl = . 
	foreach b in `bpls'{
	replace FB_by_mbpl = FBindex_`b' if mbpl == `b' & sex == 1
	replace FB_by_mbpl = FBindex_`b'_girls if mbpl == `b'  & sex == 2
	}
	drop FBindex*
areg  FB_by_mbpl mom_aged_birth by_*  i.birth_order_fact  if foreign_mo==1 , absorb(family) 
estimates store tableA4_countryspecific


* Adjust names with NYSIIS
	
	* find nysiis of all first names: 
	merge m:1 firstname using $datadir//nysiis_names_girls, keep(1 3) nogenerate
	rename nysiis_first nysiis_first_girls
	merge m:1 firstname using $datadir//nysiis_names, keep(1 3) nogenerate
	replace nysiis_first = nysiis_first_girls if sex == 2
		
	merge m:1 nysiis_first using $datadir/US1920_FBIndex_nysiis_dumb_girls, keepusing(FB_dumb_nysiis) keep(1 3) nogenerate
	rename FB_dumb_nysiis FB_dumb_nysiis_girls
	merge m:1 nysiis_first using $datadir/US1920_FBIndex_nysiis_dumb, keepusing(FB_dumb_nysiis) keep(1 3) nogenerate
	replace FB_dumb_nysiis = FB_dumb_nysiis_girls if sex == 2 
	
	
areg  FB_dumb_nysiis mom_aged_birth by_*  birth_order_all if  foreign_mo==1 , absorb(family) 
estimates store tableA4_nysiis


* Only kids born in California: 
areg  FB_dumb mom_aged_birth by_*   i.birth_order_fact if foreign_mo == 1 &  bpl == 600, absorb(family) 
estimates store tableA4_CA
summ FB_dumb if e(sample)==1
estimates store tableA4_CA_N

* Only kids aged 0 - 6 
areg  FB_dumb mom_aged_birth by_*   i.birth_order_fact if foreign_mo == 1 & age <= 6, absorb(family) 
estimates store tableA4_06
summ FB_dumb if e(sample)==1
estimates store tableA4_06_N


* ---- Save sample size for table notes: -----
foreach index in FB_dumb FB FB_mbpl FB_dumb_parentsonly{
count if `index' != . & mom_aged_birth != . &   birth_order_fact != . & foreign_mo ==1 
local N : display %10.0fc r(N)
file open N using "$writeup/`index'_N_foreign.tex", write replace
file write N "`N'"
file close N 
count if `index' != . & mom_aged_birth != . &   birth_order_fact != . & foreign_mo == 0  
local N : display %10.0fc r(N)
file open N using "$writeup/`index'_N_native.tex", write replace
file write N "`N'"
file close N       
}
* seperate sons + daughters: 
foreach index in FB_dumb FB_mbpl{ 
count if `index' != . & mom_aged_birth != . &   birth_order_fact != . & foreign_mo ==1 & sex == 2 
local N : display %10.0fc r(N)

file open N using "$writeup/`index'_N_foreign_daughters.tex", write replace
file write N "`N'"
file close N 
count if `index' != . & mom_aged_birth != . &   birth_order_fact != . & foreign_mo ==1 & sex == 1
local N : display %10.0fc r(N)

file open N using "$writeup/`index'_N_foreign_sons.tex", write replace
file write N "`N'"
file close N 
}


preserve
do "/disk/homedirs/nber/hkissel/cens1930.work/CultAssim/AppendixTables/AER_Historical_Results_plots.do"
restore

   
