* master do-file
* NBER full count 1920 census

* stata -b do "/homes/nber/davidcal/cens1940.work/master_dofile_1920census_full_NBER_20150201.do" &

clear all

set more off

global outdir /disk/bulkw/keriksso/JMPMatch
global Kbulk  /disk/bulkw/keriksso
global datadir /homes/data/cens1930.work/keriksso/keriksso/Clean_residence

*** variables needed

#delimit

local varset_full
	"serial 
	pernum 
	stateicp
	statefip
	county
	namelast
	namefrst
	sex
	age
	race
	relate
	bpl
	mbpl
	ownershp
	mortgage
	fbpl
	lit
	yrimmig
	poploc
	momloc
	relate 
	";

#delimit cr



*******
* loop around all states
* states exclude: Alaska, Hawaii
 
local states "56 1 5 6 8 12 13 21 22 24 28 37 40 45 47 48 51 4 8 9 10 16 19 17 18 20 25 23 26  27 29 30 38 31 33 34  35 32 36 39 41 42 44 46 49 50 53 55 54"




*******

foreach s in `states' {
	
	use `varset_full' using "$datadir//`s'_clean_20", clear

	
	********
	*** rename variables
	
	rename pernum 						    pid
	rename statefip 						census_state
	rename county 							census_county
	rename namelast 						name_surname
	rename namefrst	   						name_given
	rename sex 								gender
  
	

	
	********
	*** recode gender
	rename gender sex
	gen gender = ""
	replace gender = "M" 	if sex == 1
	replace gender = "F" 	if sex == 2
	
	
	* generate age2
	gen age2 = age*age
	
	* generate birthyear
	gen birthyear = 1920 - age
	
    keep if age<=15
	
	rename age age_self
	rename yrimmig yrimmig_self
		
	rename relate relate_self
	
	gen pernum = poploc
	rename fbpl fbpl_self
	rename mbpl mbpl_self
	rename lit lit_self 
	rename bpl bpl_self
	
	merge m:1 serial pernum using $datadir//`s'_clean_20, keep(1 3) keepusing(namefrst namelast age yrimmig occscore occ1950 lit fbpl mbpl)
	rename namefrst namefrst_dad
	rename namelast namelast_dad
	rename age age_dad
	rename yrimmig yrimmig_dad
	rename occscore occscore_dad
	rename occ1950 occ1950_dad
	rename lit lit_dad
	rename fbpl fbpl_dad
	rename mbpl mbpl_dad
	cap drop _merge
	
	drop pernum
	
	gen pernum = momloc
	merge m:1 serial pernum using $datadir//`s'_clean_20, keep(1 3) keepusing(bpl namefrst namelast age yrimmig lit fbpl mbpl pernum)
	rename namefrst namefrst_mom
	rename namelast namelast_mom
	rename age age_mom
	rename yrimmig yrimmig_mom
	rename lit lit_mom
	rename fbpl fbpl_mom
	rename mbpl mbpl_mom
	rename bpl bpl_mom
	drop _merge
	
	rename age_self age
	rename relate_self relate
	rename fbpl_self fbpl
	rename mbpl_self mbpl
	rename bpl_self bpl
	
	gen mother_birthyear = 1920 - age_mom
	gen father_birthyear = 1920 - age_dad
	
	
		
	* generate birth order indicators
	gen age_children = age 	
	gen age_son = age 		if sex==1
	gen age_daughter = age 	if sex==2
	
	gen head_son = sex==1
	gen head_daughter = sex==2
	
	****5/31/19: New family indicators based on momloc and mbpl
	egen X = group(serial momloc)
	
	rename serial serial_orig
	rename X serial
	
	keep if floor(mbpl/100)==floor(bpl_mom/100)
	
	
	* birth order based on overall children
	bysort serial: egen total_children = count(age)
	bysort serial: egen birth_order_all = rank(age_children)
	replace birth_order_all = total_children + 1 - birth_order_all
	replace birth_order_all = 0 if birth_order_all == .
	

	* birth order based on sons only
	bysort serial: egen total_son = sum(head_son)
	bysort serial: egen birth_order_son = rank(age_son)
	replace birth_order_son = total_son + 1 - birth_order_son
	replace birth_order_son = 0 if birth_order_son == .


	* birth order based on daughters only
	bysort serial: egen total_daughter = sum(head_daughter)
	bysort serial: egen birth_order_daughter = rank(age_daughter)
	replace birth_order_daughter = total_daughter + 1 - birth_order_daughter
	replace birth_order_daughter = 0 if birth_order_daughter == .	
	
	
	* generate birth order dummies (based on all children)
	gen birthorder_all_rank  = 1 if (birth_order_all == 1 | birth_order_all == 1.5)
	replace birthorder_all_rank = 2 if (birth_order_all == 2 | birth_order_all == 2.5)
	replace birthorder_all_rank = 3 if (birth_order_all == 3 | birth_order_all == 3.5)
	replace birthorder_all_rank = 4 if (birth_order_all >= 4)

	gen birthorder_son_rank  = 1 if (birth_order_son == 1 | birth_order_son == 1.5)
	replace birthorder_son_rank = 2 if (birth_order_son == 2 | birth_order_son == 2.5)
	replace birthorder_son_rank = 3 if (birth_order_son == 3 | birth_order_son == 3.5)
	replace birthorder_son_rank = 4 if (birth_order_son >= 4)

	gen birthorder_dau_rank  = 1 if (birth_order_daughter == 1 | birth_order_daughter == 1.5)
	replace birthorder_dau_rank = 2 if (birth_order_daughter == 2 | birth_order_daughter == 2.5)
	replace birthorder_dau_rank = 3 if (birth_order_daughter == 3 | birth_order_daughter == 3.5)
	replace birthorder_dau_rank = 4 if (birth_order_daughter >= 4)

	ta birthorder_all_rank, gen(b_a_r)
	ta birthorder_son_rank, gen(b_s_r)
	ta birthorder_dau_rank, gen(b_d_r)


	foreach x in b_a_r1 b_a_r2 b_a_r3 b_a_r4 b_s_r1 b_s_r2 b_s_r3 b_s_r4 b_d_r1 b_d_r2 b_d_r3 b_d_r4 {
		replace `x' = 0 if `x' ==.
		}


		
	*******
	*** generate years ariving in the US for dad
	
	* 	covert arrival_year into ``number of years'' arrived in the US

	gen hh_years_in_us = 1920 - yrimmig_dad if yrimmig_dad!=0
	replace hh_years_in_us = . if (hh_years_in_us < 0 | hh_years_in_us > age_dad)
	
	gen hh_years_in_us_spouse = 1920 - yrimmig_mom if yrimmig_mom!=0
	
	gen hh_years_in_us_birth = birthyear - yrimmig_dad
	
	
	* 	generate quadratic term
	gen hh_years_in_us_birth2 = hh_years_in_us_birth * hh_years_in_us_birth


	* 	generate year in the US at birth categories
	gen hh_yiub_03 = (hh_years_in_us_birth >= 0 & hh_years_in_us_birth <= 3) 		 
	gen hh_yiub_46 = (hh_years_in_us_birth >= 4 & hh_years_in_us_birth <= 6) 		 
	gen hh_yiub_79 = (hh_years_in_us_birth >= 7 & hh_years_in_us_birth <= 9) 		 
	gen hh_yiub_1014 = (hh_years_in_us_birth >= 10 & hh_years_in_us_birth <= 14) 	 
	gen hh_yiub_1519 = (hh_years_in_us_birth >= 15 & hh_years_in_us_birth <= 19) 	 
	gen hh_yiub_20 = (hh_years_in_us_birth >= 20)
	
	
		
	*******
	*** generate JPE birthplace indicators for father

	gen birthplace_father_jpe = "Other"
	replace birthplace_father_jpe = "Norway"  		if fbpl==40400
	replace birthplace_father_jpe = "Sweden"  		if fbpl==40500
	replace birthplace_father_jpe = "England"  		if fbpl==41000
	replace birthplace_father_jpe = "Ireland"  		if fbpl==41400
	replace birthplace_father_jpe = "Italy"  		if fbpl==43400
	replace birthplace_father_jpe = "Austria"  		if fbpl==45000
	replace birthplace_father_jpe = "Germany"  		if fbpl==45300
	replace birthplace_father_jpe = "Russia"  		if fbpl==46500
	replace birthplace_father_jpe = "Switzerland"  	if fbpl==42600
	replace birthplace_father_jpe = "Belgium"  		if fbpl==42000
	replace birthplace_father_jpe = "France"  		if fbpl==42100
	replace birthplace_father_jpe = "Denmark"  		if fbpl==40000
	replace birthplace_father_jpe = "Wales"  		if fbpl==41200
	replace birthplace_father_jpe = "Scotland"  	if fbpl==41100
	replace birthplace_father_jpe = "Finland"  		if fbpl==40100
	replace birthplace_father_jpe = "Portugal"  	if fbpl==43600
		
gen birthplace_hhh_jpe = birthplace_father_jpe
		
	*** generate household hh's birthplace, literacy, and homeownership
	gen double serial20 = serial
merge m:1 serial20 using $outdir//`s'_headinfo_20.dta, keep(1 3) keepusing(head_inc head_lit head_owns head_mortgage head_farm)
drop serial20
	
	
			
	
	
	*******
	*** generate father birthplace categories based on patterns
	gen birthplace_father_g = "Similar" 					if (birthplace_father_jpe == "England" | birthplace_father_jpe == "Wales" | birthplace_father_jpe == "Scotland")
	replace birthplace_father_g = "NoConvergence" 			if (birthplace_father_jpe == "Ireland" | birthplace_father_jpe == "Italy" | birthplace_father_jpe == "Austria" | birthplace_father_jpe == "France" | birthplace_father_jpe == "Portugal")
	replace birthplace_father_g = "Convergence" 			if (birthplace_father_jpe == "Norway" | birthplace_father_jpe == "Sweden" | birthplace_father_jpe == "Germany" | birthplace_father_jpe == "Switzerland" | birthplace_father_jpe == "Belgium" | birthplace_father_jpe == "Denmark" | birthplace_father_jpe == "Finland" | birthplace_father_jpe == "Russia")

	
	*******
	*** generate hhh birthplace categories based on patterns
	gen birthplace_hhh_g = "Similar" 					if (birthplace_hhh_jpe == "England" | birthplace_hhh_jpe == "Wales" | birthplace_hhh_jpe == "Scotland")
	replace birthplace_hhh_g = "NoConvergence" 			if (birthplace_hhh_jpe == "Ireland" | birthplace_hhh_jpe == "Italy" | birthplace_hhh_jpe == "Austria" | birthplace_hhh_jpe == "France" | birthplace_hhh_jpe == "Portugal")
	replace birthplace_hhh_g = "Convergence" 			if (birthplace_hhh_jpe == "Norway" | birthplace_hhh_jpe == "Sweden" | birthplace_hhh_jpe == "Germany" | birthplace_hhh_jpe == "Switzerland" | birthplace_hhh_jpe == "Belgium" | birthplace_hhh_jpe == "Denmark" | birthplace_hhh_jpe == "Finland" | birthplace_hhh_jpe == "Russia")
	
	
				
	********
	*** generate indicator of own nativity
	* native = 1 if born in the US
	* native = 0 if born outside of US
	gen native = bpl<=10000
	gen foreign = 1 - native
		
	
	
		
	
	
	*** generate household head nativity
	gen foreign_hh = fbpl>10000 if  fbpl!=.	
	
	*** generate father's nativity
	gen foreign_fa = fbpl>10000 if fbpl!=.
	
	*** generate mother's nativity
	gen foreign_mo = mbpl>10000 if mbpl!=.
	
	gen foreign_hhhpa = (fbpl_dad>10000 & fbpl_dad!=. ) | (mbpl_dad>10000 & mbpl_dad!=.)
	
	gen foreign_hhhsp = (fbpl_mom>10000 & fbpl_mom!=. ) | (mbpl_mom>10000 & mbpl_mom!=.)

	
	
	
	
	
	*******
	*** define types of chidren based on nativity

	gen native_nativehh 	= (foreign == 0 & foreign_hh == 0) 					if fbpl!=.
	gen native_nativefa 	= (foreign == 0 & foreign_fa == 1 & foreign_mo == 0) 	if fbpl!=. & mbpl!=.
	gen native_nativemo 	= (foreign == 0 & foreign_fa == 0 & foreign_mo == 1) 	if fbpl!=. & mbpl!=.
	gen native_nativebo 	= (foreign == 0 & foreign_fa == 0 & foreign_mo == 0) 	if fbpl!=. & mbpl!=.
	
	gen native_foreignhh 	= (foreign == 0 & foreign_hh == 1)   if fbpl!=.
	gen native_foreignfa 	= (foreign == 0 & foreign_fa == 1 & foreign_mo == 0) 	if fbpl!=. & mbpl!=.
	gen native_foreignmo 	= (foreign == 0 & foreign_fa == 0 & foreign_mo == 1) 	if fbpl!=. & mbpl!=.
	gen native_foreignbo 	= (foreign == 0 & foreign_fa == 1 & foreign_mo == 1) 	if fbpl!=. & mbpl!=.
	
	gen foreign_foreignhh 	= (foreign == 1 & foreign_hh == 1) 					if fbpl!=.
	gen foreign_foreignfa 	= (foreign == 1 & foreign_fa == 1 & foreign_mo == 0) 	if fbpl!=. & mbpl!=.
	gen foreign_foreignmo 	= (foreign == 1 & foreign_fa == 0 & foreign_mo == 1) 	if fbpl!=. & mbpl!=.
	gen foreign_foreignbo 	= (foreign == 1 & foreign_fa == 1 & foreign_mo == 1) 	if fbpl!=. & mbpl!=.

	gen native_nativehh_na  = (native_nativehh == 1 & foreign_hhhsp == 0 & foreign_hhhpa == 0) 			if fbpl!=. & poploc!=. & mbpl!=. & momloc!=.
	gen native_nativehh_fo  = (native_nativehh == 1 & (foreign_hhhsp == 1 | foreign_hhhpa == 1)) 		if fbpl!=. & poploc!=. & mbpl!=. & momloc!=.

	* generate household size
	gen count = 1
	bysort serial: egen fam_size = sum(count)	
	drop count
	
	
	
	********
	* MALE *
	********

	*** keep male only 
	keep if gender == "M"

	
	*******
	*** clean up names
	gen name_given_orig = name_given
	split name_given
	replace name_given = upper(name_given1)
	replace name_given = upper(name_given2) if name_given == "?"
	capture drop name_given1 name_given2 name_given3 name_given4 name_given5
	
	cap drop _merge
	* merge with nickname cleaned dataset (male only)
	gen name = name_given
		merge m:m name using "/disk/homedirs/nber/keriksso/MatchingFiles//nicknames_cleaned.dta"
	drop if _merge == 2
	cap drop _merge
	replace name_given = cleaned if cleaned!="" & name!=cleaned
	drop name
	gen name_given_cleaned = name_given
	
	* NYSIIS conversion (input: name_given_cleaned)
	nysiis name_given_cleaned, gen(name_given_nysiis) 

	
	rename name_given_orig name_given_orig_self 
	rename name_given name_given_self
	rename name_given_cleaned name_given_cleaned_self
	
	*******
	*** clean up names
	gen name_given_orig = namefrst_dad
	split namefrst_dad
	replace namefrst_dad = upper(namefrst_dad1)
	replace namefrst_dad = upper(namefrst_dad2) if namefrst_dad1 == "?"
	capture drop name_given1 namefrst_dad1 name_given3 name_given4 name_given5
	
	cap drop _merge
	* merge with nickname cleaned dataset (male only)
	gen name = namefrst_dad
	merge m:m name using "/disk/homedirs/nber/keriksso/MatchingFiles//nicknames_cleaned.dta"
	drop if _merge == 2
	replace name = cleaned if cleaned!="" & name!=cleaned 
	cap drop _merge
	gen name_given_cleaned = name
		drop name

	rename name_given_cleaned name_given_cleaned_dad
	
	
	rename name_given_cleaned_self  name_given_cleaned
	rename name_given_self name_given
	
	  
	*******
	*** indicator whether father and child name same


	* indicator
	gen samename_fa = name_given_cleaned_dad ==name_given_cleaned
	

	
	****
	* save the above into a temp data file, for doing the following analysis separately for both genders
	save "$Kbulk//NBER_1920census_temp_male_nf_`s'.dta", replace
	
	}
	


	*/	
	
******
*** append all datasets

clear
local states "56 1 5 6 8 12 13 21 22 24 28 37 40 45 47 48 51 4 8 9 10 16 19 17 18 20 25 23 26  27 29 30 38 31 33 34  35 32 36 39 41 42 44 46 49 50 53 55 54"
foreach s in `states' {
	append using "$Kbulk//NBER_1920census_temp_male_nf_`s'.dta"
	}
	
save "$Kbulk/NBER_1920census_temp_male_nf_full.dta", replace


/*

foreach s in `statelist_full' {
	erase "NBER_1920census_temp_male_nf_`s'.dta"
	}

*/
