
* stata -b do "/homes/nber/davidcal/cens1940.work/master_dofile_1920census_full_NBER_20150201.do" &

*clear all

set more off


cd "/homes/nber/davidcal/cens1940.work"
global Kbulk  /disk/bulkw/keriksso


	***********************************now, use appended data--merge in allllll of the FB indexes*********************************************
	
	* use the appended dataset from now on

use "$Kbulk/NBER_1920census_temp_male_nf_full_FEM.dta", clear


*******
*** generate birth decade
gen birthdecade = 1850 	if (birthyear >= 1850 & birthyear < 1855)
replace birthdecade = 1855 	if (birthyear >= 1855 & birthyear < 1860)
replace birthdecade = 1860 	if (birthyear >= 1860 & birthyear < 1865)
replace birthdecade = 1865 	if (birthyear >= 1865 & birthyear < 1870)
replace birthdecade = 1870 	if (birthyear >= 1870 & birthyear < 1875)
replace birthdecade = 1875 	if (birthyear >= 1875 & birthyear < 1880)
replace birthdecade = 1880 	if (birthyear >= 1880 & birthyear < 1885)
replace birthdecade = 1885 	if (birthyear >= 1885 & birthyear < 1890)
replace birthdecade = 1890 	if (birthyear >= 1890 & birthyear < 1895)
replace birthdecade = 1895 	if (birthyear >= 1895 & birthyear < 1900)
replace birthdecade = 1900 	if (birthyear >= 1900 & birthyear < 1905)
replace birthdecade = 1905 	if (birthyear >= 1905 & birthyear < 1910)
replace birthdecade = 1910 	if (birthyear >= 1910 & birthyear < 1915)
replace birthdecade = 1915 	if (birthyear >= 1915 & birthyear < 1920)
replace birthdecade = 1920 	if (birthyear >= 1920 & birthyear < 1925)
replace birthdecade = 1925 	if (birthyear >= 1925 & birthyear < 1930)
replace birthdecade = 1930 	if (birthyear >= 1930 & birthyear < 1935)
replace birthdecade = 1935 	if (birthyear >= 1935 & birthyear <= 1940)



*******
*** merge FB_index based on first names and birth year
*   FB_index calculated using full census data (by Katherine)
/*NOT BOTHERING WITH THESE__USE THE 4th ONE ANYWAYS***

*****
**  A. for each foreign nation, FB_index is calculated using the entire population sample (excluding blacks)

**  specification A1:
* 	1940 full count census
* 	use all cohorts before birthyear
* 	NYSIIS name conversion	
* 	merge FB_index based on name_given_nysiis and birthyear, using only 1940 census calcualted indices
* 	FB_index excluding blacks
* 	match rate with nysiis names: %

merge m:m name_given_nysiis birthyear using "FB Index/US1940_FBIndex_N_female.dta"
drop if _merge == 2
rename _merge f_index_nysiis_female_1940_match

rename FBindex f_index_beforebirth_n_f_1940
label variable f_index_beforebirth_n_f_1940 "F_index: Female, excl black, NYSIIS names, 1940 census only"

* generate indicator whether f_index is above 0.5
gen f_index_nf_abovehalf = (f_index_beforebirth_n_f_1940 > 0.5) if f_index_beforebirth_n_f_1940 != .



**  specification A2:
* 	1940 full count census
* 	use all cohorts before birthyear
* 	raw name
* 	merge FB_index based on name_given and birthyear, using only 1940 census calcualted indices
* 	FB_index excluding blacks
* 	match rate with raw names: %

merge m:m name_given birthyear using "FB Index/US1940_FBIndex_female.dta"
drop if _merge == 2
rename _merge f_index_raw_female_1940_match

rename FBindex f_index_beforebirth_r_f_1940
label variable f_index_beforebirth_r_f_1940 "F_index: Feale, excl black, raw names, 1940 census only"

* generate indicator whether f_index is above 0.5
gen f_index_rf_abovehalf = (f_index_beforebirth_r_f_1940 > 0.5) if f_index_beforebirth_r_f_1940 != .
*/
/*THIS DOESN"T MERGE--replace Nysiis_first to name_given_nysiis in FBINDEX FILE****

**  specification A3:
* 	1940 full count census
* 	use only 20 years of cohorts prior to birthyear
* 	NYSIIS name conversion
* 	merge FB_index based on name_given_nysiis and birthyear, using only 1940 census calcualted indices
* 	FB_index excluding blacks
* 	match rate with nysiis names: %

merge m:m name_given_nysiis birthyear using "FB Index/Nearest 20 Years Only/US1940_FBIndex_N_F.dta"
drop if _merge == 2
rename _merge f_index_nf_1940_20y_match

rename FBindex f_index_20y_n_f_1940
label variable f_index_20y_n_f_1940 "F_index: Female, excl black, NYSIIS names, 1940 census only, 20 years before birth"

* generate indicator whether f_index is above 0.5
gen f_index_nf20_abovehalf = (f_index_20y_n_f_1940 > 0.5) if f_index_20y_n_f_1940 != .
*/

*/

**  specification A4:
* 	1940 full count census
* 	use only 20 years of cohorts prior to birthyear
* 	raw name conversion
* 	merge FB_index based on name_given and birthyear, using only 1940 census calcualted indices
* 	FB_index excluding blacks
* 	match rate with raw names: %
cd
merge m:m name_given birthyear using "US1940_FBIndex_F.dta"
drop if _merge == 2
rename _merge f_index_rf_1940_20y_match

rename FBindex f_index_20y_r_f_1940
label variable f_index_20y_r_f_1940 "F_index: Female, excl black, raw names, 1940 census only, 20 years before birth"

* generate indicator whether f_index is above 0.5
gen f_index_rf20_abovehalf = (f_index_20y_r_f_1940 > 0.5) if f_index_20y_r_f_1940 != .



save $Kbulk/Full1920_data_FBIndex_women_NEW.dta, replace




