
do "D:\Data\workdata\704665\daycare\dofiles\first.do"

************************************************************************
**************** preschool data
* we have digitalized these data from Skjernbæk publications (various years)
* see paper for details
************************************************************************

use "D:\Data\workdata\704665\External data_raw\daycare\Data-fra-PD-20150201",clear
* omit info we do not need
#delimit ;
drop munic sogn area Notes nr AdrID_2015 VEJNAVN_2015 
	HUSNR_2015 POSTNR_2015 POSTNAVN_2015 SOGNNR_2015 SOGNNAVN_2015 X_2015 Y_2015 Evt__Miriam_Tjek;
#delimit cr

* work with DK 1955 muni/parish structure (minor changes in the 1930-60 period, mainly merges of parishes or creation of new ones due to increase
* of population in existing parishes)
rename Kode_55SO Sognekode_IM
rename ID_55KO Kommune_ID
ren established year_established
ren aproval_year year_approval

drop if year_approval==.
*35 obs missing approval year; we omit those

*Dummy for operating in each year BASED ON APPROVAL YEAR
forval i=1921/1960 {
gen operating`i' = 0
replace operating`i' = 1 if year_approval<=`i'
}

*Number operating in each year BASED ON APPROVAL YEAR
sort Sognekode_IM year_approval
forval i=1921/1960 {
by Sognekode_IM: egen numinst`i' = total(operating`i')
}

*Number of slots per center. only have it in certain years, so need to interpolate
foreach i in 1921 1924 1927 1936 1942 1946 1950 1956 1960 {
by Sognekode_IM: egen numslots`i' = total(A`i'), missing
}

*Create missing vars to be used for interpolation later
foreach i in 1922 1923 1925 1926 1928 1929 1930 1931 1932 1933 1934 1935 1937 1938 1939 1940 1941 1943 1944 1945 1947 1948 1949 1951 1952 1953 1954 1955 1957 1958 1959  {
gen numslots`i' = .
}

drop A19*

rename type__1_asyl_2_bornehave_3_vugg_ type
label var type "Type of daycare"
label define type 1"Asylum" 2"Preschool/Kindergarden"
label values type type 

gen asylum = (type==1)
gen daycare = (type==2)

*Number of daycares by type
forval i=1921/1960 {
gen asylum`i' = asylum*operating`i'
by Sognekode_IM: egen numasylum`i' = total(asylum`i')

gen daycare`i' = daycare*operating`i'
by Sognekode_IM: egen numdaycare`i' = total(daycare`i')
}

****** Keep one obs per parish
by Sognekode_IM: keep if _n==1

****Reshape data to the parish x year level
reshape long operating numinst numslots numasylum numdaycare, i(Sognekode_IM) j(year)
replace year_establishe=year_approval if year_approval<year_established

*** interpolate number slots
by Sognekode_IM: ipolate numslots year, gen(inumslots) epolate
replace inumslots=. if year<year_established
replace inumslots=0 if inumslots<0

* assume that the institution started with the number of slots that we first observe 
bys Sognekode_IM: egen numhelp=min(numslots)

gen first=.
foreach i in 1921 1924 1927 1936 1942 1946 1950 1956 1960 {
bys Sognekode_IM: replace first=1 if numslots!=. & year==`i' & first==.
}
replace first=year if first==1
bys Sognekode_IM: egen firstbook=min(first)

replace inumslots=numhelp if year<=firstbook & year>=year_established

replace inumslots = round(inumslots, 1)
*round to nearest integer

*Fill in for missing years
replace numslots = inumslots if numslots==.

*keep only relevant variables
keep Sognekode_IM Kommune_ID year operating numinst numslots numasylum numdaycare
bys Kommune_ID year: gen year1=year if operating==1 & operating[_n-1]!=1
by Kommune_ID: egen yearfirst=min(year1)
drop year1

keep if year>=1933 & year<=1960
save "$work\daycareslots_byyear_yrapproval_parish.dta", replace

/**** descriptive graphs
preserve
collapse (sum) numslots numinst (mean) operating, by(year)

line numslots year, lwidth(medthick) xtitle(Year) ytitle(Total Number Slots) xlabel(1933(5)1960)
graph export "$graphs\numslots_byyear_yrapproval.png", replace


line numinst year, lwidth(medthick) xtitle(Year) ytitle(Total Number Preschools) xlabel(1933(5)1960)
graph export "$graphs\numinst_byyear_yrapproval.png", replace

line operating year, lwidth(medthick) xtitle(Year) ytitle(Proportion Parishes with Preschool) xlabel(1933(5)1960)
graph export "$graphs\anyinst_byyear_yrapproval.png", replace

restore
*/


*****************************************************************
* controls and population data
*****************************************************************

*****************************************************************
* prepare the population data
*****************************************************************

use "D:\Data\workdata\704665\External data_raw\sogne1901_2011_nicolaifeb2013.dta", clear // external raw data
sort sognekode
drop if sognekode=="9999"
*missing pop information for all years
drop if _1921==""& _1930=="" & _1940=="" & _1950=="" & _1960==""
drop if _1921=="."& _1930=="." & _1940=="." & _1950=="." & _1960=="."

* new municipal codes (after 1970) and no parish info that allows link to old munics
drop if sognekode<"1000" & sognekode!=""
drop if sognekode==komkode_ny

drop if sognekode=="42" /*Fyns county*/
drop if sognekode==""& komkode_ny=="400" /*Bornholm county*/
replace sognekode="8988" if sognekode=="" & sognenavn=="Sønderborg" /*some hardcoding...*/
replace sognekode="9166" if sognekode=="8008" /*some hardcoding..Skanderborg*/

sort sognekode
drop if _n<=48 /*48 entities without parish codes. mostly rural areas around towns. small entities*/

by sognekode: gen help=_N
* there are 50 duplicates left - I keep the sum of population - most cases are parish + some rural areas around it
sort sognekode

foreach num of numlist 1921 1930 1940 1950 1960{
gen str7 y`num'=subinstr(_`num'," ","",3)
}

foreach num of numlist 1921 1930 1940 1950 1960{
destring(y`num'), gen(y`num'num) ignore(.)
}

sort sognekode
foreach num of numlist 1921 1930 1940 1950 1960{
by sognekode: egen y`num'sum=sum(y`num'num)
}

drop _* col* y1921-y1960num

by sognekode: gen help1=_n
keep if help1==1
drop help help1


compress
rename sognenavn parishname

* merge on parish - munic match
destring sognekode, gen(fodreg)

* to get muni codes:
merge 1:m fodreg using "$work\parish_munic_county.dta"

keep if _merge==3


* pop per municipality
sort Kommune_ID 
foreach num of numlist 1930 1940 1950 1960{
by Kommune_ID : egen popmunic`num'=sum(y`num'sum)
}

* check with aggregate figures: total pop in DK
foreach num of numlist 1921 1930 1940 1950 1960{
egen totalpop`num'=sum(y`num'sum)
}
drop y1921* totalpop1921
* total pop in 1921 is too low - all other years are close, only in 1960 -200.000, too low...

foreach num of numlist  1930 1940 1950 1960{
rename y`num'sum y`num'pop
label var y`num'pop "Population in parish `num'"
label var totalpop`num' "DK pop in year `num'"
}

save  "$work\popsize_parishesmunics.dta", replace

* keep one per munic
sort Kommune_ID
by Kommune_ID: gen help=_n

keep if help==1
#delimit ;
drop help komkode_ny komkode_old 
	sognekode parishname remarks__byopgorelsens_aarstal_ sognenavn y1930pop y1940pop y1950pop y1960pop fodreg ;
#delimit cr



foreach num of numlist  1930 1940 1950 1960{
label var popmunic`num' "Population in munic `num'"
label var totalpop`num' "DK pop in year `num'"
}

save  "$work\popsize_munics.dta", replace

***** Reshape population data set and interpolate
use "$work\popsize_munics.dta", clear

*keep Kommune_ID rural popmunic*

reshape long popmunic, i(Kommune_ID) j(year)

expand 10, gen(newyear)

replace popmunic = . if newyear==1
replace year = . if newyear==1

sort Kommune_ID year

forval i=5/13 {
by Kommune_ID: replace year = 1926+`i' if _n==`i'
}

forval i=14/22 {
by Kommune_ID: replace year = 1927+`i' if _n==`i'
}

forval i=23/31 {
by Kommune_ID: replace year = 1928+`i' if _n==`i'
}

forval i=32/40 {
by Kommune_ID: replace year = 1929+`i' if _n==`i'
}

tab year


keep if year>=1930 & year<=1960


sort Kommune_ID year

by Kommune_ID: ipolate popmunic year, gen(ipop)

replace popmunic = ipop if popmunic==.

keep Kommune_ID year popmunic

save "$work\popsize_munics_reshaped19301960.dta", replace

