* Take cleaned data and create variables needed for demand/supply/entry/counterfactuals


use data/demand-first-logits-checks-data-allage-keepimputep, clear //less sampling error keeps more hospital-months
*use demand-first-logits-checks-data-allage //has trustworthy price data
rename M MarketSize

gen mm=mod(t,12)
replace mm=12 if mm==0

*---------------------------------------------------------------------------
* need to designate those products that enter US during sample, and the time they were in trials pre-USentry
*---------------------------------------------------------------------------
bysort product US: egen enter = min(t) //selects first month for each product in US and EU
gen enterUS=enter if US==1
gen enterEU=enter if US==0
su enter enterUS enterEU US
gsort product -enterUS
by product: replace enterUS=enterUS[_n-1] if enterUS==.
gsort product -enterEU
by product: replace enterEU=enterEU[_n-1] if enterEU==.
*---------------------------------------------------------------------------
drop if US==1 //only need EU data
*---------------------------------------------------------------------------
replace age=t+18 if product=="Helistent"
drop if age==-999 //no products with no entry date found?!
* variable that denotes who enters US
gen USeventual = 0
replace USeventual = 1 if enterUS~=.
* a few need to be hand-coded b/c enter EU at end of sample period; will enter US eventually (or failed trials and never entered US)
replace USeventual=1 if product=="Cypher Select" | product=="Cypher Select Plus" | product=="Costar" | product=="Resolute" ///
	| product=="Absorb" | product=="Synergy" | product=="Promus PREMIER"  | product=="Omega"
* variable that denotes pre-USentry period--THE KEY PERIOD WHERE UNDERGOING TRIALS!
gen USlater = 0
replace USlater = 1 if USeventual==1 & t<enterUS //this makes sure we only use stents than enter during sample period
*
* may want to remove age==1 observations if think delta understated
drop if age<=$agedrop
replace age=age-$agedrop
*
* record amount of time spent in trials at any jt so can separate learning speeds in and out of trials
gen clinical = (age) * USlater
bysort product: egen cmax = max(clinical)
replace clinical = cmax if USlater==0
replace clinical = 36 if clinical>36 // more selection and meas err than learning out there
gen amc = age-clinical //time spend in observational learning at any jt
su lnd enter enterUS enterEU age clinical amc USlater USeventual
su lnd enter enterUS enterEU age clinical amc USlater USeventual if age<36
*---------------------------------------------------------------------------


*---------------------------------------------------------------------------
* understand distribution of lifetime profits; impute where truncated
*---------------------------------------------------------------------------
* ages in sample
bysort product: egen minage=min(age)
bysort product: egen maxage=max(age)
* products who enter during sample period
gen pay=(enterEU>1) //(minage==1) misses a few who aren't observed every month
replace pay=0 if minage>enterEU //some who aren't observed every month first seen in t=2 or 3
* observed profits in data (MRG~10% sample)
gen profit = q_r*p*10/1000000
sort product t
by product: gen profit_cumsum = sum(profit)
bysort product: egen profit_lifetime = max(profit_cumsum)
* adjust for missing data
gen ones=1
bysort product: egen Tobs=total(ones)
gen Tpct=Tobs/(maxage-minage+1)
replace profit_cumsum=profit_cumsum / Tpct
replace profit_lifetime=profit_lifetime / Tpct
* identify end truncated
gen age114=age if t==114
gsort product -age114
by product: replace age114=age114[_n-1] if age114==.
* identify beginning truncated
gen age1=age if t==1
gsort product -age1
by product: replace age1=age1[_n-1] if age1==.
* compute distribution for non-truncated observations
gen full=1 if age1==. & age114==.
gen pct_lifetime = profit / profit_lifetime if full==1
gen cum_pct_lifetime = profit_cumsum / profit_lifetime if full==1
* check distribution of various stats over products
preserve
collapse full enterEU age1 age114 profit profit_lifetime (min) minage=age (max) maxage=age (count) months=profit, by(product)
su full enterEU profit profit_lifetime minage maxage age1 age114
bysort full: su months profit, detail
bysort full: su months profit if profit_lifetime<50, detail
restore
* check relation between lifetime profits and age
preserve
keep if full==1 /*& maxage>20 & maxage<76*/
collapse (mean) avg_pct=cum_pct_lifetime (sd) sd_pct=cum_pct_lifetime (count) N=cum_pct_lifetime, by(age)
gen u=avg_pct+1.96*sd_pct/N^.5
gen l=avg_pct-1.96*sd_pct/N^.5
twoway (line avg_pct age if age<=48, lcolor(navy) lwidth(thick)) ///
 	(line u age if age<=48, lcolor(midblue) lpattern("-")) /// 
 	(line l age if age<=48, lcolor(midblue) lpattern("-")) ///
 	, legend( order( 1 "Mean Over Products" 2 "Standard Errors" ) rows(1) ) graphregion(color(white)) ///
	ytitle("Cumulative Percent Lifetime Profits") ylabel(0(.1)1) yscale(range(0 1)) ///
	xtitle("Age Since EU Introduction (Months)") xlabel(0(12)48)
graph export "output/LifetimeProfits_Ages.pdf", replace 
restore
bysort age: egen avg_pct = mean(cum_pct_lifetime) if full==1
gsort age -avg_pct
by age: replace avg_pct=avg_pct[_n-1] if avg_pct==.
replace avg_pct=.99 if avg_pct==1
gen avg_pct48 = avg_pct if age==48
gsort -avg_pct48
replace avg_pct48=avg_pct48[_n-1] if avg_pct48==.
* impute end truncated
replace profit_lifetime = profit_lifetime / avg_pct if age114<48 & age114!=. & age==114
replace profit_lifetime = profit_lifetime / avg_pct48 if age114>=48 & age114!=. & age==114
bysort product: egen temp=max(profit_lifetime)
replace profit_lifetime=temp
drop temp
* impute beginning truncated
replace profit_lifetime = profit_lifetime / (1-avg_pct) if age1<48 & age1!=. & age==1
replace profit_lifetime = profit_lifetime / (1-avg_pct48) if age1>=48 & age1!=. & age==1
bysort product: egen temp=max(profit_lifetime)
replace profit_lifetime=temp
drop temp
/*
* alternate approach using regression
su age, detail
gen last=1 if age>=maxage-12 & t<114-12
replace last=0 if last==.
gen yrs=0 if age<=12
replace yrs=6 if age>72
replace yrs=5 if age>60 & age<=72
replace yrs=4 if age>48 & age<=60
replace yrs=3 if age>36 & age<=48
replace yrs=2 if age>24 & age<=36
replace yrs=1 if age>12 & age<=24
su yrs last
sort product -age
by product: gen profit_next = profit[_n-1]
gen age2 =  yrs*yrs
gen age_profit = yrs*profit
gen age2_profit = age2*profit
reg last yrs age2 profit if age>6 & age<102
reg profit_next profit age_profit age2_profit if age>6 & age<102
reg last yrs age2 profit if profit_lifetime<25 & age>6 & age<102
reg profit_next profit age_profit age2_profit if profit_lifetime<25 & age>6 & age<102
drop yrs last profit_next age2 age_profit age2_profit
*/
* distribution imputed
preserve
collapse profit_lifetime, by(product)
su profit_lifetime, detail
histogram profit_lifetime, width(25) start(0) fraction ///
 	graphregion(color(white)) ///
	ytitle("Fraction of Products",size(large)) ylabel(,labsize(large)) ///
	xtitle("Lifetime Profits ($ Millions)",size(large)) xlabel(,labsize(large)) 
graph export "output/LifetimeProfits_Products.pdf", replace 
su profit_lifetime if profit_lifetime>25
histogram profit_lifetime if profit_lifetime>25, width(25) start(0) fraction ///
 	graphregion(color(white)) ///
	ytitle("Fraction of Products (conditional on >$25M)",size(large)) ylabel(,labsize(large)) ///
	xtitle("Lifetime Profits ($ Millions)",size(large)) xlabel(,labsize(large)) 
graph export "output/LifetimeProfits_Products_gt25M.pdf", replace 
restore
* profits vs age extrapolate
twoway (scatter avg_pct age if age<=36 & full==1, mcolor(erose) msymbol("o")) ///
	(lfitci cum_pct_lifetime age if age<=36 & full==1, lcolor(maroon) lpattern("-") lwidth(thick) ciplot(rline) estopts(cluster(t)) blcolor(cranberry) blpattern("-") blwidth(thin)) ///
 	, legend(off) graphregion(color(white))  ///
	ytitle("Fraction of Lifetime Profits", size(large)) ylabel(0(.1)1,labsize(large)) ///
	xtitle("Age Since Introduction to Region (Months)",size(large)) xlabel(0(12)36,labsize(large))
graph export output/fig_AgeVsPctLifeProfit.pdf, replace 
reg avg_pct age if age<=36 & full==1
predict pct_life_profit, xb
replace pct_life_profit = 1 if age>36
*---------------------------------------------------------------------------

save data/EUdata_full, replace

*---------------------------------------------------------------------------
* some restrictions on sample because some product-months do not have variation to be identified
*---------------------------------------------------------------------------
* may not want to use products where only observe old age b/c ageFE and prodFE colinear
*drop if minage>=36
*can only use products with Tj>2 for analysis (bootstrap over months)
*gen ones=1
gen old=(age>36)
bysort product old: egen Tj=total(1)
keep if Tj>=$Tj_min // make sure can estimate fixed effect 
*keep if Tj36>2 // make sure can estimate fixed effect in restricted GMM sample
*replace age=36 if age>=36

/*
preserve

keep if DES==1
keep if USeventual==1
egen prod=group(product)
sort prod
quietly tab prod, gen(jFE)
sort t prod
outsheet prod lnd age t profit_lifetime pay MarketSize p DES finishfactor jFE* using EUdata_DES_US.csv, replace

restore, preserve
*/

*---------------------------------------------------------------------------
* fillin missing months?

*---------------------------------------------------------------------------

*---------------------------------------------------------------------------
* create dummies and output for analysis
*---------------------------------------------------------------------------
egen prod=group(product)
sort prod
*quietly tab prod, gen(jFE)
* save file for analysis
sort t prod
outsheet prod lnd age clinical USlater t profit_lifetime pay MarketSize p DES USeventual using EUdata_agg_raw.csv, replace
save data/EUdata_agg_raw, replace
*outsheet prod lnd age clinical USlater t profit_lifetime pay MarketSize p DES USeventual jFE* using EUdata_p.csv, replace
*save EUdata_p, replace



