*****Aleksandra Anić
**** 5th Decembre 2024

use D:\Microeconometrics_Master\Database\mus18data.dta, clear
cd D:\Microeconometrics_Master\Results
*** panel data from the Rand Health Insurance Experiment
*** we will use cross-section data, year=2
*** we consider ordered outcome health status
keep if year==2
*** three dummy variables for health status, good health, fair or poor, we don't have dummy for excellent health status
sum hlth* // 0.365 have good health, 0.078 fair and 0.016 poor, others in the dataset should have excellent
**** first step is to create cathegorical variable
****cathegorical variable with three cathegories fair & poor (1), good (2) and excellent (3)

**** you can define variable by using several different commands
gen hlthstat=1 if hlthf==1 | hlthp==1
replace hlthstat=2 if hlthg==1
recode hlthstat (mis=3)
tab hlthstat
lab define hlthstat 1 "poor or fair" 2 "good" 3 "excellent"
lab value hlthstat hlthstat

sum hlthstat age linc ndisease
ologit hlthstat age linc ndisease
predict p1ologit p2ologit p3ologit
sum p*ologit 
tab hlthstat

*** ologit vs oprobit 
*** positive beta means that an increase in x necessarily decreases probability of being in the lowest category and incerases the probatility to be in the highest category
ologit hlthstat age linc ndisease
oprobit hlthstat age linc ndisease

**** check what happens if we change the values 1,2,3 with the same orderring
gen hlthstat1=1+hlthstat
sum hlthstat1 hlthstat

****changing values without changing order has no influence
ologit hlthstat age linc ndisease
ologit hlthstat1 age linc ndisease
oprobit hlthstat age linc ndisease
oprobit hlthstat1 age linc ndisease


/* TESTING HYPOTHESIS */
ologit hlthstat age linc ndisease female black
test female black // Wald tests
estimates store unrestricted

ologit hlthstat age linc ndisease
estimates store restricted

lrtest unrestricted restricted // Likelihood-ratio test

****check for parallel line assumption
gen d2 = hlthstat>2 // excellent
gen d1 = hlthstat>1 // hlthstat good or excellent

tab hlthstat d2
tab hlthstat d1

logit d1 age linc ndisease
est store hlth1
logit d2 age linc ndisease
est store hlth2

estimates table hlth*, equations(1)
suest hlth1 hlth2
test [hlth1_d1=hlth2_d2]

qui gologit2 hlthstat age linc ndisease, store(unconstrained)
qui gologit2 hlthstat age linc ndisease,  pl store(constrained) //  pl specified without parameters constrains all independent variables to meet the parallel lines assumption.  It will produce results that are equivalent to ologit. We can spacify variables in order to check which variable causes violation of assumptions
lrtest constrained unconstrained

***check for marginal effects
oprobit hlthstat age linc ndisease
margins, dydx(*) predict(outcome(1))
margins, dydx(*) predict(outcome(2))
margins, dydx(*) predict(outcome(3))


****ordered probit
use https://www.stata-press.com/data/r18/fullauto, clear
*** calculate probabilities and check with the results obtained by predict pr command
oprobit rep77 foreign length mpg
predict xb if rep77!=., xb //  calculate linear prediction, i.e. x1beta1+x2beta2+ ...

gen p1=normal(10.1589-xb)
gen p2=normal(11.21-xb)-normal(10.1589-xb)
predict pologit1-pologit5 if rep77!=.
sum p1 p2 pologit1 pologit2

gen mpg_me2=(normalden(10.1589-xb)-normalden(11.21-xb))*(0.1305) 
margins, dydx(mpg) predict(outcome(2)) 
sum mpg_me2

****ordered logit

use https://www.stata-press.com/data/r18/fullauto, clear
*** calculate probabilities and check with the results obtained by predict pr command
ologit rep77 foreign length mpg
predict xb if rep77!=., xb //  calculate linear prediction, i.e. x1beta1+x2beta2+ ...
predict prob1-prob5 if rep77!=.

gen p2=exp(19.86506-xb)/(1+exp(19.86506-xb))-exp(17.92748-xb)/(1+exp(17.92748-xb))
gen p1=exp(17.92748-xb)/(1+exp(17.92748-xb))

*** oprobit with using matrix algebra, more advanced
/*
oprobit rep77 foreign length mpg
matrix coef=e(b)
predict xb if rep77!=., xb
gen p1=normal(coef[1,colnumb(coef,"/:cut1")]-xb)
gen p2=normal(coef[1,colnumb(coef,"/:cut2")]-xb)-normal(coef[1,colnumb(coef,"/:cut1")]-xb)
predict pologit1-pologit5 if rep77!=.
sum p1 p2 pologit1 pologit2

gen mpg_me2=(normalden(coef[1,colnumb(coef,"/:cut1")]-xb)-normalden(coef[1,colnumb(coef,"/:cut2")]-xb))*(coef[1,colnumb(coef,"rep77:mpg")]) 
margins, dydx(mpg) predict(outcome(2)) 


*****https://www.stata.com/support/faqs/statistics/cut-points/