*
*This file is for education purpose
*Only for students in Khon Kaen University
*
*The original do-file is mus03p1reg.do November 25 2008 for Stata version 10.1
/*
* STATA Program
* copyright C 2008 by A. Colin Cameron and Pravin K. Trivedi
* used for "Microeconometrics using Stata"
* by A. Colin Cameron and Pravin K. Trivedi (2008)
* Stata Press
*/
****************************************************************************
*
*To run you need files
*mus03data.dta in your directory
*
*To change your current directory go to
*File > Change working directory >
*
*Then, choose a folder you want to work on
*Add you mus03data.dta to your chosen folder
*
****************************************************************************
capture log close
log using refresh.txt, text replace
************ DATA SUMMARY STATISTICS ***************************************
* Variable description for medical expenditure dataset
use mus03data.dta
describe totexp ltotexp posexp suppins phylim actlim totchr age female income
* Summary statistics for medical expenditure dataset
summarize totexp ltotexp posexp suppins phylim actlim totchr age female income
* Tabulate variable
tabulate income if income <= 0
* Detailed summary statistics of a single variable
summarize totexp, detail
* Two-way table of frequencies
table female totchr
* Two-way table with row and column percentages and Pearson chi-squared
tabulate female suppins, row col chi2
* Three-way table of frequencies
table female totchr suppins
* One-way table of summary statistics
table female, contents(N totchr mean totchr sd totchr p50 totchr)
* Two-way table of summary statistics
table female suppins, contents(N totchr mean totchr)
* Summary statistics obtained using command tabstat
tabstat totexp ltotexp, stat (count mean p50 sd skew kurt) col(stat)
* Kernel density plots with adjustment for highly skewed data
kdensity totexp if posexp==1, generate (kx1 kd1) n(500)
graph twoway (line kd1 kx1) if kx1 < 40000, name(levels)
kdensity ltotexp if posexp==1, generate (kx2 kd2) n(500)
graph twoway (line kd2 kx2) if kx2 < ln(40000), name(logs)
graph combine levels logs, iscale(1.0)
graph export mus03fig1.eps, replace
*********** BASIC REGRESSION ANALYSIS ***********************************
*Question: What determines health care expenditures of elderly?
*****************************************************************
* Pairwise correlations for dependent variable and regressor variables
correlate ltotexp suppins phylim actlim totchr age female income
* OLS regression with heteroskedasticity-robust standard errors
regress ltotexp suppins phylim actlim totchr age female income, vce(robust)
/*
Interpretation:
totchr: The number of chronic problem increases health care expenditure by 37.6%.
income: The standard deviation of income is 22. A 1-standard deviation in income
increases medical expenditure by 0.055 (22x0.0025), or 5.5%.
suppins: With supplement insurance, medical expenditure increases by 29.2% (e0.256-e0).
What you need to pay attention includes: F – test, R-squared and t-test of each coefficients.
*/
* Display stored results and list available postestimation commands
ereturn list
help regress postestimation
/*
Hypothesis test: impact of functional limitation and activity limitation on
medical expenditure is statistically equal (H0: bphylim = bactlim). Use Wald test
*/
* Wald test of equality of coefficients
quietly regress ltotexp suppins phylim actlim totchr age female ///
income, vce(robust)
test phylim = actlim
/*
Result
p=0.6054 > 0.05, we do not reject H0.
There is no statistically difference between the impacts of hylim and actlim.
*/
* Joint test of statistical significance of several variables
test phylim actlim totchr
* Store and then tabulate results from multiple regressions
quietly regress ltotexp suppins phylim actlim totchr age female income, vce(robust)
estimates store REG1
quietly regress ltotexp suppins phylim actlim totchr age female educyr, vce(robust)
estimates store REG2
estimates table REG1 REG2, b(%9.4f) se stats(N r2 F ll) keep(suppins income educyr)
* Tabulate results using user-written command esttab to produce cleaner output
esttab REG1 REG2, b(%10.4f) se scalars(N r2 F ll) mtitles ///
keep(suppins income educyr) title("Model comparison of REG1-REG2")
* Write tabulated results to a file in latex table format
quietly esttab REG1 REG2 using mus03table.tex, replace b(%10.4f) se scalars(N r2 F ll) ///
mtitles keep(suppins age income educyr _cons) title("Model comparison of REG1-REG2")
* Add a user-calculated statistic to the table
estimates drop REG1 REG2
quietly regress ltotexp suppins phylim actlim totchr age female ///
income, vce(robust)
estadd scalar pvalue = Ftail(e(df_r),e(df_m),e(F))
estimates store REG1
quietly regress ltotexp suppins phylim actlim totchr age female ///
educyr, vce(robust)
estadd scalar pvalue = Ftail(e(df_r),e(df_m),e(F))
estimates store REG2
esttab REG1 REG2, b(%10.4f) se scalars(F pvalue) mtitles keep(suppins)
************** SPECIFICATION ANALYSIS **************************************
* Plot of residuals against fitted values
quietly regress ltotexp suppins phylim actlim totchr age female income, ///
vce(robust)
rvfplot
graph export mus03fig2.eps, replace
* Details on the outlier residuals
predict uhat, residual
predict yhat, xb
list totexp ltotexp yhat uhat if uhat < -5, clean
* Compute dfits that combines outliers and leverage
quietly regress ltotexp suppins phylim actlim totchr age female income
predict dfits, dfits
scalar threshold = 2*sqrt((e(df_m)+1)/e(N))
display "dfits threshold = " %6.3f threshold
tabstat dfits, stat (min p1 p5 p95 p99 max) format(%9.3f) col(stat)
list dfits totexp ltotexp yhat uhat if abs(dfits) > 2*threshold & e(sample), clean
* Boxcox model with lhs variable transformed
boxcox totexp suppins phylim actlim totchr age female income if totexp>0, nolog
* Variable augmentation test of conditional mean using estat ovtest
quietly regress ltotexp suppins phylim actlim totchr age female ///
income, vce(robust)
estat ovtest
* Link test of functional form of conditional mean
quietly regress ltotexp suppins phylim actlim totchr age female ///
income, vce(robust)
linktest
/*
Heteroskedasticity test: Breusch-Pagan Lagrange multiplier test
estat hottest postestimation command:
1. Use y-hat as regressor.
2. Use all Xs as regressor.
Reject homoscedasticity if chi-square < 0.05.
*/
* Heteroskedasticity tests using estat hettest and option iid
quietly regress ltotexp suppins phylim actlim totchr age female income
estat hettest, iid
estat hettest suppins phylim actlim totchr age female income, iid
* Information matrix test
quietly regress ltotexp suppins phylim actlim totchr age female income
estat imtest
* Simulation to show tests have power in more than one direction
clear all
set obs 50
set seed 10101
generate x = runiform() // x ~ uniform(0,1)
generate u = rnormal() // u ~ N(0,1)
generate y = exp(1 + 0.25*x + 4*x^2) + u
generate xsq = x^2
regress y x xsq
* Test for heteroskedasticity
estat hettest
* Test for misspecified conditional mean
estat ovtest
**************************** PREDICTION *********************************
* Change dependent variable to level of positive medical expenditures
use mus03data.dta, clear
keep if totexp > 0
regress totexp suppins phylim actlim totchr age female income, vce(robust)
* Prediction in model linear in levels
predict yhatlevels
summarize totexp yhatlevels
* Compare median prediction and median actual value
tabstat totexp yhatlevels, stat (count p50) col(stat)
* Compute standard errors of prediction and forecast with default VCE
quietly regress totexp suppins phylim actlim totchr age female income
predict yhatstdp, stdp
predict yhatstdf, stdf
summarize yhatstdp yhatstdf
* Compute elasticity for a specified regressor
quietly regress totexp suppins phylim actlim totchr age female income, vce(robust)
mfx, varlist(totchr) eyex
* Prediction in levels from a logarithmic model
quietly regress ltotexp suppins phylim actlim totchr age female income
quietly predict lyhat
generate yhatwrong = exp(lyhat)
generate yhatnormal = exp(lyhat)*exp(0.5*e(rmse)^2)
quietly predict uhat, residual
generate expuhat = exp(uhat)
quietly summarize expuhat
generate yhatduan = r(mean)*exp(lyhat)
summarize totexp yhatwrong yhatnormal yhatduan yhatlevels
* Predicted effect of supplementary insurance: methods 1 and 2
bysort suppins: summarize totexp yhatlevels yhatduan
* Predicted effect of supplementary insurance: method 3 for log-linear model
quietly regress ltotexp suppins phylim actlim totchr age female income
preserve
quietly replace suppins = 1
quietly predict lyhat1
generate yhatnormal1 = exp(lyhat1)*exp(0.5*e(rmse)^2)
quietly replace suppins = 0
quietly predict lyhat0
generate yhatnormal0 = exp(lyhat0)*exp(0.5*e(rmse)^2)
generate treateffect = yhatnormal1 - yhatnormal0
summarize yhatnormal1 yhatnormal0 treateffect
restore
*************************** SAMPLING WEIGHTS ******************************
* Create artificial sampling weights
use mus03data.dta, clear
generate swght = totchr^2 + 0.5
summarize swght
* Calculate the weighted mean
mean totexp [pweight=swght]
* Perform weighted regression
regress totexp suppins phylim actlim totchr age female income [pweight=swght]
* Weighted prediction
quietly predict yhatwols
mean yhatwols [pweight=swght], noheader
mean yhatwols, noheader // unweighted prediction
*
* log close
* clear
* exit