/*Transformation for Normality and Equal Variance*/ options pageno=1; OPTIONS FORMCHAR="|----|+|---+=|-/\<>*"; title; /*Use the permanent SAS baseball data set*/ libname sasdata2 "d:\sasdata2"; title "Baseball Data Set"; proc means data=sasdata2.baseball; run; proc freq data=sasdata2.baseball; tables team league division; run; proc univariate data=sasdata2.baseball; var salary; histogram; qqplot / normal(mu=est sigma=est); run; goptions reset=all; goptions device=win target=winprtm; symbol1 color=black value=dot height=.5 interpol=rl; title "Salary vs. Number of Hits in Previous Year"; proc gplot data=sasdata2.baseball; plot salary * no_hits; run; quit; proc reg data=sasdata2.baseball; model salary = no_hits; plot rstudent.*predicted.; output out=regdata1 p=predict r=resid rstudent=rstudent; run; quit; proc univariate data=regdata1; var rstudent; histogram; qqplot / normal(mu=est sigma=est); run; /*Natural Log Transformation is indicated (Lambda = 0)*/ title "Use Proc Transreg to decide on a transformation of Y"; proc transreg data=sasdata2.baseball; model boxcox(salary/geo) = linear (no_hits); run; data baseball2; set sasdata2.baseball; log_salary = log(salary+1); run; goptions reset=all; goptions device=win target=winprtm; symbol1 color=black value=dot height=.5 interpol=rl; title "Log of Salary vs. Number of Hits"; proc gplot data=baseball2; plot log_salary * no_hits; run; quit; proc reg data=baseball2; model log_salary = no_hits; plot rstudent.*predicted.; output out=regdata2 p=predict r=resid rstudent=rstudent; run; quit; proc univariate data=regdata2; var rstudent; histogram; qqplot / normal(mu=est sigma=est); run; /*Try to understand the bimodal residuals*/ ods graphics; ods rtf file="d:\salary_regression.rtf"; title "Log Salary vs Number of Hits and Years in Majors"; proc reg data=baseball2; model log_salary = no_hits yr_major / partial; plot rstudent.*predicted.; output out=regdata3 p=predict r=resid rstudent=rstudent; run; quit; ods rtf close; proc univariate data=regdata3; var rstudent; histogram; qqplot / normal(mu=est sigma=est); run;