/**************Statistical Tests***********************/ OPTIONS FORMCHAR="|----|+|---+=|-/\<>*"; title; libname sasdata2 "C:\Users\kwelch\Desktop\sasdata2"; /*Read in the pulse data*/ data pulse; infile "pulse.csv" firstobs=2 delimiter="," missover; input pulse1 pulse2 ran smokes sex height weight activity; label pulse1 = "Resting pulse, rate per minute" pulse2 = "Second pulse, rate per minute"; run; /*Create formats for categorical variables*/ proc format; value sexfmt 1="Male" 2="Female"; value yesnofmt 1="Yes" 2="No"; value actfmt 1="Low" 2="Medium" 3="High"; run; proc print data=pulse (obs=25) label; format sex sexfmt. ran smokes yesnofmt. activity actfmt.; run; proc means data=pulse; run; /**************Statistical Analyses***********************/ /*Binomial Confidence Intervals and Tests for Binary Variables*/ proc freq data = pulse; tables smokes / binomial(p=.25); run; proc freq data = pulse; tables smokes / binomial(p=.25); exact binomial; run; /*Chi-square Goodness of Fit Tests for Categorical Variables*/ proc freq data = pulse; tables activity / chisq; run; proc freq data = pulse; tables activity /chisq testp = ( .20 , .50, .30 ); run; proc freq data = pulse; tables activity /chisq testp = ( 20 , 50, 30 ); run; /*One-Sample test for a continuous variable*/ proc univariate data=pulse mu0=72; var pulse1; histogram / normal (mu=est sigma=est); qqplot /normal (mu=est sigma=est); run; proc ttest data=pulse H0=72 ; var pulse1; run; /*Paired Samples t-test*/ proc ttest data=pulse; paired pulse2*pulse1; run; /*Paired Samples t-test stratified by ran*/ proc sort data=pulse; by ran; run; proc ttest data=pulse; paired pulse2*pulse1; by ran; run; /*Independent Samples t-test*/ proc ttest data=sasdata2.employee; class gender; var salary; run; /*Check the distribution of salary for each gender*/ proc univariate data=sasdata2.employee; var salary; class gender; histogram; run; /*Assume salary is lognormally distributed*/ proc ttest data=sasdata2.employee dist=lognormal; class gender; var salary ; run; proc sort data=sasdata2.employee; by jobcat; run; proc ttest data=sasdata2.employee dist=lognormal; by jobcat; class gender; var salary; run; /*NON-PARAMETRIC TEST: WILCOXON/MANN-WHITNEY TEST*/ proc npar1way data=sasdata2.employee wilcoxon; class gender; var salary; run; proc npar1way data=sasdata2.employee wilcoxon; class gender; var salary; exact wilcoxon / mc; run; /*Correlation*/ proc corr data=sasdata2.employee; var salary salbegin educ; run; /*Linear Regression*/ proc reg data=sasdata2.employee; model salary = salbegin educ jobdum2 jobdum3 prevexp female; output out=regdat p=predict r=resid rstudent=rstudent; run; quit; proc univariate data=regdat; var rstudent; histogram / normal; qqplot / normal(mu=est sigma=est); run; proc sgplot data=regdat; scatter x=predict y=resid; run; /*Carry out a linear regression on logsalary*/ data temp; set sasdata2.employee; logsalary = log(salary); run; proc reg data=temp; model logsalary = salbegin educ jobdum2 jobdum3 prevexp female; output out=regdat2 p=predict r=resid rstudent=rstudent; run; quit; proc univariate data=regdat2; var rstudent; histogram / normal; qqplot / normal(mu=est sigma=est); run; proc sgplot data=regdat2; scatter x=predict y=resid; run; /*Chi-square test of independence*/ proc freq data=sasdata2.employee; tables gender*jobcat / chisq; run; title; proc freq data=sasdata2.employee; tables gender*jobcat / chisq; exact fisher / mc seed=0; run; /*McNemar's Test of Symmetry for Paired Categorical Data*/ data newpulse; set pulse; if pulse1 > 80 then hipulse1 = 1; if pulse1 > 0 and pulse1 <=89 then hipulse1=0; if pulse2 > 80 then hipulse2 = 1; if pulse2 > 0 and pulse2 <=89 then hipulse2=0; run; proc freq data=newpulse; tables hipulse1 hipulse2; run; proc freq data=newpulse; tables hipulse1*hipulse2/ agree; run;