#Reading in data (p. 3-7) splus<-read.table(file="h:/splus/splus.txt",na.strings='.',col.names=c('id',"age","tol","male","noff","ttof")) #Basic operations (p. 8-10) c(1,2,5) c(0:10) rep(1,10) matrix(c(1,1,2,2),2,2) array(rep(c(1,1,2,2,),2),dim=c(2,2,2)) splus[1:2,] splus[1:2,3:4] tf<-(c(1,2,5)<=c(2,4,1)) #Common statistical procedures (p. 11-16) summary(splus$age) hist(splus$age) stem(splus$age) summary(splus$tol) t.test(splus$tol,mu=6) t.test(splus[c(1:1000)*splus$male,3],splus[c(1:1000)*(1-splus$male),3]) t.test(splus[c(1:1000)*splus$male,3],splus[c(1:1000)*(1-splus$male),3],mu=.5,alternative="greater") t.test(splus[splus$male==TRUE,3],splus[splus$male==FALSE,3]) x1<-as.category(splus$male) hist(splus$noff,breaks=c(0:20)) y1<-as.category(cut(splus$noff,breaks=c(-1,0,1,100))) table(x1,y1) chisq.test(x1,y1) myreg<-lm(age~noff+tol,data=splus) myreg$coefficients[1:2] myreg$residuals summary(myreg) plot(splus$tol,myreg$residuals) plot(splus$noff,myreg$residuals) plot(splus$tol,splus$age,ylim=c(14,23),xlab="",ylab="") par(new=T) plot(splus$tol,myreg$fitted.values,col=2,ylim=c(14,23),xlab="TOL",ylab="Age at TOL") summary(splus$ttof) ttof<-c(splus[is.na(splus[,6])==FALSE,6],splus[is.na(splus[,6])==TRUE,3]) summary(ttof) cens<-c(rep(1,sum(is.na(splus[,6])==FALSE)),rep(0,sum(is.na(splus[,6])==TRUE))) ###0 = alive and 1 = dead### kM<-survfit(Surv(ttof,cens)~rep(1,1000)) plot.survfit(kM) male<-c(splus[is.na(splus[,6])==FALSE,4],splus[is.na(splus[,6])==TRUE,4]) kM<-survfit(Surv(ttof,cens)~male) plot.survfit(kM,lty=c(1,2)) survdiff(Surv(ttof,cens)~male,rho=0) survdiff(Surv(ttof,cens)~male,rho=1) coxmale<-coxph(Surv(ttof,cens)~male) summary(coxmale) #Graphics (p. 17-18) tol<-splus[,2] ttof<-splus[,6] plot(tol,ttof,xlab="Age at Time of License",ylab="Time to First Offense") plot(tol,ttof,xlab="Age at Time of License",ylab="Time to First Offense",pch='.') plot(tol[splus$male==TRUE],ttof[splus$male==TRUE],xlab="Age at Time of License",ylab="Time to First Offense",pch=25) plot(tol[splus$male==FALSE],ttof[splus$male==FALSE],xlab="Age at Time of License",ylab="Time to First Offense",pch=26) plot(tol[splus$male==TRUE],ttof[splus$male==TRUE],xlab="",ylab="",pch=25,xlim=c(15,21),ylim=c(0,8),cex=1.5,col=3) par(new=T) plot(tol[splus$male==FALSE],ttof[splus$male==FALSE],xlab="",ylab="",pch=26, xlim=c(15,21),ylim=c(0,8),cex=1.5,col=2) legend(18.5,6,legend=c("Female","Male"),pch="\032\031",cex=1.5) mtext("Time to First Offense",side=2,line=3,cex=1.25) mtext("Age at Time of License",side=1,line=3,cex=1.25) title("Age at TOL vs. Time to First Offense") #Matrix operations (p. 19-24) c(1,2,3)*c(4,5,6) A<-matrix(c(1,.5,.5,3),2,2) B<-matrix(c(2,4,3,5),2,2) A%*%B t(A%*%B) solve(B) solve(B,c(1.5,2.5)) B%*%solve(B) B%*%solve(B,c(1.5,2.5)) chol(A) t(chol(A))%*%chol(A) qr.Q(qr(A)) qr.R(qr(A)) qr.Q(qr(A))%*%qr.R(qr(A)) eigen(A) eigen(A)$values[1]*eigen(A)$vector[,1]%*%t(eigen(A)$vector[,1])+ eigen(A)$values[2]*eigen(A)$vector[,2]%*%t(eigen(A)$vector[,2]) #User-defined functions (p. 25) mydet<-function(A,B){ detA<-Re(prod(eigen(A)$values)) detB<-Re(prod(eigen(B)$values)) detAB<-detA*detB return(detA,detB,detAB) } A<-matrix(c(1,2,3,4),2,2) B<-matrix(c(2,7,4,5),2,2) testdet<-mydet(A,B) testdet$detA testdet$detB testdet$detAB #Robust statistics (p. 26-30) myreg<-lm(age~noff+tol,data=splus) myreg$coefficients myreg2<-l1fit(cbind(splus$noff,splus$tol),splus$age) myreg2$coefficients hist(splus[,2],probability=T,xlim=c(14,23),ylim=c(0,1),xlab="",ylab="") par(new=T) plot(ksmooth(splus[,2],kernel="normal",bandwidth=1)$x, ksmooth(splus[,2],kernel="normal",bandwidth=1)$y,type='l',xlim=c(14,23),ylim=c(0,1),ylab="",xlab="Age at TOL") age<-splus[is.na(ttof)==0,2] nttof<-ttof[is.na(ttof)==0] plot(age,nttof,ylim=c(0,10),xlab="",ylab="") par(new=T) plot.loess(loess(nttof~age),ylim=c(0,10),xlab="",ylab="") par(new=T) plot.loess(loess(nttof~age,degree=1),ylim=c(0,10),lty=2,xlab="",ylab="") par(new=T) plot(smooth.spline(age,nttof)$x,smooth.spline(age,nttof)$y,type='l',ylim=c(0,10),lty=3,xlab="",ylab="") par(new=T) plot(smooth.spline(age,nttof,df=4)$x,smooth.spline(age,nttof,df=4)$y,type='l',ylim=c(0,10),lty=4,xlab="Time to First Offense",ylab="Age at TOL") #Simulations (p. 31-33) betalb<-rep(0,200) betaub<-rep(0,200) count<-0 for(i in 1:200){ x<-runif(30,0,10) y<-1+2*x+rnorm(30) myreg<-lm(y~x) betalb[i]<-myreg$coefficients[2]-qt(.975,28)*sqrt(deviance(myreg)/28)/sqrt(29*var(x)) betaub[i]<-myreg$coefficients[2]+qt(.975,28)*sqrt(deviance(myreg)/28)/sqrt(29*var(x)) if(betalb[i]<2&&betaub[i]>2) count<-count+1 print(i) } print(count) 100-(sum(betalb>2)+sum(betaub<2))/2 y<-c(3,5,4,2,8,6,5,6,7,2) cumsum(y)[c(4,7,10)]-c(0,cumsum(y)[c(4,7)])