######################### # # # # # Homework 4 Solutions # # # # # ######################### #All questions must include code, and a write-up describing the procedure undertaken for each question, and conclusions drawn and interpretation of results from every statistical test performed. #Grading is no longer just code and programming. Now we are moving into the "more statistical" part of the course. Null and alternative hypothesis, test statistics, and interpretations are required! #Question 1 library(MASS) attach(Boston) qqnorm(nox, main="QQ Plot of nitrogen  oxide concentration") qqline(nox) shapiro.test(nox) wilcox.test(nox, mu=0.5) #Question 2 boxplot(crim~chas) #hard to tell from just the plots if the variance is the same between groups par(mfrow=c(1,2)) hist(crim[which(chas==1)],main="Tract bounds river") hist(crim[which(chas==0)],main="Tract does not bound river") #again, not so easy to say #do a formal test of equal variances: var.test(crim~chas) #normal? qqnorm(crim, main="QQ Plot of crime rate (overall)") qqline(crim) shapiro.test(crim) #overall looking at normality qqnorm(crim[which(chas==0)], main="QQ Plot of crime rate (does not bound CR)") qqline(crim[which(chas==0)]) shapiro.test(crim[which(chas==0)]) qqnorm(crim[which(chas==1)], main="QQ Plot of crime rate (bounds CR)") qqline(crim[which(chas==1)]) shapiro.test(crim[which(chas==1)]) #all tests and graphs point to non-normailty in the crime variable # so perform a nonparametric test wilcox.test(crim ~ chas) #Question 3(a) dis.cat<-ifelse(dis<2.5,"very close",ifelse(dis<5,"somewhat nearby","far")) #3(b) #first look at the normality assumption for medv, overall qqnorm(medv, main="QQ Plot of Medv (overall)") qqline(medv) shapiro.test(medv) #then look by group: qqnorm(medv[which(dis.cat=="very close")], main="QQ Plot of Medv (very close)") qqline(medv[which(dis.cat=="very close")]) shapiro.test(medv[which(dis.cat=="very close")]) qqnorm(medv[which(dis.cat=="somewhat nearby")], main="QQ Plot of Medv (somewhat nearby)") qqline(medv[which(dis.cat=="somewhat nearby"]) shapiro.test(medv[which(dis.cat=="somewhat nearby")]) qqnorm(medv[which(dis.cat=="far")], main="QQ Plot of Medv (far)") qqline(medv[which(dis.cat=="far"]) shapiro.test(medv[which(dis.cat=="far")]) #We could perform bartlett's test to look at the equal variances assumption, but since that test itself assumes normality of the variables, and we've established that that assumption is untenable, it's not necessary (or appropriate) to test for equal variances: a nonparametric test is the way to go. dis.cat<-factor(dis.cat) kruskal.test(medv~dis.cat) #Question 4 before<-c(12.9, 13.5, 12.8, 15.6, 17.2, 19.2, 12.6, 15.3, 14.4, 11.3) after <- c(12.7, 13.6, 12.0, 15.2, 16.8, 20.0, 12.0, 15.9, 16.0, 11.1) lengh(before) diff<- after - before hist(diff) qqnorm(diff, main="QQ Plot of Difference in dash times for 10 athletes") qqline(diff) shapiro.test(diff) #it is VERY hard to tell if something is normal with a sample size of 10. #it is not inappropriate to use a t-test here, however depending on the situation, caution may be warranted. Either method is acceptable here. That said, since there are ties in the data (two values of -0.4), performing a nonparametric test based on rankings becomes problematic: see the warning that is generated when code is run on the Wilcoxon Rank Sum test. t.test(diff) wilcox.test(diff) #or t.test(after, before, paired=TRUE) wilcox.test(after, before, paired=TRUE)