#In-class lecture 5 code. BS720, Summer 2014 setwd("/Users/Avery/Desktop/classes/720 Intro to R/summer 2014/wk5") fat <- read.table("fat.txt",header=TRUE, sep="\t") str(fat) attach(fat) cor(fat\$age, fat\$pctfat.brozek, method="pearson") cor.test(fat\$age, fat\$pctfat.brozek, method="pearson") cor(fat\$age,fat\$pctfat.brozek, method="spearman") cor.test(fat\$age,fat\$pctfat.brozek, method="spearman") x<-seq(-10,10, 1) y<-x*x plot(x,y) cor(x,y) fatdata<-fat[,c(1,2,5:11)] summary(fatdata[,-1]) lm1 <- lm(pctfat.brozek ~ neck, data = fatdata) plot(pctfat.brozek ~ neck, data = fatdata) abline(lm1) names(lm1) lm1 summary(lm1) #a random plot of normal random variables #a selection of x values, with y = x + a normal random variable centered at x, with high variance x<-c(seq(0,40), sample(seq(0,40),30, replace=TRUE) ) y<-c() for (i in 1:length(x)){ y[i] <- x[i]-20+rnorm(1,x[i],20) } lmrandom<-lm(y ~ x) plot(y~x,col="red", main=expression(paste("Random linear (normal) data with ",beta[0], " = -20")) ) abline(lmrandom) par(mfrow=c(2,2)) plot(lmrandom,which=1:4) plot(fitted(lm1),resid(lm1)) qqnorm(resid(lm1)) lm2<-lm(pctfat.brozek~age+fatfreeweight+neck,data=fatdata) summary(lm2) lm3<-lm(pctfat.brozek~age+fatfreeweight+neck+factor(bmi),data=fatdata) summary(lm3) par(mfrow=c(2,2)) plot(lm3,which=1:4) #let's see what a an influence point/outlier looks like with the randomized plot x2<-c(x,0,20,80) y2<-c(y,999,999,999) lmrandom2<-lm(y2 ~ x2) par(mfrow=c(1,2)) plot(y~x,col="red", main=expression(paste("Random linear (normal) data with ",beta[0], " = -20")),ylim=c(-50,1000) ) abline(lmrandom) plot(y2~x2,col="blue", main=expression(paste("Random linear (normal) data with ",beta[0], " = -20, plus O/IP")) ) abline(lmrandom2) par(mfrow=c(2,2)) plot(lmrandom2,which=1:4) dffits(lm3) lm3.hat<-hatvalues(lm3) id.lm3.hat<-which(lm3.hat>(2*(4+1)/nrow(fatdata))) lm3.hat[id.lm3.hat] summary(influence.measures(lm3)) # now let's try finding the leverage values in the randomly generated dataset... lmrandom2.hat <- hatvalues(lmrandom2) id.lmrandom2.hat <- which(lmrandom2.hat > (2*(4+1)/length(x2))) ## hatvalue #2*(k+1)/n lmrandom2.hat[id.lmrandom2.hat] length(x2)