## ----leukdata,echo=FALSE------------------------------------------------- ## download data from here: ## https://statweb.stanford.edu/~ckirby/brad/LSI/datasets-and-programs/data/leukdata.RData load("leukdata.RData") leukdata_norm <- leukdata N <- nrow(leukdata) for(jj in 1:ncol(leukdata)){ leukdata_norm[,jj] <- qnorm((rank(leukdata[,jj])-0.5)/N) } ## make data frame for plotting cl <- colnames(leukdata_norm) recode <- c("0"="ALL","1"="AML") leuk <- as.data.frame(t(leukdata_norm)) leuk$class <- recode[cl] ## separate into healthy and cancer ALL <- leukdata_norm[,colnames(leukdata_norm)=="0"] AML <- leukdata_norm[,colnames(leukdata_norm)=="1"] ## compute all p-values pvals <- rep(NA_real_,nrow(ALL)) tstats <- rep(NA_real_,nrow(ALL)) for(ii in 1:nrow(ALL)){ a <- t.test(AML[ii,],ALL[ii,],var.equal=TRUE) pvals[ii] <- a$p.value tstats[ii] <- a$statistic } ## transform tstats to N(0,1) zstats <- qnorm(pt(tstats,df=70)) zstatsdf <- as.data.frame(zstats) ## ----zstats-leuk,echo=FALSE,fig=TRUE,width=8,height=6-------------------- library(ggplot2) p <- ggplot(zstatsdf,aes(x=zstats)) + geom_histogram(bins=100) + geom_vline(xintercept=0,col='red') + theme( text = element_text(size=20) ) plot(p) ## ----pi0estimate-leuk,echo=FALSE----------------------------------------- thres <- qnorm(0.75) pi0hat <- mean(abs(zstats) < thres) / 0.5 ## ----leuk-fhat,echo=FALSE,fig=TRUE,width=8,height=6---------------------- library(locfdr) ## why are there infinite zstats? zstats <- zstats[!is.infinite(zstats)] a <- locfdr(zstats,type=1,nulltype=0,plot=0) wid <- abs(a$mat[1,1] - a$mat[2,1]) par(mar=c(5,5,1,1)) f <- a$mat[,5] / sum(wid*a$mat[,5]) ylim <- c(0,max(dnorm(a$mat[,1]))) plot(a$mat[,1],f,type='l',lwd=2,ylim=ylim, xlab="z",ylab="f",cex.axis=1.2,cex.lab=1.2,yaxs='i',xaxs='i') lines(a$mat[,1],dnorm(a$mat[,1]),col='red',lwd=2) legend("topright",c("f hat","N(0,1)"),lwd=2,col=c(1,2)) ## ----leuk-localfdr,echo=FALSE,fig=TRUE,width=8,height=6------------------ plot(a$mat[,1],pmin(pi0hat *(dnorm(a$mat[,1]) / f),1),type='l', xlab="z statistic",ylab="local fdr", cex.axis=1.2,cex.lab=1.2,lwd=2) ## ----leuk-localfdr-counts,echo=FALSE------------------------------------- numsig <- sum(a$fdr<0.2) ## ----leuk-mle,echo=FALSE,fig=TRUE,width=8,height=6----------------------- a <- locfdr(zstats,nulltype=1,pct0=c(0.25,0.75),plot=0) num_sig_f0est <- sum(a$fdr<0.2) plot(zstats,a$fdr,ylab="local fdr",cex.lab=1.3,cex.axis=1.3) a <- locfdr(zstats,nulltype=0,plot=0) num_sig_f0theo <- sum(a$fdr<0.2) points(zstats,a$fdr,col='red') abline(h=0.2) legend("topleft",c("Theo. Null","Emp. Null"),col=c("red","black"),pch=19)