################################################################################ ################# FIGURES for NOISeqBIO paper ######################### ################################################################################ # By Sonia Tarazona # Created 20-January-2013 options(stringsAsFactors = FALSE) library(NOISeq) ###################################################### # Data -------------------------------------------------------------------- ### Fusarium fusarium = fusarium[,c(2:3,12,1)] # fusarium for Blood-MM 30' in WT factor1 = data.frame("cond" = c("Blood", "Blood", "MM", "MM")) # Biotypes fusarium biomart = read.delim("biomart_FO.txt", header = TRUE, as.is = TRUE) rownames(biomart) = biomart[,1] biomart = biomart[,-1] biot <- biomart[,1] names(biot) <- rownames(biomart) # FOlength is computed from GTF # DATA myfo = readData(data = fusarium, factors = factor1, length = FOlength, biotype = biot, chromosome = biomart[,2:4]) ### Prostate Cancer cond = strsplit(colnames(chinacounts), "_") cond = sapply(cond, function (x) x[1]) cond[cond == "N"] = "Healthy" cond[cond == "T"] = "Tumoral" factor2 = data.frame("cond" = cond) # Biotypes human biomart = read.delim("biomart_HS_37.txt", header = TRUE, as.is = TRUE) rownames(biomart) = biomart[,1] biomart = biomart[,-1] biot <- biomart[,4] names(biot) <- rownames(biomart) # HSlength is computed from GTF # GC HS HS.GC = read.delim("HS_biomart72_GCcontent.txt", as.is= TRUE) # Data HS myhs = readData(data = chinacounts, factors = factor2, length = HSlength, biotype = biot, gc = HS.GC, chromosome = biomart[,1:3]) ###################################################### # Functionalities R package: BIOTYPE DETECTION ---------------------------- # FO mybiotdetFO = dat(input = myfo, type = "biodetection", k = 0, factor = "cond") pdf(file.path(myfigdir, "fig_biodetection1_FO.pdf"), width = 7) explo.plot(mybiotdetFO, samples = 1) dev.off() mycountsbio1 = dat(input = myfo, type = "countsbio", factor = "cond") pdf(file.path(myfigdir, "fig_biodetection2_FO.pdf"), width = 6, height = 6) explo.plot(mycountsbio1, samples = 1, plottype = "boxplot") dev.off() # HS mybiotdetHS = dat(myhs, type = "biodetection", factor = "cond", k = 0) pdf(file.path(myfigdir, "fig_biodetection1_HS.pdf"), width = 7) explo.plot(mybiotdetHS, samples = 1) dev.off() mycountsbio1HS = dat(input = myhs, type = "countsbio", factor = "cond") pdf(file.path(myfigdir, "fig_biodetection2_HS.pdf"), width = 6, height = 6) explo.plot(mycountsbio1HS, samples = 1, plottype = "boxplot") dev.off() ###################################################### # Functionalities R package: SEQUENCING DEPTH ----------------------------- ## Fusarium mysatFO = dat(input = myfo, type = "saturation", k = 0) pdf(file.path(myfigdir, "fig_seqdepth1_FO.pdf"), width = 6, height = 6) explo.plot(mysatFO, toplot = 1, samples = c(1,2), yleftlim = c(0,15000))#, yrightlim = c(0,7500)) dev.off() mycountsbio2 = dat(input = myfo, type = "countsbio", factor = NULL) pdf(file.path(myfigdir, "fig_seqdepth2_FO.pdf"), width = 6, height = 6) explo.plot(mycountsbio2, toplot = "global", samples = NULL, plottype = "boxplot") dev.off() pdf(file.path(myfigdir, "fig_seqdepth3_FO.pdf"), width = 6, height = 6) explo.plot(mycountsbio2, toplot = "global", samples = NULL, plottype = "barplot") dev.off() # all plots FO estosbios = names(mysatFO@dat$saturation)[-5] pdf(file.path(myfigdir, "fig_saturation_ALL_FO.pdf"), width = 6, height = 6) lapply(estosbios, function (x) explo.plot(mysatFO, toplot = x, samples = c(1,2), yleftlim = c(0,15000)))#, yrightlim = c(0,7500)) dev.off() pdf(file.path(myfigdir, "fig_sensitivity_ALL_FO.pdf"), width = 6, height = 6) lapply(estosbios, function (x) explo.plot(mycountsbio2, toplot = x, samples = NULL, plottype = "barplot")) dev.off() ## Prostate Cancer mysatHS = dat(input = myhs, type = "saturation", k = 0, ndepth = 6) pdf(file.path(myfigdir, "fig_seqdepth1_HS.pdf"), width = 6, height = 6) explo.plot(mysatHS, samples = grep("Tumoral", myhs@phenoData[[1]])[c(1,4)], toplot = 1, yleftlim = c(0,50000)) dev.off() pdf(file.path(myfigdir, "fig_seqdepth1_HS_allbios.pdf"), width = 6, height = 6) for (i in 1:32) { explo.plot(mysatHS, samples = grep("Tumoral", myhs@phenoData[[1]])[c(1,4)], toplot = i) } dev.off() mycountsbio2HS = dat(input = myhs, type = "countsbio", factor = NULL) pdf(file.path(myfigdir, "fig_seqdepth2_HS.pdf"), width = 6, height = 6) explo.plot(mycountsbio2HS, toplot = "global", samples = NULL, plottype = "boxplot") dev.off() pdf(file.path(myfigdir, "fig_seqdepth3_HS.pdf"), width = 6, height = 6) explo.plot(mycountsbio2HS, toplot = "global", samples = NULL, plottype = "barplot") dev.off() # all plots HS estosbios = names(mysatHS@dat$saturation)[-c(6,29)] pdf(file.path(myfigdir, "fig_saturation_ALL_HS.pdf"), width = 6, height = 6) lapply(estosbios, function (x) explo.plot(mysatHS, toplot = x, samples = c(1,2), yleftlim = c(0,50000)))#, yrightlim = c(0,7500)) dev.off() pdf(file.path(myfigdir, "fig_sensitivity_ALL_HS.pdf"), width = 6, height = 6) lapply(estosbios, function (x) { print(x) explo.plot(mycountsbio2HS, toplot = x, samples = NULL, plottype = "barplot")}) dev.off() ###################################################### # Functionalities R package: BIAS DETECTION ------------------------------- ###### Fusarium mylengthbiasFO = dat(myfo, type = "length", factor = "cond") pdf(file.path(myfigdir, "fig_bias1_FO.pdf"), width = 6, height = 6) explo.plot(mylengthbiasFO, samples = 1) dev.off() myGCbiasFO = dat(myfo, type = "GCbias", factor = "cond") pdf(file.path(myfigdir, "fig_bias2_FO.pdf"), width = 6, height = 6) explo.plot(myGCbiasFO, samples = 1) dev.off() myCDfo = dat(myfo, norm = FALSE, refColumn = 1, type = "cd") #type = "cd" pdf(file.path(myfigdir, "fig_bias3_FO.pdf"), width = 6, height = 6) explo.plot(myCDfo, xlim = c(0,50)) dev.off() ###### Prostate Cancer mylengthbiasHS = dat(myhs, type = "length", factor = "cond") pdf(file.path(myfigdir, "fig_bias1_HS.pdf"), width = 6, height = 6) explo.plot(mylengthbiasHS, samples = 1) dev.off() myGCbiasHS = dat(myhs, type = "GCbias", factor = "cond") pdf(file.path(myfigdir, "fig_bias2_HS.pdf"), width = 6, height = 6) explo.plot(myGCbiasHS, samples = 1) dev.off() myCDhs = dat(myhs, type = "cd") pdf(file.path(myfigdir, "fig_bias3_HS.pdf"), width = 6, height = 6) explo.plot(myCDhs, xlim = c(0,50), samples = 1:13) dev.off() ###################################################### # Functionalities R package: NORMALIZATION -------------------------------- ###### Fusarium datossinnorm = myfo@assayData$exprs longi = myfo@featureData@data$Length names(longi) = rownames(datossinnorm) datosnorm = list("rpkm" = rpkm(datossinnorm, long = longi), "uqua" = uqua(datossinnorm), "tmm" = tmm(datossinnorm), "prop" = rpkm(datossinnorm, long = 1000)) mibio = as.character(myfo@featureData@data$Biotype) names(mibio) = rownames(datossinnorm) myfo.norm = vector("list", length = 4) names(myfo.norm) = names(datosnorm) for (i in 1:4) { myfo.norm[[i]] = readData(data = datosnorm[[i]], factors = data.frame("cond" = myfo@phenoData@data$cond), length = longi, biotype = mibio) } mylengthbiasFO = dat(myfo.norm[["rpkm"]], type = "length", factor = "cond") pdf(file.path(myfigdir, "fig_bias1norm_FO.pdf"), width = 6, height = 6) explo.plot(mylengthbiasFO, samples = 1) dev.off() myCDfo2 = list("uqua" = dat(myfo.norm[["uqua"]], type = "countsbio"), "tmm" = dat(myfo.norm[["tmm"]], type = "countsbio")) lapply(myCDfo2, explo.plot, plottype = "boxplot") pdf(file.path(myfigdir, "fig_bias3uqua_FO.pdf"), width = 6, height = 6) myCDfo = dat(myfo.norm[["uqua"]], type = "cd", norm = TRUE) explo.plot(myCDfo) dev.off() pdf(file.path(myfigdir, "fig_bias3tmm_FO.pdf"), width = 6, height = 6) myCDfo = dat(myfo.norm[["tmm"]], type = "cd", norm = TRUE) explo.plot(myCDfo) dev.off() ###### Prostate Cancer datossinnorm = myhs@assayData$exprs longi = myhs@featureData@data$Length names(longi) = rownames(datossinnorm) datosnorm = list("rpkm" = rpkm(datossinnorm, long = longi), "uqua" = uqua(datossinnorm), "tmm" = tmm(datossinnorm)) mibio = as.character(myhs@featureData@data$Biotype) names(mibio) = rownames(datossinnorm) myhs.norm = vector("list", length = 3) names(myhs.norm) = names(datosnorm) for (i in 1:3) { myhs.norm[[i]] = readData(data = datosnorm[[i]], factors = data.frame("cond" = myhs@phenoData@data$cond), length = longi, biotype = mibio) } mylengthbiasHS = dat(myhs.norm[["rpkm"]], type = "length", factor = "cond") pdf(file.path(myfigdir, "fig_bias1norm_HS.pdf"), width = 6, height = 6) explo.plot(mylengthbiasHS, samples = 1) dev.off() myCDhs2 = list("uqua" = dat(myhs.norm[["uqua"]], type = "countsbio"), "tmm" = dat(myhs.norm[["tmm"]], type = "countsbio")) lapply(myCDhs2, explo.plot, plottype = "boxplot") pdf(file.path(myfigdir, "fig_bias3uqua_HS.pdf"), width = 6, height = 6) myCDhs = dat(myhs.norm[["uqua"]], type = "cd", norm = TRUE) explo.plot(myCDhs, samples = 1:13) dev.off() pdf(file.path(myfigdir, "fig_bias3tmm_HS.pdf"), width = 6, height = 6) myCDhs = dat(myhs.norm[["tmm"]], type = "cd", norm = TRUE) explo.plot(myCDhs, samples = 1:13) dev.off() #################################################### ### Normalization of GC content with EDASeq package #################################################### library(EDASeq) gc.norm = vector("list", length = 2) names(gc.norm) = c("FO", "HS") # FO conteos = myfo@assayData$exprs mygc = myfo@featureData@data$GC; names(mygc) = rownames(conteos) mygc = data.frame(gc = mygc) mygcFO = na.omit(mygc) data <- newSeqExpressionSet(exprs=as.matrix(conteos[rownames(mygcFO),]), featureData=mygcFO, phenoData=data.frame(conditions=as.character(myfo@phenoData$cond), row.names=colnames(conteos))) dataWithin <- withinLaneNormalization(data,"gc",which="full") dataNorm <- betweenLaneNormalization(dataWithin,which="full") dataNorm = exprs(dataNorm) gc.norm[[1]] = dataNorm # HS conteos = myhs@assayData$exprs mygc = myhs@featureData@data$GC; names(mygc) = rownames(conteos) mygc = data.frame(gc = mygc) mygcHS = na.omit(mygc) data <- newSeqExpressionSet(exprs=as.matrix(conteos[rownames(mygcHS),]), featureData=mygcHS, phenoData=data.frame(conditions=as.character(myhs@phenoData$cond), row.names=colnames(conteos))) dataWithin <- withinLaneNormalization(data,"gc",which="full") dataNorm <- betweenLaneNormalization(dataWithin,which="full") dataNorm = exprs(dataNorm) gc.norm[[2]] = dataNorm ### Figures with GC normalization # FO myfactor = myfo@phenoData$cond gcnormFO = readData(data = gc.norm[[1]], factors = data.frame("cond" = myfactor), gc = mygcFO) pdf(file.path(myfigdir, "fig_bias2edaseq_FO.pdf"), width = 6, height = 6) myGCplot = dat(gcnormFO, type = "GCbias", factor = "cond") explo.plot(myGCplot, samples = 1) dev.off() ###################################################### # Differential Expression: RESULTS ---------------------------------------- library(edgeR) library(DESeq2) library(samr) myq = 0.95 mycolor = miscolores[c(2,4,6:7)] ## FO data resultsFOfilt1 = vector("list", length = 4) names(resultsFOfilt1) = c("NOISeqBIO", "edgeR", "DESeq2", "SAMseq") png(file.path(myfigdir, "fig_DEmethodsExperimentalData_FO_filt1.png"), width = 300*2, height = 300*2) par(mfcol = c(2,2)) misdatos = myfo@assayData$exprs grupo = data.frame("grup" = c("Blood","Blood","MM","MM")) misdatos = filtered.data(dataset = misdatos, factor = grupo$grup, norm = FALSE, method = 1, cv.cutoff = 500, cpm = 1) condi1 = rowMeans(tmm(misdatos[,grep("wt_M_30", colnames(misdatos))])) + 1 condi2 = rowMeans(tmm(misdatos[,grep("wt_B_30", colnames(misdatos))])) + 1 biotipos = as.character(myfo@featureData@data[,"Biotype"]) names(biotipos) = rownames(myfo@featureData@data) datosN = readData(misdatos, factors = grupo, biotype = biotipos, chromosome = myfo@featureData@data[,3:5]) # NOISeqBIO (probability) mynoiseq = noiseqbio(datosN, norm = "tmm", factor = "grup", random.seed = 123, nclust = 15, lc = 0, r = 50, adj = 1.5, a0per = 0.9, filter = 0) resultsFOfilt1[[1]] = mynoiseq mydeg = rownames(degenes(resultsFOfilt1[[1]], q = myq)) length(mydeg) plot(condi1, condi2, main = "NOISeqBIO", log = "xy", xlab = "MM", ylab = "Blood", cex = 0.7) points(condi1[mydeg], condi2[mydeg], col = mycolor[1], pch = 20, cex = 0.5) text(3,5000, paste("DEG =", length(mydeg))) ## edgeR (adjusted p-value) d2 = DGEList(counts = misdatos, group = grupo[,"grup"]) d2 = calcNormFactors(d2) d2 = estimateCommonDisp(d2) d2 = estimateTagwiseDisp(d2, trend = "movingave") d2 = exactTest(d2) myedgeR = topTags(d2, n = nrow(misdatos))[[1]] resultsFOfilt1[[2]] = myedgeR mypval = resultsFOfilt1[[2]][,"FDR"] mydeg = rownames(resultsFOfilt1[[2]][which(mypval < (1-myq)), ]) length(mydeg) plot(condi1, condi2, main = "edgeR", log = "xy", xlab = "MM", ylab = "Blood", cex = 0.7) points(condi1[mydeg], condi2[mydeg], col = mycolor[2], pch = 20, cex = 0.5) text(3,5000, paste("DEG =", length(mydeg))) ## DESeq (adjusted p-value) # DESeq.cds = newCountDataSet(countData = misdatos, conditions = grupo[,"grup"]) # DESeq.cds = estimateSizeFactors(DESeq.cds) # DESeq.cds = estimateDispersions(DESeq.cds, sharingMode = "maximum", method = "pooled", fitType = "local") # DESeq.test = nbinomTest(DESeq.cds, "MM", "Blood") # resultsFOfilt1[[3]] = DESeq.test # mypval = p.adjust(DESeq.test$pval, method = "BH") # mydeg = DESeq.test[which(mypval < (1-myq)),"id"] # length(mydeg) # # plot(condi1, condi2, main = "DESeq - FO", log = "xy", # xlab = "cond1", ylab = "cond2", cex = 0.7) # points(condi1[mydeg], condi2[mydeg], col = 2, pch = 20, cex = 0.5) # text(3,5000, paste("DEG =", length(mydeg))) ## DESeq2 (adjusted p-value) DESeq.cds = DESeqDataSetFromMatrix(countData = misdatos, colData = grupo, design = ~ grup) DESeq.cds = DESeq(DESeq.cds) DESeq.test = results(DESeq.cds) resultsFOfilt1[[3]] = DESeq.test mydeg = rownames(as.data.frame(resultsFOfilt1[[3]])[which(as.data.frame(resultsFOfilt1[[3]])[,"padj"] < (1-myq)),]) length(mydeg) plot(condi1, condi2, main = "DESeq2", log = "xy", xlab = "MM", ylab = "Blood", cex = 0.7) points(condi1[mydeg], condi2[mydeg], col = mycolor[3], pch = 20, cex = 0.5) text(3,5000, paste("DEG =", length(mydeg))) ## SAMseq (FDR) SAMseq.test = SAMseq(misdatos, grupo[,"grup"], resp.type = "Two class unpaired", geneid = rownames(misdatos), genenames = rownames(misdatos), nperms = 100, nresamp = 20, fdr.output = 1, random.seed = 208) samseq.up = SAMseq.test$siggenes.table$genes.up samseq.lo = SAMseq.test$siggenes.table$genes.lo rownames(samseq.up) = samseq.up[,1] rownames(samseq.lo) = samseq.lo[,1] mySAMseq = rbind(na.omit(samseq.up[,3:5]), na.omit(samseq.lo[,3:5])) genesSAM = rownames(mySAMseq) mySAMseq = apply(mySAMseq, 2, as.numeric) rownames(mySAMseq) = genesSAM mySAMseq = data.frame(mySAMseq)[rownames(misdatos),] mySAMseq[,3] = mySAMseq[,3]/100 resultsFOfilt1[[4]] = mySAMseq mydeg = rownames(resultsFOfilt1[[4]][which(resultsFOfilt1[[4]][,3] < (1-myq)),]) length(mydeg) plot(condi1, condi2, main = "SAMseq", log = "xy", xlab = "MM", ylab = "Blood", cex = 0.7) points(condi1[mydeg], condi2[mydeg], col = mycolor[4], pch = 20, cex = 0.5) text(3,5000, paste("DEG =", length(mydeg))) dev.off() #----------# ## Prostate Cancer data resultsHSfilt1 = vector("list", length = 4) names(resultsHSfilt1) = c("NOISeqBIO", "edgeR", "DESeq2", "SAMseq") misdatos = myhs@assayData$exprs grupo = data.frame("grup" = myhs@phenoData$cond) misdatos = filtered.data(dataset = misdatos, factor = grupo$grup, norm = FALSE, method = 1, cv.cutoff = 500, cpm = 1) # 17207 condi1 = rowMeans(tmm(misdatos[,grep("N", colnames(misdatos))])) + 1 condi2 = rowMeans(tmm(misdatos[,grep("T", colnames(misdatos))])) + 1 biotipos = as.character(myhs@featureData@data[,"Biotype"]) names(biotipos) = rownames(myhs@featureData@data) datosN = readData(misdatos, factors = grupo, biotype = biotipos, chromosome = myhs@featureData@data[,4:6]) png(file.path(myfigdir, "fig_DEmethodsExperimentalData_HS_filt2.png"), width = 450*2, height = 450*2) par(mfcol = c(2,2)) ## NOISeqBIO (probability) mynoiseq = noiseqbio(datosN, norm = "tmm", factor = "grup", random.seed = 123, nclust = 15, lc = 0, r = 50, adj = 1.5, a0per = 0.9, filter = 0) resultsHSfilt1[[1]] = mynoiseq mydeg = rownames(degenes(resultsHSfilt1[[1]], q = myq)) # 1757 length(mydeg) plot(condi1[setdiff(names(condi1), mydeg)], condi2[setdiff(names(condi1), mydeg)], main = "NOISeqBIO", log = "xy", xlab = "Healthy", ylab = "Tumoral", cex = 0.7) points(condi1[mydeg], condi2[mydeg], col = mycolor[1], pch = 1, cex = 0.7) text(3,10000, paste("DEG =", length(mydeg))) ## edgeR (adjusted p-value) d2 = DGEList(counts = misdatos, group = grupo$grup) d2 = calcNormFactors(d2) d2 = estimateCommonDisp(d2) d2 = estimateTagwiseDisp(d2, trend = "movingave") d2 = exactTest(d2) myedgeR = topTags(d2, n = nrow(misdatos))[[1]] resultsHSfilt1[[2]] = myedgeR mypval = resultsHSfilt1[[2]][,"FDR"] mydeg = rownames(resultsHSfilt1[[2]][which(mypval < (1-myq)), ]) length(mydeg) plot(condi1[setdiff(names(condi1), mydeg)], condi2[setdiff(names(condi1), mydeg)], main = "edgeR", log = "xy", xlab = "Healthy", ylab = "Tumoral", cex = 0.7) points(condi1[mydeg], condi2[mydeg], col = mycolor[2], pch = 1, cex = 0.7) text(3,10000, paste("DEG =", length(mydeg))) ## DESeq (adjusted p-value) # DESeq.cds = newCountDataSet(countData = misdatos, conditions = grupo$grup) # DESeq.cds = estimateSizeFactors(DESeq.cds) # DESeq.cds = estimateDispersions(DESeq.cds, sharingMode = "maximum", method = "pooled", fitType = "local") # DESeq.test = nbinomTest(DESeq.cds, "Healthy", "Tumoral") # resultsHSfilt1[[3]] = DESeq.test # mypval = p.adjust(DESeq.test$pval, method = "BH") # mydeg = DESeq.test[which(mypval < (1-myq)),"id"] # length(mydeg) # # plot(condi1, condi2, main = "DESeq", log = "xy", # xlab = "Healthy", ylab = "Tumoral", cex = 0.7) # points(condi1[mydeg], condi2[mydeg], col = 2, pch = 20, cex = 0.5) # text(3,10000, paste("DEG =", length(mydeg))) ## DESeq (adjusted p-value) DESeq.cds = DESeqDataSetFromMatrix(countData = misdatos, colData = grupo, design = ~ grup) DESeq.cds = DESeq(DESeq.cds) DESeq.test = results(DESeq.cds) resultsHSfilt1[[3]] = DESeq.test mydeg = rownames((resultsHSfilt1[[3]])[which((resultsHSfilt1[[3]])[,"padj"] < (1-myq)),]) length(mydeg) plot(condi1[setdiff(names(condi1), mydeg)], condi2[setdiff(names(condi1), mydeg)], main = "DESeq2", log = "xy", xlab = "Healthy", ylab = "Tumoral", cex = 0.7) points(condi1[mydeg], condi2[mydeg], col = mycolor[3], pch = 1, cex = 0.7) text(3,10000, paste("DEG =", length(mydeg))) ## SAMseq (FDR) SAMseq.test = SAMseq(misdatos, grupo[,"grup"], resp.type = "Two class unpaired", geneid = rownames(misdatos), genenames = rownames(misdatos), nperms = 100, nresamp = 20, fdr.output = 1, random.seed = 208) samseq.up = SAMseq.test$siggenes.table$genes.up samseq.lo = SAMseq.test$siggenes.table$genes.lo rownames(samseq.up) = samseq.up[,1] rownames(samseq.lo) = samseq.lo[,1] mySAMseq = rbind(na.omit(samseq.up[,3:5]), na.omit(samseq.lo[,3:5])) genesSAM = rownames(mySAMseq) mySAMseq = apply(mySAMseq, 2, as.numeric) rownames(mySAMseq) = genesSAM mySAMseq = data.frame(mySAMseq)[rownames(misdatos),] mySAMseq[,3] = mySAMseq[,3]/100 resultsHSfilt1[[4]] = mySAMseq mydeg = rownames(resultsHSfilt1[[4]][which(resultsHSfilt1[[4]][,3] < (1-myq)),]) length(mydeg) plot(condi1[setdiff(names(condi1), mydeg)], condi2[setdiff(names(condi1), mydeg)], main = "SAMseq", log = "xy", xlab = "Healthy", ylab = "Tumoral", cex = 0.7) points(condi1[mydeg], condi2[mydeg], col = mycolor[4], pch = 1, cex = 0.7) text(3,10000, paste("DEG =", length(mydeg))) dev.off() ######################################################## # Differential Expression: Common DEG and Correlation --------------------- myq = 0.05 mydegFO = mydegHS = vector("list", length = 4) names(mydegFO) = names(mydegHS) = names(resultsFOfilt1) # NOISeqBIO genesFO = rownames(resultsFOfilt1[[1]]@results[[1]]) genesHS = rownames(resultsHSfilt1[[1]]@results[[1]]) data2corFO = 1-resultsFOfilt1[[1]]@results[[1]][,"prob"] data2corHS = 1-resultsHSfilt1[[1]]@results[[1]][,"prob"] mydegFO[[1]] = genesFO[which(resultsFOfilt1[[1]]@results[[1]][,"prob"] > 1-myq)] mydegHS[[1]] = genesHS[which(resultsHSfilt1[[1]]@results[[1]][,"prob"] > 1-myq)] # edgeR data2corFO = cbind(data2corFO, resultsFOfilt1[[2]][genesFO,"FDR"]) data2corHS = cbind(data2corHS, resultsHSfilt1[[2]][genesHS,"FDR"]) mydegFO[[2]] = rownames(resultsFOfilt1[[2]])[which(resultsFOfilt1[[2]][,"FDR"] < myq)] mydegHS[[2]] = rownames(resultsHSfilt1[[2]])[which(resultsHSfilt1[[2]][,"FDR"] < myq)] head(resultsFOfilt1[[2]]) # DESeq2 data2corFO = cbind(data2corFO, as.data.frame(resultsFOfilt1[[3]])[genesFO,"padj"]) data2corHS = cbind(data2corHS, as.data.frame(resultsHSfilt1[[3]])[genesHS,"padj"]) mydegFO[[3]] = rownames(as.data.frame(resultsFOfilt1[[3]]))[which(as.data.frame(resultsFOfilt1[[3]])[,"padj"] < myq)] mydegHS[[3]] = rownames(as.data.frame(resultsHSfilt1[[3]]))[which(as.data.frame(resultsHSfilt1[[3]])[,"padj"] < myq)] # SAMseq data2corFO = cbind(data2corFO, resultsFOfilt1[[4]][genesFO,3]) data2corHS = cbind(data2corHS, resultsHSfilt1[[4]][genesHS,3]) mydegFO[[4]] = rownames(resultsFOfilt1[[4]])[which(resultsFOfilt1[[4]][,3] < myq)] mydegHS[[4]] = rownames(resultsHSfilt1[[4]])[which(resultsHSfilt1[[4]][,3] < myq)] colnames(data2corFO) = colnames(data2corHS) = names(resultsFOfilt1) cor(na.omit(data2corFO), method = "spearman") cor(na.omit(data2corHS), method = "spearman") for (i in 1:3) { for (j in (i+1):4){ print(names(resultsFOfilt1)[i]); print(names(resultsFOfilt1)[j]) print("FO") print(length(intersect(mydegFO[[i]], mydegFO[[j]]))) print("HS") print(length(intersect(mydegHS[[i]], mydegHS[[j]]))) } } ############################################################################ # Functionalities R package: DE RESULTS EXPLORATION ----------------------- ##### FO mynoiseq = resultsFOfilt1[["NOISeqBIO"]] medida = 400 ## Expression DE plot png(file.path(myfigdir, "fig_pack_expre.png"), width = medida, height = medida) DE.plot(mynoiseq, q = 0.95, graphic = "expr", log.scale = TRUE) dev.off() ## (M,D) DE plot png(file.path(myfigdir, "fig_pack_MD.png"), width = medida, height = medida) DE.plot(mynoiseq, q = 0.95, graphic = "MD") dev.off() ## Manhattan DE plot medida = 6 pdf(file.path(myfigdir, "fig_pack_DEmanhattan_FO.pdf"), width = medida*2, height = medida*2) DE.plot(mynoiseq, q = 0.95, graphic = "chrom", chromosomes = 1:2, log.scale = TRUE, join = FALSE) dev.off() # with all chromosomes medida = 5 pdf(file.path(myfigdir, "fig_pack_DEmanhattan_FO_AllChrom.pdf"), width = medida*2, height = medida*5) DE.plot(mynoiseq, q = 0.95, graphic = "chrom", chromosomes = 1:15, log.scale = TRUE, join = FALSE) dev.off() ## Distribution DE plot medida = 6 pdf(file.path(myfigdir, "fig_pack_DEbarplot_FO.pdf"), width = medida*2, height = medida) DE.plot(mynoiseq, chromosomes = 1:15, q = 0.95, graphic = "distr") dev.off() ##### HS mynoiseq = resultsHSfilt1[["NOISeqBIO"]] medida = 400 ## Expression DE plot png(file.path(myfigdir, "fig_pack_expre_HS.png"), width = medida, height = medida) DE.plot(mynoiseq, q = 0.95, graphic = "expr", log.scale = TRUE) dev.off() ## (M,D) DE plot png(file.path(myfigdir, "fig_pack_MD_HS.png"), width = medida, height = medida) DE.plot(mynoiseq, q = 0.95, graphic = "MD") dev.off() ## Manhattan DE plot medida = 3.6 pdf(file.path(myfigdir, "fig_pack_DEmanhattan_HS.pdf"), width = medida*2, height = medida*1.95) DE.plot(mynoiseq, q = 0.95, graphic = "chrom", chromosomes = 1:2, log.scale = TRUE, join = FALSE) dev.off() ## Distribution DE plot medida = 6 pdf(file.path(myfigdir, "fig_pack_DEbarplot_HS.pdf"), width = medida*2, height = medida) DE.plot(mynoiseq, chromosomes = c(as.character(1:22), "X", "Y"), q = 0.95, graphic = "distr") dev.off() ########################################################################################### ########################################################################################### #### Functional enrichment analysis (Prostate Cancer) # to classify up in tumoral & down in tumoral datanorm = tmm(myhs@assayData$exprs, lc = 0) TminusH = rowMeans(datanorm[,grep("T_", colnames(datanorm))]) - rowMeans(datanorm[,grep("N_", colnames(datanorm))]) mygenes = rownames(myhs@assayData$exprs) mylength = myhs@featureData@data$Length names(mylength) = mygenes # preparing data for GOseq paragoseq <- vector("list", length = 3) names(paragoseq) <- c("all", "upT", "downT") paragoseq = lapply(paragoseq, function (x) { x = vector("list", length = 4) names(x) = names(resultsHSfilt1) x}) for (j in 1:4) { paragoseq[[1]][[j]] = as.integer(mygenes %in% mydegHS[[j]]) paragoseq[[2]][[j]] = (mygenes %in% mydegHS[[j]])*(TminusH > 0) paragoseq[[3]][[j]] = (mygenes %in% mydegHS[[j]])*(TminusH < 0) names(paragoseq[[1]][[j]]) = names(paragoseq[[2]][[j]]) = names(paragoseq[[3]][[j]]) = mygenes } # applying GOseq enrichetNOTpropagated = enriched enriched <- vector("list", length = 3) names(enriched) <- names(paragoseq) enriched = lapply(enriched, function (x) { x = vector("list", length = 4) names(x) = names(resultsHSfilt1) x}) for (i in 1:3) { for (j in 1:4) { #mipwf <- nullp(paragoseq[[i]][[j]], bias.data = mylength) mipwf <- nullp(paragoseq[[i]][[j]], bias.data = mylength, genome = "hg19", id = "ensGene") #walle <- goseq(pwf = mipwf, gene2cat = myGO2, method = "Wallenius", repcnt = 2000) walle <- goseq(pwf = mipwf, gene2cat = NULL, method = "Wallenius", repcnt = 2000, genome = "hg19", id = "ensGene") enriched[[i]][[j]] <- walle$category[p.adjust(walle$over_represented_pvalue, method = "BH") < 0.05] if (length(enriched[[i]][[j]]) > 0) { enriched[[i]][[j]] = cbind(enriched[[i]][[j]], myGO3[enriched[[i]][[j]],2]) } } } t(sapply(enriched, sapply, function(x) length(unique(x[,1])))) # NOISeqBIO edgeR DESeq2 SAMseq # all 237 405 471 573 # upT 32 61 95 151 # downT 348 632 612 650 # propagated # NOISeqBIO edgeR DESeq2 SAMseq # all 1518 2338 2364 2557 # upT 210 323 495 742 # downT 1869 2737 2760 2830