########################################################################################## ########## Function to simulate data and apply DE methods ########## ########################################################################################## ## By Sonia Tarazona ## 30-October-2013 ## To generate nsim simulations generosimuls = function(datacod, noise, nrepl, propdeg, nsim = 10, laruta) { laseed = sample(1:1000, 1) for (i in 1:nsim) { myresult = escenario(datacod=datacod, noise=noise, nrepl=nrepl, propdeg=propdeg, myseed=laseed) if (datacod == 1) { elnom = paste("foxy_", noise, "_", nrepl, "_", propdeg, "_", formatC(i, width = 2, format = "d", flag = "0"), ".RData", sep = "") } if (datacod == 2) { elnom = paste("human_", noise, "_", nrepl, "_", propdeg, "_", formatC(i, width = 2, format = "d", flag = "0"), ".RData", sep = "") } save(myresult, file = file.path(laruta, elnom)) laseed = laseed + 10 } } ## To simulate data and apply all methods escenario = function(datacod, noise, nrepl, propdeg, myseed) { if (datacod == 1) { fusarium = clc.counts[-grep("TCONS", rownames(clc.counts)),] fusarium = fusarium[which(rowSums(fusarium) > 0),] dataset = fusarium } if (datacod == 2) { chinacounts = chinacounts[which(rowSums(chinacounts) > 0),] dataset = chinacounts } set.seed(myseed) mycol = sample(ncol(dataset), 1) prova = simula.counts(counts0 = dataset[,mycol], nrepl = nrepl, depth = 30, propdeg = propdeg, noise = noise, beta = 6) grupo = data.frame("grup" = as.factor(rep(1:2, each = nrepl))) datosim = filtered.data(dataset = prova[[1]], factor = grupo$grup, norm = FALSE, method = 1, cv.cutoff = 500) datosN = readData(datosim, factors = grupo) misgenes = rownames(datosim) if (!is.null(prova[[2]])) { rownames(prova[[2]]) = prova[[2]][,1] resultados = prova[[2]][misgenes,-1] resultados[which(is.na(resultados[,1]) == TRUE),] = 0 } else { resultados = data.frame("regu" = rep(0,length(misgenes)), "foldchange" = rep(0,length(misgenes))) } rownames(resultados) = misgenes # NOISeq mynoiseq = noiseq(datosN, norm = "tmm", replicates = "biological", factor = "grup") resultados = data.frame(resultados, mynoiseq@results[[1]][misgenes,1:5]) # NOISeqBIO mynoiseq = noiseqbio(datosN, norm = "tmm", factor = "grup", r = 20, nclust = 15, lc = 0, adj = 1.5, a0per = 0.9, filter = 0) resultados = data.frame(resultados, mynoiseq@results[[1]][misgenes,3:4]) # edgeR d2 = DGEList(counts = datosim, group = grupo[,"grup"]) d2 = calcNormFactors(d2) d2 = estimateCommonDisp(d2) d2 = estimateTagwiseDisp(d2, trend = "movingave") d2 = exactTest(d2) myedgeR = topTags(d2, n = nrow(datosim))[[1]] resultados = data.frame(resultados, myedgeR[misgenes,3:4]) # DESeq #DESeq.cds = newCountDataSet(countData = datosim, conditions = grupo[,"grup"]) #DESeq.cds = estimateSizeFactors(DESeq.cds) #DESeq.cds = estimateDispersions(DESeq.cds, sharingMode = "maximum", method = "pooled", fitType = "local") #DESeq.test = nbinomTest(DESeq.cds, 1, 2) #rownames(DESeq.test) = DESeq.test[,"id"] #resultados = data.frame(resultados, DESeq.test[misgenes,7:8]) # DESeq2 DESeq.cds = DESeqDataSetFromMatrix(countData = datosim, colData = grupo, design = ~ grup) DESeq.cds = DESeq(DESeq.cds) DESeq.test = as.data.frame(results(DESeq.cds)) rownames(DESeq.test) = misgenes resultados = data.frame(resultados, DESeq.test[misgenes,c("pvalue","padj")]) # SAMseq SAMseq.test = SAMseq(datosim, grupo[,"grup"], resp.type = "Two class unpaired", geneid = misgenes, genenames = misgenes, nperms = 100, nresamp = 20, fdr.output = 1, random.seed = 208) samseq.up = SAMseq.test$siggenes.table$genes.up samseq.lo = SAMseq.test$siggenes.table$genes.lo rownames(samseq.up) = samseq.up[,1] rownames(samseq.lo) = samseq.lo[,1] mySAMseq = rbind(na.omit(samseq.up[,3:5]), na.omit(samseq.lo[,3:5])) genesSAM = rownames(mySAMseq) mySAMseq = apply(mySAMseq, 2, as.numeric) rownames(mySAMseq) = genesSAM mySAMseq = data.frame(mySAMseq)[misgenes,] mySAMseq[,3] = mySAMseq[,3]/100 resultados = data.frame(resultados, mySAMseq[,c(1,3)]) colnames(resultados)[c(7,9,11,13,15)] = c("NOISeq", "NOISeqBIO", "edgeR", "DESeq2", "SAMseq") misparametros = data.frame("param" = c("data", "seed", "col", "noise", "nrepl", "propdeg", "ngenes"), "value" = c(datacod, myseed, mycol, noise, nrepl, propdeg, length(misgenes))) list("results" = resultados, "parameters" = misparametros) }