##########################################################################################
##########            Function to simulate data and apply DE methods            ##########
##########################################################################################


## By Sonia Tarazona
## 30-October-2013


## To generate nsim simulations

generosimuls = function(datacod, noise, nrepl, propdeg, nsim = 10, laruta) {  
 
  
  laseed = sample(1:1000, 1)
  
  for (i in 1:nsim) {
    
    myresult = escenario(datacod=datacod, noise=noise, nrepl=nrepl, propdeg=propdeg, myseed=laseed)
    
    if (datacod == 1) {
      elnom = paste("foxy_", noise, "_", nrepl, "_", propdeg, "_", formatC(i, width = 2, format = "d", flag = "0"),
                    ".RData", sep = "")
    }
    
    if (datacod == 2) {
      elnom = paste("human_", noise, "_", nrepl, "_", propdeg, "_", formatC(i, width = 2, format = "d", flag = "0"),
                    ".RData", sep = "")
    }
        
    save(myresult, file = file.path(laruta, elnom))    
    
    laseed = laseed + 10
    
  }
}


## To simulate data and apply all methods 

escenario = function(datacod, noise, nrepl, propdeg, myseed) {
  
  if (datacod == 1) {       
    
    fusarium = clc.counts[-grep("TCONS", rownames(clc.counts)),]
    fusarium = fusarium[which(rowSums(fusarium) > 0),]    
    dataset = fusarium 
  }
  
  if (datacod == 2) {     
    
    chinacounts = chinacounts[which(rowSums(chinacounts) > 0),]    
    dataset = chinacounts 
  }   

  set.seed(myseed)
  
  mycol = sample(ncol(dataset), 1)
  
  prova = simula.counts(counts0 = dataset[,mycol], nrepl = nrepl, depth = 30, propdeg = propdeg, 
                        noise = noise, beta = 6)
      
  grupo = data.frame("grup" = as.factor(rep(1:2, each = nrepl)))
  datosim = filtered.data(dataset =  prova[[1]], factor = grupo$grup, norm = FALSE, method = 1, cv.cutoff = 500)
  datosN = readData(datosim, factors = grupo)
  
  misgenes = rownames(datosim)
  
  if (!is.null(prova[[2]])) {
    rownames(prova[[2]]) = prova[[2]][,1]
    resultados = prova[[2]][misgenes,-1]
    resultados[which(is.na(resultados[,1]) == TRUE),] = 0
  } else {
    resultados = data.frame("regu" = rep(0,length(misgenes)), "foldchange" = rep(0,length(misgenes)))
  }
  
  rownames(resultados) = misgenes  
 
  
  # NOISeq
  mynoiseq = noiseq(datosN, norm = "tmm", replicates = "biological", factor = "grup")
  resultados = data.frame(resultados, mynoiseq@results[[1]][misgenes,1:5])
    
  # NOISeqBIO
  mynoiseq = noiseqbio(datosN, norm = "tmm", factor = "grup", r = 20, nclust = 15, 
                       lc = 0, adj = 1.5, a0per = 0.9, filter = 0)
  resultados = data.frame(resultados, mynoiseq@results[[1]][misgenes,3:4])

  # edgeR
  d2 = DGEList(counts = datosim, group = grupo[,"grup"])
  d2 = calcNormFactors(d2)  
  d2 = estimateCommonDisp(d2)
  d2 = estimateTagwiseDisp(d2, trend = "movingave")
  d2 = exactTest(d2)
  myedgeR = topTags(d2, n = nrow(datosim))[[1]]
  resultados = data.frame(resultados, myedgeR[misgenes,3:4])  
  
  # DESeq
  #DESeq.cds = newCountDataSet(countData = datosim, conditions = grupo[,"grup"])
  #DESeq.cds = estimateSizeFactors(DESeq.cds)
  #DESeq.cds = estimateDispersions(DESeq.cds, sharingMode = "maximum", method = "pooled", fitType = "local")
  #DESeq.test = nbinomTest(DESeq.cds, 1, 2)
  #rownames(DESeq.test) = DESeq.test[,"id"]
  #resultados = data.frame(resultados, DESeq.test[misgenes,7:8])  

  # DESeq2
  DESeq.cds = DESeqDataSetFromMatrix(countData = datosim, colData = grupo, design = ~ grup)
  DESeq.cds = DESeq(DESeq.cds)
  DESeq.test = as.data.frame(results(DESeq.cds))
  rownames(DESeq.test) = misgenes
  resultados = data.frame(resultados, DESeq.test[misgenes,c("pvalue","padj")])
  
  # SAMseq
  SAMseq.test = SAMseq(datosim, grupo[,"grup"], resp.type = "Two class unpaired",
                       geneid = misgenes, genenames = misgenes,
                       nperms = 100, nresamp = 20, fdr.output = 1, random.seed = 208)
  samseq.up = SAMseq.test$siggenes.table$genes.up
  samseq.lo = SAMseq.test$siggenes.table$genes.lo
  rownames(samseq.up) = samseq.up[,1]
  rownames(samseq.lo) = samseq.lo[,1]
  mySAMseq = rbind(na.omit(samseq.up[,3:5]), na.omit(samseq.lo[,3:5]))
  genesSAM = rownames(mySAMseq)
  mySAMseq = apply(mySAMseq, 2, as.numeric)
  rownames(mySAMseq) = genesSAM
  mySAMseq = data.frame(mySAMseq)[misgenes,]
  mySAMseq[,3] = mySAMseq[,3]/100
  resultados = data.frame(resultados, mySAMseq[,c(1,3)])    
  
  colnames(resultados)[c(7,9,11,13,15)] = c("NOISeq", "NOISeqBIO", "edgeR", "DESeq2", "SAMseq")
  
  misparametros = data.frame("param" = c("data", "seed", "col", "noise", "nrepl", "propdeg", "ngenes"),
                             "value" = c(datacod, myseed, mycol, noise, nrepl, propdeg, length(misgenes)))
  
  
  list("results" = resultados, "parameters" = misparametros)
    
}