################################################################################
#################       FIGURES for NOISeqBIO paper    #########################
################################################################################

# By Sonia Tarazona
# Created 20-January-2013

options(stringsAsFactors = FALSE)

library(NOISeq)


######################################################


# Data --------------------------------------------------------------------


### Fusarium

fusarium = fusarium[,c(2:3,12,1)]

# fusarium for Blood-MM 30' in WT
factor1 = data.frame("cond" = c("Blood", "Blood", "MM", "MM"))

# Biotypes fusarium
biomart = read.delim("biomart_FO.txt", header = TRUE, as.is = TRUE)
rownames(biomart) = biomart[,1]
biomart = biomart[,-1]

biot <- biomart[,1]
names(biot) <- rownames(biomart)

# FOlength is computed from GTF


# DATA
myfo = readData(data = fusarium, factors = factor1, length = FOlength, biotype = biot, 
                chromosome = biomart[,2:4])


### Prostate Cancer

cond = strsplit(colnames(chinacounts), "_")
cond = sapply(cond, function (x) x[1])
cond[cond == "N"] = "Healthy"
cond[cond == "T"] = "Tumoral"

factor2 = data.frame("cond" = cond)

# Biotypes human
biomart = read.delim("biomart_HS_37.txt", header = TRUE, as.is = TRUE)
rownames(biomart) = biomart[,1]
biomart = biomart[,-1]

biot <- biomart[,4]
names(biot) <- rownames(biomart)

# HSlength is computed from GTF

# GC HS
HS.GC = read.delim("HS_biomart72_GCcontent.txt", as.is= TRUE)


# Data HS
myhs = readData(data = chinacounts, factors = factor2, length = HSlength, biotype = biot, gc = HS.GC, 
                chromosome = biomart[,1:3])


######################################################


# Functionalities R package: BIOTYPE DETECTION ----------------------------


# FO

mybiotdetFO = dat(input = myfo, type = "biodetection", k = 0, factor = "cond")  
pdf(file.path(myfigdir, "fig_biodetection1_FO.pdf"), width = 7)
explo.plot(mybiotdetFO, samples = 1)
dev.off()

mycountsbio1 = dat(input = myfo, type = "countsbio", factor = "cond")
pdf(file.path(myfigdir, "fig_biodetection2_FO.pdf"), width = 6, height = 6)
explo.plot(mycountsbio1, samples = 1, plottype = "boxplot")
dev.off()


# HS

mybiotdetHS = dat(myhs, type = "biodetection", factor = "cond", k = 0)
pdf(file.path(myfigdir, "fig_biodetection1_HS.pdf"), width = 7)
explo.plot(mybiotdetHS, samples = 1)
dev.off()

mycountsbio1HS = dat(input = myhs, type = "countsbio", factor = "cond")
pdf(file.path(myfigdir, "fig_biodetection2_HS.pdf"), width = 6, height = 6)
explo.plot(mycountsbio1HS, samples = 1, plottype = "boxplot")
dev.off()


######################################################


# Functionalities R package: SEQUENCING DEPTH -----------------------------


## Fusarium

mysatFO = dat(input = myfo, type = "saturation", k = 0)
pdf(file.path(myfigdir, "fig_seqdepth1_FO.pdf"), width = 6, height = 6)
explo.plot(mysatFO, toplot = 1, samples = c(1,2), yleftlim = c(0,15000))#, yrightlim = c(0,7500))
dev.off()

mycountsbio2 = dat(input = myfo, type = "countsbio", factor = NULL)

pdf(file.path(myfigdir, "fig_seqdepth2_FO.pdf"), width = 6, height = 6)
explo.plot(mycountsbio2, toplot = "global", samples = NULL, plottype = "boxplot")
dev.off()

pdf(file.path(myfigdir, "fig_seqdepth3_FO.pdf"), width = 6, height = 6)
explo.plot(mycountsbio2, toplot = "global", samples = NULL, plottype = "barplot")
dev.off()

# all plots FO
estosbios = names(mysatFO@dat$saturation)[-5]

pdf(file.path(myfigdir, "fig_saturation_ALL_FO.pdf"), width = 6, height = 6)
lapply(estosbios, function (x) explo.plot(mysatFO, toplot = x, samples = c(1,2), 
                                          yleftlim = c(0,15000)))#, yrightlim = c(0,7500))
dev.off()


pdf(file.path(myfigdir, "fig_sensitivity_ALL_FO.pdf"), width = 6, height = 6)
lapply(estosbios, function (x) explo.plot(mycountsbio2, toplot = x, samples = NULL, plottype = "barplot"))
dev.off()


## Prostate Cancer

mysatHS = dat(input = myhs, type = "saturation", k = 0, ndepth = 6)
pdf(file.path(myfigdir, "fig_seqdepth1_HS.pdf"), width = 6, height = 6)
explo.plot(mysatHS, samples = grep("Tumoral", myhs@phenoData[[1]])[c(1,4)], toplot = 1, 
           yleftlim = c(0,50000))
dev.off()

pdf(file.path(myfigdir, "fig_seqdepth1_HS_allbios.pdf"), width = 6, height = 6)
for (i in 1:32) {
  explo.plot(mysatHS, samples = grep("Tumoral", myhs@phenoData[[1]])[c(1,4)], toplot = i)               
}
dev.off()


mycountsbio2HS = dat(input = myhs, type = "countsbio", factor = NULL)

pdf(file.path(myfigdir, "fig_seqdepth2_HS.pdf"), width = 6, height = 6)
explo.plot(mycountsbio2HS, toplot = "global", samples = NULL, plottype = "boxplot")
dev.off()

pdf(file.path(myfigdir, "fig_seqdepth3_HS.pdf"), width = 6, height = 6)
explo.plot(mycountsbio2HS, toplot = "global", samples = NULL, plottype = "barplot")
dev.off()


# all plots HS
estosbios = names(mysatHS@dat$saturation)[-c(6,29)]

pdf(file.path(myfigdir, "fig_saturation_ALL_HS.pdf"), width = 6, height = 6)
lapply(estosbios, function (x) explo.plot(mysatHS, toplot = x, samples = c(1,2), 
                                          yleftlim = c(0,50000)))#, yrightlim = c(0,7500))
dev.off()


pdf(file.path(myfigdir, "fig_sensitivity_ALL_HS.pdf"), width = 6, height = 6)
lapply(estosbios, function (x) { print(x)
                                 explo.plot(mycountsbio2HS, toplot = x, samples = NULL, plottype = "barplot")})
dev.off()


######################################################


# Functionalities R package: BIAS DETECTION -------------------------------


###### Fusarium
mylengthbiasFO = dat(myfo, type = "length", factor = "cond") 
pdf(file.path(myfigdir, "fig_bias1_FO.pdf"), width = 6, height = 6)
explo.plot(mylengthbiasFO, samples = 1)
dev.off()

myGCbiasFO = dat(myfo, type = "GCbias", factor = "cond")
pdf(file.path(myfigdir, "fig_bias2_FO.pdf"), width = 6, height = 6)
explo.plot(myGCbiasFO, samples = 1)
dev.off()

myCDfo = dat(myfo, norm = FALSE, refColumn = 1, type = "cd") #type = "cd"
pdf(file.path(myfigdir, "fig_bias3_FO.pdf"), width = 6, height = 6)
explo.plot(myCDfo, xlim = c(0,50))
dev.off()


###### Prostate Cancer

mylengthbiasHS = dat(myhs, type = "length", factor = "cond") 
pdf(file.path(myfigdir, "fig_bias1_HS.pdf"), width = 6, height = 6)
explo.plot(mylengthbiasHS, samples = 1)
dev.off()

myGCbiasHS = dat(myhs, type = "GCbias", factor = "cond")
pdf(file.path(myfigdir, "fig_bias2_HS.pdf"), width = 6, height = 6)
explo.plot(myGCbiasHS, samples = 1)
dev.off()

myCDhs = dat(myhs, type = "cd")
pdf(file.path(myfigdir, "fig_bias3_HS.pdf"), width = 6, height = 6)
explo.plot(myCDhs, xlim = c(0,50), samples = 1:13)
dev.off()


######################################################


# Functionalities R package: NORMALIZATION --------------------------------


###### Fusarium
datossinnorm = myfo@assayData$exprs
longi = myfo@featureData@data$Length
names(longi) = rownames(datossinnorm)
datosnorm = list("rpkm" = rpkm(datossinnorm, long = longi),
                 "uqua" = uqua(datossinnorm),
                 "tmm" = tmm(datossinnorm),
                 "prop" = rpkm(datossinnorm, long = 1000))

mibio = as.character(myfo@featureData@data$Biotype)
names(mibio) = rownames(datossinnorm)

myfo.norm = vector("list", length = 4)
names(myfo.norm) = names(datosnorm)
for (i in 1:4) {
  myfo.norm[[i]] = readData(data = datosnorm[[i]], 
                            factors = data.frame("cond" = myfo@phenoData@data$cond),
                            length = longi,
                            biotype = mibio)
}

mylengthbiasFO = dat(myfo.norm[["rpkm"]], type = "length", factor = "cond") 
pdf(file.path(myfigdir, "fig_bias1norm_FO.pdf"), width = 6, height = 6)
explo.plot(mylengthbiasFO, samples = 1)
dev.off()


myCDfo2 = list("uqua" = dat(myfo.norm[["uqua"]], type = "countsbio"),
               "tmm" = dat(myfo.norm[["tmm"]], type = "countsbio"))
lapply(myCDfo2, explo.plot, plottype = "boxplot")

pdf(file.path(myfigdir, "fig_bias3uqua_FO.pdf"), width = 6, height = 6)
myCDfo = dat(myfo.norm[["uqua"]], type = "cd", norm = TRUE)
explo.plot(myCDfo)
dev.off()

pdf(file.path(myfigdir, "fig_bias3tmm_FO.pdf"), width = 6, height = 6)
myCDfo = dat(myfo.norm[["tmm"]], type = "cd", norm = TRUE)
explo.plot(myCDfo)
dev.off()


###### Prostate Cancer
datossinnorm = myhs@assayData$exprs
longi = myhs@featureData@data$Length
names(longi) = rownames(datossinnorm)
datosnorm = list("rpkm" = rpkm(datossinnorm, long = longi),
                 "uqua" = uqua(datossinnorm),
                 "tmm" = tmm(datossinnorm))

mibio = as.character(myhs@featureData@data$Biotype)
names(mibio) = rownames(datossinnorm)

myhs.norm = vector("list", length = 3)
names(myhs.norm) = names(datosnorm)
for (i in 1:3) {
  myhs.norm[[i]] = readData(data = datosnorm[[i]], 
                            factors = data.frame("cond" = myhs@phenoData@data$cond),
                            length = longi,
                            biotype = mibio)
}

mylengthbiasHS = dat(myhs.norm[["rpkm"]], type = "length", factor = "cond") 
pdf(file.path(myfigdir, "fig_bias1norm_HS.pdf"), width = 6, height = 6)
explo.plot(mylengthbiasHS, samples = 1)
dev.off()

myCDhs2 = list("uqua" = dat(myhs.norm[["uqua"]], type = "countsbio"),
               "tmm" = dat(myhs.norm[["tmm"]], type = "countsbio"))
lapply(myCDhs2, explo.plot, plottype = "boxplot")

pdf(file.path(myfigdir, "fig_bias3uqua_HS.pdf"), width = 6, height = 6)
myCDhs = dat(myhs.norm[["uqua"]], type = "cd", norm = TRUE)
explo.plot(myCDhs, samples = 1:13)
dev.off()

pdf(file.path(myfigdir, "fig_bias3tmm_HS.pdf"), width = 6, height = 6)
myCDhs = dat(myhs.norm[["tmm"]], type = "cd", norm = TRUE)
explo.plot(myCDhs, samples = 1:13)
dev.off()


####################################################
### Normalization of GC content with EDASeq package
####################################################

library(EDASeq)
gc.norm = vector("list", length = 2)
names(gc.norm) = c("FO", "HS")


# FO
conteos = myfo@assayData$exprs
mygc = myfo@featureData@data$GC; names(mygc) = rownames(conteos)
mygc = data.frame(gc = mygc)
mygcFO = na.omit(mygc)
data <- newSeqExpressionSet(exprs=as.matrix(conteos[rownames(mygcFO),]), featureData=mygcFO,
                            phenoData=data.frame(conditions=as.character(myfo@phenoData$cond),
                                                 row.names=colnames(conteos)))
dataWithin <- withinLaneNormalization(data,"gc",which="full")
dataNorm <- betweenLaneNormalization(dataWithin,which="full")
dataNorm = exprs(dataNorm)

gc.norm[[1]] = dataNorm


# HS
conteos = myhs@assayData$exprs
mygc = myhs@featureData@data$GC; names(mygc) = rownames(conteos)
mygc = data.frame(gc = mygc)
mygcHS = na.omit(mygc)
data <- newSeqExpressionSet(exprs=as.matrix(conteos[rownames(mygcHS),]), featureData=mygcHS,
                            phenoData=data.frame(conditions=as.character(myhs@phenoData$cond),
                                                 row.names=colnames(conteos)))
dataWithin <- withinLaneNormalization(data,"gc",which="full")
dataNorm <- betweenLaneNormalization(dataWithin,which="full")
dataNorm = exprs(dataNorm)

gc.norm[[2]] = dataNorm


### Figures with GC normalization


# FO
myfactor = myfo@phenoData$cond
gcnormFO = readData(data = gc.norm[[1]], factors = data.frame("cond" = myfactor), gc = mygcFO)

pdf(file.path(myfigdir, "fig_bias2edaseq_FO.pdf"), width = 6, height = 6)
myGCplot = dat(gcnormFO, type = "GCbias", factor = "cond")
explo.plot(myGCplot, samples = 1)
dev.off()


######################################################


# Differential Expression: RESULTS ----------------------------------------

library(edgeR)
library(DESeq2)
library(samr)

myq = 0.95

mycolor = miscolores[c(2,4,6:7)]

## FO data

resultsFOfilt1 = vector("list", length = 4)
names(resultsFOfilt1) = c("NOISeqBIO", "edgeR", "DESeq2", "SAMseq")


png(file.path(myfigdir, "fig_DEmethodsExperimentalData_FO_filt1.png"), width = 300*2, height = 300*2)

par(mfcol = c(2,2))

misdatos = myfo@assayData$exprs
grupo = data.frame("grup" = c("Blood","Blood","MM","MM"))

misdatos = filtered.data(dataset =  misdatos, factor = grupo$grup, norm = FALSE, method = 1, cv.cutoff = 500, cpm = 1)

condi1 = rowMeans(tmm(misdatos[,grep("wt_M_30", colnames(misdatos))])) + 1
condi2 = rowMeans(tmm(misdatos[,grep("wt_B_30", colnames(misdatos))])) + 1

biotipos = as.character(myfo@featureData@data[,"Biotype"])
names(biotipos) = rownames(myfo@featureData@data)
datosN = readData(misdatos, factors = grupo, biotype = biotipos, chromosome = myfo@featureData@data[,3:5])


# NOISeqBIO (probability)
mynoiseq = noiseqbio(datosN, norm = "tmm", factor = "grup", random.seed = 123, nclust = 15, lc = 0, r = 50,
                     adj = 1.5, a0per = 0.9, filter = 0)
resultsFOfilt1[[1]] = mynoiseq
mydeg = rownames(degenes(resultsFOfilt1[[1]], q = myq))
length(mydeg)

plot(condi1, condi2, main = "NOISeqBIO", log = "xy",
     xlab = "MM", ylab = "Blood", cex = 0.7)
points(condi1[mydeg], condi2[mydeg], col = mycolor[1], pch = 20, cex = 0.5)
text(3,5000, paste("DEG =", length(mydeg)))  


## edgeR (adjusted p-value)
d2 = DGEList(counts = misdatos, group = grupo[,"grup"])
d2 = calcNormFactors(d2)  
d2 = estimateCommonDisp(d2)
d2 = estimateTagwiseDisp(d2, trend = "movingave")
d2 = exactTest(d2)
myedgeR = topTags(d2, n = nrow(misdatos))[[1]]
resultsFOfilt1[[2]] = myedgeR
mypval = resultsFOfilt1[[2]][,"FDR"]
mydeg = rownames(resultsFOfilt1[[2]][which(mypval < (1-myq)), ])
length(mydeg)

plot(condi1, condi2, main = "edgeR", log = "xy", 
     xlab = "MM", ylab = "Blood", cex = 0.7)
points(condi1[mydeg], condi2[mydeg], col = mycolor[2], pch = 20, cex = 0.5)
text(3,5000, paste("DEG =", length(mydeg)))


## DESeq (adjusted p-value)
# DESeq.cds = newCountDataSet(countData = misdatos, conditions = grupo[,"grup"])
# DESeq.cds = estimateSizeFactors(DESeq.cds)
# DESeq.cds = estimateDispersions(DESeq.cds, sharingMode = "maximum", method = "pooled", fitType = "local")
# DESeq.test = nbinomTest(DESeq.cds, "MM", "Blood")
# resultsFOfilt1[[3]] = DESeq.test
# mypval = p.adjust(DESeq.test$pval, method = "BH")
# mydeg = DESeq.test[which(mypval < (1-myq)),"id"]
# length(mydeg)
# 
# plot(condi1, condi2, main = "DESeq - FO", log = "xy", 
#      xlab = "cond1", ylab = "cond2", cex = 0.7)
# points(condi1[mydeg], condi2[mydeg], col = 2, pch = 20, cex = 0.5)
# text(3,5000, paste("DEG =", length(mydeg))) 


## DESeq2 (adjusted p-value)
DESeq.cds = DESeqDataSetFromMatrix(countData = misdatos, colData = grupo, design = ~ grup)
DESeq.cds = DESeq(DESeq.cds)
DESeq.test = results(DESeq.cds)
resultsFOfilt1[[3]] = DESeq.test
mydeg = rownames(as.data.frame(resultsFOfilt1[[3]])[which(as.data.frame(resultsFOfilt1[[3]])[,"padj"] < (1-myq)),])
length(mydeg)

plot(condi1, condi2, main = "DESeq2", log = "xy", 
     xlab = "MM", ylab = "Blood", cex = 0.7)
points(condi1[mydeg], condi2[mydeg], col = mycolor[3], pch = 20, cex = 0.5)
text(3,5000, paste("DEG =", length(mydeg))) 


## SAMseq (FDR)
SAMseq.test = SAMseq(misdatos, grupo[,"grup"], resp.type = "Two class unpaired",
                     geneid = rownames(misdatos), genenames = rownames(misdatos),
                     nperms = 100, nresamp = 20, fdr.output = 1, random.seed = 208)
samseq.up = SAMseq.test$siggenes.table$genes.up
samseq.lo = SAMseq.test$siggenes.table$genes.lo
rownames(samseq.up) = samseq.up[,1]
rownames(samseq.lo) = samseq.lo[,1]
mySAMseq = rbind(na.omit(samseq.up[,3:5]), na.omit(samseq.lo[,3:5]))
genesSAM = rownames(mySAMseq)
mySAMseq = apply(mySAMseq, 2, as.numeric)
rownames(mySAMseq) = genesSAM
mySAMseq = data.frame(mySAMseq)[rownames(misdatos),]
mySAMseq[,3] = mySAMseq[,3]/100
resultsFOfilt1[[4]] = mySAMseq
mydeg = rownames(resultsFOfilt1[[4]][which(resultsFOfilt1[[4]][,3] < (1-myq)),])
length(mydeg)

plot(condi1, condi2, main = "SAMseq", log = "xy", 
     xlab = "MM", ylab = "Blood", cex = 0.7)
points(condi1[mydeg], condi2[mydeg], col = mycolor[4], pch = 20, cex = 0.5)
text(3,5000, paste("DEG =", length(mydeg))) 

dev.off()


#----------#


## Prostate Cancer data
resultsHSfilt1 = vector("list", length = 4)
names(resultsHSfilt1) = c("NOISeqBIO", "edgeR", "DESeq2", "SAMseq")

misdatos = myhs@assayData$exprs
grupo = data.frame("grup" = myhs@phenoData$cond)
misdatos = filtered.data(dataset =  misdatos, factor = grupo$grup, norm = FALSE, method = 1, cv.cutoff = 500, cpm = 1) # 17207

condi1 = rowMeans(tmm(misdatos[,grep("N", colnames(misdatos))])) + 1
condi2 = rowMeans(tmm(misdatos[,grep("T", colnames(misdatos))])) + 1

biotipos = as.character(myhs@featureData@data[,"Biotype"])
names(biotipos) = rownames(myhs@featureData@data)
datosN = readData(misdatos, factors = grupo, biotype = biotipos, chromosome = myhs@featureData@data[,4:6])


png(file.path(myfigdir, "fig_DEmethodsExperimentalData_HS_filt2.png"), width = 450*2, height = 450*2)

par(mfcol = c(2,2))

## NOISeqBIO (probability)
mynoiseq = noiseqbio(datosN, norm = "tmm", factor = "grup", random.seed = 123, nclust = 15, lc = 0, r = 50,
                     adj = 1.5, a0per = 0.9, filter = 0)
resultsHSfilt1[[1]] = mynoiseq
mydeg = rownames(degenes(resultsHSfilt1[[1]], q = myq))  # 1757
length(mydeg)

plot(condi1[setdiff(names(condi1), mydeg)], condi2[setdiff(names(condi1), mydeg)], main = "NOISeqBIO", log = "xy", 
     xlab = "Healthy", ylab = "Tumoral", cex = 0.7)
points(condi1[mydeg], condi2[mydeg], col = mycolor[1], pch = 1, cex = 0.7)
text(3,10000, paste("DEG =", length(mydeg)))  


## edgeR (adjusted p-value)
d2 = DGEList(counts = misdatos, group = grupo$grup)
d2 = calcNormFactors(d2)  
d2 = estimateCommonDisp(d2)
d2 = estimateTagwiseDisp(d2, trend = "movingave")
d2 = exactTest(d2)
myedgeR = topTags(d2, n = nrow(misdatos))[[1]]
resultsHSfilt1[[2]] = myedgeR
mypval = resultsHSfilt1[[2]][,"FDR"]
mydeg = rownames(resultsHSfilt1[[2]][which(mypval < (1-myq)), ])
length(mydeg)

plot(condi1[setdiff(names(condi1), mydeg)], condi2[setdiff(names(condi1), mydeg)], main = "edgeR", log = "xy", 
     xlab = "Healthy", ylab = "Tumoral", cex = 0.7)
points(condi1[mydeg], condi2[mydeg], col = mycolor[2], pch = 1, cex = 0.7)
text(3,10000, paste("DEG =", length(mydeg)))


## DESeq (adjusted p-value)
# DESeq.cds = newCountDataSet(countData = misdatos, conditions = grupo$grup)
# DESeq.cds = estimateSizeFactors(DESeq.cds)
# DESeq.cds = estimateDispersions(DESeq.cds, sharingMode = "maximum", method = "pooled", fitType = "local")
# DESeq.test = nbinomTest(DESeq.cds, "Healthy", "Tumoral")
# resultsHSfilt1[[3]] = DESeq.test
# mypval = p.adjust(DESeq.test$pval, method = "BH")
# mydeg = DESeq.test[which(mypval < (1-myq)),"id"]
# length(mydeg)
# 
# plot(condi1, condi2, main = "DESeq", log = "xy", 
#      xlab = "Healthy", ylab = "Tumoral", cex = 0.7)
# points(condi1[mydeg], condi2[mydeg], col = 2, pch = 20, cex = 0.5)
# text(3,10000, paste("DEG =", length(mydeg))) 


## DESeq (adjusted p-value)
DESeq.cds = DESeqDataSetFromMatrix(countData = misdatos, colData = grupo, design = ~ grup)
DESeq.cds = DESeq(DESeq.cds)
DESeq.test = results(DESeq.cds)
resultsHSfilt1[[3]] = DESeq.test
mydeg = rownames((resultsHSfilt1[[3]])[which((resultsHSfilt1[[3]])[,"padj"] < (1-myq)),])
length(mydeg)

plot(condi1[setdiff(names(condi1), mydeg)], condi2[setdiff(names(condi1), mydeg)], main = "DESeq2", log = "xy", 
     xlab = "Healthy", ylab = "Tumoral", cex = 0.7)
points(condi1[mydeg], condi2[mydeg], col = mycolor[3], pch = 1, cex = 0.7)
text(3,10000, paste("DEG =", length(mydeg)))


## SAMseq (FDR)
SAMseq.test = SAMseq(misdatos, grupo[,"grup"], resp.type = "Two class unpaired",
                     geneid = rownames(misdatos), genenames = rownames(misdatos),
                     nperms = 100, nresamp = 20, fdr.output = 1, random.seed = 208)
samseq.up = SAMseq.test$siggenes.table$genes.up
samseq.lo = SAMseq.test$siggenes.table$genes.lo
rownames(samseq.up) = samseq.up[,1]
rownames(samseq.lo) = samseq.lo[,1]
mySAMseq = rbind(na.omit(samseq.up[,3:5]), na.omit(samseq.lo[,3:5]))
genesSAM = rownames(mySAMseq)
mySAMseq = apply(mySAMseq, 2, as.numeric)
rownames(mySAMseq) = genesSAM
mySAMseq = data.frame(mySAMseq)[rownames(misdatos),]
mySAMseq[,3] = mySAMseq[,3]/100
resultsHSfilt1[[4]] = mySAMseq
mydeg = rownames(resultsHSfilt1[[4]][which(resultsHSfilt1[[4]][,3] < (1-myq)),])
length(mydeg)

plot(condi1[setdiff(names(condi1), mydeg)], condi2[setdiff(names(condi1), mydeg)], main = "SAMseq", log = "xy", 
     xlab = "Healthy", ylab = "Tumoral", cex = 0.7)
points(condi1[mydeg], condi2[mydeg], col = mycolor[4], pch = 1, cex = 0.7)
text(3,10000, paste("DEG =", length(mydeg))) 


dev.off()  


########################################################


# Differential Expression: Common DEG and Correlation ---------------------

myq = 0.05

mydegFO = mydegHS = vector("list", length = 4)
names(mydegFO) = names(mydegHS) = names(resultsFOfilt1)

# NOISeqBIO
genesFO = rownames(resultsFOfilt1[[1]]@results[[1]])
genesHS = rownames(resultsHSfilt1[[1]]@results[[1]])
data2corFO = 1-resultsFOfilt1[[1]]@results[[1]][,"prob"]
data2corHS = 1-resultsHSfilt1[[1]]@results[[1]][,"prob"]
mydegFO[[1]] = genesFO[which(resultsFOfilt1[[1]]@results[[1]][,"prob"] > 1-myq)]
mydegHS[[1]] = genesHS[which(resultsHSfilt1[[1]]@results[[1]][,"prob"] > 1-myq)]

# edgeR
data2corFO = cbind(data2corFO, resultsFOfilt1[[2]][genesFO,"FDR"])
data2corHS = cbind(data2corHS, resultsHSfilt1[[2]][genesHS,"FDR"])
mydegFO[[2]] = rownames(resultsFOfilt1[[2]])[which(resultsFOfilt1[[2]][,"FDR"] < myq)]
mydegHS[[2]] = rownames(resultsHSfilt1[[2]])[which(resultsHSfilt1[[2]][,"FDR"] < myq)]
head(resultsFOfilt1[[2]])

# DESeq2
data2corFO = cbind(data2corFO, as.data.frame(resultsFOfilt1[[3]])[genesFO,"padj"])
data2corHS = cbind(data2corHS, as.data.frame(resultsHSfilt1[[3]])[genesHS,"padj"])
mydegFO[[3]] = rownames(as.data.frame(resultsFOfilt1[[3]]))[which(as.data.frame(resultsFOfilt1[[3]])[,"padj"] < myq)]
mydegHS[[3]] = rownames(as.data.frame(resultsHSfilt1[[3]]))[which(as.data.frame(resultsHSfilt1[[3]])[,"padj"] < myq)]

# SAMseq
data2corFO = cbind(data2corFO, resultsFOfilt1[[4]][genesFO,3])
data2corHS = cbind(data2corHS, resultsHSfilt1[[4]][genesHS,3])
mydegFO[[4]] = rownames(resultsFOfilt1[[4]])[which(resultsFOfilt1[[4]][,3] < myq)]
mydegHS[[4]] = rownames(resultsHSfilt1[[4]])[which(resultsHSfilt1[[4]][,3] < myq)]


colnames(data2corFO) = colnames(data2corHS) = names(resultsFOfilt1)


cor(na.omit(data2corFO), method = "spearman")
cor(na.omit(data2corHS), method = "spearman")

for (i in 1:3) {
  for (j in (i+1):4){
    print(names(resultsFOfilt1)[i]); print(names(resultsFOfilt1)[j])
    print("FO")
    print(length(intersect(mydegFO[[i]], mydegFO[[j]])))
    print("HS")
    print(length(intersect(mydegHS[[i]], mydegHS[[j]])))
  }
}


############################################################################


# Functionalities R package: DE RESULTS EXPLORATION -----------------------


##### FO
mynoiseq = resultsFOfilt1[["NOISeqBIO"]]

medida = 400

## Expression DE plot
png(file.path(myfigdir, "fig_pack_expre.png"), width = medida, height = medida)
DE.plot(mynoiseq, q = 0.95, graphic = "expr", log.scale = TRUE)
dev.off()

## (M,D) DE plot
png(file.path(myfigdir, "fig_pack_MD.png"), width = medida, height = medida)
DE.plot(mynoiseq, q = 0.95, graphic = "MD")
dev.off()

## Manhattan DE plot
medida = 6
pdf(file.path(myfigdir, "fig_pack_DEmanhattan_FO.pdf"), width = medida*2, height = medida*2)
DE.plot(mynoiseq, q = 0.95, graphic = "chrom", chromosomes = 1:2, log.scale = TRUE, join = FALSE)
dev.off()

# with all chromosomes
medida = 5
pdf(file.path(myfigdir, "fig_pack_DEmanhattan_FO_AllChrom.pdf"), width = medida*2, height = medida*5)
DE.plot(mynoiseq, q = 0.95, graphic = "chrom", chromosomes = 1:15, log.scale = TRUE, join = FALSE)
dev.off()


## Distribution DE plot
medida = 6
pdf(file.path(myfigdir, "fig_pack_DEbarplot_FO.pdf"), width = medida*2, height = medida)
DE.plot(mynoiseq, chromosomes = 1:15, q = 0.95, graphic = "distr")
dev.off()


##### HS
mynoiseq = resultsHSfilt1[["NOISeqBIO"]]

medida = 400

## Expression DE plot
png(file.path(myfigdir, "fig_pack_expre_HS.png"), width = medida, height = medida)
DE.plot(mynoiseq, q = 0.95, graphic = "expr", log.scale = TRUE)
dev.off()

## (M,D) DE plot
png(file.path(myfigdir, "fig_pack_MD_HS.png"), width = medida, height = medida)
DE.plot(mynoiseq, q = 0.95, graphic = "MD")
dev.off()

## Manhattan DE plot
medida = 3.6
pdf(file.path(myfigdir, "fig_pack_DEmanhattan_HS.pdf"), width = medida*2, height = medida*1.95)
DE.plot(mynoiseq, q = 0.95, graphic = "chrom", chromosomes = 1:2, log.scale = TRUE, join = FALSE)
dev.off()

## Distribution DE plot
medida = 6
pdf(file.path(myfigdir, "fig_pack_DEbarplot_HS.pdf"), width = medida*2, height = medida)
DE.plot(mynoiseq, chromosomes = c(as.character(1:22), "X", "Y"), q = 0.95, graphic = "distr")
dev.off()


###########################################################################################
###########################################################################################


#### Functional enrichment analysis (Prostate Cancer)


# to classify up in tumoral & down in tumoral
datanorm = tmm(myhs@assayData$exprs, lc = 0)
TminusH = rowMeans(datanorm[,grep("T_", colnames(datanorm))]) - rowMeans(datanorm[,grep("N_", colnames(datanorm))])
mygenes = rownames(myhs@assayData$exprs)
mylength = myhs@featureData@data$Length
names(mylength) = mygenes


# preparing data for GOseq
paragoseq <- vector("list", length = 3)
names(paragoseq) <- c("all", "upT", "downT")
paragoseq = lapply(paragoseq, function (x) { x = vector("list", length = 4)
                                             names(x) = names(resultsHSfilt1)
                                             x})
for (j in 1:4) {
  paragoseq[[1]][[j]] = as.integer(mygenes %in% mydegHS[[j]])
  paragoseq[[2]][[j]] = (mygenes %in% mydegHS[[j]])*(TminusH > 0)
  paragoseq[[3]][[j]] = (mygenes %in% mydegHS[[j]])*(TminusH < 0)
  names(paragoseq[[1]][[j]]) = names(paragoseq[[2]][[j]]) = names(paragoseq[[3]][[j]]) = mygenes  
} 


# applying GOseq
enrichetNOTpropagated = enriched
enriched <- vector("list", length = 3)
names(enriched) <- names(paragoseq)
enriched = lapply(enriched, function (x) { x = vector("list", length = 4)
                                             names(x) = names(resultsHSfilt1)
                                             x})

for (i in 1:3) {
  for (j in 1:4) {        
    #mipwf <- nullp(paragoseq[[i]][[j]], bias.data = mylength)
    mipwf <- nullp(paragoseq[[i]][[j]], bias.data = mylength, genome = "hg19", id = "ensGene")
    #walle <- goseq(pwf = mipwf, gene2cat = myGO2, method = "Wallenius", repcnt = 2000)
    walle <- goseq(pwf = mipwf, gene2cat = NULL, method = "Wallenius", repcnt = 2000,
                   genome = "hg19", id = "ensGene")
    enriched[[i]][[j]] <- walle$category[p.adjust(walle$over_represented_pvalue, method = "BH") < 0.05]
    if (length(enriched[[i]][[j]]) > 0) {
      enriched[[i]][[j]] = cbind(enriched[[i]][[j]], myGO3[enriched[[i]][[j]],2])
    }        
  }  
}


t(sapply(enriched, sapply, function(x) length(unique(x[,1]))))
# NOISeqBIO edgeR DESeq2 SAMseq
# all         237   405    471    573
# upT          32    61     95    151
# downT       348   632    612    650

# propagated
#        NOISeqBIO edgeR DESeq2 SAMseq
# all        1518  2338   2364   2557
# upT         210   323    495    742
# downT      1869  2737   2760   2830