## ----fig.show='hold'----------------------------------------------------------
library(TCGAWorkflowData)
data("elmerExample")
data("TCGA_LGG_Transcriptome_20_samples")
data("TCGA_GBM_Transcriptome_20_samples")
data("histoneMarks")
data("biogrid")
data("genes_GR")
data("maf_lgg_gbm")

## ----eval = FALSE, message=FALSE,warning=FALSE, include=TRUE------------------
#  library(GenomicRanges)
#  library(TCGAbiolinks)
#  ##############################
#  ## Recurrent CNV annotation ##
#  ##############################
#  # Get gene information from GENCODE using biomart
#  genes <- TCGAbiolinks:::get.GRCh.bioMart(genome = "hg19")
#  genes <- genes[genes$external_gene_id != "" & genes$chromosome_name %in% c(1:22,"X","Y"),]
#  genes[genes$chromosome_name == "X", "chromosome_name"] <- 23
#  genes[genes$chromosome_name == "Y", "chromosome_name"] <- 24
#  genes$chromosome_name <- sapply(genes$chromosome_name,as.integer)
#  genes <- genes[order(genes$start_position),]
#  genes <- genes[order(genes$chromosome_name),]
#  genes <- genes[,c("external_gene_id", "chromosome_name", "start_position","end_position")]
#  colnames(genes) <- c("GeneSymbol","Chr","Start","End")
#  genes_GR <- makeGRangesFromDataFrame(genes,keep.extra.columns = TRUE)
#  save(genes_GR,genes,file = "genes_GR.rda", compress = "xz")

## ----eval=FALSE, message=FALSE, warning=FALSE, include=TRUE-------------------
#  library(RTCGAToolbox)
#  # Download GISTIC results
#  lastAnalyseDate <- getFirehoseAnalyzeDates(1)
#  gistic <- getFirehoseData(
#    dataset = "GBM",
#    gistic2_Date = lastAnalyseDate,
#    GISTIC = TRUE
#  )
#  
#  # get GISTIC results
#  gistic_allbygene <- getData(
#    gistic,
#    type = "GISTIC",
#    platform = "AllByGene"
#  )
#  
#  gistic_thresholedbygene <- getData(
#    gistic,
#    type = "GISTIC",
#    platform = "ThresholdedByGene"
#  )
#  
#  save(
#    gistic_allbygene,
#    gistic_thresholedbygene,
#    file = "GBMGistic.rda", compress = "xz"
#  )

## ----eval=FALSE, include=TRUE, results='asis'---------------------------------
#  query_exp_lgg <- GDCquery(
#    project = "TCGA-LGG",
#    data.category = "Transcriptome Profiling",
#    data.type = "Gene Expression Quantification",
#    workflow.type = "STAR - Counts"
#  )
#  # Get only first 20 samples to make example faster
#  query_exp_lgg$results[[1]] <- query_exp_lgg$results[[1]][1:20,]
#  GDCdownload(query_exp_lgg)
#  exp_lgg <- GDCprepare(
#    query = query_exp_lgg
#  )
#  
#  query_exp_gbm <- GDCquery(
#    project = "TCGA-GBM",
#    data.category = "Transcriptome Profiling",
#    data.type = "Gene Expression Quantification",
#    workflow.type = "STAR - Counts"
#  )
#  # Get only first 20 samples to make example faster
#  query_exp_gbm$results[[1]] <- query_exp_gbm$results[[1]][1:20,]
#  GDCdownload(query_exp_gbm)
#  exp_gbm <- GDCprepare(
#    query = query_exp_gbm
#  )

## ----eval=FALSE, include=TRUE, results='asis'---------------------------------
#  #----------- 8.3 Identification of Regulatory Enhancers   -------
#  library(TCGAbiolinks)
#  # Samples: primary solid tumor w/ DNA methylation and gene expression
#  matched_met_exp <- function(project, n = NULL){
#    # get primary solid tumor samples: DNA methylation
#    message("Download DNA methylation information")
#    met450k <- GDCquery(
#      project = project,
#      data.category = "DNA Methylation",
#      platform = "Illumina Human Methylation 450",
#      data.type = "Methylation Beta Value",
#      sample.type = c("Primary Tumor")
#    )
#    met450k.tp <-  met450k$results[[1]]$cases
#  
#    # get primary solid tumor samples: RNAseq
#    message("Download gene expression information")
#    exp <- GDCquery(
#      project = project,
#      data.category = "Transcriptome Profiling",
#      data.type = "Gene Expression Quantification",
#      workflow.type = "STAR - Counts"
#      sample.type = c("Primary Tumor")
#    )
#  
#    exp.tp <- exp$results[[1]]$cases
#    # Get patients with samples in both platforms
#    patients <- unique(substr(exp.tp,1,15)[substr(exp.tp,1,12) %in% substr(met450k.tp,1,12)] )
#    if(!is.null(n)) patients <- patients[1:n] # get only n samples
#    return(patients)
#  }
#  lgg.samples <- matched_met_exp("TCGA-LGG", n = 10)
#  gbm.samples <- matched_met_exp("TCGA-GBM", n = 10)
#  samples <- c(lgg.samples,gbm.samples)
#  
#  #-----------------------------------
#  # 1 - Methylation
#  # ----------------------------------
#  query.met <- GDCquery(
#    project = c("TCGA-LGG","TCGA-GBM"),
#    data.category = "DNA Methylation",
#    platform = "Illumina Human Methylation 450",
#    data.type = "Methylation Beta Value",
#    barcode = samples
#  )
#  GDCdownload(query.met)
#  met <- GDCprepare(query.met, save = FALSE)
#  met <- subset(met,subset = as.character(GenomicRanges::seqnames(met)) %in% c("chr9"))
#  
#  #-----------------------------------
#  # 2 - Expression
#  # ----------------------------------
#  query.exp <- GDCquery(
#    project = c("TCGA-LGG","TCGA-GBM"),
#    data.category = "Transcriptome Profiling",
#    data.type = "Gene Expression Quantification",
#    workflow.type = "STAR - Counts"
#    sample.type = c("Primary Tumor")
#    barcode =  samples
#  )
#  GDCdownload(query.exp)
#  exp <- GDCprepare(query.exp, save = FALSE)
#  save(exp, met, gbm.samples, lgg.samples, file = "elmerExample.rda", compress = "xz")

## ----eval=FALSE, include=TRUE, results='asis'---------------------------------
#  library(TCGAbiolinks)
#  query <- GDCquery(
#    project = c("TCGA-LGG","TCGA-GBM"),
#    data.category = "Simple Nucleotide Variation",
#    access = "open",
#    data.type = "Masked Somatic Mutation",
#    workflow.type = "Aliquot Ensemble Somatic Variant Merging and Masking"
#  )
#  GDCdownload(query)
#  maf <- GDCprepare(query)
#  save(maf,file = "maf_lgg_gbm.rda",compress = "xz")

## ----eval=FALSE, include=TRUE, results='asis'---------------------------------
#  ### read biogrid info
#  ### Check last version in https://thebiogrid.org/download.php
#  file <- "https://downloads.thebiogrid.org/Download/BioGRID/Latest-Release/BIOGRID-ALL-LATEST.tab2.zip"
#  if(!file.exists(gsub("zip","txt",basename(file)))){
#    downloader::download(file,basename(file))
#    unzip(basename(file),junkpaths =TRUE)
#  }
#  
#  tmp.biogrid <- vroom::vroom(
#    dir(pattern = "BIOGRID-ALL.*\\.txt")
#  )
#  save(tmp.biogrid, file = "biogrid.rda", compress = "xz")

## ----results='hide', eval=FALSE, echo=FALSE, message=FALSE,warning=FALSE------
#  library(ChIPseeker)
#  library(AnnotationHub)
#  library(pbapply)
#  library(ggplot2)
#  #------------------ Working with ChipSeq data ---------------
#  # Step 1: download histone marks for a brain and non-brain samples.
#  #------------------------------------------------------------
#  # loading annotation hub database
#  ah = AnnotationHub()
#  
#  # Searching for brain consolidated epigenomes in the roadmap database
#  bpChipEpi_brain <- query(ah , c("EpigenomeRoadMap", "narrowPeak", "chip", "consolidated","brain","E068"))
#  
#  # Get chip-seq data
#  histone.marks <- pblapply(names(bpChipEpi_brain), function(x) {ah[[x]]})
#  names(histone.marks) <- names(bpChipEpi_brain)
#  save(histone.marks, file = "histoneMarks.rda", compress = "xz")

## ----sessionInfo, results='asis', echo=FALSE----------------------------------
pander::pander(sessionInfo(), compact = FALSE)