## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----Load library, echo=TRUE, message=FALSE, warning=FALSE-------------------- #INSTALL BiocManager::install("SARC") #load library library(SARC) ## ----Load more library, echo=FALSE, message=FALSE, warning=FALSE-------------- #load additional packages for vignette building library(knitr) library(kableExtra) ## ----test_cnv, echo=TRUE------------------------------------------------------ data("test_cnv") #For speed just use a few detected CNVs test_cnv <- test_cnv[1:3,] head(test_cnv) %>% kable %>% kable_styling("striped", full_width=FALSE) %>% scroll_box(width = "800px", height = "200px") ## ----test_cov, echo=TRUE------------------------------------------------------ data("test_cov") head(test_cov[, 1:10]) %>% kable %>% kable_styling("striped", full_width=FALSE) %>% scroll_box(width = "800px", height = "200px") ## ----pressure, echo=TRUE------------------------------------------------------ #Create a start site and end site for each CNV detected SARC <- regionSet(cnv = test_cnv, test_cov) #Create smaller coverage files for each CNV SARC <- regionSplit(RE = SARC, #the raggedexpression object we made cnv = metadata(SARC)[['CNVlist']][[1]], startlist = metadata(SARC)[[2]], #list of start sites, per CNV endlist = metadata(SARC)[[3]]) #list of end sites, per CNV ## ----means, echo=TRUE--------------------------------------------------------- #Calculate the mean read coverage SARC <- regionMean(RE = SARC, cnv = test_cnv, splitcov = metadata(SARC)[[4]]) #list of cnv specific coverage ## ----qd, echo=TRUE------------------------------------------------------------ #Calculate the distribution of the mean reads SARC <- regionQuantiles(RE = SARC, cnv = metadata(SARC)[['CNVlist']][[2]], #new cnv file meancov = metadata(SARC)[[5]], #list of cnv specific coverage + means q1 = 0.1, #lower threshold q2 = 0.9) #upper threshold ## ----anova, echo=TRUE--------------------------------------------------------- #Calculate rarity of each suspected CNV - in contrast to other samples SARC <- prepAnova(RE = SARC, start = metadata(SARC)[[2]], end = metadata(SARC)[[3]], cnv = metadata(SARC)[['CNVlist']][[3]]) #newest cnv dataframe SARC <- anovaOnCNV(RE = SARC, cnv = metadata(SARC)[['CNVlist']][[3]], anovacov = metadata(SARC)[[8]]) head(metadata(SARC)[['CNVlist']][[4]]) %>% kable %>% kable_styling("striped", full_width=FALSE) %>% scroll_box(width = "800px", height = "200px") ## ----set up plot, echo=TRUE, message=FALSE, warning=FALSE--------------------- #prepare new objects for the CNV plots to work library(TxDb.Hsapiens.UCSC.hg38.knownGene) library(Homo.sapiens) #load genome specific libraries from BioConductor TxDb(Homo.sapiens) <- TxDb.Hsapiens.UCSC.hg38.knownGene txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene tx <- transcriptsBy(Homo.sapiens, columns = "SYMBOL") txgene <- tx@unlistData SARC <- plotCovPrep(RE = SARC, cnv = metadata(SARC)[['CNVlist']][[4]], #newest cnv dataframe startlist = metadata(SARC)[[2]], endlist = metadata(SARC)[[3]], n1 = 0, #left-padding n2 = 0) #right-padding SARC <- regionGrangeMake(RE = SARC, covprepped = metadata(SARC)[[9]]) SARC <- addExonsGenes(RE = SARC, covgranges = metadata(SARC)[[10]], #list of grange objects - one for each detected CNV txdb = txdb, #Species specific database txgene = txgene) #genes/ exons from the database SARC <- setupCNVplot(RE = SARC, namedgranges = metadata(SARC)[[11]], #grange objects which have genes/ exons added covprepped = metadata(SARC)[[9]]) ## ----plot CNVs, echo=TRUE, warning=FALSE-------------------------------------- #Calculate rarity of each suspected CNV - in contrast to other samples plotCNV(cnv = metadata(SARC)[['CNVlist']][[4]], setup = metadata(SARC)[[12]], FilteredCNV = 1) ## ----flag, echo=TRUE, eval = TRUE--------------------------------------------- #Use statistical analyses to flag CNVs as high or low confidence of being true SARC <- cnvConfidence(RE = SARC, cnv = metadata(SARC)[['CNVlist']][[4]]) head(metadata(SARC)[['CNVlist']][[5]]) %>% kable %>% kable_styling("striped", full_width=FALSE) %>% scroll_box(width = "800px", height = "200px") ## ----check, echo=TRUE, eval = TRUE-------------------------------------------- #This will show all the dataframes (cnv) files made #Generally it is recommended to use the most recently made cnv file to keep all the additional columns print(SARC) #This will show all the list objects. Their names should roughly correlate with the names of the parameters in the functions. names(metadata(SARC)) ## ----check distribution of reads, echo=TRUE, eval=TRUE------------------------ SARC <- setQDplot(RE = SARC, meancov = metadata(SARC)[[5]]) seeDist(meanList = metadata(SARC)[[13]], cnv = metadata(SARC)[['CNVlist']][[5]], sample = 1, plotly=FALSE) ## ----exon numbers and gene names, echo=TRUE, eval=TRUE------------------------ SARC <- pasteExonsGenes(RE = SARC, setup = metadata(SARC)[[12]], #list of dataframes from the setupCNVplot function cnv = metadata(SARC)[['CNVlist']][[5]]) #cnv file to add an extra column to metadata(SARC)[['CNVlist']][[6]] %>% kable %>% kable_styling("striped", full_width=FALSE) %>% scroll_box(width = "800px", height = "200px") ## ----dunnet, echo=TRUE, eval = FALSE------------------------------------------ # DNA <- phDunnetonCNV(RE = DNA, # cnv = metadata(SARC)[['CNVlist']][[4]], # anovacov = metadata(SARC)[[8]]) # SARC <- cnvConfidence(MA = SARC, cnv = experiments(SARC)[[7]], ph = TRUE) ## ----expanded plotCNV, echo=TRUE, eval = FALSE-------------------------------- # data("test_cnv2") # library(ggplot2) # for (i in seq_len(nrow(test_cnv2))) { # n <- paste0(cnv$SAMPLE[i], "_", # cnv$GENE[i], "_", # cnv$CHROM[i], "_", # cnv$START[i], "_", # cnv$END[i]) # savename <- paste0(n, ".jpeg") # print(i) # plotCNV(cnv = cnv, setup = metadata(X)[[9]], FilteredCNV = i, # batch = cnv$BATCH[i], gene = cnv$GENE[i]) # ggplot2::ggsave(filename = savename) # } ## ----mouse, eval=TRUE--------------------------------------------------------- require(TxDb.Mmusculus.UCSC.mm10.knownGene) require(Mus.musculus) # load genome specific libraries from BioConductor #prepare new objects for the CNV plots to work TxDb(Mus.musculus) <- TxDb.Mmusculus.UCSC.mm10.knownGene txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene tx <- transcriptsBy(Mus.musculus, columns = "SYMBOL") txgene <- tx@unlistData ## ----read depths, eval=TRUE, message=FALSE------------------------------------ require(GenomicAlignments) bam <- system.file("extdata", "test.bam", package="SARC") coverage <- coverage(bam) coverage$chr1@values ## ----CPM, eval=TRUE----------------------------------------------------------- load(system.file("extdata", "test_cov_raw.rda", package="SARC")) load(system.file("extdata", "librarySizes.rda", package="SARC")) normalised_counts <- t(t(test_cov[, 5:ncol(test_cov)]) / libsizes$PerMillion) normalised_counts <- cbind(test_cov[,1:4], normalised_counts) ## ----session_info, echo=TRUE, eval = TRUE------------------------------------- sessionInfo()