## @knitr style, eval=TRUE, echo=FALSE, results='asis' BiocStyle::latex() ## @knitr include=FALSE library(knitr) opts_chunk$set(tidy=FALSE) ## @knitr Homo.sapiens library(Homo.sapiens) columns(Homo.sapiens) ## @knitr Homo.sapiens2 keytypes(Homo.sapiens) ## @knitr Homo.sapiens3 k <- head(keys(Homo.sapiens,keytype="ENTREZID")) k ## @knitr Homo.sapiens4 result <- select(Homo.sapiens, keys=k, columns=c("TXNAME","TXSTART","TXSTRAND"), keytype="ENTREZID") head(result) ## @knitr URI Example uri <- 'http://www.uniprot.org/uniprot/?query=' ids <- c('P13368', 'Q6GZX4') idStr <- paste(ids, collapse="+or+") format <- '&format=tab' fullUri <- paste0(uri,idStr,format) read.delim(fullUri) ## @knitr web service code getUniprotGoodies <- function(query, columns) { ## query and columns start as a character vectors qstring <- paste(query, collapse="+or+") cstring <- paste(columns, collapse=",") uri <- 'http://www.uniprot.org/uniprot/?query=' fullUri <- paste0(uri,qstring,'&format=tab&columns=',cstring) dat <- read.delim(fullUri, stringsAsFactors=FALSE) ## now remove things that were not in the specific original query... dat <- dat[dat[,1] %in% query,] dat } ## @knitr xml_tree,eval=FALSE ## library(XML) ## uri <- "http://www.uniprot.org/uniprot/?query=P13368+or+Q6GZX4&format=xml" ## xml <- xmlTreeParse(uri, useInternalNodes=TRUE) ## @knitr xml_namespace,eval=FALSE ## defs <- xmlNamespaceDefinitions(xml, recurisve=TRUE) ## defs ## @knitr xml_namespace_struct,eval=FALSE ## ns <- structure(sapply(defs, function(x) x$uri), names=names(defs)) ## @knitr xml_namespace2,eval=FALSE ## entry <- getNodeSet(xml, "//ns:entry", "ns") ## xmlSize(entry) ## @knitr xml_xmlAttrs,eval=FALSE ## nms <- xpathSApply(xml, "//ns:entry/ns:name", xmlValue, namespaces="ns") ## attrs <- xpathApply(xml, "//ns:entry", xmlAttrs, namespaces="ns") ## names(attrs) <- nms ## attrs ## @knitr xml_xmlChildren,eval=FALSE ## fun1 <- function(elt) unique(names(xmlChildren(elt))) ## xpathApply(xml, "//ns:entry", fun1, namespaces="ns") ## @knitr xml_feature_type,eval=FALSE ## Q6GZX4 <- "//ns:entry[ns:accession='Q6GZX4']/ns:feature" ## xmlSize(getNodeSet(xml, Q6GZX4, namespaces="ns")) ## ## P13368 <- "//ns:entry[ns:accession='P13368']/ns:feature" ## xmlSize(getNodeSet(xml, P13368, namespaces="ns")) ## @knitr xml_feature_type2,eval=FALSE ## path <- "//ns:feature" ## unique(xpathSApply(xml, path, xmlGetAttr, "type", namespaces="ns")) ## @knitr xml_feature_type_P13368,eval=FALSE ## path <- "//ns:entry[ns:accession='P13368']/ns:feature[@type='sequence conflict']" ## data.frame(t(xpathSApply(xml, path, xmlAttrs, namespaces="ns"))) ## @knitr xml_sequence,eval=FALSE ## library(Biostrings) ## path <- "//ns:entry/ns:sequence" ## seqs <- xpathSApply(xml, path, xmlValue, namespaces="ns") ## aa <- AAStringSet(unlist(lapply(seqs, function(elt) gsub("\n", "", elt)), ## use.names=FALSE)) ## names(aa) <- nms ## aa ## @knitr WebServiceObject setClass("uniprot", representation(name="character"), prototype(name="uniprot")) ## @knitr makeInstanceWebServiceObj uniprot <- new("uniprot") ## @knitr onLoad2,eval=FALSE ## .onLoad <- function(libname, pkgname) ## { ## ns <- asNamespace(pkgname) ## uniprot <- new("uniprot") ## assign("uniprot", uniprot, envir=ns) ## namespaceExport(ns, "uniprot") ## } ## @knitr keytypeUniprot setMethod("keytypes", "uniprot",function(x){return("UNIPROT")}) uniprot <- new("uniprot") keytypes(uniprot) ## @knitr keytypeUniprot2 setMethod("columns", "uniprot", function(x){return(c("ID", "SEQUENCE", "ORGANISM"))}) columns(uniprot) ## @knitr webServiceSelect .select <- function(x, keys, columns){ colsTranslate <- c(id='ID', sequence='SEQUENCE', organism='ORGANISM') columns <- names(colsTranslate)[colsTranslate %in% columns] getUniprotGoodies(query=keys, columns=columns) } setMethod("select", "uniprot", function(x, keys, columns, keytype) { .select(keys=keys, columns=columns) }) select(uniprot, keys=c("P13368","P20806"), columns=c("ID","ORGANISM")) ## @knitr classicConn,results='hide' drv <- SQLite() library("org.Hs.eg.db") con_hs <- dbConnect(drv, dbname=system.file("extdata", "org.Hs.eg.sqlite", package = "org.Hs.eg.db")) con_hs dbDisconnect(con_hs) ## @knitr ourConn require(hom.Hs.inp.db) str(hom.Hs.inp.db) ## @knitr ourConn2 hom.Hs.inp.db$conn ## or better we can use a helper function to wrap this: AnnotationDbi:::dbConn(hom.Hs.inp.db) ## or we can just call the provided convenience function ## from when this package loads: hom.Hs.inp_dbconn() ## @knitr dbListTables con <- AnnotationDbi:::dbConn(hom.Hs.inp.db) head(dbListTables(con)) dbListFields(con, "Mus_musculus") ## @knitr dbGetQuery dbGetQuery(con, "SELECT * FROM metadata") ## @knitr dbListTables2 head(dbListTables(con)) ## @knitr dbListFields2 dbListFields(con, "Apis_mellifera") ## @knitr dbGetQuery2 head(dbGetQuery(con, "SELECT * FROM Apis_mellifera")) ## @knitr Anopheles,eval=FALSE ## head(dbGetQuery(con, "SELECT * FROM Anopheles_gambiae")) ## ## Then only retrieve human records ## ## Query: SELECT * FROM Anopheles_gambiae WHERE species='HOMSA' ## head(dbGetQuery(con, "SELECT * FROM Anopheles_gambiae WHERE species='HOMSA'")) ## dbDisconnect(con) ## @knitr getMetadata, echo=FALSE library(hom.Hs.inp.db) hom.Hs.inp_dbInfo() ## @knitr referenceClass,eval=FALSE ## .InparanoidDb <- ## setRefClass("InparanoidDb", contains="AnnotationDb") ## @knitr onLoad,eval=FALSE ## sPkgname <- sub(".db$","",pkgname) ## db <- loadDb(system.file("extdata", paste(sPkgname, ## ".sqlite",sep=""), package=pkgname, lib.loc=libname), ## packageName=pkgname) ## dbNewname <- AnnotationDbi:::dbObjectName(pkgname,"InparanoidDb") ## ns <- asNamespace(pkgname) ## assign(dbNewname, db, envir=ns) ## namespaceExport(ns, dbNewname) ## @knitr columns,eval=FALSE ## .cols <- function(x) ## { ## con <- AnnotationDbi:::dbConn(x) ## list <- dbListTables(con) ## ## drop unwanted tables ## unwanted <- c("map_counts","map_metadata","metadata") ## list <- list[!list %in% unwanted] ## ## Then just to format things in the usual way ## list <- toupper(list) ## dbDisconnect(con) ## list ## } ## ## ## Then make this into a method ## setMethod("columns", "InparanoidDb", .cols(x)) ## ## Then we can call it ## columns(hom.Hs.inp.db) ## @knitr keytypes,eval=FALSE ## setMethod("keytypes", "InparanoidDb", .cols(x)) ## ## Then we can call it ## keytypes(hom.Hs.inp.db) ## ## ## refactor of .cols ## .getLCcolnames <- function(x) ## { ## con <- AnnotationDbi:::dbConn(x) ## list <- dbListTables(con) ## ## drop unwanted tables ## unwanted <- c("map_counts","map_metadata","metadata") ## list <- list[!list %in% unwanted] ## dbDisconnect(con) ## list ## } ## .cols <- function(x) ## { ## list <- .getLCcolnames(x) ## ## Then just to format things in the usual way ## toupper(list) ## } ## ## Test: ## columns(hom.Hs.inp.db) ## ## ## new helper function: ## .getTableNames <- function(x) ## { ## LC <- .getLCcolnames(x) ## UC <- .cols(x) ## names(UC) <- LC ## UC ## } ## .getTableNames(hom.Hs.inp.db) ## @knitr keys,eval=FALSE ## .keys <- function(x, keytype) ## { ## ## translate keytype back to table name ## tabNames <- .getTableNames(x) ## lckeytype <- names(tabNames[tabNames %in% keytype]) ## ## get a connection ## con <- AnnotationDbi:::dbConn(x) ## sql <- paste("SELECT inp_id FROM",lckeytype, "WHERE species!='HOMSA'") ## res <- dbGetQuery(con, sql) ## res <- as.vector(t(res)) ## dbDisconnect(con) ## res ## } ## ## setMethod("keys", "InparanoidDb", .keys(x, keytype)) ## ## Then we can call it ## keys(hom.Hs.inp.db, "TRICHOPLAX_ADHAERENS") ## @knitr dbDisconnect dbDisconnect(con) ## @knitr makeNewDb drv <- dbDriver("SQLite") dbname <- file.path(tempdir(), "myNewDb.sqlite") con <- dbConnect(drv, dbname=dbname) ## @knitr exampleFrame data = data.frame(id=c(1,2,9), string=c("Blue", "Red", "Green"), stringsAsFactors=FALSE) ## @knitr exercise2 dbGetQuery(con, "CREATE Table genePheno (id INTEGER, string TEXT)") ## @knitr LabelledPreparedQueries names(data) <- c("id","string") sql <- "INSERT INTO genePheno VALUES ($id, $string)" dbBeginTransaction(con) dbGetPreparedQuery(con, sql, bind.data = data) dbCommit(con) ## @knitr ATTACH db <- system.file("extdata", "TxDb.Hsapiens.UCSC.hg19.knownGene.sqlite", package="TxDb.Hsapiens.UCSC.hg19.knownGene") dbGetQuery(con, sprintf("ATTACH '%s' AS db",db)) ## @knitr ATTACHJoin sql <- "SELECT * FROM db.gene AS dbg, genePheno AS gp WHERE dbg.gene_id=gp.id" res <- dbGetQuery(con, sql) res ## @knitr SessionInfo, echo=FALSE sessionInfo()