dhimmel
diff --git a/‎collapse.Rmd
Lines changed: 25 additions & 22 deletions b/‎collapse.Rmd
Lines changed: 25 additions & 22 deletions
diff --git a/‎collapse.html
Lines changed: 97 additions & 142 deletions b/‎collapse.html
Lines changed: 97 additions & 142 deletions
diff --git a/‎data/binding.tsv.gz
-396 KB b/‎data/binding.tsv.gz
-396 KB
@@ -11,19 +11,20 @@ library(dplyr)
 library(ggplot2)
 library(DT)
 library(scales)
+library(readr)
 
 options(stringsAsFactors=FALSE)
-
-write.delim <- function(x, file, sep='\t', quote = FALSE, row.names=FALSE, na = '', ...) {
-  write.table(x = x, file = file, sep=sep, quote=quote, row.names=row.names, na=na, ...)
-}
 ```
 
 ```{r}
 # Read bindingdb and remove non-human interactions
 binding.db <- file.path('data', 'binding.tsv.gz') %>%
-  read.delim(stringsAsFactors=FALSE) %>%
-  dplyr::filter(organism == 'Homo sapiens')
+  readr::read_tsv() %>%
+  dplyr::filter(organism == 'Homo sapiens') %>%
+  dplyr::filter(! is.na(affinity_nM)) %>%
+  dplyr::mutate(
+    source=plyr::mapvalues(source, c('Curated from the literature by BindingDB'), c('BindingDB'))
+  )
 
 # View a subset of the data.frame
 binding.db %>% dplyr::sample_n(200) %>% dplyr::select(-c(pubmed, doi)) %>% DT::datatable()
@@ -32,11 +33,9 @@ binding.db %>% dplyr::sample_n(200) %>% dplyr::select(-c(pubmed, doi)) %>% DT::d
 
 ```{r}
 # Read the drugbank to bindingDB fuzzy mappings produced using UniChem
-map.df <- 'http://git.dhimmel.com/drugbank/data/mapping/bindingdb.tsv' %>%
-  read.delim(stringsAsFactors=FALSE)
-
 # Restrict to compounds in drugbank
-joined.df <- map.df %>%
+joined.df <- 'https://raw.githubusercontent.com/dhimmel/drugbank/3e87872db5fca5ac427ce27464ab945c0ceb4ec6/data/mapping/bindingdb.tsv' %>%
+  readr::read_tsv() %>%
   dplyr::inner_join(binding.db)
 ```
 
@@ -51,14 +50,15 @@ geom.mean <- function(x) {
 ResolveAffinity <- function(df) {
   # Preferentially selects the affinity measure. If multiple meansurements
   # exist for the same compound-protein pair, the geometric mean is taken.
-  measures <- df$measure
   for (measure in c('Kd', 'Ki', 'IC50')) {
-    if (is.element(measure, measures)) {
-      values <- df$affinity_nM[measures == measure]
+    if (is.element(measure, df$measure)) {
+      measure.df <- df[df$measure == measure, ]
       return.df <- data.frame(
         measure = measure,
-        affinity_nM = round(geom.mean(values), 5),
-        n_measures = length(values))
+        affinity_nM = round(geom.mean(measure.df$affinity_nM), 5),
+        n_measures = nrow(measure.df),
+        sources = paste(unique(na.omit(measure.df$source)), collapse=','),
+        pubmeds = paste(unique(na.omit(measure.df$pubmed)), collapse=','))
       return(return.df)
     }
   }
@@ -71,7 +71,7 @@ collapse.df <- joined.df %>%
   dplyr::ungroup()
 
 collapse.df %>%
-  write.delim('data/bindings-drugbank-collapsed.tsv')
+  readr::write_tsv('data/bindings-drugbank-collapsed.tsv')
 
 # View a subset of the data.frame
 collapse.df %>% dplyr::sample_n(200) %>% DT::datatable()
@@ -80,26 +80,29 @@ collapse.df %>% dplyr::sample_n(200) %>% DT::datatable()
 `r nrow(collapse.df)` compound--protein pairs were assayed.
 
 ```{r}
-drugbank.df <- 'http://git.dhimmel.com/drugbank/data/drugbank.tsv' %>%
-  read.delim() %>%
+drugbank.df <- 'https://raw.githubusercontent.com/dhimmel/drugbank/3e87872db5fca5ac427ce27464ab945c0ceb4ec6/data/drugbank.tsv' %>%
+  readr::read_tsv() %>%
   dplyr::mutate(drugbank_approved = as.integer(grepl('approved', groups))) %>%
   dplyr::transmute(drugbank_id, drugbank_name = name, drugbank_approved)  
 
-entrez.df <- 'http://git.dhimmel.com/entrez-gene/data/symbols-human.tsv' %>%
-  read.delim() %>%
+entrez.df <- 'https://raw.githubusercontent.com/dhimmel/entrez-gene/5352b31e04ec136e99d25a0ba63e8867aa71b69f/data/genes-human.tsv' %>%
+  readr::read_tsv() %>%
   dplyr::transmute(entrez_gene = GeneID, gene_symbol = Symbol)
 
 gene.df <- collapse.df %>%
   dplyr::group_by(drugbank_id, entrez_gene) %>%
   dplyr::summarize(
     affinity_nM = min(affinity_nM),
-    n_pairs = n()) %>%
+    n_pairs = n(),
+    sources = paste(unique(sources), collapse=','),
+    pubmeds = paste(unique(pubmeds), collapse=',')
+    ) %>%
   dplyr::ungroup() %>%
   dplyr::left_join(drugbank.df) %>%
   dplyr::left_join(entrez.df)
 
 gene.df %>%
-  write.delim('data/bindings-drugbank-gene.tsv')
+  readr::write_tsv('data/bindings-drugbank-gene.tsv')
 
 # View bindings for approved drugs
 gene.df %>%