@@ -11,19 +11,20 @@ library(dplyr)
11
11
library(ggplot2)
12
12
library(DT)
13
13
library(scales)
14
+ library(readr)
14
15
15
16
options(stringsAsFactors=FALSE)
16
-
17
- write.delim <- function(x, file, sep='\t', quote = FALSE, row.names=FALSE, na = '', ...) {
18
- write.table(x = x, file = file, sep=sep, quote=quote, row.names=row.names, na=na, ...)
19
- }
20
17
```
21
18
22
19
``` {r}
23
20
# Read bindingdb and remove non-human interactions
24
21
binding.db <- file.path('data', 'binding.tsv.gz') %>%
25
- read.delim(stringsAsFactors=FALSE) %>%
26
- dplyr::filter(organism == 'Homo sapiens')
22
+ readr::read_tsv() %>%
23
+ dplyr::filter(organism == 'Homo sapiens') %>%
24
+ dplyr::filter(! is.na(affinity_nM)) %>%
25
+ dplyr::mutate(
26
+ source=plyr::mapvalues(source, c('Curated from the literature by BindingDB'), c('BindingDB'))
27
+ )
27
28
28
29
# View a subset of the data.frame
29
30
binding.db %>% dplyr::sample_n(200) %>% dplyr::select(-c(pubmed, doi)) %>% DT::datatable()
@@ -32,11 +33,9 @@ binding.db %>% dplyr::sample_n(200) %>% dplyr::select(-c(pubmed, doi)) %>% DT::d
32
33
33
34
``` {r}
34
35
# Read the drugbank to bindingDB fuzzy mappings produced using UniChem
35
- map.df <- 'http://git.dhimmel.com/drugbank/data/mapping/bindingdb.tsv' %>%
36
- read.delim(stringsAsFactors=FALSE)
37
-
38
36
# Restrict to compounds in drugbank
39
- joined.df <- map.df %>%
37
+ joined.df <- 'https://raw.githubusercontent.com/dhimmel/drugbank/3e87872db5fca5ac427ce27464ab945c0ceb4ec6/data/mapping/bindingdb.tsv' %>%
38
+ readr::read_tsv() %>%
40
39
dplyr::inner_join(binding.db)
41
40
```
42
41
@@ -51,14 +50,15 @@ geom.mean <- function(x) {
51
50
ResolveAffinity <- function(df) {
52
51
# Preferentially selects the affinity measure. If multiple meansurements
53
52
# exist for the same compound-protein pair, the geometric mean is taken.
54
- measures <- df$measure
55
53
for (measure in c('Kd', 'Ki', 'IC50')) {
56
- if (is.element(measure, measures )) {
57
- values <- df$affinity_nM[measures == measure]
54
+ if (is.element(measure, df$measure )) {
55
+ measure.df <- df[df$measure == measure, ]
58
56
return.df <- data.frame(
59
57
measure = measure,
60
- affinity_nM = round(geom.mean(values), 5),
61
- n_measures = length(values))
58
+ affinity_nM = round(geom.mean(measure.df$affinity_nM), 5),
59
+ n_measures = nrow(measure.df),
60
+ sources = paste(unique(na.omit(measure.df$source)), collapse=','),
61
+ pubmeds = paste(unique(na.omit(measure.df$pubmed)), collapse=','))
62
62
return(return.df)
63
63
}
64
64
}
@@ -71,7 +71,7 @@ collapse.df <- joined.df %>%
71
71
dplyr::ungroup()
72
72
73
73
collapse.df %>%
74
- write.delim ('data/bindings-drugbank-collapsed.tsv')
74
+ readr::write_tsv ('data/bindings-drugbank-collapsed.tsv')
75
75
76
76
# View a subset of the data.frame
77
77
collapse.df %>% dplyr::sample_n(200) %>% DT::datatable()
@@ -80,26 +80,29 @@ collapse.df %>% dplyr::sample_n(200) %>% DT::datatable()
80
80
` r nrow(collapse.df) ` compound--protein pairs were assayed.
81
81
82
82
``` {r}
83
- drugbank.df <- 'http ://git.dhimmel .com/drugbank/data/drugbank.tsv' %>%
84
- read.delim () %>%
83
+ drugbank.df <- 'https ://raw.githubusercontent .com/dhimmel/ drugbank/3e87872db5fca5ac427ce27464ab945c0ceb4ec6 /data/drugbank.tsv' %>%
84
+ readr::read_tsv () %>%
85
85
dplyr::mutate(drugbank_approved = as.integer(grepl('approved', groups))) %>%
86
86
dplyr::transmute(drugbank_id, drugbank_name = name, drugbank_approved)
87
87
88
- entrez.df <- 'http ://git.dhimmel .com/entrez-gene/data/symbols -human.tsv' %>%
89
- read.delim () %>%
88
+ entrez.df <- 'https ://raw.githubusercontent .com/dhimmel/ entrez-gene/5352b31e04ec136e99d25a0ba63e8867aa71b69f/ data/genes -human.tsv' %>%
89
+ readr::read_tsv () %>%
90
90
dplyr::transmute(entrez_gene = GeneID, gene_symbol = Symbol)
91
91
92
92
gene.df <- collapse.df %>%
93
93
dplyr::group_by(drugbank_id, entrez_gene) %>%
94
94
dplyr::summarize(
95
95
affinity_nM = min(affinity_nM),
96
- n_pairs = n()) %>%
96
+ n_pairs = n(),
97
+ sources = paste(unique(sources), collapse=','),
98
+ pubmeds = paste(unique(pubmeds), collapse=',')
99
+ ) %>%
97
100
dplyr::ungroup() %>%
98
101
dplyr::left_join(drugbank.df) %>%
99
102
dplyr::left_join(entrez.df)
100
103
101
104
gene.df %>%
102
- write.delim ('data/bindings-drugbank-gene.tsv')
105
+ readr::write_tsv ('data/bindings-drugbank-gene.tsv')
103
106
104
107
# View bindings for approved drugs
105
108
gene.df %>%
0 commit comments