Skip to content

Commit db9392a

Browse files
agalitsynanvictus
andauthored
bugfix(snipping): features with empty regions return snips filled with NaNs (#216)
* bugfix(snipping): features with empty regions return snips filled with NaNs. Related to 1a63791 which turned out to be not complete fix. * bugfix(snipping): Docstrings added, code simplified. * bugfix+tests(snipping): Snipping adapted for pandas failing sometimes while grouping np.nans. Pandas fails sometimes on groupby with np.nan in the dataframe: pandas-dev/pandas#35202 With this fix, we replace NaN regions (unannotated) with empty string, and then do the grouping. Tests of snipping added, covering NaN in "region" column of the windows dataframe for snipping. * style(snipping test): improved * formatting imporoved * Update cooltools/snipping.py Co-authored-by: Nezar Abdennur <[email protected]>
1 parent 53ce4fc commit db9392a

File tree

2 files changed

+54
-0
lines changed

2 files changed

+54
-0
lines changed

cooltools/snipping.py

+12
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,16 @@ def assign_regions(features, supports):
134134

135135
def _pileup(data_select, data_snip, arg):
136136
support, feature_group = arg
137+
138+
# check if region is unannotated
139+
if len(support) == 0:
140+
lo = feature_group["lo"].values[0]
141+
hi = feature_group["hi"].values[0]
142+
s = hi-lo # Shape of individual snip
143+
stack = np.full((s, s, len(feature_group)), np.nan)
144+
# return empty snippets if region is not annotated
145+
return stack, feature_group["_rank"].values
146+
137147
# check if support region is on- or off-diagonal
138148
if len(support) == 2:
139149
region1, region2 = map(bioframe.region.parse_region_string, support)
@@ -183,13 +193,15 @@ def pileup(features, data_select, data_snip, map=map):
183193
warnings.warn("Some features do not have regions assigned! Some snips will be empty.")
184194

185195
features = features.copy()
196+
features['region'] = features.region.fillna('') # fill in unanotated regions with empty string
186197
features["_rank"] = range(len(features))
187198

188199
# cumul_stack = []
189200
# orig_rank = []
190201
cumul_stack, orig_rank = zip(
191202
*map(
192203
partial(_pileup, data_select, data_snip),
204+
# Note that unannotated regions will form a separate group
193205
features.groupby("region", sort=False),
194206
)
195207
)

tests/test_snipping.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import cooler
2+
import os.path as op
3+
4+
import cooltools.snipping
5+
import numpy as np
6+
7+
8+
def test_snipper(request):
9+
"""
10+
Test the snipping on matrix:
11+
"""
12+
# Read cool file and create regions out of it:
13+
clr = cooler.Cooler(op.join(request.fspath.dirname, "data/CN.mm9.1000kb.cool"))
14+
15+
regions = [(chrom, 0, clr.chromsizes[chrom]) for chrom in clr.chromnames]
16+
17+
# Example region with windows, two regions from annotated genomic regions:
18+
windows = cooltools.snipping.make_bin_aligned_windows(
19+
1_000_000, ["chr10", "chr10"], [10_000_000, 20_000_000], flank_bp=2_000_000
20+
)
21+
22+
windows = cooltools.snipping.assign_regions(windows, regions).reset_index(drop=True)
23+
24+
snipper = cooltools.snipping.CoolerSnipper(clr)
25+
stack = cooltools.snipping.pileup(windows, snipper.select, snipper.snip, map=map)
26+
27+
# Check that the size of snips is OK and there are two of them:
28+
assert stack.shape == (5, 5, 2)
29+
30+
# Example region with windows, second window comes from unannotated genomic region:
31+
windows = cooltools.snipping.make_bin_aligned_windows(
32+
1_000_000, ["chr10", "chr10"], [10_000_000, 150_000_000], flank_bp=2_000_000
33+
)
34+
35+
windows = cooltools.snipping.assign_regions(windows, regions).reset_index(drop=True)
36+
37+
snipper = cooltools.snipping.CoolerSnipper(clr)
38+
stack = cooltools.snipping.pileup(windows, snipper.select, snipper.snip, map=map)
39+
40+
assert stack.shape == (5, 5, 2)
41+
assert np.all(np.isfinite(stack[:, :, 0]))
42+
assert np.all(np.isnan(stack[:, :, 1]))

0 commit comments

Comments
 (0)