Skip to content

Commit 965f102

Browse files
authored
[qob] Fix IBD and enable tests (#14062)
CHANGELOG: Fixed bugs in the identity by descent implementation for Query on Batch This PR fixes #14052. There were two bugs in how we compute IBD. In addition, the tests weren't running in QoB and the test dataset we were using doesn't have enough variability to catch errors. I used Balding Nichols generated data instead. Do we need to set the seed in the tests here?
1 parent d1823fa commit 965f102

File tree

2 files changed

+23
-19
lines changed

2 files changed

+23
-19
lines changed

hail/python/hail/methods/relatedness/identity_by_descent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def identity_by_descent(dataset, maf=None, bounded=True, min=None, max=None) ->
147147
_e00=(2 * (p**2) * (q**2) * ((X - 1) / X) * ((Y - 1) / Y) * (T / (T - 1)) * (T / (T - 2)) * (T / (T - 3))),
148148
_e10=(
149149
4 * (p**3) * q * ((X - 1) / X) * ((X - 2) / X) * (T / (T - 1)) * (T / (T - 2)) * (T / (T - 3))
150-
+ 4 * p * (q**3) * ((Y - 1) / X) * ((Y - 2) / X) * (T / (T - 1)) * (T / (T - 2)) * (T / (T - 3))
150+
+ 4 * p * (q**3) * ((Y - 1) / Y) * ((Y - 2) / Y) * (T / (T - 1)) * (T / (T - 2)) * (T / (T - 3))
151151
),
152152
_e20=(
153153
(p**4) * ((X - 1) / X) * ((X - 2) / X) * ((X - 3) / X) * (T / (T - 1)) * (T / (T - 2)) * (T / (T - 3))
@@ -164,7 +164,7 @@ def identity_by_descent(dataset, maf=None, bounded=True, min=None, max=None) ->
164164
+ (p**2) * q * ((X - 1) / X) * (T / (T - 1)) * (T / (T - 2))
165165
+ p * (q**2) * ((Y - 1) / Y) * (T / (T - 1)) * (T / (T - 2))
166166
),
167-
_e22=(T / 2),
167+
_e22=1,
168168
)
169169

170170
dataset = dataset.checkpoint(hl.utils.new_temp_file())

hail/python/test/hail/methods/relatedness/test_identity_by_descent.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,19 @@
55

66
import hail as hl
77
import hail.utils as utils
8-
from ...helpers import get_dataset, test_timeout, qobtest
8+
from ...helpers import test_timeout, qobtest
99

1010

11-
def plinkify(ds, min=None, max=None):
11+
@pytest.fixture(scope='module')
12+
def ds():
13+
dataset = hl.balding_nichols_model(1, 100, 100)
14+
dataset = dataset.key_cols_by(s=hl.str(dataset.sample_idx + 1))
15+
return dataset
16+
17+
18+
def plinkify(dataset, min=None, max=None):
1219
vcf = utils.new_temp_file(prefix="plink", extension="vcf")
13-
hl.export_vcf(ds, vcf)
20+
hl.export_vcf(dataset, vcf)
1421

1522
local_tmpdir = utils.new_local_temp_dir()
1623
plinkpath = f'{local_tmpdir}/plink-ibd'
@@ -45,9 +52,7 @@ def plinkify(ds, min=None, max=None):
4552
@qobtest
4653
@unittest.skipIf('HAIL_TEST_SKIP_PLINK' in os.environ, 'Skipping tests requiring plink')
4754
@test_timeout(local=10 * 60, batch=10 * 60)
48-
def test_ibd_default_arguments():
49-
ds = get_dataset()
50-
55+
def test_ibd_default_arguments(ds):
5156
plink_results = plinkify(ds)
5257
hail_results = hl.identity_by_descent(ds).collect()
5358

@@ -62,11 +67,10 @@ def test_ibd_default_arguments():
6267
assert plink_results[key][1][2] == row.ibs2
6368

6469

70+
@qobtest
6571
@unittest.skipIf('HAIL_TEST_SKIP_PLINK' in os.environ, 'Skipping tests requiring plink')
6672
@test_timeout(local=10 * 60, batch=10 * 60)
67-
def test_ibd_0_and_1():
68-
ds = get_dataset()
69-
73+
def test_ibd_0_and_1(ds):
7074
plink_results = plinkify(ds, min=0.0, max=1.0)
7175
hail_results = hl.identity_by_descent(ds).collect()
7276

@@ -81,15 +85,15 @@ def test_ibd_0_and_1():
8185
assert plink_results[key][1][2] == row.ibs2
8286

8387

88+
@qobtest
8489
@test_timeout(local=10 * 60, batch=10 * 60)
85-
def test_ibd_does_not_error_with_dummy_maf_float64():
86-
dataset = get_dataset()
87-
dataset = dataset.annotate_rows(dummy_maf=0.01)
88-
hl.identity_by_descent(dataset, dataset['dummy_maf'], min=0.0, max=1.0)
90+
def test_ibd_does_not_error_with_dummy_maf_float64(ds):
91+
ds = ds.annotate_rows(dummy_maf=0.01)
92+
hl.identity_by_descent(ds, ds['dummy_maf'], min=0.0, max=1.0)
8993

9094

95+
@qobtest
9196
@test_timeout(local=10 * 60, batch=10 * 60)
92-
def test_ibd_does_not_error_with_dummy_maf_float32():
93-
dataset = get_dataset()
94-
dataset = dataset.annotate_rows(dummy_maf=0.01)
95-
hl.identity_by_descent(dataset, hl.float32(dataset['dummy_maf']), min=0.0, max=1.0)
97+
def test_ibd_does_not_error_with_dummy_maf_float32(ds):
98+
ds = ds.annotate_rows(dummy_maf=0.01)
99+
hl.identity_by_descent(ds, hl.float32(ds['dummy_maf']), min=0.0, max=1.0)

0 commit comments

Comments
 (0)