1
+ import sys
2
+
1
3
import numpy as np
2
4
import pandas as pd
5
+ import pytest
6
+ from numpy import typing as npt
3
7
from numpy .testing import assert_array_equal
4
8
from pytest import FixtureRequest
5
9
from upath import UPath
6
10
11
+ from gwas .compression .arr .base import Blosc2CompressionMethod , compression_methods
7
12
from gwas .mem .wkspace import SharedWorkspace
8
13
from gwas .pheno import VariableCollection
14
+ from gwas .utils import cpu_count
9
15
10
16
from .simulation import missing_value_rate
11
17
18
+ try :
19
+ import blosc2 as blosc2
20
+ except ImportError :
21
+ pass
22
+
12
23
sample_count = 100
13
24
phenotype_count = 16
14
25
covariate_count = 4
15
26
16
- samples = [str (i ) for i in range (sample_count )]
17
- permutation = np .random .permutation (sample_count )
27
+ samples = [f"{ i + 1 :03d} " for i in range (sample_count )]
18
28
19
29
phenotype_names = [f"phenotype_{ i + 1 :02d} " for i in range (phenotype_count )]
20
30
covariate_names = [f"covariate_{ i + 1 :02d} " for i in range (covariate_count )]
21
31
22
32
23
- def test_pheno (
24
- tmp_path : UPath ,
25
- sw : SharedWorkspace ,
26
- request : FixtureRequest ,
27
- ) -> None :
28
- np .random .seed (47 )
29
- allocation_names = set (sw .allocations .keys ())
33
+ @pytest .fixture (scope = "session" )
34
+ def permutation () -> npt .NDArray [np .int_ ]:
35
+ np .random .seed (46 )
36
+ return np .random .permutation (sample_count )
30
37
38
+
39
+ @pytest .fixture (scope = "session" )
40
+ def phenotypes () -> npt .NDArray [np .float64 ]:
41
+ np .random .seed (47 )
31
42
phenotypes = np .random .rand (sample_count , phenotype_count )
32
43
phenotypes [
33
44
np .random .choice (
@@ -36,8 +47,22 @@ def test_pheno(
36
47
p = [1 - missing_value_rate , missing_value_rate ],
37
48
)
38
49
] = np .nan
39
- covariates = np . random . rand ( sample_count , covariate_count )
50
+ return phenotypes
40
51
52
+
53
+ @pytest .fixture (scope = "session" )
54
+ def covariates () -> npt .NDArray [np .float64 ]:
55
+ np .random .seed (48 )
56
+ return np .random .rand (sample_count , covariate_count )
57
+
58
+
59
+ @pytest .fixture (scope = "session" )
60
+ def phenotype_path (
61
+ phenotypes : npt .NDArray [np .float64 ],
62
+ permutation : npt .NDArray [np .int_ ],
63
+ tmp_path_factory : pytest .TempPathFactory ,
64
+ ) -> UPath :
65
+ tmp_path = UPath (tmp_path_factory .mktemp ("phenotypes" ))
41
66
phenotype_frame = pd .DataFrame (
42
67
phenotypes [permutation , :],
43
68
columns = phenotype_names ,
@@ -47,7 +72,16 @@ def test_pheno(
47
72
phenotype_frame .to_csv (
48
73
phenotype_path , sep = "\t " , index = True , header = True , na_rep = "n/a"
49
74
)
75
+ return phenotype_path
50
76
77
+
78
+ @pytest .fixture (scope = "session" )
79
+ def covariate_path (
80
+ covariates : npt .NDArray [np .float64 ],
81
+ permutation : npt .NDArray [np .int_ ],
82
+ tmp_path_factory : pytest .TempPathFactory ,
83
+ ) -> UPath :
84
+ tmp_path = UPath (tmp_path_factory .mktemp ("covariates" ))
51
85
covariate_frame = pd .DataFrame (
52
86
covariates [permutation , :],
53
87
columns = covariate_names ,
@@ -57,6 +91,18 @@ def test_pheno(
57
91
covariate_frame .to_csv (
58
92
covariate_path , sep = "\t " , index = True , header = True , na_rep = "n/a"
59
93
)
94
+ return covariate_path
95
+
96
+
97
+ def test_pheno (
98
+ phenotypes : npt .NDArray [np .float64 ],
99
+ covariates : npt .NDArray [np .float64 ],
100
+ phenotype_path : UPath ,
101
+ covariate_path : UPath ,
102
+ sw : SharedWorkspace ,
103
+ request : FixtureRequest ,
104
+ ) -> None :
105
+ allocation_names = set (sw .allocations .keys ())
60
106
61
107
variable_collection0 = VariableCollection .from_txt (
62
108
[phenotype_path ],
@@ -135,3 +181,40 @@ def test_pheno_zero_variance(
135
181
variable_collection .covariates .name ,
136
182
}
137
183
assert set (sw .allocations .keys ()) <= (allocation_names | new_allocation_names )
184
+
185
+
186
+ @pytest .mark .parametrize ("compression_method_name" , compression_methods .keys ())
187
+ def test_covariance (
188
+ compression_method_name : str ,
189
+ phenotype_path : UPath ,
190
+ covariate_path : UPath ,
191
+ sw : SharedWorkspace ,
192
+ tmp_path : UPath ,
193
+ request : FixtureRequest ,
194
+ ) -> None :
195
+ compression_method = compression_methods [compression_method_name ]
196
+ if isinstance (compression_method , Blosc2CompressionMethod ):
197
+ if "blosc2" not in sys .modules :
198
+ pytest .skip ("blosc2 not installed" )
199
+
200
+ allocation_names = set (sw .allocations .keys ())
201
+
202
+ variable_collection = VariableCollection .from_txt (
203
+ [phenotype_path ],
204
+ [covariate_path ],
205
+ sw ,
206
+ samples = samples ,
207
+ missing_value_strategy = "listwise_deletion" ,
208
+ )
209
+ request .addfinalizer (variable_collection .free )
210
+
211
+ covariance_path = tmp_path / "covariance.tsv"
212
+ variable_collection .covariance_to_txt (
213
+ covariance_path , compression_method , num_threads = cpu_count ()
214
+ )
215
+
216
+ new_allocation_names = {
217
+ variable_collection .phenotypes .name ,
218
+ variable_collection .covariates .name ,
219
+ }
220
+ assert set (sw .allocations .keys ()) <= (allocation_names | new_allocation_names )
0 commit comments