Skip to content

NAN coding database and acquisition changes #417

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
May 10, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
4290775
NAN coding updates to covidcast:
dshemetov Feb 3, 2021
69a08aa
Update src/acquisition/covidcast/csv_importer.py
dshemetov Apr 15, 2021
cad81ff
Update src/acquisition/covidcast/csv_importer.py
dshemetov Apr 15, 2021
517d3da
Nans: add George's review suggestions
dshemetov Apr 15, 2021
dd80418
Merge branch 'main' of https://github.com/dshemetov/delphi-epidata in…
dshemetov Apr 15, 2021
a1538f6
Fix tests, all tests pass
dshemetov Apr 19, 2021
f8e2a19
Nans: add API documentation
dshemetov Apr 27, 2021
7d6c1f2
Merge branch 'main' of https://github.com/dshemetov/delphi-epidata in…
dshemetov Apr 27, 2021
fb4b05a
Update docs/api/covidcast.md
dshemetov Apr 28, 2021
e3974c2
Update docs/api/missing_codes.md
dshemetov Apr 28, 2021
5267d17
Update docs/api/missing_codes.md
dshemetov Apr 28, 2021
8929cc0
Update docs/api/missing_codes.md
dshemetov Apr 28, 2021
a550720
Update docs/api/missing_codes.md
dshemetov Apr 28, 2021
78f8f26
Switch to delphi_utils.nancodes
dshemetov Apr 28, 2021
691c7cd
Merge branch 'main' of https://github.com/dshemetov/delphi-epidata in…
dshemetov Apr 28, 2021
abf18dd
Merge pull request #517 from cmu-delphi/sgratzl/ci_fix
korlaxxalrok May 5, 2021
97921bc
Merge remote-tracking branch 'upstream/main' into main
dshemetov May 5, 2021
e78afc5
Nans: Update flask server to serve missing cols
dshemetov May 5, 2021
660b195
Nans: modify a few tests
dshemetov May 6, 2021
e9b080c
Nans: update to the finalized nancode names
dshemetov May 10, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import delphi.operations.secrets as secrets
import delphi.epidata.acquisition.covidcast.database as live
from delphi.epidata.acquisition.covidcast.covidcast_meta_cache_updater import main
from delphi.epidata.acquisition.covidcast.nancodes import Nans

# py3tester coverage target (equivalent to `import *`)
__test_target__ = (
Expand Down Expand Up @@ -65,17 +66,17 @@ def test_caching(self):
"""Populate, query, cache, query, and verify the cache."""

# insert dummy data
self.cur.execute('''
self.cur.execute(f'''
insert into covidcast values
(0, 'src', 'sig', 'day', 'state', 20200422, 'pa',
123, 1, 2, 3, 456, 1, 20200422, 0, 1, False),
123, 1, 2, 3, 456, 1, 20200422, 0, 1, False, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
(0, 'src', 'sig', 'day', 'state', 20200422, 'wa',
789, 1, 2, 3, 456, 1, 20200423, 1, 1, False)
789, 1, 2, 3, 456, 1, 20200423, 1, 1, False, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING})
''')
self.cur.execute('''
self.cur.execute(f'''
insert into covidcast values
(100, 'src', 'wip_sig', 'day', 'state', 20200422, 'pa',
456, 4, 5, 6, 789, -1, 20200422, 0, 1, True)
456, 4, 5, 6, 789, -1, 20200422, 0, 1, True, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING})
''')

self.cnx.commit()
Expand Down
102 changes: 90 additions & 12 deletions integrations/acquisition/covidcast/test_csv_uploading.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from delphi.epidata.client.delphi_epidata import Epidata
from delphi.epidata.acquisition.covidcast.csv_to_database import main
import delphi.operations.secrets as secrets
from delphi.epidata.acquisition.covidcast.nancodes import Nans

# py3tester coverage target (equivalent to `import *`)
__test_target__ = 'delphi.epidata.acquisition.covidcast.csv_to_database'
Expand Down Expand Up @@ -66,27 +67,35 @@ def test_uploading(self):

# valid
with open(source_receiving_dir + '/20200419_state_test.csv', 'w') as f:
f.write('geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size\n')
f.write(f'ca,1,0.1,10,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n')
f.write(f'tx,2,0.2,20,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n')
f.write(f'fl,3,0.3,30,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n')

# valid, old style no missing cols should have intelligent defaults
# TODO: Could be expanded to test more cases
with open(source_receiving_dir + '/20200419_state_test_no_missing.csv', 'w') as f:
f.write('geo_id,val,se,sample_size\n')
f.write('ca,1,0.1,10\n')
f.write('tx,2,0.2,20\n')
f.write('fl,3,0.3,30\n')
f.write('tx,NA,0.2,20\n')
f.write('wa,3,0.3,30\n')

# valid wip
with open(source_receiving_dir + '/20200419_state_wip_prototype.csv', 'w') as f:
f.write('geo_id,val,se,sample_size\n')
f.write('me,10,0.01,100\n')
f.write('nd,20,0.02,200\n')
f.write('wa,30,0.03,300\n')
f.write('geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size\n')
f.write(f'me,10,0.01,100,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n')
f.write(f'nd,20,0.02,200,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n')
f.write(f'wa,30,0.03,300,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n')

# invalid
with open(source_receiving_dir + '/20200419_state_wip_really_long_name_that_will_be_accepted.csv', 'w') as f:
f.write('geo_id,val,se,sample_size\n')
f.write('pa,100,5.4,624\n')
f.write('geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size\n')
f.write(f'pa,100,5.4,624,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n')

# invalid
with open(source_receiving_dir + '/20200419_state_wip_really_long_name_that_will_get_truncated_lorem_ipsum_dolor_sit_amet.csv', 'w') as f:
f.write('geo_id,val,se,sample_size\n')
f.write('pa,100,5.4,624\n')
f.write('geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size\n')
f.write(f'pa,100,5.4,624,{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}\n')

# invalid
with open(source_receiving_dir + '/20200420_state_test.csv', 'w') as f:
Expand Down Expand Up @@ -137,6 +146,9 @@ def apply_lag(expected_epidata):
'sample_size': 10,
'direction': None,
'signal': 'test',
'missing_value': Nans.NOT_MISSING,
'missing_stderr': Nans.NOT_MISSING,
'missing_sample_size': Nans.NOT_MISSING,
},
{
'time_value': 20200419,
Expand All @@ -146,6 +158,9 @@ def apply_lag(expected_epidata):
'sample_size': 30,
'direction': None,
'signal': 'test',
'missing_value': Nans.NOT_MISSING,
'missing_stderr': Nans.NOT_MISSING,
'missing_sample_size': Nans.NOT_MISSING,
},
{
'time_value': 20200419,
Expand All @@ -155,15 +170,66 @@ def apply_lag(expected_epidata):
'sample_size': 20,
'direction': None,
'signal': 'test',
'missing_value': Nans.NOT_MISSING,
'missing_stderr': Nans.NOT_MISSING,
'missing_sample_size': Nans.NOT_MISSING,
},
]),
'message': 'success',
})

# request CSV data from the API on the test with missing values
response = Epidata.covidcast(
'src-name', 'test_no_missing', 'day', 'state', 20200419, '*')

# verify data matches the CSV
# NB these are ordered by geo_value
self.assertEqual(response, {
'result': 1,
'epidata': apply_lag([
{
'time_value': 20200419,
'geo_value': 'ca',
'value': 1,
'stderr': 0.1,
'sample_size': 10,
'direction': None,
'signal': 'test_no_missing',
'missing_value': Nans.NOT_MISSING,
'missing_stderr': Nans.NOT_MISSING,
'missing_sample_size': Nans.NOT_MISSING,
},
{
'time_value': 20200419,
'geo_value': 'tx',
'value': None,
'stderr': 0.2,
'sample_size': 20,
'direction': None,
'signal': 'test_no_missing',
'missing_value': Nans.UNKNOWN,
'missing_stderr': Nans.NOT_MISSING,
'missing_sample_size': Nans.NOT_MISSING,
},
{
'time_value': 20200419,
'geo_value': 'wa',
'value': 3,
'stderr': 0.3,
'sample_size': 30,
'direction': None,
'signal': 'test_no_missing',
'missing_value': Nans.NOT_MISSING,
'missing_stderr': Nans.NOT_MISSING,
'missing_sample_size': Nans.NOT_MISSING,
},
]),
'message': 'success',
})

# request CSV data from the API on WIP signal
response = Epidata.covidcast(
'src-name', 'wip_prototype', 'day', 'state', 20200419, '*')


# verify data matches the CSV
# NB these are ordered by geo_value
Expand All @@ -178,6 +244,9 @@ def apply_lag(expected_epidata):
'sample_size': 100,
'direction': None,
'signal': 'wip_prototype',
'missing_value': Nans.NOT_MISSING,
'missing_stderr': Nans.NOT_MISSING,
'missing_sample_size': Nans.NOT_MISSING,
},
{
'time_value': 20200419,
Expand All @@ -187,6 +256,9 @@ def apply_lag(expected_epidata):
'sample_size': 200,
'direction': None,
'signal': 'wip_prototype',
'missing_value': Nans.NOT_MISSING,
'missing_stderr': Nans.NOT_MISSING,
'missing_sample_size': Nans.NOT_MISSING,
},
{
'time_value': 20200419,
Expand All @@ -196,6 +268,9 @@ def apply_lag(expected_epidata):
'sample_size': 300,
'direction': None,
'signal': 'wip_prototype',
'missing_value': Nans.NOT_MISSING,
'missing_stderr': Nans.NOT_MISSING,
'missing_sample_size': Nans.NOT_MISSING,
},
]),
'message': 'success',
Expand All @@ -218,7 +293,10 @@ def apply_lag(expected_epidata):
'stderr': 5.4,
'sample_size': 624,
'direction': None,
'signal': 'wip_really_long_name_that_will_be_accepted',
'signal': 'wip_really_long_name_that_will_be_accepted',\
'missing_value': Nans.NOT_MISSING,
'missing_stderr': Nans.NOT_MISSING,
'missing_sample_size': Nans.NOT_MISSING,
},
])
})
Expand Down
94 changes: 62 additions & 32 deletions integrations/acquisition/covidcast/test_fill_is_latest_issue.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from delphi.epidata.client.delphi_epidata import Epidata
from delphi.epidata.acquisition.covidcast.fill_is_latest_issue import main
import delphi.operations.secrets as secrets
from delphi.epidata.acquisition.covidcast.nancodes import Nans

# py3tester coverage target (equivalent to `import *`)
__test_target__ = 'delphi.epidata.acquisition.covidcast.fill_is_latest_issue'
Expand Down Expand Up @@ -52,39 +53,54 @@ def _test_fill_is_latest_issue(self, clbp, use_filter):
"""Update rows having a stale `direction` field and serve the results."""

# NOTE: column order is:
# (id, source, signal, time_type, geo_type, time_value, geo_value,
# value_updated_timestamp, value, stderr, sample_size, direction_updated_timestamp, direction, issue, lag, is_latest_issue, is_wip)
# (id, source, signal, time_type, geo_type, time_value, geo_value,
# value_updated_timestamp, value, stderr, sample_size, direction_updated_timestamp,
# direction, issue, lag, is_latest_issue, is_wip, missing_value, missing_stderr, missing_sample_size)

self.cur.execute('''
self.cur.execute(f'''
insert into covidcast values
(0, 'src', 'sig', 'day', 'state', 20200228, 'ca',
123, 2, 5, 5, 5, NULL, 20200228, 0, 1, False),
123, 2, 5, 5, 5, NULL, 20200228, 0, 1, False,
{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
(0, 'src', 'sig', 'day', 'state', 20200228, 'ca',
123, 2, 0, 0, 0, NULL, 20200229, 1, 1, False),
123, 2, 0, 0, 0, NULL, 20200229, 1, 1, False,
{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
(0, 'src', 'sig', 'day', 'state', 20200229, 'ca',
123, 6, 0, 0, 0, NULL, 20200301, 1, 1, False),
123, 6, 0, 0, 0, NULL, 20200301, 1, 1, False,
{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
(0, 'src', 'sig', 'day', 'state', 20200229, 'ca',
123, 6, 9, 9, 9, NULL, 20200229, 0, 1, False),
123, 6, 9, 9, 9, NULL, 20200229, 0, 1, False,
{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
(0, 'src', 'sig', 'day', 'state', 20200301, 'ca',
123, 5, 0, 0, 0, NULL, 20200303, 2, 1, False),
123, 5, 0, 0, 0, NULL, 20200303, 2, 1, False,
{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
(0, 'src', 'sig', 'day', 'state', 20200301, 'ca',
123, 5, 5, 5, 5, NULL, 20200302, 1, 1, False),
123, 5, 5, 5, 5, NULL, 20200302, 1, 1, False,
{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
(0, 'src', 'sig', 'day', 'state', 20200301, 'ca',
123, 5, 9, 8, 7, NULL, 20200301, 0, 1, False),
123, 5, 9, 8, 7, NULL, 20200301, 0, 1, False,
{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
(0, 'src', 'sig', 'day', 'state', 20200228, 'ny',
123, 2, 5, 5, 5, NULL, 20200228, 0, 1, False),
123, 2, 5, 5, 5, NULL, 20200228, 0, 1, False,
{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
(0, 'src', 'sig', 'day', 'state', 20200228, 'ny',
123, 2, 0, 0, 0, NULL, 20200229, 1, 1, False),
123, 2, 0, 0, 0, NULL, 20200229, 1, 1, False,
{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
(0, 'src', 'sig', 'day', 'state', 20200229, 'ny',
123, 6, 0, 0, 0, NULL, 20200301, 1, 1, False),
123, 6, 0, 0, 0, NULL, 20200301, 1, 1, False,
{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
(0, 'src', 'sig', 'day', 'state', 20200229, 'ny',
123, 6, 9, 9, 9, NULL, 20200229, 0, 1, False),
123, 6, 9, 9, 9, NULL, 20200229, 0, 1, False,
{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
(0, 'src', 'sig', 'day', 'state', 20200301, 'ny',
123, 5, 0, 0, 0, NULL, 20200303, 2, 1, False),
123, 5, 0, 0, 0, NULL, 20200303, 2, 1, False,
{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
(0, 'src', 'sig', 'day', 'state', 20200301, 'ny',
123, 5, 5, 5, 5, NULL, 20200302, 1, 1, False),
123, 5, 5, 5, 5, NULL, 20200302, 1, 1, False,
{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
(0, 'src', 'sig', 'day', 'state', 20200301, 'ny',
123, 5, 9, 8, 7, NULL, 20200301, 0, 1, False)
123, 5, 9, 8, 7, NULL, 20200301, 0, 1, False,
{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING})
''')
self.cnx.commit()

Expand All @@ -104,40 +120,54 @@ def _test_fill_is_latest_issue(self, clbp, use_filter):
result = list(self.cur)
expected = [
(1, 'src', 'sig', 'day', 'state', 20200228, 'ca',
123, 2, 5, 5, 5, None, 20200228, 0, bytearray(b'0'), bytearray(b'0')),
123, 2.0, 5.0, 5.0, 5, None, 20200228, 0, bytearray(b'0'), bytearray(b'0'),
Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING),
(2, 'src', 'sig', 'day', 'state', 20200228, 'ca',
123, 2, 0, 0, 0, None, 20200229, 1, bytearray(b'1'), bytearray(b'0')),
123, 2.0, 0.0, 0.0, 0, None, 20200229, 1, bytearray(b'1'), bytearray(b'0'),
Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING),
(3, 'src', 'sig', 'day', 'state', 20200229, 'ca',
123, 6, 0, 0, 0, None, 20200301, 1, bytearray(b'1'), bytearray(b'0')),
123, 6.0, 0.0, 0.0, 0, None, 20200301, 1, bytearray(b'1'), bytearray(b'0'),
Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING),
(4, 'src', 'sig', 'day', 'state', 20200229, 'ca',
123, 6, 9, 9, 9, None, 20200229, 0, bytearray(b'0'), bytearray(b'0')),
123, 6.0, 9.0, 9.0, 9, None, 20200229, 0, bytearray(b'0'), bytearray(b'0'),
Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING),
(5, 'src', 'sig', 'day', 'state', 20200301, 'ca',
123, 5, 0, 0, 0, None, 20200303, 2, bytearray(b'1'), bytearray(b'0')),
123, 5.0, 0.0, 0.0, 0, None, 20200303, 2, bytearray(b'1'), bytearray(b'0'),
Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING),
(6, 'src', 'sig', 'day', 'state', 20200301, 'ca',
123, 5, 5, 5, 5, None, 20200302, 1, bytearray(b'0'), bytearray(b'0')),
123, 5.0, 5.0, 5.0, 5, None, 20200302, 1, bytearray(b'0'), bytearray(b'0'),
Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING),
(7, 'src', 'sig', 'day', 'state', 20200301, 'ca',
123, 5, 9, 8, 7, None, 20200301, 0, bytearray(b'0'), bytearray(b'0')),
123, 5.0, 9.0, 8.0, 7, None, 20200301, 0, bytearray(b'0'), bytearray(b'0'),
Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING),
(8, 'src', 'sig', 'day', 'state', 20200228, 'ny',
123, 2, 5, 5, 5, None, 20200228, 0, bytearray(b'0'), bytearray(b'0')),
123, 2.0, 5.0, 5.0, 5, None, 20200228, 0, bytearray(b'0'), bytearray(b'0'),
Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING),
(9, 'src', 'sig', 'day', 'state', 20200228, 'ny',
123, 2, 0, 0, 0, None, 20200229, 1, bytearray(b'1'), bytearray(b'0')),
123, 2.0, 0.0, 0.0, 0, None, 20200229, 1, bytearray(b'1'), bytearray(b'0'),
Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING),
(10, 'src', 'sig', 'day', 'state', 20200229, 'ny',
123, 6, 0, 0, 0, None, 20200301, 1, bytearray(b'1'), bytearray(b'0')),
123, 6.0, 0.0, 0.0, 0, None, 20200301, 1, bytearray(b'1'), bytearray(b'0'),
Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING),
(11, 'src', 'sig', 'day', 'state', 20200229, 'ny',
123, 6, 9, 9, 9, None, 20200229, 0, bytearray(b'0'), bytearray(b'0')),
123, 6.0, 9.0, 9.0, 9, None, 20200229, 0, bytearray(b'0'), bytearray(b'0'),
Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING),
(12, 'src', 'sig', 'day', 'state', 20200301, 'ny',
123, 5, 0, 0, 0, None, 20200303, 2, bytearray(b'1'), bytearray(b'0')),
123, 5.0, 0.0, 0.0, 0, None, 20200303, 2, bytearray(b'1'), bytearray(b'0'),
Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING),
(13, 'src', 'sig', 'day', 'state', 20200301, 'ny',
123, 5, 5, 5, 5, None, 20200302, 1, bytearray(b'0'), bytearray(b'0')),
123, 5.0, 5.0, 5.0, 5, None, 20200302, 1, bytearray(b'0'), bytearray(b'0'),
Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING),
(14, 'src', 'sig', 'day', 'state', 20200301, 'ny',
123, 5, 9, 8, 7, None, 20200301, 0, bytearray(b'0'), bytearray(b'0'))
123, 5.0, 9.0, 8.0, 7, None, 20200301, 0, bytearray(b'0'), bytearray(b'0'),
Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING)
]

if use_filter:
# revert ny is_latest values
for i in range(7, 14):
x = list(expected[i])
x[-2] = bytearray(b'1')
x[-5] = bytearray(b'1')
expected[i] = tuple(x)

self.assertEqual(result, expected)
Expand Down
Loading