Closed
Description
Safegraph is failing tests in main; probably due to a collision of geo mapper changes:
ERROR test_run.py::TestRun::test_output_files_exist - KeyError: "['geo_id'] not in index"
ERROR test_run.py::TestRun::test_output_files_format - KeyError: "['geo_id'] not in index"
Traceback 1:
_____________________________________________________________________________________________ ERROR at setup of TestRun.test_output_files_exist ______________________________________________________________________________________________
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "/usr/lib/python3.8/multiprocessing/pool.py", line 48, in mapstar
return list(map(*args))
File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/delphi_safegraph/process.py", line 236, in process
process_window(past_week[:1],
File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/delphi_safegraph/process.py", line 183, in process_window
df_export = aggregated_df[
File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/env/lib/python3.8/site-packages/pandas/core/frame.py", line 2912, in __getitem__
indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/env/lib/python3.8/site-packages/pandas/core/indexing.py", line 1254, in _get_listlike_indexer
self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/env/lib/python3.8/site-packages/pandas/core/indexing.py", line 1304, in _validate_read_indexer
raise KeyError(f"{not_found} not in index")
KeyError: "['geo_id'] not in index"
"""
The above exception was the direct cause of the following exception:
@pytest.fixture(scope="session")
def run_as_module():
# Clean receiving directory
for fname in os.listdir("receiving"):
if ".csv" in fname:
os.remove(join("receiving", fname))
> run_module()
conftest.py:17:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../delphi_safegraph/run.py:76: in run_module
pool.map(single_arg_process, files_with_previous_weeks)
/usr/lib/python3.8/multiprocessing/pool.py:364: in map
return self._map_async(func, iterable, mapstar, chunksize).get()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <multiprocessing.pool.MapResult object at 0x7f927004e0a0>, timeout = None
def get(self, timeout=None):
self.wait(timeout)
if not self.ready():
raise TimeoutError
if self._success:
return self._value
else:
> raise self._value
E KeyError: "['geo_id'] not in index"
/usr/lib/python3.8/multiprocessing/pool.py:771: KeyError
Traceback 2:
_____________________________________________________________________________________________ ERROR at setup of TestRun.test_output_files_format _____________________________________________________________________________________________
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "/usr/lib/python3.8/multiprocessing/pool.py", line 48, in mapstar
return list(map(*args))
File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/delphi_safegraph/process.py", line 236, in process
process_window(past_week[:1],
File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/delphi_safegraph/process.py", line 183, in process_window
df_export = aggregated_df[
File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/env/lib/python3.8/site-packages/pandas/core/frame.py", line 2912, in __getitem__
indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/env/lib/python3.8/site-packages/pandas/core/indexing.py", line 1254, in _get_listlike_indexer
self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/env/lib/python3.8/site-packages/pandas/core/indexing.py", line 1304, in _validate_read_indexer
raise KeyError(f"{not_found} not in index")
KeyError: "['geo_id'] not in index"
"""
The above exception was the direct cause of the following exception:
@pytest.fixture(scope="session")
def run_as_module():
# Clean receiving directory
for fname in os.listdir("receiving"):
if ".csv" in fname:
os.remove(join("receiving", fname))
> run_module()
conftest.py:17:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../delphi_safegraph/run.py:76: in run_module
pool.map(single_arg_process, files_with_previous_weeks)
/usr/lib/python3.8/multiprocessing/pool.py:364: in map
return self._map_async(func, iterable, mapstar, chunksize).get()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <multiprocessing.pool.MapResult object at 0x7f927004e0a0>, timeout = None
def get(self, timeout=None):
self.wait(timeout)
if not self.ready():
raise TimeoutError
if self._success:
return self._value
else:
> raise self._value
E KeyError: "['geo_id'] not in index"
/usr/lib/python3.8/multiprocessing/pool.py:771: KeyError