Skip to content

Safegraph is failing tests in main #418

Closed
@krivard

Description

@krivard

Safegraph is failing tests in main; probably due to a collision of geo mapper changes:

ERROR test_run.py::TestRun::test_output_files_exist - KeyError: "['geo_id'] not in index"
ERROR test_run.py::TestRun::test_output_files_format - KeyError: "['geo_id'] not in index"

Traceback 1:

_____________________________________________________________________________________________ ERROR at setup of TestRun.test_output_files_exist ______________________________________________________________________________________________
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 125, in worker
    result = (True, func(*args, **kwds))
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 48, in mapstar
    return list(map(*args))
  File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/delphi_safegraph/process.py", line 236, in process
    process_window(past_week[:1],
  File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/delphi_safegraph/process.py", line 183, in process_window
    df_export = aggregated_df[
  File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/env/lib/python3.8/site-packages/pandas/core/frame.py", line 2912, in __getitem__
    indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
  File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/env/lib/python3.8/site-packages/pandas/core/indexing.py", line 1254, in _get_listlike_indexer
    self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
  File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/env/lib/python3.8/site-packages/pandas/core/indexing.py", line 1304, in _validate_read_indexer
    raise KeyError(f"{not_found} not in index")
KeyError: "['geo_id'] not in index"
"""

The above exception was the direct cause of the following exception:

    @pytest.fixture(scope="session")
    def run_as_module():
        # Clean receiving directory
        for fname in os.listdir("receiving"):
            if ".csv" in fname:
                os.remove(join("receiving", fname))
>       run_module()

conftest.py:17:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../delphi_safegraph/run.py:76: in run_module
    pool.map(single_arg_process, files_with_previous_weeks)
/usr/lib/python3.8/multiprocessing/pool.py:364: in map
    return self._map_async(func, iterable, mapstar, chunksize).get()   
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <multiprocessing.pool.MapResult object at 0x7f927004e0a0>, timeout = None

    def get(self, timeout=None):
        self.wait(timeout)
        if not self.ready():
            raise TimeoutError
        if self._success:
            return self._value
        else:
>           raise self._value
E           KeyError: "['geo_id'] not in index"

/usr/lib/python3.8/multiprocessing/pool.py:771: KeyError

Traceback 2:

_____________________________________________________________________________________________ ERROR at setup of TestRun.test_output_files_format _____________________________________________________________________________________________
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 125, in worker
    result = (True, func(*args, **kwds))
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 48, in mapstar
    return list(map(*args))
  File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/delphi_safegraph/process.py", line 236, in process
    process_window(past_week[:1],
  File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/delphi_safegraph/process.py", line 183, in process_window
    df_export = aggregated_df[
  File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/env/lib/python3.8/site-packages/pandas/core/frame.py", line 2912, in __getitem__
    indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
  File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/env/lib/python3.8/site-packages/pandas/core/indexing.py", line 1254, in _get_listlike_indexer
    self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
  File "/home/krivard/projects/covid/dev/covidcast-indicators/safegraph/env/lib/python3.8/site-packages/pandas/core/indexing.py", line 1304, in _validate_read_indexer
    raise KeyError(f"{not_found} not in index")
KeyError: "['geo_id'] not in index"
"""

The above exception was the direct cause of the following exception:

    @pytest.fixture(scope="session")
    def run_as_module():
        # Clean receiving directory
        for fname in os.listdir("receiving"):
            if ".csv" in fname:
                os.remove(join("receiving", fname))
>       run_module()

conftest.py:17:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../delphi_safegraph/run.py:76: in run_module
    pool.map(single_arg_process, files_with_previous_weeks)
/usr/lib/python3.8/multiprocessing/pool.py:364: in map
    return self._map_async(func, iterable, mapstar, chunksize).get()   
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <multiprocessing.pool.MapResult object at 0x7f927004e0a0>, timeout = None

    def get(self, timeout=None):
        self.wait(timeout)
        if not self.ready():
            raise TimeoutError
        if self._success:
            return self._value
        else:
>           raise self._value
E           KeyError: "['geo_id'] not in index"

/usr/lib/python3.8/multiprocessing/pool.py:771: KeyError

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions