Skip to content

Commit d368dc6

Browse files
Adjust to Pandas breaking change, bump Python version (#101)
Pandas 1.1 has broken Record Mover's usage of the read_csv() function by adding error checking in cases where a certain argument would be unused. Details of the Pandas change: * pandas-dev/pandas#27394 * pandas-dev/pandas#31383 See Records Mover test failures here: * https://app.circleci.com/pipelines/github/bluelabsio/records-mover/1089/workflows/e62f1cf0-f8d0-4e22-9652-112df72b02b8/jobs/9439
1 parent dd7341d commit d368dc6

File tree

9 files changed

+24
-11
lines changed

9 files changed

+24
-11
lines changed

.python-version

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
records-mover-3.8.3
1+
records-mover-3.8.5

deps.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ export SLUGIFY_USES_TEXT_UNIDECODE=yes
77
brew update && ( brew upgrade pyenv || true )
88
pyenv rehash # needed if pyenv is updated
99

10-
python_version=3.8.3
10+
python_version=3.8.5
1111
# zipimport.ZipImportError: can't decompress data; zlib not available:
1212
# You may need `xcode-select --install` on OS X
1313
# https://github.com/pyenv/pyenv/issues/451#issuecomment-151336786

metrics/coverage_high_water_mark

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
93.7100
1+
93.7200

metrics/flake8_high_water_mark

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
170
1+
169

records_mover/records/delimited/csv_streamer.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,21 @@ def stream_csv(filepath_or_buffer: Union[str, IO[bytes]],
4141
'header': header,
4242
'compression': pandas_compression_from_hint[compression_hint],
4343
'escapechar': hints.get('escape'),
44-
'prefix': 'untitled_',
4544
'iterator': True,
4645
'engine': 'python'
4746
}
47+
if header is None:
48+
# Pandas only accepts the prefix argument (which makes for
49+
# tidier column names when otherwise not provided) when the
50+
# header is explicitly marked as missing, not when it's
51+
# available or even when we ask Pandas to infer it. Bummer,
52+
# as this means that when Pandas infers that there's no
53+
# header, the column names will end up different than folks
54+
# explicitly tell records mover that there is no header.
55+
#
56+
# https://github.com/pandas-dev/pandas/issues/27394
57+
# https://github.com/pandas-dev/pandas/pull/31383
58+
kwargs['prefix'] = 'untitled_'
4859
if 'quoting' in hints:
4960
quoting = hints['quoting']
5061
kwargs['quoting'] = pandas_quoting_from_hint[quoting]

records_mover/records/mover.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
def move(records_source: RecordsSource,
1919
records_target: RecordsTarget,
20-
processing_instructions: ProcessingInstructions=ProcessingInstructions()) -> MoveResult:
20+
processing_instructions: ProcessingInstructions = ProcessingInstructions()) -> MoveResult:
2121
"""Copy records from one location to another. Applies a sequence of
2222
possible techniques to do this in an efficient way and respects
2323
the preferences set in records_source, records_target and

records_mover/records/pandas/read_csv_options.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,13 @@ def pandas_read_csv_options(records_format: DelimitedRecordsFormat,
161161
# Leaving this in case a future version of Pandas behaves
162162
# better.
163163
#
164-
pandas_options['prefix'] = 'untitled_'
164+
if pandas_options['header'] is None:
165+
# Pandas only accepts the prefix argument when the
166+
# header is marked as missing.
167+
#
168+
# https://github.com/pandas-dev/pandas/issues/27394
169+
# https://github.com/pandas-dev/pandas/pull/31383
170+
pandas_options['prefix'] = 'untitled_'
165171

166172
#
167173
# mangle_dupe_cols : bool, default True

tests/unit/records/test_csv_streamer.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ def test_stream_csv_no_compression(self,
2424
engine='python',
2525
escapechar=None,
2626
header='infer',
27-
prefix='untitled_',
2827
quoting=0,
2928
iterator=True,
3029
sep=mock_field_delimiter)
@@ -49,7 +48,6 @@ def test_stream_csv_gzip(self,
4948
engine='python',
5049
escapechar=None,
5150
header='infer',
52-
prefix='untitled_',
5351
iterator=True,
5452
sep=mock_field_delimiter)
5553
mock_io.TextIOWrapper.assert_not_called()
@@ -72,7 +70,6 @@ def test_stream_filename(self,
7270
engine='python',
7371
escapechar=None,
7472
header='infer',
75-
prefix='untitled_',
7673
iterator=True,
7774
sep=mock_field_delimiter)
7875
mock_io.TextIOWrapper.assert_not_called()

tests/unit/records/test_pandas_read_csv_options.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ def test_pandas_read_csv_options_csv(self):
118118
'engine': 'python',
119119
'error_bad_lines': True,
120120
'header': 0,
121-
'prefix': 'untitled_',
122121
'quotechar': '"',
123122
'quoting': 0,
124123
'warn_bad_lines': True,

0 commit comments

Comments
 (0)