Skip to content

Commit d29f782

Browse files
Merge branch 'master' into my-dev
2 parents e9561fe + 6131a59 commit d29f782

File tree

22 files changed

+273
-93
lines changed

22 files changed

+273
-93
lines changed

doc/source/whatsnew/v0.23.2.txt

+14-3
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ and bug fixes. We recommend that all users upgrade to this version.
1616
Fixed Regressions
1717
~~~~~~~~~~~~~~~~~
1818

19-
-
19+
- Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
2020
-
2121

2222
.. _whatsnew_0232.performance:
@@ -27,6 +27,7 @@ Performance Improvements
2727
- Improved performance of membership checks in :class:`CategoricalIndex`
2828
(i.e. ``x in ci``-style checks are much faster). :meth:`CategoricalIndex.contains`
2929
is likewise much faster (:issue:`21369`)
30+
- Improved performance of :meth:`MultiIndex.is_unique` (:issue:`21522`)
3031
-
3132

3233
Documentation Changes
@@ -45,11 +46,15 @@ Bug Fixes
4546
-
4647
-
4748

49+
**Timedelta**
50+
51+
- Bug in :class:`Timedelta` where non-zero timedeltas shorter than 1 microsecond were considered False (:issue:`21484`)
52+
4853
**Conversion**
4954

55+
- Bug in constructing :class:`Index` with an iterator or generator (:issue:`21470`)
56+
- Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`)
5057

51-
-
52-
-
5358

5459
**Indexing**
5560

@@ -77,8 +82,14 @@ Bug Fixes
7782
-
7883

7984
**Timezones**
85+
8086
- Bug in :class:`Timestamp` and :class:`DatetimeIndex` where passing a :class:`Timestamp` localized after a DST transition would return a datetime before the DST transition (:issue:`20854`)
8187
- Bug in comparing :class:`DataFrame`s with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`)
88+
- Bug in :meth:`DatetimeIndex.shift` where an ``AssertionError`` would raise when shifting across DST (:issue:`8616`)
89+
- Bug in :class:`Timestamp` constructor where passing an invalid timezone offset designator (``Z``) would not raise a ``ValueError``(:issue:`8910`)
90+
- Bug in :meth:`Timestamp.replace` where replacing at a DST boundary would retain an incorrect offset (:issue:`7825`)
91+
- Bug in :meth:`DatetimeIndex.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`)
92+
- Bug in :meth:`DatetimeIndex.resample` when downsampling across a DST boundary (:issue:`8531`)
8293

8394
**Other**
8495

doc/source/whatsnew/v0.24.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ v0.24.0
88
New features
99
~~~~~~~~~~~~
1010

11+
- ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
12+
1113
.. _whatsnew_0240.enhancements.other:
1214

1315
Other Enhancements

pandas/_libs/tslibs/timedeltas.pyx

+3
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,9 @@ cdef class _Timedelta(timedelta):
899899
def __str__(self):
900900
return self._repr_base(format='long')
901901

902+
def __bool__(self):
903+
return self.value != 0
904+
902905
def isoformat(self):
903906
"""
904907
Format Timedelta as ISO 8601 Duration like

pandas/core/arrays/categorical.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import numpy as np
44
from warnings import warn
55
import textwrap
6-
import types
76

87
from pandas import compat
98
from pandas.compat import u, lzip
@@ -28,7 +27,7 @@
2827
is_categorical,
2928
is_categorical_dtype,
3029
is_list_like, is_sequence,
31-
is_scalar,
30+
is_scalar, is_iterator,
3231
is_dict_like)
3332

3433
from pandas.core.algorithms import factorize, take_1d, unique1d, take
@@ -2483,7 +2482,7 @@ def _convert_to_list_like(list_like):
24832482
if isinstance(list_like, list):
24842483
return list_like
24852484
if (is_sequence(list_like) or isinstance(list_like, tuple) or
2486-
isinstance(list_like, types.GeneratorType)):
2485+
is_iterator(list_like)):
24872486
return list(list_like)
24882487
elif is_scalar(list_like):
24892488
return [list_like]

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1690,7 +1690,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
16901690
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
16911691
compression : string, optional
16921692
A string representing the compression to use in the output file.
1693-
Allowed values are 'gzip', 'bz2', 'zip', 'xz'.
1693+
Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only
1694+
used when the first argument is a filename.
16941695
line_terminator : string, default ``'\n'``
16951696
The newline character or character sequence to use in the output
16961697
file

pandas/core/indexes/base.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -428,12 +428,14 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
428428
elif data is None or is_scalar(data):
429429
cls._scalar_data_error(data)
430430
else:
431-
if tupleize_cols and is_list_like(data) and data:
431+
if tupleize_cols and is_list_like(data):
432+
# GH21470: convert iterable to list before determining if empty
432433
if is_iterator(data):
433434
data = list(data)
434-
# we must be all tuples, otherwise don't construct
435-
# 10697
436-
if all(isinstance(e, tuple) for e in data):
435+
436+
if data and all(isinstance(e, tuple) for e in data):
437+
# we must be all tuples, otherwise don't construct
438+
# 10697
437439
from .multi import MultiIndex
438440
return MultiIndex.from_tuples(
439441
data, names=name or kwargs.get('names'))

pandas/core/indexes/multi.py

-12
Original file line numberDiff line numberDiff line change
@@ -852,14 +852,6 @@ def _has_complex_internals(self):
852852
# to disable groupby tricks
853853
return True
854854

855-
@cache_readonly
856-
def is_monotonic(self):
857-
"""
858-
return if the index is monotonic increasing (only equal or
859-
increasing) values.
860-
"""
861-
return self.is_monotonic_increasing
862-
863855
@cache_readonly
864856
def is_monotonic_increasing(self):
865857
"""
@@ -887,10 +879,6 @@ def is_monotonic_decreasing(self):
887879
# monotonic decreasing if and only if reverse is monotonic increasing
888880
return self[::-1].is_monotonic_increasing
889881

890-
@cache_readonly
891-
def is_unique(self):
892-
return not self.duplicated().any()
893-
894882
@cache_readonly
895883
def _have_mixed_levels(self):
896884
""" return a boolean list indicated if we have mixed levels """

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3790,7 +3790,8 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='',
37903790
non-ascii, for python versions prior to 3
37913791
compression : string, optional
37923792
A string representing the compression to use in the output file.
3793-
Allowed values are 'gzip', 'bz2', 'zip', 'xz'.
3793+
Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only
3794+
used when the first argument is a filename.
37943795
date_format: string, default None
37953796
Format string for datetime objects.
37963797
decimal: string, default '.'

pandas/io/common.py

+4
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,10 @@ def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, **kwargs):
445445
def write(self, data):
446446
super(BytesZipFile, self).writestr(self.filename, data)
447447

448+
@property
449+
def closed(self):
450+
return self.fp is None
451+
448452

449453
class MMapWrapper(BaseIterator):
450454
"""

pandas/io/excel.py

+36-15
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,10 @@ class ExcelWriter(object):
804804
datetime_format : string, default None
805805
Format string for datetime objects written into Excel files
806806
(e.g. 'YYYY-MM-DD HH:MM:SS')
807+
mode : {'w' or 'a'}, default 'w'
808+
File mode to use (write or append).
809+
810+
.. versionadded:: 0.24.0
807811
808812
Notes
809813
-----
@@ -897,7 +901,8 @@ def save(self):
897901
pass
898902

899903
def __init__(self, path, engine=None,
900-
date_format=None, datetime_format=None, **engine_kwargs):
904+
date_format=None, datetime_format=None, mode='w',
905+
**engine_kwargs):
901906
# validate that this engine can handle the extension
902907
if isinstance(path, string_types):
903908
ext = os.path.splitext(path)[-1]
@@ -919,6 +924,8 @@ def __init__(self, path, engine=None,
919924
else:
920925
self.datetime_format = datetime_format
921926

927+
self.mode = mode
928+
922929
def __fspath__(self):
923930
return _stringify_path(self.path)
924931

@@ -993,23 +1000,27 @@ class _OpenpyxlWriter(ExcelWriter):
9931000
engine = 'openpyxl'
9941001
supported_extensions = ('.xlsx', '.xlsm')
9951002

996-
def __init__(self, path, engine=None, **engine_kwargs):
1003+
def __init__(self, path, engine=None, mode='w', **engine_kwargs):
9971004
# Use the openpyxl module as the Excel writer.
9981005
from openpyxl.workbook import Workbook
9991006

1000-
super(_OpenpyxlWriter, self).__init__(path, **engine_kwargs)
1007+
super(_OpenpyxlWriter, self).__init__(path, mode=mode, **engine_kwargs)
10011008

1002-
# Create workbook object with default optimized_write=True.
1003-
self.book = Workbook()
1009+
if self.mode == 'a': # Load from existing workbook
1010+
from openpyxl import load_workbook
1011+
book = load_workbook(self.path)
1012+
self.book = book
1013+
else:
1014+
# Create workbook object with default optimized_write=True.
1015+
self.book = Workbook()
10041016

1005-
# Openpyxl 1.6.1 adds a dummy sheet. We remove it.
1006-
if self.book.worksheets:
1007-
try:
1008-
self.book.remove(self.book.worksheets[0])
1009-
except AttributeError:
1017+
if self.book.worksheets:
1018+
try:
1019+
self.book.remove(self.book.worksheets[0])
1020+
except AttributeError:
10101021

1011-
# compat
1012-
self.book.remove_sheet(self.book.worksheets[0])
1022+
# compat - for openpyxl <= 2.4
1023+
self.book.remove_sheet(self.book.worksheets[0])
10131024

10141025
def save(self):
10151026
"""
@@ -1443,11 +1454,16 @@ class _XlwtWriter(ExcelWriter):
14431454
engine = 'xlwt'
14441455
supported_extensions = ('.xls',)
14451456

1446-
def __init__(self, path, engine=None, encoding=None, **engine_kwargs):
1457+
def __init__(self, path, engine=None, encoding=None, mode='w',
1458+
**engine_kwargs):
14471459
# Use the xlwt module as the Excel writer.
14481460
import xlwt
14491461
engine_kwargs['engine'] = engine
1450-
super(_XlwtWriter, self).__init__(path, **engine_kwargs)
1462+
1463+
if mode == 'a':
1464+
raise ValueError('Append mode is not supported with xlwt!')
1465+
1466+
super(_XlwtWriter, self).__init__(path, mode=mode, **engine_kwargs)
14511467

14521468
if encoding is None:
14531469
encoding = 'ascii'
@@ -1713,13 +1729,18 @@ class _XlsxWriter(ExcelWriter):
17131729
supported_extensions = ('.xlsx',)
17141730

17151731
def __init__(self, path, engine=None,
1716-
date_format=None, datetime_format=None, **engine_kwargs):
1732+
date_format=None, datetime_format=None, mode='w',
1733+
**engine_kwargs):
17171734
# Use the xlsxwriter module as the Excel writer.
17181735
import xlsxwriter
17191736

1737+
if mode == 'a':
1738+
raise ValueError('Append mode is not supported with xlsxwriter!')
1739+
17201740
super(_XlsxWriter, self).__init__(path, engine=engine,
17211741
date_format=date_format,
17221742
datetime_format=datetime_format,
1743+
mode=mode,
17231744
**engine_kwargs)
17241745

17251746
self.book = xlsxwriter.Workbook(path, **engine_kwargs)

pandas/io/formats/csvs.py

+39-20
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55

66
from __future__ import print_function
77

8+
import warnings
9+
810
import csv as csvlib
11+
from zipfile import ZipFile
912
import numpy as np
1013

1114
from pandas.core.dtypes.missing import notna
12-
from pandas.core.dtypes.inference import is_file_like
1315
from pandas.core.index import Index, MultiIndex
1416
from pandas import compat
1517
from pandas.compat import (StringIO, range, zip)
@@ -128,19 +130,31 @@ def save(self):
128130
else:
129131
encoding = self.encoding
130132

131-
# PR 21300 uses string buffer to receive csv writing and dump into
132-
# file-like output with compression as option. GH 21241, 21118
133-
f = StringIO()
134-
if not is_file_like(self.path_or_buf):
135-
# path_or_buf is path
136-
path_or_buf = self.path_or_buf
137-
elif hasattr(self.path_or_buf, 'name'):
138-
# path_or_buf is file handle
139-
path_or_buf = self.path_or_buf.name
140-
else:
141-
# path_or_buf is file-like IO objects.
133+
# GH 21227 internal compression is not used when file-like passed.
134+
if self.compression and hasattr(self.path_or_buf, 'write'):
135+
msg = ("compression has no effect when passing file-like "
136+
"object as input.")
137+
warnings.warn(msg, RuntimeWarning, stacklevel=2)
138+
139+
# when zip compression is called.
140+
is_zip = isinstance(self.path_or_buf, ZipFile) or (
141+
not hasattr(self.path_or_buf, 'write')
142+
and self.compression == 'zip')
143+
144+
if is_zip:
145+
# zipfile doesn't support writing string to archive. uses string
146+
# buffer to receive csv writing and dump into zip compression
147+
# file handle. GH 21241, 21118
148+
f = StringIO()
149+
close = False
150+
elif hasattr(self.path_or_buf, 'write'):
142151
f = self.path_or_buf
143-
path_or_buf = None
152+
close = False
153+
else:
154+
f, handles = _get_handle(self.path_or_buf, self.mode,
155+
encoding=encoding,
156+
compression=self.compression)
157+
close = True
144158

145159
try:
146160
writer_kwargs = dict(lineterminator=self.line_terminator,
@@ -157,13 +171,18 @@ def save(self):
157171
self._save()
158172

159173
finally:
160-
# GH 17778 handles zip compression for byte strings separately.
161-
buf = f.getvalue()
162-
if path_or_buf:
163-
f, handles = _get_handle(path_or_buf, self.mode,
164-
encoding=encoding,
165-
compression=self.compression)
166-
f.write(buf)
174+
if is_zip:
175+
# GH 17778 handles zip compression separately.
176+
buf = f.getvalue()
177+
if hasattr(self.path_or_buf, 'write'):
178+
self.path_or_buf.write(buf)
179+
else:
180+
f, handles = _get_handle(self.path_or_buf, self.mode,
181+
encoding=encoding,
182+
compression=self.compression)
183+
f.write(buf)
184+
close = True
185+
if close:
167186
f.close()
168187
for _fh in handles:
169188
_fh.close()

pandas/tests/frame/test_to_csv.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import numpy as np
1010

1111
from pandas.compat import (lmap, range, lrange, StringIO, u)
12+
from pandas.io.common import _get_handle
1213
import pandas.core.common as com
1314
from pandas.errors import ParserError
1415
from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp,
@@ -935,18 +936,19 @@ def test_to_csv_compression(self, df, encoding, compression):
935936
with ensure_clean() as filename:
936937

937938
df.to_csv(filename, compression=compression, encoding=encoding)
938-
939939
# test the round trip - to_csv -> read_csv
940940
result = read_csv(filename, compression=compression,
941941
index_col=0, encoding=encoding)
942+
assert_frame_equal(df, result)
942943

943-
with open(filename, 'w') as fh:
944-
df.to_csv(fh, compression=compression, encoding=encoding)
945-
946-
result_fh = read_csv(filename, compression=compression,
947-
index_col=0, encoding=encoding)
944+
# test the round trip using file handle - to_csv -> read_csv
945+
f, _handles = _get_handle(filename, 'w', compression=compression,
946+
encoding=encoding)
947+
with f:
948+
df.to_csv(f, encoding=encoding)
949+
result = pd.read_csv(filename, compression=compression,
950+
encoding=encoding, index_col=0, squeeze=True)
948951
assert_frame_equal(df, result)
949-
assert_frame_equal(df, result_fh)
950952

951953
# explicitly make sure file is compressed
952954
with tm.decompress_file(filename, compression) as fh:

0 commit comments

Comments
 (0)