Skip to content

Commit 534a379

Browse files
committed
Merge remote-tracking branch 'upstream/master' into sparse-frame-accessor
2 parents 94a7baf + 5c341dc commit 534a379

File tree

11 files changed

+90
-145
lines changed

11 files changed

+90
-145
lines changed

doc/source/whatsnew/v0.24.2.rst

+2-43
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ including other versions of pandas.
1818
.. _whatsnew_0242.regressions:
1919

2020
Fixed Regressions
21-
^^^^^^^^^^^^^^^^^
21+
~~~~~~~~~~~~~~~~~
2222

2323
- Fixed regression in :meth:`DataFrame.all` and :meth:`DataFrame.any` where ``bool_only=True`` was ignored (:issue:`25101`)
2424
- Fixed issue in ``DataFrame`` construction with passing a mixed list of mixed types could segfault. (:issue:`25075`)
@@ -31,71 +31,30 @@ Fixed Regressions
3131
- Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`)
3232
- Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`)
3333
- Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`)
34+
- Fixed regression in :func:`to_timedelta` losing precision when converting floating data to ``Timedelta`` data (:issue:`25077`).
3435
- Fixed pip installing from source into an environment without NumPy (:issue:`25193`)
3536
- Fixed regression in :meth:`DataFrame.to_csv` writing duplicate line endings with gzip compress (:issue:`25311`)
3637

37-
.. _whatsnew_0242.enhancements:
38-
39-
Enhancements
40-
^^^^^^^^^^^^
41-
42-
-
43-
-
44-
4538
.. _whatsnew_0242.bug_fixes:
4639

4740
Bug Fixes
4841
~~~~~~~~~
4942

50-
**Conversion**
51-
52-
-
53-
-
54-
-
55-
56-
**Indexing**
57-
58-
-
59-
-
60-
-
61-
6243
**I/O**
6344

6445
- Better handling of terminal printing when the terminal dimensions are not known (:issue:`25080`)
6546
- Bug in reading a HDF5 table-format ``DataFrame`` created in Python 2, in Python 3 (:issue:`24925`)
6647
- Bug in reading a JSON with ``orient='table'`` generated by :meth:`DataFrame.to_json` with ``index=False`` (:issue:`25170`)
6748
- Bug where float indexes could have misaligned values when printing (:issue:`25061`)
68-
-
69-
70-
**Categorical**
71-
72-
-
73-
-
74-
-
75-
76-
**Timezones**
77-
78-
-
79-
-
80-
-
81-
82-
**Timedelta**
83-
84-
-
85-
-
86-
-
8749

8850
**Reshaping**
8951

9052
- Bug in :meth:`~pandas.core.groupby.GroupBy.transform` where applying a function to a timezone aware column would return a timezone naive result (:issue:`24198`)
9153
- Bug in :func:`DataFrame.join` when joining on a timezone aware :class:`DatetimeIndex` (:issue:`23931`)
92-
-
9354

9455
**Visualization**
9556

9657
- Bug in :meth:`Series.plot` where a secondary y axis could not be set to log scale (:issue:`25545`)
97-
-
98-
-
9958

10059
**Other**
10160

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ I/O
216216
- Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`)
217217
- Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`)
218218
- :meth:`DataFrame.to_html` now raises ``TypeError`` when using an invalid type for the ``classes`` parameter instead of ``AsseertionError`` (:issue:`25608`)
219+
- Bug in :meth:`DataFrame.to_string` and :meth:`DataFrame.to_latex` that would lead to incorrect output when the ``header`` keyword is used (:issue:`16718`)
219220
-
220221
-
221222

pandas/_libs/tslibs/timedeltas.pyx

+16-3
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,11 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
246246
return iresult.base # .base to access underlying np.ndarray
247247

248248

249-
cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
250-
""" return a casting of the unit represented to nanoseconds
251-
round the fractional part of a float to our precision, p """
249+
cpdef inline object precision_from_unit(object unit):
250+
"""
251+
Return a casting of the unit represented to nanoseconds + the precision
252+
to round the fractional part.
253+
"""
252254
cdef:
253255
int64_t m
254256
int p
@@ -285,6 +287,17 @@ cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
285287
p = 0
286288
else:
287289
raise ValueError("cannot cast unit {unit}".format(unit=unit))
290+
return m, p
291+
292+
293+
cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
294+
""" return a casting of the unit represented to nanoseconds
295+
round the fractional part of a float to our precision, p """
296+
cdef:
297+
int64_t m
298+
int p
299+
300+
m, p = precision_from_unit(unit)
288301

289302
# just give me the unit back
290303
if ts is None:

pandas/core/arrays/timedeltas.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
1212
from pandas._libs.tslibs.fields import get_timedelta_field
1313
from pandas._libs.tslibs.timedeltas import (
14-
array_to_timedelta64, parse_timedelta_unit)
14+
array_to_timedelta64, parse_timedelta_unit, precision_from_unit)
1515
import pandas.compat as compat
1616
from pandas.util._decorators import Appender
1717

@@ -918,12 +918,15 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
918918
copy = copy and not copy_made
919919

920920
elif is_float_dtype(data.dtype):
921-
# treat as multiples of the given unit. If after converting to nanos,
922-
# there are fractional components left, these are truncated
923-
# (i.e. NOT rounded)
921+
# cast the unit, multiply base/frace separately
922+
# to avoid precision issues from float -> int
924923
mask = np.isnan(data)
925-
coeff = np.timedelta64(1, unit) / np.timedelta64(1, 'ns')
926-
data = (coeff * data).astype(np.int64).view('timedelta64[ns]')
924+
m, p = precision_from_unit(unit)
925+
base = data.astype(np.int64)
926+
frac = data - base
927+
if p:
928+
frac = np.round(frac, p)
929+
data = (base * m + (frac * m).astype(np.int64)).view('timedelta64[ns]')
927930
data[mask] = iNaT
928931
copy = False
929932

pandas/core/reshape/concat.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,10 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
282282
# consolidate data & figure out what our result ndim is going to be
283283
ndims = set()
284284
for obj in objs:
285-
if not isinstance(obj, NDFrame):
286-
msg = ('cannot concatenate object of type "{0}";'
287-
' only pd.Series, pd.DataFrame, and pd.Panel'
288-
' (deprecated) objs are valid'.format(type(obj)))
285+
if not isinstance(obj, (Series, DataFrame)):
286+
msg = ("cannot concatenate object of type '{}';"
287+
' only Series and DataFrame objs are valid'
288+
.format(type(obj)))
289289
raise TypeError(msg)
290290

291291
# consolidate

pandas/io/formats/format.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,10 @@ def _to_str_columns(self):
528528
else:
529529
str_columns = self._get_formatted_column_labels(frame)
530530

531+
if self.show_row_idx_names:
532+
for x in str_columns:
533+
x.append('')
534+
531535
stringified = []
532536
for i, c in enumerate(frame):
533537
cheader = str_columns[i]
@@ -770,11 +774,6 @@ def space_format(x, y):
770774
need_leadsp[x] else x]
771775
for i, (col, x) in enumerate(zip(columns,
772776
fmt_columns))]
773-
774-
if self.show_row_idx_names:
775-
for x in str_columns:
776-
x.append('')
777-
778777
# self.str_columns = str_columns
779778
return str_columns
780779

pandas/tests/computation/test_eval.py

+16-46
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,13 @@
2323
from pandas.core.computation.expressions import (
2424
_NUMEXPR_INSTALLED, _USE_NUMEXPR)
2525
from pandas.core.computation.ops import (
26-
_arith_ops_syms, _binary_math_ops, _binary_ops_dict, _bool_ops_syms,
26+
_arith_ops_syms, _binary_math_ops, _binary_ops_dict,
2727
_special_case_arith_ops_syms, _unary_math_ops)
2828
import pandas.util.testing as tm
2929
from pandas.util.testing import (
3030
assert_frame_equal, assert_numpy_array_equal, assert_produces_warning,
3131
assert_series_equal, makeCustomDataframe as mkdf, randbool)
3232

33-
_series_frame_incompatible = _bool_ops_syms
34-
_scalar_skip = 'in', 'not in'
35-
3633

3734
@pytest.fixture(params=(
3835
pytest.param(engine,
@@ -162,13 +159,21 @@ def teardown_method(self, method):
162159
del self.pandas_rhses, self.pandas_lhses, self.current_engines
163160

164161
@pytest.mark.slow
165-
def test_complex_cmp_ops(self):
166-
cmp_ops = ('!=', '==', '<=', '>=', '<', '>')
167-
cmp2_ops = ('>', '<')
168-
for lhs, cmp1, rhs, binop, cmp2 in product(self.lhses, cmp_ops,
169-
self.rhses, self.bin_ops,
170-
cmp2_ops):
171-
self.check_complex_cmp_op(lhs, cmp1, rhs, binop, cmp2)
162+
@pytest.mark.parametrize('cmp1', ['!=', '==', '<=', '>=', '<', '>'],
163+
ids=['ne', 'eq', 'le', 'ge', 'lt', 'gt'])
164+
@pytest.mark.parametrize('cmp2', ['>', '<'], ids=['gt', 'lt'])
165+
def test_complex_cmp_ops(self, cmp1, cmp2):
166+
for lhs, rhs, binop in product(
167+
self.lhses, self.rhses, self.bin_ops):
168+
lhs_new = _eval_single_bin(lhs, cmp1, rhs, self.engine)
169+
rhs_new = _eval_single_bin(lhs, cmp2, rhs, self.engine)
170+
expected = _eval_single_bin(
171+
lhs_new, binop, rhs_new, self.engine)
172+
173+
ex = '(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)'.format(
174+
cmp1=cmp1, binop=binop, cmp2=cmp2)
175+
result = pd.eval(ex, engine=self.engine, parser=self.parser)
176+
self.check_equal(result, expected)
172177

173178
def test_simple_cmp_ops(self):
174179
bool_lhses = (DataFrame(randbool(size=(10, 5))),
@@ -225,41 +230,6 @@ def check_equal(self, result, expected):
225230
else:
226231
assert result == expected
227232

228-
def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2):
229-
skip_these = _scalar_skip
230-
ex = '(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)'.format(cmp1=cmp1,
231-
binop=binop,
232-
cmp2=cmp2)
233-
scalar_with_in_notin = (is_scalar(rhs) and (cmp1 in skip_these or
234-
cmp2 in skip_these))
235-
if scalar_with_in_notin:
236-
with pytest.raises(TypeError):
237-
pd.eval(ex, engine=self.engine, parser=self.parser)
238-
with pytest.raises(TypeError):
239-
pd.eval(ex, engine=self.engine, parser=self.parser,
240-
local_dict={'lhs': lhs, 'rhs': rhs})
241-
else:
242-
lhs_new = _eval_single_bin(lhs, cmp1, rhs, self.engine)
243-
rhs_new = _eval_single_bin(lhs, cmp2, rhs, self.engine)
244-
if (isinstance(lhs_new, Series) and
245-
isinstance(rhs_new, DataFrame) and
246-
binop in _series_frame_incompatible):
247-
pass
248-
# TODO: the code below should be added back when left and right
249-
# hand side bool ops are fixed.
250-
#
251-
# try:
252-
# pytest.raises(Exception, pd.eval, ex,
253-
# local_dict={'lhs': lhs, 'rhs': rhs},
254-
# engine=self.engine, parser=self.parser)
255-
# except AssertionError:
256-
# raise
257-
else:
258-
expected = _eval_single_bin(
259-
lhs_new, binop, rhs_new, self.engine)
260-
result = pd.eval(ex, engine=self.engine, parser=self.parser)
261-
self.check_equal(result, expected)
262-
263233
def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs):
264234

265235
def check_operands(left, right, cmp_op):

pandas/tests/indexes/timedeltas/test_tools.py

+7
Original file line numberDiff line numberDiff line change
@@ -181,3 +181,10 @@ def test_to_timedelta_on_missing_values(self):
181181

182182
actual = pd.to_timedelta(pd.NaT)
183183
assert actual.value == timedelta_NaT.astype('int64')
184+
185+
def test_to_timedelta_float(self):
186+
# https://github.com/pandas-dev/pandas/issues/25077
187+
arr = np.arange(0, 1, 1e-6)[-10:]
188+
result = pd.to_timedelta(arr, unit='s')
189+
expected_asi8 = np.arange(999990000, int(1e9), 1000, dtype='int64')
190+
tm.assert_numpy_array_equal(result.asi8, expected_asi8)

pandas/tests/io/formats/test_format.py

+8
Original file line numberDiff line numberDiff line change
@@ -2380,6 +2380,14 @@ def test_to_string_header(self):
23802380
exp = '0 0\n ..\n9 9'
23812381
assert res == exp
23822382

2383+
def test_to_string_multindex_header(self):
2384+
# GH 16718
2385+
df = (pd.DataFrame({'a': [0], 'b': [1], 'c': [2], 'd': [3]})
2386+
.set_index(['a', 'b']))
2387+
res = df.to_string(header=['r1', 'r2'])
2388+
exp = ' r1 r2\na b \n0 1 2 3'
2389+
assert res == exp
2390+
23832391

23842392
def _three_digit_exp():
23852393
return '{x:.4g}'.format(x=1.7e8) == '1.7e+008'

pandas/tests/io/formats/test_to_latex.py

+16
Original file line numberDiff line numberDiff line change
@@ -735,3 +735,19 @@ def test_to_latex_float_format_no_fixed_width(self):
735735
\end{tabular}
736736
"""
737737
assert df.to_latex(float_format='%.0f') == expected
738+
739+
def test_to_latex_multindex_header(self):
740+
# GH 16718
741+
df = (pd.DataFrame({'a': [0], 'b': [1], 'c': [2], 'd': [3]})
742+
.set_index(['a', 'b']))
743+
observed = df.to_latex(header=['r1', 'r2'])
744+
expected = r"""\begin{tabular}{llrr}
745+
\toprule
746+
& & r1 & r2 \\
747+
a & b & & \\
748+
\midrule
749+
0 & 1 & 2 & 3 \\
750+
\bottomrule
751+
\end{tabular}
752+
"""
753+
assert observed == expected

0 commit comments

Comments
 (0)