Skip to content

Commit 2b8c313

Browse files
committed
Merge remote-tracking branch 'upstream/main' into roll_var_remove_floating_point_artifacts
merge
2 parents 4c08dfa + f99ec8b commit 2b8c313

File tree

13 files changed

+230
-51
lines changed

13 files changed

+230
-51
lines changed

doc/source/whatsnew/v1.4.2.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Bug fixes
3131
~~~~~~~~~
3232
- Fix some cases for subclasses that define their ``_constructor`` properties as general callables (:issue:`46018`)
3333
- Fixed "longtable" formatting in :meth:`.Styler.to_latex` when ``column_format`` is given in extended format (:issue:`46037`)
34-
-
34+
- Fixed incorrect rendering in :meth:`.Styler.format` with ``hyperlinks="html"`` when the url contains a colon or other special characters (:issue:`46389`)
3535

3636
.. ---------------------------------------------------------------------------
3737

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dependencies:
1818
- cython>=0.29.24
1919

2020
# code checks
21-
- black=21.5b2
21+
- black=22.1.0
2222
- cpplint
2323
- flake8=4.0.1
2424
- flake8-bugbear=21.3.2 # used by flake8, find likely bugs

pandas/_libs/tslibs/conversion.pyx

+4-37
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ from pandas._libs.tslibs.np_datetime cimport (
3131
NPY_DATETIMEUNIT,
3232
NPY_FR_ns,
3333
_string_to_dts,
34+
astype_overflowsafe,
3435
check_dts_bounds,
3536
dt64_to_dtstruct,
3637
dtstruct_to_dt64,
@@ -215,54 +216,20 @@ def ensure_datetime64ns(arr: ndarray, copy: bool = True):
215216
-------
216217
ndarray with dtype datetime64[ns]
217218
"""
218-
cdef:
219-
Py_ssize_t i, n = arr.size
220-
const int64_t[:] ivalues
221-
int64_t[:] iresult
222-
NPY_DATETIMEUNIT unit
223-
npy_datetimestruct dts
224-
225-
shape = (<object>arr).shape
226-
227219
if (<object>arr).dtype.byteorder == ">":
228220
# GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap
229221
dtype = arr.dtype
230222
arr = arr.astype(dtype.newbyteorder("<"))
231223

232224
if arr.size == 0:
225+
# Fastpath; doesn't matter but we have old tests for result.base
226+
# being arr.
233227
result = arr.view(DT64NS_DTYPE)
234228
if copy:
235229
result = result.copy()
236230
return result
237231

238-
if arr.dtype.kind != "M":
239-
raise TypeError("ensure_datetime64ns arr must have datetime64 dtype")
240-
unit = get_unit_from_dtype(arr.dtype)
241-
if unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
242-
# without raising explicitly here, we end up with a SystemError
243-
# built-in function ensure_datetime64ns returned a result with an error
244-
raise ValueError("datetime64/timedelta64 must have a unit specified")
245-
246-
if unit == NPY_FR_ns:
247-
# Check this before allocating result for perf, might save some memory
248-
if copy:
249-
return arr.copy()
250-
return arr
251-
252-
ivalues = arr.view(np.int64).ravel("K")
253-
254-
result = np.empty_like(arr, dtype=DT64NS_DTYPE)
255-
iresult = result.ravel("K").view(np.int64)
256-
257-
for i in range(n):
258-
if ivalues[i] != NPY_NAT:
259-
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
260-
iresult[i] = dtstruct_to_dt64(&dts)
261-
check_dts_bounds(&dts)
262-
else:
263-
iresult[i] = NPY_NAT
264-
265-
return result
232+
return astype_overflowsafe(arr, DT64NS_DTYPE, copy=copy)
266233

267234

268235
def ensure_timedelta64ns(arr: ndarray, copy: bool = True):

pandas/_libs/tslibs/np_datetime.pxd

+9-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ cdef extern from "numpy/ndarraytypes.h":
5252
NPY_FR_as
5353
NPY_FR_GENERIC
5454

55+
int64_t NPY_DATETIME_NAT # elswhere we call this NPY_NAT
56+
5557
cdef extern from "src/datetime/np_datetime.h":
5658
ctypedef struct pandas_timedeltastruct:
5759
int64_t days
@@ -67,7 +69,7 @@ cdef extern from "src/datetime/np_datetime.h":
6769

6870
cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1
6971

70-
cdef check_dts_bounds(npy_datetimestruct *dts)
72+
cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=?)
7173

7274
cdef int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil
7375
cdef void dt64_to_dtstruct(int64_t dt64, npy_datetimestruct* out) nogil
@@ -86,3 +88,9 @@ cdef int _string_to_dts(str val, npy_datetimestruct* dts,
8688
bint want_exc) except? -1
8789

8890
cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype)
91+
92+
cpdef cnp.ndarray astype_overflowsafe(
93+
cnp.ndarray values, # ndarray[datetime64[anyunit]]
94+
cnp.dtype dtype, # ndarray[datetime64[anyunit]]
95+
bint copy=*,
96+
)

pandas/_libs/tslibs/np_datetime.pyi

+3
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@ class OutOfBoundsDatetime(ValueError): ...
44

55
# only exposed for testing
66
def py_get_unit_from_dtype(dtype: np.dtype): ...
7+
def astype_overflowsafe(
8+
arr: np.ndarray, dtype: np.dtype, copy: bool = ...
9+
) -> np.ndarray: ...

pandas/_libs/tslibs/np_datetime.pyx

+98-7
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ import_datetime()
2222
cimport numpy as cnp
2323

2424
cnp.import_array()
25-
from numpy cimport int64_t
25+
from numpy cimport (
26+
int64_t,
27+
ndarray,
28+
)
2629

2730
from pandas._libs.tslibs.util cimport get_c_string_buf_and_size
2831

@@ -36,7 +39,12 @@ cdef extern from "src/datetime/np_datetime.h":
3639
pandas_timedeltastruct *result
3740
) nogil
3841

42+
# AS, FS, PS versions exist but are not imported because they are not used.
3943
npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS
44+
npy_datetimestruct _US_MIN_DTS, _US_MAX_DTS
45+
npy_datetimestruct _MS_MIN_DTS, _MS_MAX_DTS
46+
npy_datetimestruct _S_MIN_DTS, _S_MAX_DTS
47+
npy_datetimestruct _M_MIN_DTS, _M_MAX_DTS
4048

4149
PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype);
4250

@@ -119,22 +127,40 @@ class OutOfBoundsDatetime(ValueError):
119127
pass
120128

121129

122-
cdef inline check_dts_bounds(npy_datetimestruct *dts):
130+
cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=NPY_FR_ns):
123131
"""Raises OutOfBoundsDatetime if the given date is outside the range that
124132
can be represented by nanosecond-resolution 64-bit integers."""
125133
cdef:
126134
bint error = False
127-
128-
if (dts.year <= 1677 and
129-
cmp_npy_datetimestruct(dts, &_NS_MIN_DTS) == -1):
135+
npy_datetimestruct cmp_upper, cmp_lower
136+
137+
if unit == NPY_FR_ns:
138+
cmp_upper = _NS_MAX_DTS
139+
cmp_lower = _NS_MIN_DTS
140+
elif unit == NPY_FR_us:
141+
cmp_upper = _US_MAX_DTS
142+
cmp_lower = _US_MIN_DTS
143+
elif unit == NPY_FR_ms:
144+
cmp_upper = _MS_MAX_DTS
145+
cmp_lower = _MS_MIN_DTS
146+
elif unit == NPY_FR_s:
147+
cmp_upper = _S_MAX_DTS
148+
cmp_lower = _S_MIN_DTS
149+
elif unit == NPY_FR_m:
150+
cmp_upper = _M_MAX_DTS
151+
cmp_lower = _M_MIN_DTS
152+
else:
153+
raise NotImplementedError(unit)
154+
155+
if cmp_npy_datetimestruct(dts, &cmp_lower) == -1:
130156
error = True
131-
elif (dts.year >= 2262 and
132-
cmp_npy_datetimestruct(dts, &_NS_MAX_DTS) == 1):
157+
elif cmp_npy_datetimestruct(dts, &cmp_upper) == 1:
133158
error = True
134159

135160
if error:
136161
fmt = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} '
137162
f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}')
163+
# TODO: "nanosecond" in the message assumes NPY_FR_ns
138164
raise OutOfBoundsDatetime(f'Out of bounds nanosecond timestamp: {fmt}')
139165

140166

@@ -202,3 +228,68 @@ cdef inline int _string_to_dts(str val, npy_datetimestruct* dts,
202228
buf = get_c_string_buf_and_size(val, &length)
203229
return parse_iso_8601_datetime(buf, length, want_exc,
204230
dts, out_local, out_tzoffset)
231+
232+
233+
cpdef ndarray astype_overflowsafe(
234+
ndarray values,
235+
cnp.dtype dtype,
236+
bint copy=True,
237+
):
238+
"""
239+
Convert an ndarray with datetime64[X] to datetime64[Y], raising on overflow.
240+
"""
241+
if values.descr.type_num != cnp.NPY_DATETIME:
242+
# aka values.dtype.kind != "M"
243+
raise TypeError("astype_overflowsafe values must have datetime64 dtype")
244+
if dtype.type_num != cnp.NPY_DATETIME:
245+
raise TypeError("astype_overflowsafe dtype must be datetime64")
246+
247+
cdef:
248+
NPY_DATETIMEUNIT from_unit = get_unit_from_dtype(values.dtype)
249+
NPY_DATETIMEUNIT to_unit = get_unit_from_dtype(dtype)
250+
251+
if (
252+
from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
253+
or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
254+
):
255+
# without raising explicitly here, we end up with a SystemError
256+
# built-in function [...] returned a result with an error
257+
raise ValueError("datetime64 values and dtype must have a unit specified")
258+
259+
if from_unit == to_unit:
260+
# Check this before allocating result for perf, might save some memory
261+
if copy:
262+
return values.copy()
263+
return values
264+
265+
cdef:
266+
ndarray i8values = values.view("i8")
267+
268+
# equiv: result = np.empty((<object>values).shape, dtype="i8")
269+
ndarray iresult = cnp.PyArray_EMPTY(
270+
values.ndim, values.shape, cnp.NPY_INT64, 0
271+
)
272+
273+
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(iresult, i8values)
274+
cnp.flatiter it
275+
Py_ssize_t i, N = values.size
276+
int64_t value, new_value
277+
npy_datetimestruct dts
278+
279+
for i in range(N):
280+
# Analogous to: item = values[i]
281+
value = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
282+
283+
if value == NPY_DATETIME_NAT:
284+
new_value = NPY_DATETIME_NAT
285+
else:
286+
pandas_datetime_to_datetimestruct(value, from_unit, &dts)
287+
check_dts_bounds(&dts, to_unit)
288+
new_value = npy_datetimestruct_to_datetime(to_unit, &dts)
289+
290+
# Analogous to: iresult[i] = new_value
291+
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = new_value
292+
293+
cnp.PyArray_MultiIter_NEXT(mi)
294+
295+
return iresult.view(dtype)

pandas/_libs/tslibs/src/datetime/np_datetime.c

+30
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,40 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
2727
#include <numpy/ndarraytypes.h>
2828
#include "np_datetime.h"
2929

30+
31+
const npy_datetimestruct _AS_MIN_DTS = {
32+
1969, 12, 31, 23, 59, 50, 776627, 963145, 224193};
33+
const npy_datetimestruct _FS_MIN_DTS = {
34+
1969, 12, 31, 21, 26, 16, 627963, 145224, 193000};
35+
const npy_datetimestruct _PS_MIN_DTS = {
36+
1969, 9, 16, 5, 57, 7, 963145, 224193, 0};
3037
const npy_datetimestruct _NS_MIN_DTS = {
3138
1677, 9, 21, 0, 12, 43, 145224, 193000, 0};
39+
const npy_datetimestruct _US_MIN_DTS = {
40+
-290308, 12, 21, 19, 59, 05, 224193, 0, 0};
41+
const npy_datetimestruct _MS_MIN_DTS = {
42+
-292275055, 5, 16, 16, 47, 4, 193000, 0, 0};
43+
const npy_datetimestruct _S_MIN_DTS = {
44+
-292277022657, 1, 27, 8, 29, 53, 0, 0, 0};
45+
const npy_datetimestruct _M_MIN_DTS = {
46+
-17536621475646, 5, 4, 5, 53, 0, 0, 0, 0};
47+
48+
const npy_datetimestruct _AS_MAX_DTS = {
49+
1970, 1, 1, 0, 0, 9, 223372, 36854, 775807};
50+
const npy_datetimestruct _FS_MAX_DTS = {
51+
1970, 1, 1, 2, 33, 43, 372036, 854775, 807000};
52+
const npy_datetimestruct _PS_MAX_DTS = {
53+
1970, 4, 17, 18, 2, 52, 36854, 775807, 0};
3254
const npy_datetimestruct _NS_MAX_DTS = {
3355
2262, 4, 11, 23, 47, 16, 854775, 807000, 0};
56+
const npy_datetimestruct _US_MAX_DTS = {
57+
294247, 1, 10, 4, 0, 54, 775807, 0, 0};
58+
const npy_datetimestruct _MS_MAX_DTS = {
59+
292278994, 8, 17, 7, 12, 55, 807000, 0, 0};
60+
const npy_datetimestruct _S_MAX_DTS = {
61+
292277026596, 12, 4, 15, 30, 7, 0, 0, 0};
62+
const npy_datetimestruct _M_MAX_DTS = {
63+
17536621479585, 8, 30, 18, 7, 0, 0, 0, 0};
3464

3565

3666
const int days_per_month_table[2][12] = {

pandas/_libs/tslibs/src/datetime/np_datetime.h

+14
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,22 @@ typedef struct {
2828
npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds;
2929
} pandas_timedeltastruct;
3030

31+
extern const npy_datetimestruct _AS_MIN_DTS;
32+
extern const npy_datetimestruct _AS_MAX_DTS;
33+
extern const npy_datetimestruct _FS_MIN_DTS;
34+
extern const npy_datetimestruct _FS_MAX_DTS;
35+
extern const npy_datetimestruct _PS_MIN_DTS;
36+
extern const npy_datetimestruct _PS_MAX_DTS;
3137
extern const npy_datetimestruct _NS_MIN_DTS;
3238
extern const npy_datetimestruct _NS_MAX_DTS;
39+
extern const npy_datetimestruct _US_MIN_DTS;
40+
extern const npy_datetimestruct _US_MAX_DTS;
41+
extern const npy_datetimestruct _MS_MIN_DTS;
42+
extern const npy_datetimestruct _MS_MAX_DTS;
43+
extern const npy_datetimestruct _S_MIN_DTS;
44+
extern const npy_datetimestruct _S_MAX_DTS;
45+
extern const npy_datetimestruct _M_MIN_DTS;
46+
extern const npy_datetimestruct _M_MAX_DTS;
3347

3448
// stuff pandas needs
3549
// ----------------------------------------------------------------------------

pandas/_libs/tslibs/tzconversion.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz):
534534

535535
int64_t[::1] result
536536

537-
if is_utc(tz):
537+
if is_utc(tz) or tz is None:
538538
# Much faster than going through the "standard" pattern below
539539
return stamps.copy()
540540

pandas/io/formats/style_render.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1589,7 +1589,7 @@ def _render_href(x, format):
15891589
href = r"\href{{{0}}}{{{0}}}"
15901590
else:
15911591
raise ValueError("``hyperlinks`` format can only be 'html' or 'latex'")
1592-
pat = r"(https?:\/\/|ftp:\/\/|www.)[\w/\-?=%.]+\.[\w/\-&?=%.]+"
1592+
pat = r"((http|ftp)s?:\/\/|www.)[\w/\-?=%.:@]+\.[\w/\-&?=%.,':;~!@#$*()\[\]]+"
15931593
return re.sub(pat, lambda m: href.format(m.group(0)), x)
15941594
return x
15951595

pandas/tests/io/formats/style/test_html.py

+12
Original file line numberDiff line numberDiff line change
@@ -778,8 +778,20 @@ def test_hiding_index_columns_multiindex_trimming():
778778
("no scheme, no top-level: www.web", False, "www.web"),
779779
("https scheme: https://www.web.com", True, "https://www.web.com"),
780780
("ftp scheme: ftp://www.web", True, "ftp://www.web"),
781+
("ftps scheme: ftps://www.web", True, "ftps://www.web"),
781782
("subdirectories: www.web.com/directory", True, "www.web.com/directory"),
782783
("Multiple domains: www.1.2.3.4", True, "www.1.2.3.4"),
784+
("with port: http://web.com:80", True, "http://web.com:80"),
785+
(
786+
"full net_loc scheme: http://user:[email protected]",
787+
True,
788+
"http://user:[email protected]",
789+
),
790+
(
791+
"with valid special chars: http://web.com/,.':;~!@#$*()[]",
792+
True,
793+
"http://web.com/,.':;~!@#$*()[]",
794+
),
783795
],
784796
)
785797
def test_rendered_links(type, text, exp, found):

0 commit comments

Comments
 (0)