Skip to content

Commit b7c0bfd

Browse files
author
awu42
committed
Edited validate_rst_title_capitalization.py for review (pandas-dev#26941)
2 parents de06ec8 + 52c22b2 commit b7c0bfd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+394
-311
lines changed

.travis.yml

+12-10
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ python: 3.7
77
# travis cache --delete inside the project directory from the travis command line client
88
# The cache directories will be deleted if anything in ci/ changes in a commit
99
cache:
10-
ccache: true
11-
directories:
12-
- $HOME/.cache # cython cache
13-
- $HOME/.ccache # compiler cache
10+
ccache: true
11+
directories:
12+
- $HOME/.cache # cython cache
13+
- $HOME/.ccache # compiler cache
1414

1515
env:
1616
global:
@@ -20,13 +20,13 @@ env:
2020
- secure: "EkWLZhbrp/mXJOx38CHjs7BnjXafsqHtwxPQrqWy457VDFWhIY1DMnIR/lOWG+a20Qv52sCsFtiZEmMfUjf0pLGXOqurdxbYBGJ7/ikFLk9yV2rDwiArUlVM9bWFnFxHvdz9zewBH55WurrY4ShZWyV+x2dWjjceWG5VpWeI6sA="
2121

2222
git:
23-
# for cloning
24-
depth: false
23+
# for cloning
24+
depth: false
2525

2626
matrix:
27-
fast_finish: true
27+
fast_finish: true
2828

29-
include:
29+
include:
3030
- env:
3131
- JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network and not clipboard)"
3232

@@ -40,6 +40,9 @@ matrix:
4040
- postgresql
4141

4242
- env:
43+
# Enabling Deprecations when running tests
44+
# PANDAS_TESTING_MODE="deprecate" causes DeprecationWarning messages to be displayed in the logs
45+
# See pandas/_testing.py for more details.
4346
- JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true SQL="1"
4447
services:
4548
- mysql
@@ -70,7 +73,6 @@ before_install:
7073
# This overrides travis and tells it to look nowhere.
7174
- export BOTO_CONFIG=/dev/null
7275

73-
7476
install:
7577
- echo "install start"
7678
- ci/prep_cython_cache.sh
@@ -87,5 +89,5 @@ script:
8789
after_script:
8890
- echo "after_script start"
8991
- source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
90-
- ci/print_skipped.py
92+
- ci/print_skipped.py
9193
- echo "after_script done"

LICENSE

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
BSD 3-Clause License
22

3-
Copyright (c) 2008-2012, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
3+
Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
44
All rights reserved.
55

6+
Copyright (c) 2011-2020, Open source contributors.
7+
68
Redistribution and use in source and binary forms, with or without
79
modification, are permitted provided that the following conditions are met:
810

doc/source/development/contributing.rst

+16
Original file line numberDiff line numberDiff line change
@@ -1525,3 +1525,19 @@ The branch will still exist on GitHub, so to delete it there do::
15251525
git push origin --delete shiny-new-feature
15261526

15271527
.. _Gitter: https://gitter.im/pydata/pandas
1528+
1529+
1530+
Tips for a successful Pull Request
1531+
==================================
1532+
1533+
If you have made it to the `Review your code`_ phase, one of the core contributors may
1534+
take a look. Please note however that a handful of people are responsible for reviewing
1535+
all of the contributions, which can often lead to bottlenecks.
1536+
1537+
To improve the chances of your pull request being reviewed, you should:
1538+
1539+
- **Reference an open issue** for non-trivial changes to clarify the PR's purpose
1540+
- **Ensure you have appropriate tests**. These should be the first part of any PR
1541+
- **Keep your pull requests as simple as possible**. Larger PRs take longer to review
1542+
- **Ensure that CI is in a green state**. Reviewers may not even look otherwise
1543+
- **Keep** `Updating your pull request`_, either by request or every few days

doc/source/ecosystem.rst

+4-6
Original file line numberDiff line numberDiff line change
@@ -122,16 +122,14 @@ also goes beyond matplotlib and pandas with the option to perform statistical
122122
estimation while plotting, aggregating across observations and visualizing the
123123
fit of statistical models to emphasize patterns in a dataset.
124124

125-
`yhat/ggpy <https://github.com/yhat/ggpy>`__
126-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
125+
`plotnine <https://github.com/has2k1/plotnine/>`__
126+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
127127

128128
Hadley Wickham's `ggplot2 <https://ggplot2.tidyverse.org/>`__ is a foundational exploratory visualization package for the R language.
129129
Based on `"The Grammar of Graphics" <https://www.cs.uic.edu/~wilkinson/TheGrammarOfGraphics/GOG.html>`__ it
130130
provides a powerful, declarative and extremely general way to generate bespoke plots of any kind of data.
131-
It's really quite incredible. Various implementations to other languages are available,
132-
but a faithful implementation for Python users has long been missing. Although still young
133-
(as of Jan-2014), the `yhat/ggpy <https://github.com/yhat/ggpy>`__ project has been
134-
progressing quickly in that direction.
131+
Various implementations to other languages are available.
132+
A good implementation for Python users is `has2k1/plotnine <https://github.com/has2k1/plotnine/>`__.
135133

136134
`IPython Vega <https://github.com/vega/ipyvega>`__
137135
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

doc/source/whatsnew/v1.0.0.rst

+19
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,25 @@ Use :meth:`arrays.IntegerArray.to_numpy` with an explicit ``na_value`` instead.
483483
484484
a.to_numpy(dtype="float", na_value=np.nan)
485485
486+
**Reductions can return ``pd.NA``**
487+
488+
When performing a reduction such as a sum with ``skipna=False``, the result
489+
will now be ``pd.NA`` instead of ``np.nan`` in presence of missing values
490+
(:issue:`30958`).
491+
492+
*pandas 0.25.x*
493+
494+
.. code-block:: python
495+
496+
>>> pd.Series(a).sum(skipna=False)
497+
nan
498+
499+
*pandas 1.0.0*
500+
501+
.. ipython:: python
502+
503+
pd.Series(a).sum(skipna=False)
504+
486505
**value_counts returns a nullable integer dtype**
487506

488507
:meth:`Series.value_counts` with a nullable integer dtype now returns a nullable

doc/source/whatsnew/v1.1.0.rst

+3-2
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ Categorical
6060
Datetimelike
6161
^^^^^^^^^^^^
6262
- Bug in :class:`Timestamp` where constructing :class:`Timestamp` from ambiguous epoch time and calling constructor again changed :meth:`Timestamp.value` property (:issue:`24329`)
63-
-
63+
- :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`)
6464
-
6565

6666
Timedelta
@@ -102,7 +102,7 @@ Interval
102102

103103
Indexing
104104
^^^^^^^^
105-
105+
- Bug in slicing on a :class:`DatetimeIndex` with a partial-timestamp dropping high-resolution indices near the end of a year, quarter, or month (:issue:`31064`)
106106
-
107107
-
108108

@@ -142,6 +142,7 @@ Reshaping
142142

143143
-
144144
- Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`)
145+
- Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`)
145146
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)
146147

147148

pandas/_libs/src/ujson/python/objToJSON.c

+76-46
Original file line numberDiff line numberDiff line change
@@ -456,8 +456,8 @@ static char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base,
456456
static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
457457
size_t *len) {
458458

459-
if (!PyDateTime_Check(obj)) {
460-
PyErr_SetString(PyExc_TypeError, "Expected datetime object");
459+
if (!PyDate_Check(obj)) {
460+
PyErr_SetString(PyExc_TypeError, "Expected date object");
461461
return NULL;
462462
}
463463

@@ -469,7 +469,7 @@ static npy_datetime PyDateTimeToEpoch(PyObject *obj, NPY_DATETIMEUNIT base) {
469469
npy_datetimestruct dts;
470470
int ret;
471471

472-
if (!PyDateTime_Check(obj)) {
472+
if (!PyDate_Check(obj)) {
473473
// TODO: raise TypeError
474474
}
475475
PyDateTime_Date *dt = (PyDateTime_Date *)obj;
@@ -1504,6 +1504,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
15041504
char **ret;
15051505
char *dataptr, *cLabel;
15061506
int type_num;
1507+
NPY_DATETIMEUNIT base = enc->datetimeUnit;
15071508
PRINTMARK();
15081509

15091510
if (!labels) {
@@ -1541,60 +1542,85 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
15411542
break;
15421543
}
15431544

1544-
// TODO: vectorized timedelta solution
1545-
if (enc->datetimeIso &&
1546-
(type_num == NPY_TIMEDELTA || PyDelta_Check(item))) {
1547-
PyObject *td = PyObject_CallFunction(cls_timedelta, "(O)", item);
1548-
if (td == NULL) {
1549-
Py_DECREF(item);
1550-
NpyArr_freeLabels(ret, num);
1551-
ret = 0;
1552-
break;
1553-
}
1554-
1555-
PyObject *iso = PyObject_CallMethod(td, "isoformat", NULL);
1556-
Py_DECREF(td);
1557-
if (iso == NULL) {
1558-
Py_DECREF(item);
1559-
NpyArr_freeLabels(ret, num);
1560-
ret = 0;
1561-
break;
1562-
}
1563-
1564-
cLabel = (char *)PyUnicode_AsUTF8(iso);
1565-
Py_DECREF(iso);
1566-
len = strlen(cLabel);
1567-
} else if (PyTypeNum_ISDATETIME(type_num)) {
1568-
NPY_DATETIMEUNIT base = enc->datetimeUnit;
1569-
npy_int64 longVal;
1545+
int is_datetimelike = 0;
1546+
npy_int64 nanosecVal;
1547+
if (PyTypeNum_ISDATETIME(type_num)) {
1548+
is_datetimelike = 1;
15701549
PyArray_VectorUnaryFunc *castfunc =
15711550
PyArray_GetCastFunc(PyArray_DescrFromType(type_num), NPY_INT64);
15721551
if (!castfunc) {
15731552
PyErr_Format(PyExc_ValueError,
15741553
"Cannot cast numpy dtype %d to long",
15751554
enc->npyType);
15761555
}
1577-
castfunc(dataptr, &longVal, 1, NULL, NULL);
1578-
if (enc->datetimeIso) {
1579-
cLabel = int64ToIso(longVal, base, &len);
1556+
castfunc(dataptr, &nanosecVal, 1, NULL, NULL);
1557+
} else if (PyDate_Check(item) || PyDelta_Check(item)) {
1558+
is_datetimelike = 1;
1559+
if (PyObject_HasAttrString(item, "value")) {
1560+
nanosecVal = get_long_attr(item, "value");
15801561
} else {
1581-
if (!scaleNanosecToUnit(&longVal, base)) {
1582-
// TODO: This gets hit but somehow doesn't cause errors
1583-
// need to clean up (elsewhere in module as well)
1562+
if (PyDelta_Check(item)) {
1563+
nanosecVal = total_seconds(item) *
1564+
1000000000LL; // nanoseconds per second
1565+
} else {
1566+
// datetime.* objects don't follow above rules
1567+
nanosecVal = PyDateTimeToEpoch(item, NPY_FR_ns);
15841568
}
1585-
cLabel = PyObject_Malloc(21); // 21 chars for int64
1586-
sprintf(cLabel, "%" NPY_INT64_FMT, longVal);
1587-
len = strlen(cLabel);
15881569
}
1589-
} else if (PyDateTime_Check(item) || PyDate_Check(item)) {
1590-
NPY_DATETIMEUNIT base = enc->datetimeUnit;
1591-
if (enc->datetimeIso) {
1592-
cLabel = PyDateTimeToIso((PyDateTime_Date *)item, base, &len);
1570+
}
1571+
1572+
if (is_datetimelike) {
1573+
if (nanosecVal == get_nat()) {
1574+
len = 5; // TODO: shouldn't require extra space for terminator
1575+
cLabel = PyObject_Malloc(len);
1576+
strncpy(cLabel, "null", len);
15931577
} else {
1594-
cLabel = PyObject_Malloc(21); // 21 chars for int64
1595-
sprintf(cLabel, "%" NPY_DATETIME_FMT,
1596-
PyDateTimeToEpoch(item, base));
1597-
len = strlen(cLabel);
1578+
if (enc->datetimeIso) {
1579+
// TODO: Vectorized Timedelta function
1580+
if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) {
1581+
PyObject *td =
1582+
PyObject_CallFunction(cls_timedelta, "(O)", item);
1583+
if (td == NULL) {
1584+
Py_DECREF(item);
1585+
NpyArr_freeLabels(ret, num);
1586+
ret = 0;
1587+
break;
1588+
}
1589+
1590+
PyObject *iso =
1591+
PyObject_CallMethod(td, "isoformat", NULL);
1592+
Py_DECREF(td);
1593+
if (iso == NULL) {
1594+
Py_DECREF(item);
1595+
NpyArr_freeLabels(ret, num);
1596+
ret = 0;
1597+
break;
1598+
}
1599+
1600+
len = strlen(PyUnicode_AsUTF8(iso));
1601+
cLabel = PyObject_Malloc(len + 1);
1602+
memcpy(cLabel, PyUnicode_AsUTF8(iso), len + 1);
1603+
Py_DECREF(iso);
1604+
} else {
1605+
if (type_num == NPY_DATETIME) {
1606+
cLabel = int64ToIso(nanosecVal, base, &len);
1607+
} else {
1608+
cLabel = PyDateTimeToIso((PyDateTime_Date *)item,
1609+
base, &len);
1610+
}
1611+
}
1612+
if (cLabel == NULL) {
1613+
Py_DECREF(item);
1614+
NpyArr_freeLabels(ret, num);
1615+
ret = 0;
1616+
break;
1617+
}
1618+
} else {
1619+
cLabel = PyObject_Malloc(21); // 21 chars for int64
1620+
sprintf(cLabel, "%" NPY_DATETIME_FMT,
1621+
NpyDateTimeToEpoch(nanosecVal, base));
1622+
len = strlen(cLabel);
1623+
}
15981624
}
15991625
} else { // Fallback to string representation
16001626
PyObject *str = PyObject_Str(item);
@@ -1615,6 +1641,10 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
16151641
ret[i] = PyObject_Malloc(len + 1);
16161642
memcpy(ret[i], cLabel, len + 1);
16171643

1644+
if (is_datetimelike) {
1645+
PyObject_Free(cLabel);
1646+
}
1647+
16181648
if (PyErr_Occurred()) {
16191649
NpyArr_freeLabels(ret, num);
16201650
ret = 0;

pandas/core/arrays/boolean.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -670,13 +670,15 @@ def _reduce(self, name, skipna=True, **kwargs):
670670
mask = self._mask
671671

672672
# coerce to a nan-aware float if needed
673-
if mask.any():
674-
data = self._data.astype("float64")
675-
data[mask] = np.nan
673+
if self._hasna:
674+
data = self.to_numpy("float64", na_value=np.nan)
676675

677676
op = getattr(nanops, "nan" + name)
678677
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)
679678

679+
if np.isnan(result):
680+
return libmissing.NA
681+
680682
# if we have numeric op that would result in an int, coerce to int if possible
681683
if name in ["sum", "prod"] and notna(result):
682684
int_result = np.int64(result)

pandas/core/arrays/datetimelike.py

+23-4
Original file line numberDiff line numberDiff line change
@@ -743,17 +743,36 @@ def searchsorted(self, value, side="left", sorter=None):
743743
Array of insertion points with the same shape as `value`.
744744
"""
745745
if isinstance(value, str):
746-
value = self._scalar_from_string(value)
746+
try:
747+
value = self._scalar_from_string(value)
748+
except ValueError:
749+
raise TypeError("searchsorted requires compatible dtype or scalar")
750+
751+
elif is_valid_nat_for_dtype(value, self.dtype):
752+
value = NaT
753+
754+
elif isinstance(value, self._recognized_scalars):
755+
value = self._scalar_type(value)
756+
757+
elif isinstance(value, np.ndarray):
758+
if not type(self)._is_recognized_dtype(value):
759+
raise TypeError(
760+
"searchsorted requires compatible dtype or scalar, "
761+
f"not {type(value).__name__}"
762+
)
763+
value = type(self)(value)
764+
self._check_compatible_with(value)
747765

748-
if not (isinstance(value, (self._scalar_type, type(self))) or isna(value)):
749-
raise ValueError(f"Unexpected type for 'value': {type(value)}")
766+
if not (isinstance(value, (self._scalar_type, type(self))) or (value is NaT)):
767+
raise TypeError(f"Unexpected type for 'value': {type(value)}")
750768

751-
self._check_compatible_with(value)
752769
if isinstance(value, type(self)):
770+
self._check_compatible_with(value)
753771
value = value.asi8
754772
else:
755773
value = self._unbox_scalar(value)
756774

775+
# TODO: Use datetime64 semantics for sorting, xref GH#29844
757776
return self.asi8.searchsorted(value, side=side, sorter=sorter)
758777

759778
def repeat(self, repeats, *args, **kwargs):

0 commit comments

Comments
 (0)