Skip to content

Commit bf40fc2

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents 5ecf905 + 5468223 commit bf40fc2

39 files changed

+409
-170
lines changed

README.md

+7-7
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ The source code is currently hosted on GitHub at:
8787
https://github.com/pandas-dev/pandas
8888

8989
Binary installers for the latest released version are available at the [Python
90-
package index](https://pypi.org/project/pandas) and on conda.
90+
Package Index (PyPI)](https://pypi.org/project/pandas) and on [Conda](https://docs.conda.io/en/latest/).
9191

9292
```sh
9393
# conda
@@ -100,15 +100,15 @@ pip install pandas
100100
```
101101

102102
## Dependencies
103-
- [NumPy](https://www.numpy.org)
104-
- [python-dateutil](https://labix.org/python-dateutil)
105-
- [pytz](https://pythonhosted.org/pytz)
103+
- [NumPy - Adds support for large, multi-dimensional arrays, matrices and high-level mathematical functions to operate on these arrays](https://www.numpy.org)
104+
- [python-dateutil - Provides powerful extensions to the standard datetime module](https://labix.org/python-dateutil)
105+
- [pytz - Brings the Olson tz database into Python which allows accurate and cross platform timezone calculations](https://pythonhosted.org/pytz)
106106

107107
See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) for minimum supported versions of required, recommended and optional dependencies.
108108

109109
## Installation from sources
110-
To install pandas from source you need Cython in addition to the normal
111-
dependencies above. Cython can be installed from pypi:
110+
To install pandas from source you need [Cython](https://cython.org/) in addition to the normal
111+
dependencies above. Cython can be installed from PyPI:
112112

113113
```sh
114114
pip install cython
@@ -145,7 +145,7 @@ See the full instructions for [installing from source](https://pandas.pydata.org
145145
The official documentation is hosted on PyData.org: https://pandas.pydata.org/pandas-docs/stable
146146

147147
## Background
148-
Work on ``pandas`` started at AQR (a quantitative hedge fund) in 2008 and
148+
Work on ``pandas`` started at [AQR](https://www.aqr.com/) (a quantitative hedge fund) in 2008 and
149149
has been under active development since then.
150150

151151
## Getting Help

asv_bench/benchmarks/indexing.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
lower-level methods directly on Index and subclasses, see index_object.py,
44
indexing_engine.py, and index_cached.py
55
"""
6+
import itertools
67
import string
78
import warnings
89

@@ -256,7 +257,9 @@ def setup(self, index):
256257
"non_monotonic": CategoricalIndex(list("abc" * N)),
257258
}
258259
self.data = indices[index]
259-
self.data_unique = CategoricalIndex(list(string.printable))
260+
self.data_unique = CategoricalIndex(
261+
["".join(perm) for perm in itertools.permutations(string.printable, 3)]
262+
)
260263

261264
self.int_scalar = 10000
262265
self.int_list = list(range(10000))

asv_bench/benchmarks/io/csv.py

+63-32
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from io import StringIO
1+
from io import BytesIO, StringIO
22
import random
33
import string
44

@@ -146,10 +146,10 @@ def time_read_csv(self, bad_date_value):
146146
class ReadCSVSkipRows(BaseIO):
147147

148148
fname = "__test__.csv"
149-
params = [None, 10000]
150-
param_names = ["skiprows"]
149+
params = ([None, 10000], ["c", "python"])
150+
param_names = ["skiprows", "engine"]
151151

152-
def setup(self, skiprows):
152+
def setup(self, skiprows, engine):
153153
N = 20000
154154
index = tm.makeStringIndex(N)
155155
df = DataFrame(
@@ -164,8 +164,8 @@ def setup(self, skiprows):
164164
)
165165
df.to_csv(self.fname)
166166

167-
def time_skipprows(self, skiprows):
168-
read_csv(self.fname, skiprows=skiprows)
167+
def time_skipprows(self, skiprows, engine):
168+
read_csv(self.fname, skiprows=skiprows, engine=engine)
169169

170170

171171
class ReadUint64Integers(StringIORewind):
@@ -192,10 +192,10 @@ def time_read_uint64_na_values(self):
192192
class ReadCSVThousands(BaseIO):
193193

194194
fname = "__test__.csv"
195-
params = ([",", "|"], [None, ","])
196-
param_names = ["sep", "thousands"]
195+
params = ([",", "|"], [None, ","], ["c", "python"])
196+
param_names = ["sep", "thousands", "engine"]
197197

198-
def setup(self, sep, thousands):
198+
def setup(self, sep, thousands, engine):
199199
N = 10000
200200
K = 8
201201
data = np.random.randn(N, K) * np.random.randint(100, 10000, (N, K))
@@ -206,16 +206,19 @@ def setup(self, sep, thousands):
206206
df = df.applymap(lambda x: fmt.format(x))
207207
df.to_csv(self.fname, sep=sep)
208208

209-
def time_thousands(self, sep, thousands):
210-
read_csv(self.fname, sep=sep, thousands=thousands)
209+
def time_thousands(self, sep, thousands, engine):
210+
read_csv(self.fname, sep=sep, thousands=thousands, engine=engine)
211211

212212

213213
class ReadCSVComment(StringIORewind):
214-
def setup(self):
214+
params = ["c", "python"]
215+
param_names = ["engine"]
216+
217+
def setup(self, engine):
215218
data = ["A,B,C"] + (["1,2,3 # comment"] * 100000)
216219
self.StringIO_input = StringIO("\n".join(data))
217220

218-
def time_comment(self):
221+
def time_comment(self, engine):
219222
read_csv(
220223
self.data(self.StringIO_input), comment="#", header=None, names=list("abc")
221224
)
@@ -255,25 +258,47 @@ def time_read_csv_python_engine(self, sep, decimal, float_precision):
255258
)
256259

257260

261+
class ReadCSVEngine(StringIORewind):
262+
params = ["c", "python"]
263+
param_names = ["engine"]
264+
265+
def setup(self, engine):
266+
data = ["A,B,C,D,E"] + (["1,2,3,4,5"] * 100000)
267+
self.StringIO_input = StringIO("\n".join(data))
268+
# simulate reading from file
269+
self.BytesIO_input = BytesIO(self.StringIO_input.read().encode("utf-8"))
270+
271+
def time_read_stringcsv(self, engine):
272+
read_csv(self.data(self.StringIO_input), engine=engine)
273+
274+
def time_read_bytescsv(self, engine):
275+
read_csv(self.data(self.BytesIO_input), engine=engine)
276+
277+
258278
class ReadCSVCategorical(BaseIO):
259279

260280
fname = "__test__.csv"
281+
params = ["c", "python"]
282+
param_names = ["engine"]
261283

262-
def setup(self):
284+
def setup(self, engine):
263285
N = 100000
264286
group1 = ["aaaaaaaa", "bbbbbbb", "cccccccc", "dddddddd", "eeeeeeee"]
265287
df = DataFrame(np.random.choice(group1, (N, 3)), columns=list("abc"))
266288
df.to_csv(self.fname, index=False)
267289

268-
def time_convert_post(self):
269-
read_csv(self.fname).apply(Categorical)
290+
def time_convert_post(self, engine):
291+
read_csv(self.fname, engine=engine).apply(Categorical)
270292

271-
def time_convert_direct(self):
272-
read_csv(self.fname, dtype="category")
293+
def time_convert_direct(self, engine):
294+
read_csv(self.fname, engine=engine, dtype="category")
273295

274296

275297
class ReadCSVParseDates(StringIORewind):
276-
def setup(self):
298+
params = ["c", "python"]
299+
param_names = ["engine"]
300+
301+
def setup(self, engine):
277302
data = """{},19:00:00,18:56:00,0.8100,2.8100,7.2000,0.0000,280.0000\n
278303
{},20:00:00,19:56:00,0.0100,2.2100,7.2000,0.0000,260.0000\n
279304
{},21:00:00,20:56:00,-0.5900,2.2100,5.7000,0.0000,280.0000\n
@@ -284,18 +309,20 @@ def setup(self):
284309
data = data.format(*two_cols)
285310
self.StringIO_input = StringIO(data)
286311

287-
def time_multiple_date(self):
312+
def time_multiple_date(self, engine):
288313
read_csv(
289314
self.data(self.StringIO_input),
315+
engine=engine,
290316
sep=",",
291317
header=None,
292318
names=list(string.digits[:9]),
293319
parse_dates=[[1, 2], [1, 3]],
294320
)
295321

296-
def time_baseline(self):
322+
def time_baseline(self, engine):
297323
read_csv(
298324
self.data(self.StringIO_input),
325+
engine=engine,
299326
sep=",",
300327
header=None,
301328
parse_dates=[1],
@@ -304,17 +331,18 @@ def time_baseline(self):
304331

305332

306333
class ReadCSVCachedParseDates(StringIORewind):
307-
params = ([True, False],)
308-
param_names = ["do_cache"]
334+
params = ([True, False], ["c", "python"])
335+
param_names = ["do_cache", "engine"]
309336

310-
def setup(self, do_cache):
337+
def setup(self, do_cache, engine):
311338
data = ("\n".join(f"10/{year}" for year in range(2000, 2100)) + "\n") * 10
312339
self.StringIO_input = StringIO(data)
313340

314-
def time_read_csv_cached(self, do_cache):
341+
def time_read_csv_cached(self, do_cache, engine):
315342
try:
316343
read_csv(
317344
self.data(self.StringIO_input),
345+
engine=engine,
318346
header=None,
319347
parse_dates=[0],
320348
cache_dates=do_cache,
@@ -329,37 +357,40 @@ class ReadCSVMemoryGrowth(BaseIO):
329357
chunksize = 20
330358
num_rows = 1000
331359
fname = "__test__.csv"
360+
params = ["c", "python"]
361+
param_names = ["engine"]
332362

333-
def setup(self):
363+
def setup(self, engine):
334364
with open(self.fname, "w") as f:
335365
for i in range(self.num_rows):
336366
f.write(f"{i}\n")
337367

338-
def mem_parser_chunks(self):
368+
def mem_parser_chunks(self, engine):
339369
# see gh-24805.
340-
result = read_csv(self.fname, chunksize=self.chunksize)
370+
result = read_csv(self.fname, chunksize=self.chunksize, engine=engine)
341371

342372
for _ in result:
343373
pass
344374

345375

346376
class ReadCSVParseSpecialDate(StringIORewind):
347-
params = (["mY", "mdY", "hm"],)
348-
param_names = ["value"]
377+
params = (["mY", "mdY", "hm"], ["c", "python"])
378+
param_names = ["value", "engine"]
349379
objects = {
350380
"mY": "01-2019\n10-2019\n02/2000\n",
351381
"mdY": "12/02/2010\n",
352382
"hm": "21:34\n",
353383
}
354384

355-
def setup(self, value):
385+
def setup(self, value, engine):
356386
count_elem = 10000
357387
data = self.objects[value] * count_elem
358388
self.StringIO_input = StringIO(data)
359389

360-
def time_read_special_date(self, value):
390+
def time_read_special_date(self, value, engine):
361391
read_csv(
362392
self.data(self.StringIO_input),
393+
engine=engine,
363394
sep=",",
364395
header=None,
365396
names=["Date"],

ci/code_checks.sh

+4
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
178178
pytest -q --doctest-modules pandas/core/strings/
179179
RET=$(($RET + $?)) ; echo $MSG "DONE"
180180

181+
MSG='Doctests sql.py' ; echo $MSG
182+
pytest -q --doctest-modules pandas/io/sql.py
183+
RET=$(($RET + $?)) ; echo $MSG "DONE"
184+
181185
# Directories
182186

183187
MSG='Doctests arrays'; echo $MSG

ci/deps/azure-37-slow.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ dependencies:
3131
- moto>=1.3.14
3232
- scipy
3333
- sqlalchemy
34-
- xlrd
34+
- xlrd<2.0
3535
- xlsxwriter
3636
- xlwt
3737
- moto

ci/deps/azure-38-locale.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ dependencies:
3030
- pytz
3131
- scipy
3232
- xarray
33-
- xlrd
33+
- xlrd<2.0
3434
- xlsxwriter
3535
- xlwt
3636
- moto

ci/deps/azure-macos-37.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ dependencies:
2626
- python-dateutil==2.7.3
2727
- pytz
2828
- xarray
29-
- xlrd
29+
- xlrd<2.0
3030
- xlsxwriter
3131
- xlwt
3232
- pip

ci/deps/azure-windows-37.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ dependencies:
3333
- s3fs>=0.4.2
3434
- scipy
3535
- sqlalchemy
36-
- xlrd
36+
- xlrd<2.0
3737
- xlsxwriter
3838
- xlwt
3939
- pyreadstat

ci/deps/azure-windows-38.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,6 @@ dependencies:
3131
- pytz
3232
- s3fs>=0.4.0
3333
- scipy
34-
- xlrd
34+
- xlrd<2.0
3535
- xlsxwriter
3636
- xlwt

ci/deps/travis-37-cov.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ dependencies:
4343
- sqlalchemy
4444
- statsmodels
4545
- xarray
46-
- xlrd
46+
- xlrd<2.0
4747
- xlsxwriter
4848
- xlwt
4949
- pip

ci/deps/travis-37-locale.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ dependencies:
3535
- pytables>=3.5.1
3636
- scipy
3737
- xarray=0.12.3
38-
- xlrd
38+
- xlrd<2.0
3939
- xlsxwriter
4040
- xlwt
4141
- moto

ci/deps/travis-38-slow.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ dependencies:
3030
- moto>=1.3.14
3131
- scipy
3232
- sqlalchemy
33-
- xlrd
33+
- xlrd<2.0
3434
- xlsxwriter
3535
- xlwt
3636
- moto

doc/source/whatsnew/v1.3.0.rst

+2-3
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ Bug fixes
170170
Categorical
171171
^^^^^^^^^^^
172172

173-
-
173+
- Bug in ``CategoricalIndex.reindex`` failed when ``Index`` passed with elements all in category (:issue:`28690`)
174174
-
175175

176176
Datetimelike
@@ -195,7 +195,6 @@ Numeric
195195
^^^^^^^
196196
- Bug in :meth:`DataFrame.quantile`, :meth:`DataFrame.sort_values` causing incorrect subsequent indexing behavior (:issue:`38351`)
197197
- Bug in :meth:`DataFrame.select_dtypes` with ``include=np.number`` now retains numeric ``ExtensionDtype`` columns (:issue:`35340`)
198-
-
199198

200199
Conversion
201200
^^^^^^^^^^
@@ -232,7 +231,7 @@ MultiIndex
232231
^^^^^^^^^^
233232

234233
- Bug in :meth:`DataFrame.drop` raising ``TypeError`` when :class:`MultiIndex` is non-unique and no level is provided (:issue:`36293`)
235-
-
234+
- Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when :class:`MultiIndex` containing ``NaN`` even when they are differntly ordered (:issue:`38439`)
236235

237236
I/O
238237
^^^

0 commit comments

Comments
 (0)