Skip to content

Commit 523f9c8

Browse files
Merge remote-tracking branch 'upstream/master' into typing
2 parents aee7579 + 2d65e38 commit 523f9c8

File tree

16 files changed

+144
-55
lines changed

16 files changed

+144
-55
lines changed

asv_bench/asv.conf.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,13 @@
5050
"xlsxwriter": [],
5151
"xlrd": [],
5252
"xlwt": [],
53+
"odfpy": [],
5354
"pytest": [],
5455
// If using Windows with python 2.7 and want to build using the
5556
// mingw toolchain (rather than MSVC), uncomment the following line.
5657
// "libpython": [],
5758
},
58-
59+
"conda_channels": ["defaults", "conda-forge"],
5960
// Combinations of libraries/python versions can be excluded/included
6061
// from the set to test. Each entry is a dictionary containing additional
6162
// key-value pairs to include/exclude.

asv_bench/benchmarks/io/excel.py

+54-22
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,72 @@
11
from io import BytesIO
22

33
import numpy as np
4+
from odf.opendocument import OpenDocumentSpreadsheet
5+
from odf.table import Table, TableCell, TableRow
6+
from odf.text import P
47

58
from pandas import DataFrame, ExcelWriter, date_range, read_excel
69
import pandas.util.testing as tm
710

811

9-
class Excel:
12+
def _generate_dataframe():
13+
N = 2000
14+
C = 5
15+
df = DataFrame(
16+
np.random.randn(N, C),
17+
columns=["float{}".format(i) for i in range(C)],
18+
index=date_range("20000101", periods=N, freq="H"),
19+
)
20+
df["object"] = tm.makeStringIndex(N)
21+
return df
22+
23+
24+
class WriteExcel:
1025

1126
params = ["openpyxl", "xlsxwriter", "xlwt"]
1227
param_names = ["engine"]
1328

1429
def setup(self, engine):
15-
N = 2000
16-
C = 5
17-
self.df = DataFrame(
18-
np.random.randn(N, C),
19-
columns=["float{}".format(i) for i in range(C)],
20-
index=date_range("20000101", periods=N, freq="H"),
21-
)
22-
self.df["object"] = tm.makeStringIndex(N)
23-
self.bio_read = BytesIO()
24-
self.writer_read = ExcelWriter(self.bio_read, engine=engine)
25-
self.df.to_excel(self.writer_read, sheet_name="Sheet1")
26-
self.writer_read.save()
27-
self.bio_read.seek(0)
28-
29-
def time_read_excel(self, engine):
30-
read_excel(self.bio_read)
30+
self.df = _generate_dataframe()
3131

3232
def time_write_excel(self, engine):
33-
bio_write = BytesIO()
34-
bio_write.seek(0)
35-
writer_write = ExcelWriter(bio_write, engine=engine)
36-
self.df.to_excel(writer_write, sheet_name="Sheet1")
37-
writer_write.save()
33+
bio = BytesIO()
34+
bio.seek(0)
35+
writer = ExcelWriter(bio, engine=engine)
36+
self.df.to_excel(writer, sheet_name="Sheet1")
37+
writer.save()
38+
39+
40+
class ReadExcel:
41+
42+
params = ["xlrd", "openpyxl", "odf"]
43+
param_names = ["engine"]
44+
fname_excel = "spreadsheet.xlsx"
45+
fname_odf = "spreadsheet.ods"
46+
47+
def _create_odf(self):
48+
doc = OpenDocumentSpreadsheet()
49+
table = Table(name="Table1")
50+
for row in self.df.values:
51+
tr = TableRow()
52+
for val in row:
53+
tc = TableCell(valuetype="string")
54+
tc.addElement(P(text=val))
55+
tr.addElement(tc)
56+
table.addElement(tr)
57+
58+
doc.spreadsheet.addElement(table)
59+
doc.save(self.fname_odf)
60+
61+
def setup_cache(self):
62+
self.df = _generate_dataframe()
63+
64+
self.df.to_excel(self.fname_excel, sheet_name="Sheet1")
65+
self._create_odf()
66+
67+
def time_read_excel(self, engine):
68+
fname = self.fname_odf if engine == "odf" else self.fname_excel
69+
read_excel(fname, engine=engine)
3870

3971

4072
from ..pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/package.py

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
"""
2+
Benchmarks for pandas at the package-level.
3+
"""
4+
import subprocess
5+
import sys
6+
7+
from pandas.compat import PY37
8+
9+
10+
class TimeImport:
11+
def time_import(self):
12+
if PY37:
13+
# on py37+ we the "-X importtime" usage gives us a more precise
14+
# measurement of the import time we actually care about,
15+
# without the subprocess or interpreter overhead
16+
cmd = [sys.executable, "-X", "importtime", "-c", "import pandas as pd"]
17+
p = subprocess.run(cmd, stderr=subprocess.PIPE)
18+
19+
line = p.stderr.splitlines()[-1]
20+
field = line.split(b"|")[-2].strip()
21+
total = int(field) # microseconds
22+
return total
23+
24+
cmd = [sys.executable, "-c", "import pandas as pd"]
25+
subprocess.run(cmd, stderr=subprocess.PIPE)

ci/code_checks.sh

+7-3
Original file line numberDiff line numberDiff line change
@@ -203,10 +203,14 @@ if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then
203203
import sys
204204
import pandas
205205
206-
blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2', 'hypothesis',
206+
blacklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
207207
'lxml', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
208-
'tables', 'xlrd', 'xlsxwriter', 'xlwt'}
209-
mods = blacklist & set(m.split('.')[0] for m in sys.modules)
208+
'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
209+
210+
# GH#28227 for some of these check for top-level modules, while others are
211+
# more specific (e.g. urllib.request)
212+
import_mods = set(m.split('.')[0] for m in sys.modules) | set(sys.modules)
213+
mods = blacklist & import_mods
210214
if mods:
211215
sys.stderr.write('err: pandas should not import: {}\n'.format(', '.join(mods)))
212216
sys.exit(len(mods))

environment.yml

+1
Original file line numberDiff line numberDiff line change
@@ -80,4 +80,5 @@ dependencies:
8080
- xlrd # pandas.read_excel, DataFrame.to_excel, pandas.ExcelWriter, pandas.ExcelFile
8181
- xlsxwriter # pandas.read_excel, DataFrame.to_excel, pandas.ExcelWriter, pandas.ExcelFile
8282
- xlwt # pandas.read_excel, DataFrame.to_excel, pandas.ExcelWriter, pandas.ExcelFile
83+
- odfpy # pandas.read_excel
8384
- pyreadstat # pandas.read_spss

pandas/core/dtypes/dtypes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
ordered_sentinel = object() # type: object
2525

2626

27-
def register_extension_dtype(cls: Type[ExtensionDtype],) -> Type[ExtensionDtype]:
27+
def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]:
2828
"""
2929
Register an ExtensionType with pandas as class decorator.
3030

pandas/core/ops/__init__.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -807,7 +807,13 @@ def na_op(x, y):
807807
return result
808808

809809
fill_int = lambda x: x.fillna(0)
810-
fill_bool = lambda x: x.fillna(False).astype(bool)
810+
811+
def fill_bool(x, left=None):
812+
# if `left` is specifically not-boolean, we do not cast to bool
813+
x = x.fillna(False)
814+
if left is None or is_bool_dtype(left.dtype):
815+
x = x.astype(bool)
816+
return x
811817

812818
def wrapper(self, other):
813819
is_self_int_dtype = is_integer_dtype(self.dtype)
@@ -836,7 +842,7 @@ def wrapper(self, other):
836842

837843
elif isinstance(other, (ABCSeries, ABCIndexClass)):
838844
is_other_int_dtype = is_integer_dtype(other.dtype)
839-
other = other if is_other_int_dtype else fill_bool(other)
845+
other = other if is_other_int_dtype else fill_bool(other, self)
840846

841847
else:
842848
# scalars, list, tuple, np.array

pandas/io/common.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import codecs
55
import csv
66
import gzip
7-
from http.client import HTTPException # noqa
87
from io import BufferedIOBase, BytesIO
98
import mmap
109
import os
@@ -22,7 +21,6 @@
2221
Type,
2322
Union,
2423
)
25-
from urllib.error import URLError # noqa
2624
from urllib.parse import ( # noqa
2725
urlencode,
2826
urljoin,
@@ -31,7 +29,6 @@
3129
uses_params,
3230
uses_relative,
3331
)
34-
from urllib.request import pathname2url, urlopen
3532
import zipfile
3633

3734
from pandas.compat import _get_lzma_file, _import_lzma
@@ -188,6 +185,16 @@ def is_gcs_url(url) -> bool:
188185
return False
189186

190187

188+
def urlopen(*args, **kwargs):
189+
"""
190+
Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
191+
the stdlib.
192+
"""
193+
import urllib.request
194+
195+
return urllib.request.urlopen(*args, **kwargs)
196+
197+
191198
def get_filepath_or_buffer(
192199
filepath_or_buffer: FilePathOrBuffer,
193200
encoding: Optional[str] = None,
@@ -261,6 +268,9 @@ def file_path_to_url(path: str) -> str:
261268
-------
262269
a valid FILE URL
263270
"""
271+
# lazify expensive import (~30ms)
272+
from urllib.request import pathname2url
273+
264274
return urljoin("file:", pathname2url(path))
265275

266276

pandas/io/excel/_base.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from io import BytesIO
55
import os
66
from textwrap import fill
7-
from urllib.request import urlopen
87

98
from pandas._config import config
109

@@ -21,6 +20,7 @@
2120
_stringify_path,
2221
_validate_header_arg,
2322
get_filepath_or_buffer,
23+
urlopen,
2424
)
2525
from pandas.io.excel._util import (
2626
_fill_mi_header,
@@ -112,7 +112,7 @@
112112
113113
engine : str, default None
114114
If io is not a buffer or path, this must be set to identify io.
115-
Acceptable values are None or xlrd.
115+
Acceptable values are None, "xlrd", "openpyxl" or "odf".
116116
converters : dict, default None
117117
Dict of functions for converting values in certain columns. Keys can
118118
either be integers or column labels, values are functions that take one
@@ -783,11 +783,12 @@ class ExcelFile:
783783
Parameters
784784
----------
785785
io : string, path object (pathlib.Path or py._path.local.LocalPath),
786-
file-like object or xlrd workbook
787-
If a string or path object, expected to be a path to xls or xlsx file.
786+
a file-like object, xlrd workbook or openpypl workbook.
787+
If a string or path object, expected to be a path to xls, xlsx or odf file.
788788
engine : string, default None
789789
If io is not a buffer or path, this must be set to identify io.
790-
Acceptable values are None or ``xlrd``.
790+
Acceptable values are None, ``xlrd``, ``openpyxl`` or ``odf``.
791+
Note that ``odf`` reads tables out of OpenDocument formatted files.
791792
"""
792793

793794
from pandas.io.excel._odfreader import _ODFReader

pandas/tests/io/excel/test_readers.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from datetime import datetime, time
44
from functools import partial
55
import os
6+
from urllib.error import URLError
67
import warnings
78

89
import numpy as np
@@ -14,8 +15,6 @@
1415
from pandas import DataFrame, Index, MultiIndex, Series
1516
import pandas.util.testing as tm
1617

17-
from pandas.io.common import URLError
18-
1918

2019
@contextlib.contextmanager
2120
def ignore_xlrd_time_clock_warning():

pandas/tests/io/parser/test_common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import os
1212
import platform
1313
from tempfile import TemporaryFile
14+
from urllib.error import URLError
1415

1516
import numpy as np
1617
import pytest
@@ -21,7 +22,6 @@
2122
from pandas import DataFrame, Index, MultiIndex, Series, compat, concat
2223
import pandas.util.testing as tm
2324

24-
from pandas.io.common import URLError
2525
from pandas.io.parsers import CParserWrapper, TextFileReader, TextParser
2626

2727

pandas/tests/io/test_html.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import re
66
import threading
7+
from urllib.error import URLError
78

89
import numpy as np
910
from numpy.random import rand
@@ -17,7 +18,7 @@
1718
import pandas.util.testing as tm
1819
from pandas.util.testing import makeCustomDataframe as mkdf, network
1920

20-
from pandas.io.common import URLError, file_path_to_url
21+
from pandas.io.common import file_path_to_url
2122
import pandas.io.html
2223
from pandas.io.html import read_html
2324

pandas/tests/series/test_operators.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,8 @@ def test_logical_operators_int_dtype_with_float(self):
103103
s_0123 & [0.1, 4, 3.14, 2]
104104
with pytest.raises(TypeError):
105105
s_0123 & np.array([0.1, 4, 3.14, 2])
106-
107-
# FIXME: this should be consistent with the list case above
108-
expected = Series([False, True, False, True])
109-
result = s_0123 & Series([0.1, 4, -3.14, 2])
110-
assert_series_equal(result, expected)
106+
with pytest.raises(TypeError):
107+
s_0123 & Series([0.1, 4, -3.14, 2])
111108

112109
def test_logical_operators_int_dtype_with_str(self):
113110
s_1111 = Series([1] * 4, dtype="int8")
@@ -145,9 +142,8 @@ def test_logical_operators_int_dtype_with_object(self):
145142
assert_series_equal(result, expected)
146143

147144
s_abNd = Series(["a", "b", np.NaN, "d"])
148-
result = s_0123 & s_abNd
149-
expected = Series([False, True, False, True])
150-
assert_series_equal(result, expected)
145+
with pytest.raises(TypeError, match="unsupported.* 'int' and 'str'"):
146+
s_0123 & s_abNd
151147

152148
def test_logical_operators_bool_dtype_with_int(self):
153149
index = list("bca")

pandas/util/_test_decorators.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def _skip_if_no_scipy():
103103
)
104104

105105

106-
def skip_if_installed(package: str,) -> Callable:
106+
def skip_if_installed(package: str) -> Callable:
107107
"""
108108
Skip a test if a package is installed.
109109

0 commit comments

Comments
 (0)