Skip to content

Commit 17c83e7

Browse files
committed
TST: Refactor and improve tests for Pandas 1.5
1 parent 5aaf92b commit 17c83e7

12 files changed

+46
-41
lines changed

gramex/config.py

+5
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,11 @@ def used_kwargs(method, kwargs, ignore_keywords=False):
835835
If the method uses ``**kwargs`` (keywords), it uses all keys. To ignore this
836836
and return only named arguments, use ``ignore_keywords=True``.
837837
'''
838+
# In Pandas 1.5, DataFrame.to_csv and DataFrame.to_excel are wrapped with @deprecate_kwargs.
839+
# We dive deeper to detect the actual keywords. __wrapped__ is provided by functools.wraps
840+
# https://docs.python.org/3/library/functools.html
841+
while hasattr(method, '__wrapped__'):
842+
method = method.__wrapped__
838843
argspec = inspect.getfullargspec(method)
839844
# If method uses **kwargs, return all kwargs (unless you ignore **kwargs)
840845
if argspec.varkw and not ignore_keywords:

gramex/data.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1851,7 +1851,7 @@ def _filter_mongodb(
18511851
if len(data) > 0:
18521852
import bson
18531853

1854-
for col, val in data.iloc[0].iteritems():
1854+
for col, val in data.iloc[0].items():
18551855
if type(val) in {bson.objectid.ObjectId}:
18561856
data[col] = data[col].map(str)
18571857

gramex/gramex.yaml

+14-10
Original file line numberDiff line numberDiff line change
@@ -269,17 +269,21 @@ app:
269269
# Login parameters
270270
login_url: /login/ # URL used to log in
271271

272+
# Configure how sessions work. See https://gramener.com/gramex/guide/auth/#session-data
272273
session:
273-
cookie: sid # Name of the session ID cookie
274-
type: json # Type of store to use: hdf5, json or memory
275-
flush:
276-
5 # Write store to disk periodically (in seconds)
277-
# For type:json, use 5 seconds. It does not save to disk automatically
278-
# For type:hdf5 or sqlite, 60 seconds or so is fine
279-
purge: 3600 # Purge every hour
280-
path: $GRAMEXDATA/session.json # Path to the store (ignored for memory)
281-
expiry: 31 # Session cookies expiry in days
282-
httponly: true # Only HTTP access, no JS access
274+
# Save in a cookie called sid:
275+
cookie: sid
276+
# Save in a JSON store
277+
type: json
278+
path: $GRAMEXDATA/session.json
279+
# Flush every 5 seconds
280+
flush: 5
281+
# Clear expired sessions every hour
282+
purge: 3600
283+
# Cookies expire after 31 days
284+
expiry: 31
285+
# Browsers cannot use JS to access session cookie. Only HTTP access allowed, for security
286+
httponly: true
283287

284288
# The storelocations: section defines where Gramex stores its data.
285289
storelocations:

gramex/handlers/authhandler.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def set_user(self, user, id):
172172
user.update(
173173
{
174174
key: val
175-
for key, val in users.iloc[0].iteritems()
175+
for key, val in users.iloc[0].items()
176176
if not gramex.data.pd.isnull(val)
177177
}
178178
)

gramex/handlers/mlhandler.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def _parse_multipart_form_data(self):
139139
return pd.concat(dfs, axis=0)
140140

141141
def _parse_application_json(self):
142-
return pd.read_json(self.request.body)
142+
return pd.read_json(self.request.body.decode('utf-8'))
143143

144144
def _parse_data(self, _cache=True, append=False):
145145
header = self.request.headers.get('Content-Type', '').split(';')[0]

gramex/install.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,11 @@ def _ensure_remove(remove, path, exc_info):
178178
def _ensure_remove(func, path, exc_info):
179179
'''onerror callback for rmtree that tries hard to delete files'''
180180
if issubclass(exc_info[0], WindowsError):
181-
import winerror
182-
181+
winerror = AttrDict(
182+
ERROR_PATH_NOT_FOUND=3,
183+
ERROR_ACCESS_DENIED=5,
184+
ERROR_SHARING_VIOLATION=32,
185+
)
183186
# Delete read-only files
184187
# https://bugs.python.org/issue19643
185188
# https://bugs.python.org/msg218021

testlib/test_cache_module.py

-2
Original file line numberDiff line numberDiff line change
@@ -333,8 +333,6 @@ def test_save(self):
333333
'html': {'index': False, 'escape': False, 'ignore_keyword': 1},
334334
'hdf': {'index': False, 'key': 'data', 'format': 'fixed', 'ignore_keyword': 1},
335335
'json': {'orient': 'records', 'ignore_keyword': 1},
336-
# In Pandas 1.5, .to_csv() and .to_xlsx() accept **kwargs, but raise for invalid ones.
337-
# TODO: Fix gramex.cache.save or gramex.config.used_kwargs to ignore invalid keywords.
338336
'csv': {'index': False, 'ignore_keyword': 1},
339337
'xlsx': {'index': False, 'sheet_name': 'Sheet1', 'ignore_keyword': 1},
340338
# 'stata': dict(index=False), # cannot test since it doesn't support unicode

testlib/test_pptgen2.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,7 @@ def test_table(self, slides=9):
899899
for row_offset, shape_name in ((1, 'Table 1'), (0, 'Table 2')):
900900
table = self.get_shape(prs.slides[0].shapes, shape_name).table
901901
for i, (index, row) in enumerate(data.iterrows()):
902-
for j, (column, val) in enumerate(row.iteritems()):
902+
for j, (column, val) in enumerate(row.items()):
903903
cell = table.rows[i + row_offset].cells[j]
904904
eq_(cell.text, '{}'.format(val))
905905
# Test table header

testlib/test_store.py

+2-12
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
from nose.tools import eq_, ok_
77
from nose.plugins.skip import SkipTest
88
import gramex.cache
9-
from gramex.cache import JSONStore, SQLiteStore, HDF5Store, RedisStore
9+
from gramex.cache import JSONStore, SQLiteStore, RedisStore
1010

1111
# It must be possible to import from basehandler for backward-compatibility
12-
from gramex.handlers.basehandler import JSONStore, SQLiteStore, HDF5Store, RedisStore # noqa
12+
from gramex.handlers.basehandler import JSONStore, SQLiteStore, RedisStore # noqa
1313
from gramex.handlers.basehandler import BaseMixin
1414
from gramex.config import variables
1515
from . import tests_dir
@@ -138,16 +138,6 @@ def load(self):
138138
return {self.store._escape(key): val for key, val in self.store.store.items()}
139139

140140

141-
class TestHDF5Store(TestJSONStore):
142-
store_class = HDF5Store
143-
store_file = 'data.h5'
144-
145-
def load(self):
146-
return {
147-
key.replace('\t', '/'): json.loads(val[()]) for key, val in self.store.store.items()
148-
}
149-
150-
151141
class TestRedisStore(TestJSONStore):
152142
@classmethod
153143
def setupClass(cls):

tests/requirements.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ mkdocs # For documentation
1616
mkdocstrings # For documentation
1717
mkdocstrings[python] # For documentation
1818

19-
markdown # Markdown is part of gramex/release.json, but re-install since
20-
# importlib_metadata version conflicts with makdocs
19+
# Markdown is part of gramex/release.json, but see if we need need to
20+
# re-install since importlib_metadata version conflicts with mkdocs
21+
# markdown
2122

2223
transformers # For MLHandler
2324
datasets # For MLHandler

tests/test_capturehandler.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from gramex.handlers import Capture
2424
from . import TestGramex, server
2525

26+
_timeout = 15
2627
_captures = {}
2728
paths = {'phantomjs': which('phantomjs'), 'node': which('node')}
2829

@@ -215,7 +216,7 @@ class TestCaptureHandlerChrome(TestCaptureHandler):
215216

216217
@classmethod
217218
def setupClass(cls):
218-
cls.capture = get_capture('chrome', port=9412, engine='chrome', timeout=15)
219+
cls.capture = get_capture('chrome', port=9412, engine='chrome', timeout=_timeout)
219220
cls.folder = os.path.dirname(os.path.abspath(__file__))
220221

221222
@staticmethod
@@ -339,14 +340,17 @@ def test_headers(self):
339340
ok_(user['role'] in text)
340341

341342
def err(self, code, **params):
342-
return self.fetch(self.src, code=code, params=params)
343+
return self.fetch(self.src, code=code, params=params, timeout=_timeout)
343344

344345
def test_errors(self):
345346
# nonexistent URLs should capture the 404 page and return a screenshot
346347
self.err(code=200, url='/nonexistent')
347-
self.err(code=500, url='http://nonexistent')
348+
# Invalid capture formats report HTTP 400
348349
self.err(code=400, url=self.url, ext='nonexistent')
350+
# Invalid selectors report HTTP 500
349351
self.err(code=500, url=self.url, selector='nonexistent', ext='png')
352+
# Invalid domains report HTTP 500
353+
self.err(code=500, url='http://nonexistent')
350354

351355
def test_pdf_header_footer(self):
352356
result = self.fetch(

tests/test_mlhandler.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,13 @@ def tearDownClass(cls):
5151
shutil.rmtree(path)
5252

5353
def test_default_sentiment(self):
54-
"""Ensure that the default model predicts something."""
54+
# Ensure that the default model predicts something
5555
resp = self.get("/sentiment?text=This is bad.&text=This is good.", timeout=60)
5656
self.assertEqual(resp.json(), ["NEGATIVE", "POSITIVE"])
5757

5858
@skipUnless(os.environ.get("GRAMEX_ML_TESTS"), "Set GRAMEX_ML_TESTS=1 to run slow ML tests")
5959
def test_train_sentiment(self):
60-
"""Train with some vague sentences."""
60+
# Train with some vague sentences
6161
df = pd.read_json("https://bit.ly/3NesHFs")
6262
resp = self.get(
6363
"/sentiment?_action=train&target_col=label",
@@ -69,7 +69,7 @@ def test_train_sentiment(self):
6969
self.assertGreaterEqual(resp.json()["score"], 0.9)
7070

7171
def test_default_ner(self):
72-
"""Ensure that the default model predicts something."""
72+
# Ensure that the default model predicts something
7373
labels = self.get("/ner?text=Narendra Modi is the PM of India.", timeout=300).json()
7474
ents = [[(r["word"], r["entity_group"]) for r in label] for label in labels]
7575
self.assertIn(("Narendra Modi", "PER"), ents[0])
@@ -661,7 +661,7 @@ def test_single_line_train_fetch_model(self):
661661
self.assertIsInstance(model["DecisionTreeClassifier"], DecisionTreeClassifier)
662662

663663
def test_template(self):
664-
"""Check if viewing the template works fine."""
664+
# Check if viewing the template works fine
665665
r = self.get("/mlhandler")
666666
self.assertEqual(r.status_code, OK)
667667
# Try getting predictions
@@ -772,7 +772,7 @@ def tearDownClass(cls):
772772
shutil.rmtree(titanic)
773773

774774
def test_default(self):
775-
"""An empty GET request returns all predictions."""
775+
# An empty GET request returns all predictions
776776
y_true = gramex.cache.open(
777777
op.join(folder, "..", "testlib", "iris.csv"),
778778
usecols=["species"],
@@ -820,7 +820,7 @@ def test_filters(self):
820820
self.assertEqual(set(y_pred), {'versicolor', 'virginica'})
821821

822822
def test_from_mlhandler(self):
823-
"""Check that a model created through MLHandler works."""
823+
# Check that a model created through MLHandler works
824824
# Get results from the MLHandler
825825
df = gramex.cache.open('titanic.csv', rel=True)
826826
resp = self.get(

0 commit comments

Comments
 (0)