diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index f93950224eaae..1b88d4d90d3e1 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -314,7 +314,7 @@ jobs: timeout-minutes: 90 concurrency: - #https://github.community/t/concurrecy-not-work-for-push/183068/7 + # https://github.community/t/concurrecy-not-work-for-push/183068/7 group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.os }}-${{ matrix.pytest_target }}-dev cancel-in-progress: true @@ -346,3 +346,62 @@ jobs: - name: Run Tests uses: ./.github/actions/run-tests + + emscripten: + # Note: the Python version, Emscripten toolchain version are determined + # by the Pyodide version. The appropriate versions can be found in the + # Pyodide repodata.json "info" field, or in the Makefile.envs file: + # https://github.com/pyodide/pyodide/blob/stable/Makefile.envs#L2 + # The Node.js version can be determined via Pyodide: + # https://pyodide.org/en/stable/usage/index.html#node-js + name: Pyodide build + runs-on: ubuntu-22.04 + concurrency: + # https://github.community/t/concurrecy-not-work-for-push/183068/7 + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-wasm + cancel-in-progress: true + steps: + - name: Checkout pandas Repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python for Pyodide + id: setup-python + uses: actions/setup-python@v5 + with: + python-version: '3.11.3' + + - name: Set up Emscripten toolchain + uses: mymindstorm/setup-emsdk@v14 + with: + version: '3.1.46' + actions-cache-folder: emsdk-cache + + - name: Install pyodide-build + run: pip install "pyodide-build==0.25.1" + + - name: Build pandas for Pyodide + run: | + pyodide build + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '18' + + - name: Set up Pyodide virtual environment + run: | + pyodide venv .venv-pyodide + source .venv-pyodide/bin/activate + pip install dist/*.whl + + - name: Test pandas for Pyodide + env: + PANDAS_CI: 1 + run: | + source .venv-pyodide/bin/activate + pip install pytest hypothesis + # do not import pandas from the checked out repo + cd .. + python -c 'import pandas as pd; pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db"])' diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index caa00b205a29c..df18955089aea 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -22,6 +22,7 @@ PY311, PY312, PYPY, + WASM, ) import pandas.compat.compressors from pandas.compat.numpy import is_numpy_dev @@ -195,4 +196,5 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]: "PY311", "PY312", "PYPY", + "WASM", ] diff --git a/pandas/compat/_constants.py b/pandas/compat/_constants.py index 7bc3fbaaefebf..2625389e5254a 100644 --- a/pandas/compat/_constants.py +++ b/pandas/compat/_constants.py @@ -17,6 +17,7 @@ PY311 = sys.version_info >= (3, 11) PY312 = sys.version_info >= (3, 12) PYPY = platform.python_implementation() == "PyPy" +WASM = (sys.platform == "emscripten") or (platform.machine() in ["wasm32", "wasm64"]) ISMUSL = "musl" in (sysconfig.get_config_var("HOST_GNU_TYPE") or "") REF_COUNT = 2 if PY311 else 3 @@ -27,4 +28,5 @@ "PY311", "PY312", "PYPY", + "WASM", ] diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index f5857b952dfc1..e0c5e337fb746 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas.compat import WASM + from pandas.core.dtypes.common import is_number from pandas import ( @@ -54,6 +56,7 @@ def test_apply_np_reducer(op, how): tm.assert_series_equal(result, expected) +@pytest.mark.skipif(WASM, reason="No fp exception support in wasm") @pytest.mark.parametrize( "op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"] ) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 0ecb8f9bef468..4583155502374 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -8,6 +8,7 @@ import numpy as np import pytest +from pandas.compat import WASM from pandas.errors import OutOfBoundsDatetime import pandas as pd @@ -1741,6 +1742,7 @@ def test_td64_div_object_mixed_result(self, box_with_array): # ------------------------------------------------------------------ # __floordiv__, __rfloordiv__ + @pytest.mark.skipif(WASM, reason="no fp exception support in wasm") def test_td64arr_floordiv_td64arr_with_nat(self, box_with_array): # GH#35529 box = box_with_array diff --git a/pandas/tests/indexes/datetimes/methods/test_normalize.py b/pandas/tests/indexes/datetimes/methods/test_normalize.py index 74711f67e6446..0ce28d60b53b9 100644 --- a/pandas/tests/indexes/datetimes/methods/test_normalize.py +++ b/pandas/tests/indexes/datetimes/methods/test_normalize.py @@ -2,6 +2,7 @@ import numpy as np import pytest +from pandas.compat import WASM import pandas.util._test_decorators as td from pandas import ( @@ -70,6 +71,9 @@ def test_normalize_tz(self): assert not rng.is_normalized @td.skip_if_windows + @pytest.mark.skipif( + WASM, reason="tzset is available only on Unix-like systems, not WASM" + ) @pytest.mark.parametrize( "timezone", [ diff --git a/pandas/tests/indexes/datetimes/methods/test_resolution.py b/pandas/tests/indexes/datetimes/methods/test_resolution.py index 8399fafbbaff2..42c3ab0617b7c 100644 --- a/pandas/tests/indexes/datetimes/methods/test_resolution.py +++ b/pandas/tests/indexes/datetimes/methods/test_resolution.py @@ -1,7 +1,10 @@ from dateutil.tz import tzlocal import pytest -from pandas.compat import IS64 +from pandas.compat import ( + IS64, + WASM, +) from pandas import date_range @@ -20,9 +23,10 @@ ("us", "microsecond"), ], ) +@pytest.mark.skipif(WASM, reason="OverflowError received on WASM") def test_dti_resolution(request, tz_naive_fixture, freq, expected): tz = tz_naive_fixture - if freq == "YE" and not IS64 and isinstance(tz, tzlocal): + if freq == "YE" and ((not IS64) or WASM) and isinstance(tz, tzlocal): request.applymarker( pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038") ) diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index c93c80a7bb084..ba31a9bc15fb5 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -15,6 +15,7 @@ import numpy as np import pytest +from pandas.compat import WASM from pandas.errors import ( EmptyDataError, ParserError, @@ -80,6 +81,7 @@ def test_path_path_lib(all_parsers): tm.assert_frame_equal(df, result) +@pytest.mark.skipif(WASM, reason="limited file system access on WASM") def test_nonexistent_path(all_parsers): # gh-2428: pls no segfault # gh-14086: raise more helpful FileNotFoundError @@ -93,6 +95,7 @@ def test_nonexistent_path(all_parsers): assert path == e.value.filename +@pytest.mark.skipif(WASM, reason="limited file system access on WASM") @td.skip_if_windows # os.chmod does not work in windows def test_no_permission(all_parsers): # GH 23784 diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index 98a460f221592..ab2e1ee138315 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -18,6 +18,7 @@ import numpy as np import pytest +from pandas.compat import WASM from pandas.compat.numpy import np_version_gte1p24 from pandas.errors import ( ParserError, @@ -94,15 +95,16 @@ def test_dtype_and_names_error(c_parser_only): """ # fallback casting, but not castable warning = RuntimeWarning if np_version_gte1p24 else None - with pytest.raises(ValueError, match="cannot safely convert"): - with tm.assert_produces_warning(warning, check_stacklevel=False): - parser.read_csv( - StringIO(data), - sep=r"\s+", - header=None, - names=["a", "b"], - dtype={"a": np.int32}, - ) + if not WASM: # no fp exception support in wasm + with pytest.raises(ValueError, match="cannot safely convert"): + with tm.assert_produces_warning(warning, check_stacklevel=False): + parser.read_csv( + StringIO(data), + sep=r"\s+", + header=None, + names=["a", "b"], + dtype={"a": np.int32}, + ) @pytest.mark.parametrize( @@ -550,6 +552,7 @@ def test_chunk_whitespace_on_boundary(c_parser_only): tm.assert_frame_equal(result, expected) +@pytest.mark.skipif(WASM, reason="limited file system access on WASM") def test_file_handles_mmap(c_parser_only, csv1): # gh-14418 # diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 889ef61740a2c..fc5df6d9babcb 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -7,7 +7,10 @@ import numpy as np import pytest -from pandas.compat import IS64 +from pandas.compat._constants import ( + IS64, + WASM, +) from pandas.errors import EmptyDataError import pandas as pd @@ -168,6 +171,7 @@ def test_airline(datapath): tm.assert_frame_equal(df, df0) +@pytest.mark.skipif(WASM, reason="Pyodide/WASM has 32-bitness") def test_date_time(datapath): # Support of different SAS date/datetime formats (PR #15871) fname = datapath("io", "sas", "data", "datetime.sas7bdat") @@ -253,6 +257,7 @@ def test_corrupt_read(datapath): pd.read_sas(fname) +@pytest.mark.xfail(WASM, reason="failing with currently set tolerances on WASM") def test_max_sas_date(datapath): # GH 20927 # NB. max datetime in SAS dataset is 31DEC9999:23:59:59.999 @@ -292,6 +297,7 @@ def test_max_sas_date(datapath): tm.assert_frame_equal(df, expected) +@pytest.mark.xfail(WASM, reason="failing with currently set tolerances on WASM") def test_max_sas_date_iterator(datapath): # GH 20927 # when called as an iterator, only those chunks with a date > pd.Timestamp.max @@ -337,6 +343,7 @@ def test_max_sas_date_iterator(datapath): tm.assert_frame_equal(results[1], expected[1]) +@pytest.mark.skipif(WASM, reason="Pyodide/WASM has 32-bitness") def test_null_date(datapath): fname = datapath("io", "sas", "data", "dates_null.sas7bdat") df = pd.read_sas(fname, encoding="utf-8") diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index ad729d2346a3b..e4b4d3a82669d 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -19,7 +19,10 @@ import numpy as np import pytest -from pandas.compat import is_platform_windows +from pandas.compat import ( + WASM, + is_platform_windows, +) import pandas as pd import pandas._testing as tm @@ -163,6 +166,7 @@ def test_iterator(self): tm.assert_frame_equal(first, expected.iloc[[0]]) tm.assert_frame_equal(pd.concat(it), expected.iloc[1:]) + @pytest.mark.skipif(WASM, reason="limited file system access on WASM") @pytest.mark.parametrize( "reader, module, error_class, fn_ext", [ @@ -228,6 +232,7 @@ def test_write_missing_parent_directory(self, method, module, error_class, fn_ex ): method(dummy_frame, path) + @pytest.mark.skipif(WASM, reason="limited file system access on WASM") @pytest.mark.parametrize( "reader, module, error_class, fn_ext", [ @@ -382,6 +387,7 @@ def mmap_file(datapath): class TestMMapWrapper: + @pytest.mark.skipif(WASM, reason="limited file system access on WASM") def test_constructor_bad_file(self, mmap_file): non_file = StringIO("I am not a file") non_file.fileno = lambda: -1 @@ -404,6 +410,7 @@ def test_constructor_bad_file(self, mmap_file): with pytest.raises(ValueError, match=msg): icom._maybe_memory_map(target, True) + @pytest.mark.skipif(WASM, reason="limited file system access on WASM") def test_next(self, mmap_file): with open(mmap_file, encoding="utf-8") as target: lines = target.readlines() @@ -587,6 +594,7 @@ def test_bad_encdoing_errors(): icom.get_handle(path, "w", errors="bad") +@pytest.mark.skipif(WASM, reason="limited file system access on WASM") def test_errno_attribute(): # GH 13872 with pytest.raises(FileNotFoundError, match="\\[Errno 2\\]") as err: diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 97599722cb93f..357e6129dd8f1 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -14,6 +14,7 @@ import numpy as np import pytest +from pandas.compat import WASM from pandas.compat._optional import import_optional_dependency from pandas.errors import ( EmptyDataError, @@ -485,6 +486,7 @@ def test_empty_string_etree(val): read_xml(data, parser="etree") +@pytest.mark.skipif(WASM, reason="limited file system access on WASM") def test_wrong_file_path(parser): filename = os.path.join("does", "not", "exist", "books.xml") diff --git a/pandas/tests/scalar/timestamp/methods/test_replace.py b/pandas/tests/scalar/timestamp/methods/test_replace.py index d67de79a8dd10..c5169fdff0cd4 100644 --- a/pandas/tests/scalar/timestamp/methods/test_replace.py +++ b/pandas/tests/scalar/timestamp/methods/test_replace.py @@ -11,6 +11,7 @@ conversion, ) from pandas._libs.tslibs.dtypes import NpyDatetimeUnit +from pandas.compat import WASM import pandas.util._test_decorators as td import pandas._testing as tm @@ -99,6 +100,7 @@ def test_replace_integer_args(self, tz_aware_fixture): with pytest.raises(ValueError, match=msg): ts.replace(hour=0.1) + @pytest.mark.skipif(WASM, reason="tzset is not available on WASM") def test_replace_tzinfo_equiv_tz_localize_none(self): # GH#14621, GH#7825 # assert conversion to naive is the same as replacing tzinfo with None @@ -106,6 +108,7 @@ def test_replace_tzinfo_equiv_tz_localize_none(self): assert ts.tz_localize(None) == ts.replace(tzinfo=None) @td.skip_if_windows + @pytest.mark.skipif(WASM, reason="tzset is not available on WASM") def test_replace_tzinfo(self): # GH#15683 dt = datetime(2016, 3, 27, 1) diff --git a/pandas/tests/scalar/timestamp/methods/test_timestamp_method.py b/pandas/tests/scalar/timestamp/methods/test_timestamp_method.py index 67985bd4ba566..b576317fca8b4 100644 --- a/pandas/tests/scalar/timestamp/methods/test_timestamp_method.py +++ b/pandas/tests/scalar/timestamp/methods/test_timestamp_method.py @@ -1,9 +1,11 @@ # NB: This is for the Timestamp.timestamp *method* specifically, not # the Timestamp class in general. +import pytest from pytz import utc from pandas._libs.tslibs import Timestamp +from pandas.compat import WASM import pandas.util._test_decorators as td import pandas._testing as tm @@ -11,6 +13,7 @@ class TestTimestampMethod: @td.skip_if_windows + @pytest.mark.skipif(WASM, reason="tzset is not available on WASM") def test_timestamp(self, fixed_now_ts): # GH#17329 # tz-naive --> treat it as if it were UTC for purposes of timestamp() diff --git a/pandas/tests/scalar/timestamp/test_formats.py b/pandas/tests/scalar/timestamp/test_formats.py index e1299c272e5cc..44db1187850c9 100644 --- a/pandas/tests/scalar/timestamp/test_formats.py +++ b/pandas/tests/scalar/timestamp/test_formats.py @@ -5,6 +5,8 @@ import pytest import pytz # a test below uses pytz but only inside a `eval` call +from pandas.compat import WASM + from pandas import Timestamp ts_no_ns = Timestamp( @@ -95,6 +97,7 @@ class TestTimestampRendering: @pytest.mark.parametrize( "date", ["2014-03-07", "2014-01-01 09:00", "2014-01-01 00:00:00.000000001"] ) + @pytest.mark.skipif(WASM, reason="tzset is not available on WASM") def test_repr(self, date, freq, tz): # avoid to match with timezone name freq_repr = f"'{freq}'" diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index f4042acd05dc3..b05c30fa50fbe 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -21,6 +21,7 @@ iNaT, parsing, ) +from pandas.compat import WASM from pandas.errors import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, @@ -959,6 +960,7 @@ def test_to_datetime_YYYYMMDD(self): assert actual == datetime(2008, 1, 15) @td.skip_if_windows # `tm.set_timezone` does not work in windows + @pytest.mark.skipif(WASM, reason="tzset is not available on WASM") def test_to_datetime_now(self): # See GH#18666 with tm.set_timezone("US/Eastern"): @@ -975,7 +977,8 @@ def test_to_datetime_now(self): assert pdnow.tzinfo is None assert pdnow2.tzinfo is None - @td.skip_if_windows # `tm.set_timezone` does not work in windows + @td.skip_if_windows # `tm.set_timezone` does not work on Windows + @pytest.mark.skipif(WASM, reason="tzset is not available on WASM") @pytest.mark.parametrize("tz", ["Pacific/Auckland", "US/Samoa"]) def test_to_datetime_today(self, tz): # See GH#18666 @@ -1007,6 +1010,7 @@ def test_to_datetime_today_now_unicode_bytes(self, arg): to_datetime([arg]) @pytest.mark.filterwarnings("ignore:Timestamp.utcnow is deprecated:FutureWarning") + @pytest.mark.skipif(WASM, reason="tzset is not available on WASM") @pytest.mark.parametrize( "format, expected_ds", [ diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index c052ca58f5873..ba000a0439dd1 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -6,7 +6,10 @@ import numpy as np import pytest -from pandas.compat import IS64 +from pandas.compat import ( + IS64, + WASM, +) from pandas.errors import OutOfBoundsTimedelta import pandas as pd @@ -214,6 +217,7 @@ def test_to_timedelta_on_missing_values_list(self, val): actual = to_timedelta([val]) assert actual[0]._value == np.timedelta64("NaT").astype("int64") + @pytest.mark.skipif(WASM, reason="No fp exception support in WASM") @pytest.mark.xfail(not IS64, reason="Floating point error") def test_to_timedelta_float(self): # https://github.com/pandas-dev/pandas/issues/25077 diff --git a/pandas/tests/tseries/offsets/test_common.py b/pandas/tests/tseries/offsets/test_common.py index 3792878973c15..34181f28bb1a0 100644 --- a/pandas/tests/tseries/offsets/test_common.py +++ b/pandas/tests/tseries/offsets/test_common.py @@ -9,6 +9,7 @@ ) from pandas.compat import ( IS64, + WASM, is_platform_windows, ) @@ -106,6 +107,7 @@ def _offset(request): return request.param +@pytest.mark.skipif(WASM, reason="OverflowError received on WASM") def test_apply_out_of_range(request, tz_naive_fixture, _offset): tz = tz_naive_fixture @@ -130,7 +132,11 @@ def test_apply_out_of_range(request, tz_naive_fixture, _offset): if tz is not None: assert t.tzinfo is not None - if isinstance(tz, tzlocal) and not IS64 and _offset is not DateOffset: + if ( + isinstance(tz, tzlocal) + and ((not IS64) or WASM) + and _offset is not DateOffset + ): # If we hit OutOfBoundsDatetime on non-64 bit machines # we'll drop out of the try clause before the next test request.applymarker( diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 52af5adb686a7..9b64beaf09273 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -17,6 +17,7 @@ from pandas._libs.tslibs.parsing import parse_datetime_string_with_reso from pandas.compat import ( ISMUSL, + WASM, is_platform_windows, ) import pandas.util._test_decorators as td @@ -29,6 +30,7 @@ from pandas._testing._hypothesis import DATETIME_NO_TZ +@pytest.mark.skipif(WASM, reason="tzset is not available on WASM") @pytest.mark.skipif( is_platform_windows() or ISMUSL, reason="TZ setting incorrect on Windows and MUSL Linux", diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index d4a79cae61772..48684c4810d2a 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -39,6 +39,7 @@ def test_foo(): from pandas.compat import ( IS64, + WASM, is_platform_windows, ) from pandas.compat._optional import import_optional_dependency @@ -115,6 +116,10 @@ def skip_if_no(package: str, min_version: str | None = None) -> pytest.MarkDecor locale.getlocale()[0] != "en_US", reason=f"Set local {locale.getlocale()[0]} is not en_US", ) +skip_if_wasm = pytest.mark.skipif( + WASM, + reason="does not support wasm", +) def parametrize_fixture_doc(*args) -> Callable[[F], F]: