Skip to content

Commit 37edda8

Browse files
jorisvandenbosscheWillAyd
authored andcommitted
REF (string dtype): rename using_pyarrow_string_dtype to using_string_dtype (pandas-dev#59320)
1 parent edadd35 commit 37edda8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+120
-174
lines changed

pandas/_config/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,6 @@ def using_nullable_dtypes() -> bool:
5252
return _mode_options["nullable_dtypes"]
5353

5454

55-
def using_pyarrow_string_dtype() -> bool:
55+
def using_string_dtype() -> bool:
5656
_mode_options = _global_config["future"]
5757
return _mode_options["infer_string"]

pandas/_libs/lib.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ from cython cimport (
3737
floating,
3838
)
3939

40-
from pandas._config import using_pyarrow_string_dtype
40+
from pandas._config import using_string_dtype
4141

4242
from pandas._libs.missing import check_na_tuples_nonequal
4343

@@ -2725,7 +2725,7 @@ def maybe_convert_objects(ndarray[object] objects,
27252725
seen.object_ = True
27262726

27272727
elif seen.str_:
2728-
if using_pyarrow_string_dtype() and is_string_array(objects, skipna=True):
2728+
if using_string_dtype() and is_string_array(objects, skipna=True):
27292729
from pandas.core.arrays.string_ import StringDtype
27302730

27312731
dtype = StringDtype(storage="pyarrow_numpy")

pandas/core/construction.py

+4-12
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import numpy as np
2020
from numpy import ma
2121

22-
from pandas._config import using_pyarrow_string_dtype
22+
from pandas._config import using_string_dtype
2323

2424
from pandas._libs import lib
2525
from pandas._libs.tslibs import (
@@ -566,11 +566,7 @@ def sanitize_array(
566566
if not is_list_like(data):
567567
if index is None:
568568
raise ValueError("index must be specified when data is not list-like")
569-
if (
570-
isinstance(data, str)
571-
and using_pyarrow_string_dtype()
572-
and original_dtype is None
573-
):
569+
if isinstance(data, str) and using_string_dtype() and original_dtype is None:
574570
from pandas.core.arrays.string_ import StringDtype
575571

576572
dtype = StringDtype("pyarrow_numpy")
@@ -604,14 +600,10 @@ def sanitize_array(
604600
subarr = data
605601
if data.dtype == object:
606602
subarr = maybe_infer_to_datetimelike(data)
607-
if (
608-
object_index
609-
and using_pyarrow_string_dtype()
610-
and is_string_dtype(subarr)
611-
):
603+
if object_index and using_string_dtype() and is_string_dtype(subarr):
612604
# Avoid inference when string option is set
613605
subarr = data
614-
elif data.dtype.kind == "U" and using_pyarrow_string_dtype():
606+
elif data.dtype.kind == "U" and using_string_dtype():
615607
from pandas.core.arrays.string_ import StringDtype
616608

617609
dtype = StringDtype(storage="pyarrow_numpy")

pandas/core/dtypes/cast.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import numpy as np
2020

21-
from pandas._config import using_pyarrow_string_dtype
21+
from pandas._config import using_string_dtype
2222

2323
from pandas._libs import (
2424
Interval,
@@ -798,7 +798,7 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
798798
# coming out as np.str_!
799799

800800
dtype = _dtype_obj
801-
if using_pyarrow_string_dtype():
801+
if using_string_dtype():
802802
from pandas.core.arrays.string_ import StringDtype
803803

804804
dtype = StringDtype(storage="pyarrow_numpy")

pandas/core/internals/construction.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import numpy as np
1414
from numpy import ma
1515

16-
from pandas._config import using_pyarrow_string_dtype
16+
from pandas._config import using_string_dtype
1717

1818
from pandas._libs import lib
1919

@@ -375,7 +375,7 @@ def ndarray_to_mgr(
375375
bp = BlockPlacement(slice(len(columns)))
376376
nb = new_block_2d(values, placement=bp, refs=refs)
377377
block_values = [nb]
378-
elif dtype is None and values.dtype.kind == "U" and using_pyarrow_string_dtype():
378+
elif dtype is None and values.dtype.kind == "U" and using_string_dtype():
379379
dtype = StringDtype(storage="pyarrow_numpy")
380380

381381
obj_columns = list(values)

pandas/io/feather_format.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
Any,
77
)
88

9-
from pandas._config import using_pyarrow_string_dtype
9+
from pandas._config import using_string_dtype
1010

1111
from pandas._libs import lib
1212
from pandas.compat._optional import import_optional_dependency
@@ -120,7 +120,7 @@ def read_feather(
120120
with get_handle(
121121
path, "rb", storage_options=storage_options, is_text=False
122122
) as handles:
123-
if dtype_backend is lib.no_default and not using_pyarrow_string_dtype():
123+
if dtype_backend is lib.no_default and not using_string_dtype():
124124
return feather.read_feather(
125125
handles.handle, columns=columns, use_threads=bool(use_threads)
126126
)
@@ -137,7 +137,7 @@ def read_feather(
137137
elif dtype_backend == "pyarrow":
138138
return pa_table.to_pandas(types_mapper=pd.ArrowDtype)
139139

140-
elif using_pyarrow_string_dtype():
140+
elif using_string_dtype():
141141
return pa_table.to_pandas(types_mapper=arrow_string_types_mapper())
142142
else:
143143
raise NotImplementedError

pandas/io/orc.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
Literal,
1010
)
1111

12-
from pandas._config import using_pyarrow_string_dtype
12+
from pandas._config import using_string_dtype
1313

1414
from pandas._libs import lib
1515
from pandas.compat._optional import import_optional_dependency
@@ -127,7 +127,7 @@ def read_orc(
127127
df = pa_table.to_pandas(types_mapper=mapping.get)
128128
return df
129129
else:
130-
if using_pyarrow_string_dtype():
130+
if using_string_dtype():
131131
types_mapper = arrow_string_types_mapper()
132132
else:
133133
types_mapper = None

pandas/io/parquet.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import warnings
1313
from warnings import catch_warnings
1414

15-
from pandas._config import using_pyarrow_string_dtype
15+
from pandas._config import using_string_dtype
1616
from pandas._config.config import _get_option
1717

1818
from pandas._libs import lib
@@ -257,7 +257,7 @@ def read(
257257
to_pandas_kwargs["types_mapper"] = mapping.get
258258
elif dtype_backend == "pyarrow":
259259
to_pandas_kwargs["types_mapper"] = pd.ArrowDtype # type: ignore[assignment]
260-
elif using_pyarrow_string_dtype():
260+
elif using_string_dtype():
261261
to_pandas_kwargs["types_mapper"] = arrow_string_types_mapper()
262262

263263
manager = _get_option("mode.data_manager", silent=True)

pandas/io/parsers/arrow_parser_wrapper.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import TYPE_CHECKING
44
import warnings
55

6-
from pandas._config import using_pyarrow_string_dtype
6+
from pandas._config import using_string_dtype
77

88
from pandas._libs import lib
99
from pandas.compat._optional import import_optional_dependency
@@ -295,7 +295,7 @@ def read(self) -> DataFrame:
295295
dtype_mapping = _arrow_dtype_mapping()
296296
dtype_mapping[pa.null()] = pd.Int64Dtype()
297297
frame = table.to_pandas(types_mapper=dtype_mapping.get)
298-
elif using_pyarrow_string_dtype():
298+
elif using_string_dtype():
299299
frame = table.to_pandas(types_mapper=arrow_string_types_mapper())
300300

301301
else:

pandas/io/pytables.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
config,
3232
get_option,
3333
using_copy_on_write,
34-
using_pyarrow_string_dtype,
34+
using_string_dtype,
3535
)
3636

3737
from pandas._libs import (
@@ -3224,7 +3224,7 @@ def read(
32243224
index = self.read_index("index", start=start, stop=stop)
32253225
values = self.read_array("values", start=start, stop=stop)
32263226
result = Series(values, index=index, name=self.name, copy=False)
3227-
if using_pyarrow_string_dtype() and is_string_array(values, skipna=True):
3227+
if using_string_dtype() and is_string_array(values, skipna=True):
32283228
result = result.astype("string[pyarrow_numpy]")
32293229
return result
32303230

@@ -3293,7 +3293,7 @@ def read(
32933293

32943294
columns = items[items.get_indexer(blk_items)]
32953295
df = DataFrame(values.T, columns=columns, index=axes[1], copy=False)
3296-
if using_pyarrow_string_dtype() and is_string_array(values, skipna=True):
3296+
if using_string_dtype() and is_string_array(values, skipna=True):
32973297
df = df.astype("string[pyarrow_numpy]")
32983298
dfs.append(df)
32993299

@@ -4679,9 +4679,9 @@ def read(
46794679
else:
46804680
# Categorical
46814681
df = DataFrame._from_arrays([values], columns=cols_, index=index_)
4682-
if not (using_pyarrow_string_dtype() and values.dtype.kind == "O"):
4682+
if not (using_string_dtype() and values.dtype.kind == "O"):
46834683
assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype)
4684-
if using_pyarrow_string_dtype() and is_string_array(
4684+
if using_string_dtype() and is_string_array(
46854685
values, # type: ignore[arg-type]
46864686
skipna=True,
46874687
):

pandas/io/sql.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232

3333
import numpy as np
3434

35-
from pandas._config import using_pyarrow_string_dtype
35+
from pandas._config import using_string_dtype
3636

3737
from pandas._libs import lib
3838
from pandas.compat._optional import import_optional_dependency
@@ -2215,7 +2215,7 @@ def read_table(
22152215
from pandas.io._util import _arrow_dtype_mapping
22162216

22172217
mapping = _arrow_dtype_mapping().get
2218-
elif using_pyarrow_string_dtype():
2218+
elif using_string_dtype():
22192219
from pandas.io._util import arrow_string_types_mapper
22202220

22212221
arrow_string_types_mapper()

pandas/tests/arithmetic/test_object.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import numpy as np
99
import pytest
1010

11-
from pandas._config import using_pyarrow_string_dtype
11+
from pandas._config import using_string_dtype
1212

1313
import pandas.util._test_decorators as td
1414

@@ -303,7 +303,7 @@ def test_iadd_string(self):
303303
index += "_x"
304304
assert "a_x" in index
305305

306-
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="add doesn't work")
306+
@pytest.mark.xfail(using_string_dtype(), reason="add doesn't work")
307307
def test_add(self):
308308
index = pd.Index([str(i) for i in range(10)])
309309
expected = pd.Index(index.values * 2)

pandas/tests/arrays/categorical/test_constructors.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import numpy as np
77
import pytest
88

9-
from pandas._config import using_pyarrow_string_dtype
9+
from pandas._config import using_string_dtype
1010

1111
from pandas.core.dtypes.common import (
1212
is_float_dtype,
@@ -449,7 +449,7 @@ def test_constructor_str_unknown(self):
449449
with pytest.raises(ValueError, match="Unknown dtype"):
450450
Categorical([1, 2], dtype="foo")
451451

452-
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="Can't be NumPy strings")
452+
@pytest.mark.xfail(using_string_dtype(), reason="Can't be NumPy strings")
453453
def test_constructor_np_strs(self):
454454
# GH#31499 Hashtable.map_locations needs to work on np.str_ objects
455455
cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])

pandas/tests/arrays/categorical/test_repr.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_pyarrow_string_dtype
4+
from pandas._config import using_string_dtype
55

66
from pandas import (
77
Categorical,
@@ -78,7 +78,7 @@ def test_print_none_width(self):
7878
assert exp == repr(a)
7979

8080
@pytest.mark.skipif(
81-
using_pyarrow_string_dtype(),
81+
using_string_dtype(),
8282
reason="Change once infer_string is set to True by default",
8383
)
8484
def test_unicode_print(self):

pandas/tests/base/test_misc.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_pyarrow_string_dtype
6+
from pandas._config import using_string_dtype
77

88
from pandas.compat import PYPY
99

@@ -83,7 +83,7 @@ def test_ndarray_compat_properties(index_or_series_obj):
8383

8484

8585
@pytest.mark.skipif(
86-
PYPY or using_pyarrow_string_dtype(),
86+
PYPY or using_string_dtype(),
8787
reason="not relevant for PyPy doesn't work properly for arrow strings",
8888
)
8989
def test_memory_usage(index_or_series_memory_obj):

pandas/tests/base/test_unique.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_pyarrow_string_dtype
4+
from pandas._config import using_string_dtype
55

66
import pandas as pd
77
import pandas._testing as tm
@@ -100,7 +100,7 @@ def test_nunique_null(null_obj, index_or_series_obj):
100100

101101

102102
@pytest.mark.single_cpu
103-
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="decoding fails")
103+
@pytest.mark.xfail(using_string_dtype(), reason="decoding fails")
104104
def test_unique_bad_unicode(index_or_series):
105105
# regression test for #34550
106106
uval = "\ud83d" # smiley emoji

pandas/tests/extension/base/ops.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import numpy as np
66
import pytest
77

8-
from pandas._config import using_pyarrow_string_dtype
8+
from pandas._config import using_string_dtype
99

1010
from pandas.core.dtypes.common import is_string_dtype
1111

@@ -37,7 +37,7 @@ def _get_expected_exception(
3737
else:
3838
result = self.frame_scalar_exc
3939

40-
if using_pyarrow_string_dtype() and result is not None:
40+
if using_string_dtype() and result is not None:
4141
import pyarrow as pa
4242

4343
result = ( # type: ignore[assignment]

pandas/tests/extension/test_categorical.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import numpy as np
1919
import pytest
2020

21-
from pandas._config import using_pyarrow_string_dtype
21+
from pandas._config import using_string_dtype
2222

2323
import pandas as pd
2424
from pandas import Categorical
@@ -103,7 +103,7 @@ def test_contains(self, data, data_missing):
103103
continue
104104
assert na_value_obj not in data
105105
# this section suffers from super method
106-
if not using_pyarrow_string_dtype():
106+
if not using_string_dtype():
107107
assert na_value_obj in data_missing
108108

109109
def test_empty(self, dtype):

pandas/tests/frame/constructors/test_from_dict.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_pyarrow_string_dtype
6+
from pandas._config import using_string_dtype
77

88
from pandas import (
99
DataFrame,
@@ -44,9 +44,7 @@ def test_constructor_single_row(self):
4444
)
4545
tm.assert_frame_equal(result, expected)
4646

47-
@pytest.mark.skipif(
48-
using_pyarrow_string_dtype(), reason="columns inferring logic broken"
49-
)
47+
@pytest.mark.skipif(using_string_dtype(), reason="columns inferring logic broken")
5048
def test_constructor_list_of_series(self):
5149
data = [
5250
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),

0 commit comments

Comments
 (0)