File tree 4 files changed +31
-0
lines changed
4 files changed +31
-0
lines changed Original file line number Diff line number Diff line change @@ -44,6 +44,24 @@ def time_from_integer_array(self):
44
44
pd .array (self .values_integer , dtype = "Int64" )
45
45
46
46
47
+ class StringArray :
48
+ def setup (self ):
49
+ N = 100_000
50
+ values = tm .rands_array (3 , N )
51
+ self .values_obj = np .array (values , dtype = "object" )
52
+ self .values_str = np .array (values , dtype = "U" )
53
+ self .values_list = values .tolist ()
54
+
55
+ def time_from_np_object_array (self ):
56
+ pd .array (self .values_obj , dtype = "string" )
57
+
58
+ def time_from_np_str_array (self ):
59
+ pd .array (self .values_str , dtype = "string" )
60
+
61
+ def time_from_list (self ):
62
+ pd .array (self .values_list , dtype = "string" )
63
+
64
+
47
65
class ArrowStringArray :
48
66
49
67
params = [False , True ]
Original file line number Diff line number Diff line change @@ -156,6 +156,7 @@ Performance improvements
156
156
- Performance improvement in :func: `merge ` and :meth: `DataFrame.join ` when joining on a sorted :class: `MultiIndex ` (:issue: `48504 `)
157
157
- Performance improvement in :meth: `DataFrame.loc ` and :meth: `Series.loc ` for tuple-based indexing of a :class: `MultiIndex ` (:issue: `48384 `)
158
158
- Performance improvement for :meth: `MultiIndex.unique ` (:issue: `48335 `)
159
+ - Performance improvement for :class: `~arrays.StringArray ` constructor passing a numpy array with type ``np.str_ `` (:issue: `49109 `)
159
160
- Performance improvement for :func: `concat ` with extension array backed indexes (:issue: `49128 `)
160
161
- Performance improvement in :meth: `DataFrame.join ` when joining on a subset of a :class: `MultiIndex ` (:issue: `48611 `)
161
162
- Performance improvement for :meth: `MultiIndex.intersection ` (:issue: `48604 `)
Original file line number Diff line number Diff line change @@ -703,6 +703,10 @@ cpdef ndarray[object] ensure_string_array(
703
703
if copy and result is arr:
704
704
result = result.copy()
705
705
706
+ if issubclass (arr.dtype.type, np.str_):
707
+ # short-circuit, all elements are str
708
+ return result
709
+
706
710
for i in range (n):
707
711
val = arr[i]
708
712
Original file line number Diff line number Diff line change @@ -597,6 +597,14 @@ def test_setitem_scalar_with_mask_validation(dtype):
597
597
ser [mask ] = 1
598
598
599
599
600
+ def test_from_numpy_str (dtype ):
601
+ vals = ["a" , "b" , "c" ]
602
+ arr = np .array (vals , dtype = np .str_ )
603
+ result = pd .array (arr , dtype = dtype )
604
+ expected = pd .array (vals , dtype = dtype )
605
+ tm .assert_extension_array_equal (result , expected )
606
+
607
+
600
608
def test_tolist (dtype ):
601
609
vals = ["a" , "b" , "c" ]
602
610
arr = pd .array (vals , dtype = dtype )
You can’t perform that action at this time.
0 commit comments