@@ -1429,12 +1429,12 @@ def match(self, pat: str, case: bool = True, flags: int = 0, na=lib.no_default):
1429
1429
def fullmatch (self , pat , case : bool = True , flags : int = 0 , na = lib .no_default ):
1430
1430
"""
1431
1431
Determine if each string entirely matches a regular expression.
1432
-
1432
+
1433
1433
Checks if each string in the Series or Index fully matches the
1434
1434
specified regular expression pattern. This function is useful when the
1435
1435
requirement is for an entire string to conform to a pattern, such as
1436
1436
validating formats like phone numbers or email addresses.
1437
-
1437
+
1438
1438
Parameters
1439
1439
----------
1440
1440
pat : str
@@ -1448,32 +1448,104 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=lib.no_default):
1448
1448
array. For object-dtype, ``numpy.nan`` is used. For the nullable
1449
1449
``StringDtype``, ``pandas.NA`` is used. For the ``"str"`` dtype,
1450
1450
``False`` is used.
1451
-
1451
+
1452
1452
Returns
1453
1453
-------
1454
1454
Series/Index/array of boolean values
1455
1455
The function returns a Series, Index, or array of boolean values,
1456
1456
where True indicates that the entire string matches the regular
1457
1457
expression pattern and False indicates that it does not.
1458
-
1458
+
1459
1459
See Also
1460
1460
--------
1461
1461
match : Similar, but also returns `True` when only a *prefix* of the string
1462
1462
matches the regular expression.
1463
1463
extract : Extract matched groups.
1464
-
1464
+
1465
+ Notes
1466
+ -----
1467
+ This method enforces consistent behavior between Python's string dtype
1468
+ and PyArrow-backed string arrays when using regular expressions
1469
+ containing alternation (|). For regex patterns with alternation operators,
1470
+ the method ensures proper grouping by wrapping the pattern in parentheses
1471
+ when using PyArrow-backed string arrays.
1472
+
1465
1473
Examples
1466
1474
--------
1467
1475
>>> ser = pd.Series(["cat", "duck", "dove"])
1468
1476
>>> ser.str.fullmatch(r"d.+")
1469
- 0 False
1470
- 1 True
1471
- 2 True
1477
+ 0 False
1478
+ 1 True
1479
+ 2 True
1480
+ dtype: bool
1481
+
1482
+ Ensure consistent behavior with alternation patterns:
1483
+
1484
+ >>> ser = pd.Series(['asdf', 'as'], dtype='string[pyarrow]')
1485
+ >>> ser.str.fullmatch(r"(as)|(as)")
1486
+ 0 False
1487
+ 1 True
1472
1488
dtype: bool
1473
1489
"""
1490
+
1491
+ is_pyarrow = False
1492
+
1493
+ arr = self ._data .array
1494
+
1495
+ arr_type = type (arr ).__name__
1496
+ is_pyarrow = arr_type == "ArrowStringArray"
1497
+
1498
+ if not is_pyarrow :
1499
+ is_pyarrow = "Arrow" in arr_type
1500
+
1501
+ if not is_pyarrow and hasattr (arr , "dtype" ):
1502
+ dtype_str = str (arr .dtype )
1503
+ is_pyarrow = "pyarrow" in dtype_str .lower () or "arrow" in dtype_str .lower ()
1504
+
1505
+ # Handle pattern modification for PyArrow implementation
1506
+ if is_pyarrow and "|" in pat :
1507
+ def _is_fully_wrapped (pattern ):
1508
+ if not (pattern .startswith ('(' ) and pattern .endswith (')' )):
1509
+ return False
1510
+
1511
+ inner = pattern [1 :- 1 ]
1512
+
1513
+ level = 0
1514
+ escape = False
1515
+ in_char_class = False
1516
+
1517
+ for char in inner :
1518
+ if escape :
1519
+ escape = False
1520
+ continue
1521
+
1522
+ if char == '\\ ' :
1523
+ escape = True
1524
+ elif not in_char_class and char == '[' :
1525
+ in_char_class = True
1526
+ elif in_char_class and char == ']' :
1527
+ in_char_class = False
1528
+ elif not in_char_class :
1529
+ if char == '(' :
1530
+ level += 1
1531
+ elif char == ')' :
1532
+ if level == 0 :
1533
+ # Found a closing parenthesis without matching opening one
1534
+ return False
1535
+ level -= 1
1536
+
1537
+ # If we end with zero level,
1538
+ # the outer parentheses fully wrap the pattern
1539
+ return level == 0
1540
+
1541
+
1542
+ if not (pat .startswith ('(' ) and pat .endswith (')' ) and
1543
+ _is_fully_wrapped (pat )):
1544
+ pat = f"({ pat } )"
1545
+
1474
1546
result = self ._data .array ._str_fullmatch (pat , case = case , flags = flags , na = na )
1475
1547
return self ._wrap_result (result , fill_value = na , returns_string = False )
1476
-
1548
+
1477
1549
@forbid_nonstring_types (["bytes" ])
1478
1550
def replace (
1479
1551
self ,
0 commit comments