@@ -1429,12 +1429,10 @@ def match(self, pat: str, case: bool = True, flags: int = 0, na=lib.no_default):
1429
1429
def fullmatch (self , pat , case : bool = True , flags : int = 0 , na = lib .no_default ):
1430
1430
"""
1431
1431
Determine if each string entirely matches a regular expression.
1432
-
1433
1432
Checks if each string in the Series or Index fully matches the
1434
1433
specified regular expression pattern. This function is useful when the
1435
1434
requirement is for an entire string to conform to a pattern, such as
1436
1435
validating formats like phone numbers or email addresses.
1437
-
1438
1436
Parameters
1439
1437
----------
1440
1438
pat : str
@@ -1448,28 +1446,24 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=lib.no_default):
1448
1446
array. For object-dtype, ``numpy.nan`` is used. For the nullable
1449
1447
``StringDtype``, ``pandas.NA`` is used. For the ``"str"`` dtype,
1450
1448
``False`` is used.
1451
-
1452
1449
Returns
1453
1450
-------
1454
1451
Series/Index/array of boolean values
1455
1452
The function returns a Series, Index, or array of boolean values,
1456
1453
where True indicates that the entire string matches the regular
1457
1454
expression pattern and False indicates that it does not.
1458
-
1459
1455
See Also
1460
1456
--------
1461
1457
match : Similar, but also returns `True` when only a *prefix* of the string
1462
1458
matches the regular expression.
1463
1459
extract : Extract matched groups.
1464
-
1465
1460
Notes
1466
1461
-----
1467
1462
This method enforces consistent behavior between Python's string dtype
1468
1463
and PyArrow-backed string arrays when using regular expressions
1469
1464
containing alternation (|). For regex patterns with alternation operators,
1470
1465
the method ensures proper grouping by wrapping the pattern in parentheses
1471
1466
when using PyArrow-backed string arrays.
1472
-
1473
1467
Examples
1474
1468
--------
1475
1469
>>> ser = pd.Series(["cat", "duck", "dove"])
@@ -1478,46 +1472,34 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=lib.no_default):
1478
1472
1 True
1479
1473
2 True
1480
1474
dtype: bool
1481
-
1482
1475
Ensure consistent behavior with alternation patterns:
1483
-
1484
1476
>>> ser = pd.Series(['asdf', 'as'], dtype='string[pyarrow]')
1485
1477
>>> ser.str.fullmatch(r"(as)|(as)")
1486
1478
0 False
1487
1479
1 True
1488
1480
dtype: bool
1489
1481
"""
1490
-
1491
1482
is_pyarrow = False
1492
-
1493
1483
arr = self ._data .array
1494
-
1495
1484
arr_type = type (arr ).__name__
1496
1485
is_pyarrow = arr_type == "ArrowStringArray"
1497
-
1498
1486
if not is_pyarrow :
1499
1487
is_pyarrow = "Arrow" in arr_type
1500
-
1501
1488
if not is_pyarrow and hasattr (arr , "dtype" ):
1502
1489
dtype_str = str (arr .dtype )
1503
1490
is_pyarrow = "pyarrow" in dtype_str .lower () or "arrow" in dtype_str .lower ()
1504
-
1505
- # Handle pattern modification for PyArrow implementation
1506
1491
if is_pyarrow and "|" in pat :
1507
1492
def _is_fully_wrapped (pattern ):
1508
1493
if not (pattern .startswith ('(' ) and pattern .endswith (')' )):
1509
- return False
1510
-
1494
+ return False
1511
1495
inner = pattern [1 :- 1 ]
1512
1496
level = 0
1513
1497
escape = False
1514
1498
in_char_class = False
1515
-
1516
1499
for char in inner :
1517
1500
if escape :
1518
1501
escape = False
1519
- continue
1520
-
1502
+ continue
1521
1503
if char == '\\ ' :
1522
1504
escape = True
1523
1505
elif not in_char_class and char == '[' :
@@ -1529,19 +1511,12 @@ def _is_fully_wrapped(pattern):
1529
1511
level += 1
1530
1512
elif char == ')' :
1531
1513
if level == 0 :
1532
- # Found a closing parenthesis without matching opening one
1533
1514
return False
1534
1515
level -= 1
1535
-
1536
- # If we end with zero level,
1537
- # the outer parentheses fully wrap the pattern
1538
1516
return level == 0
1539
-
1540
-
1541
1517
if not (pat .startswith ('(' ) and pat .endswith (')' ) and
1542
1518
_is_fully_wrapped (pat )):
1543
1519
pat = f"({ pat } )"
1544
-
1545
1520
result = self ._data .array ._str_fullmatch (pat , case = case , flags = flags , na = na )
1546
1521
return self ._wrap_result (result , fill_value = na , returns_string = False )
1547
1522
0 commit comments