Skip to content

Commit ac56f8c

Browse files
gh-133306: Support \z as a synonym for \Z in regular expressions (GH-133314)
\Z was an error inherited from PCRE 0.95. It was fixed in PCRE 2.0. In other engines, \Z means not “anchor at string end”, but “anchor before optional newline at string end”. \z means “anchor at string end” in most RE engines.
1 parent fe44fc4 commit ac56f8c

File tree

8 files changed

+31
-10
lines changed

8 files changed

+31
-10
lines changed

Doc/howto/regex.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -738,9 +738,12 @@ given location, they can obviously be matched an infinite number of times.
738738
different: ``\A`` still matches only at the beginning of the string, but ``^``
739739
may match at any location inside the string that follows a newline character.
740740

741-
``\Z``
741+
``\z``
742742
Matches only at the end of the string.
743743

744+
``\Z``
745+
The same as ``\z``. For compatibility with old Python versions.
746+
744747
``\b``
745748
Word boundary. This is a zero-width assertion that matches only at the
746749
beginning or end of a word. A word is defined as a sequence of alphanumeric

Doc/library/re.rst

+9-3
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ The special characters are:
266266
not a word boundary as outside a set, and numeric escapes
267267
such as ``\1`` are always octal escapes, not group references.
268268
Special sequences which do not match a single character such as ``\A``
269-
and ``\Z`` are not allowed.
269+
and ``\z`` are not allowed.
270270

271271
.. index:: single: ^ (caret); in regular expressions
272272

@@ -661,11 +661,17 @@ character ``'$'``.
661661
matches characters which are neither alphanumeric in the current locale
662662
nor the underscore.
663663

664-
.. index:: single: \Z; in regular expressions
664+
.. index:: single: \z; in regular expressions
665+
single: \Z; in regular expressions
665666

666-
``\Z``
667+
``\z``
667668
Matches only at the end of the string.
668669

670+
.. versionadded:: next
671+
672+
``\Z``
673+
The same as ``\z``. For compatibility with old Python versions.
674+
669675
.. index::
670676
single: \a; in regular expressions
671677
single: \b; in regular expressions

Doc/whatsnew/3.14.rst

+5
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,11 @@ Other language changes
624624
ASCII :class:`bytes` and :term:`bytes-like objects <bytes-like object>`.
625625
(Contributed by Daniel Pope in :gh:`129349`.)
626626

627+
* Support ``\z`` as a synonym for ``\Z`` in :mod:`regular expressions <re>`.
628+
It is interpreted unambiguously in many other regular expression engines,
629+
unlike ``\Z``, which has subtly different behavior.
630+
(Contributed by Serhiy Storchaka in :gh:`133306`.)
631+
627632
* ``\B`` in :mod:`regular expression <re>` now matches empty input string.
628633
Now it is always the opposite of ``\b``.
629634
(Contributed by Serhiy Storchaka in :gh:`124130`.)

Lib/re/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
resulting RE will match the second character.
6262
\number Matches the contents of the group of the same number.
6363
\A Matches only at the start of the string.
64-
\Z Matches only at the end of the string.
64+
\z Matches only at the end of the string.
6565
\b Matches the empty string, but only at the start or end of a word.
6666
\B Matches the empty string, but not at the start or end of a word.
6767
\d Matches any decimal digit; equivalent to the set [0-9] in

Lib/re/_parser.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@
4949
r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
5050
r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
5151
r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
52-
r"\Z": (AT, AT_END_STRING), # end of string
52+
r"\z": (AT, AT_END_STRING), # end of string
53+
r"\Z": (AT, AT_END_STRING), # end of string (obsolete)
5354
}
5455

5556
FLAGS = {

Lib/test/re_tests.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,7 @@
531531
(r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'),
532532
(r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'),
533533
# bug 127259: \Z shouldn't depend on multiline mode
534-
(r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''),
534+
(r'(?ms).*?x\s*\z(.*)','xx\nx\n', SUCCEED, 'g1', ''),
535535
# bug 128899: uppercase literals under the ignorecase flag
536536
(r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'),
537537
(r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'),

Lib/test/test_re.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,7 @@ def test_re_fullmatch(self):
619619
self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4))
620620
self.assertIsNone(re.fullmatch(r"a+", "ab"))
621621
self.assertIsNone(re.fullmatch(r"abc$", "abc\n"))
622+
self.assertIsNone(re.fullmatch(r"abc\z", "abc\n"))
622623
self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n"))
623624
self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n"))
624625
self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4))
@@ -802,6 +803,8 @@ def test_special_escapes(self):
802803
self.assertEqual(re.search(r"\B(b.)\B",
803804
"abc bcd bc abxd", re.ASCII).group(1), "bx")
804805
self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
806+
self.assertEqual(re.search(r"^\Aabc\z$", "abc", re.M).group(0), "abc")
807+
self.assertIsNone(re.search(r"^\Aabc\z$", "\nabc\n", re.M))
805808
self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
806809
self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
807810
self.assertEqual(re.search(br"\b(b.)\b",
@@ -813,6 +816,8 @@ def test_special_escapes(self):
813816
self.assertEqual(re.search(br"\B(b.)\B",
814817
b"abc bcd bc abxd", re.LOCALE).group(1), b"bx")
815818
self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc")
819+
self.assertEqual(re.search(br"^\Aabc\z$", b"abc", re.M).group(0), b"abc")
820+
self.assertIsNone(re.search(br"^\Aabc\z$", b"\nabc\n", re.M))
816821
self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc")
817822
self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M))
818823
self.assertEqual(re.search(r"\d\D\w\W\s\S",
@@ -836,7 +841,7 @@ def test_other_escapes(self):
836841
self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
837842
self.assertIsNone(re.match(r"[\^a]+", 'b'))
838843
re.purge() # for warnings
839-
for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
844+
for c in 'ceghijklmopqyCEFGHIJKLMNOPQRTVXY':
840845
with self.subTest(c):
841846
self.assertRaises(re.PatternError, re.compile, '\\%c' % c)
842847
for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
@@ -2608,8 +2613,8 @@ def test_findall_atomic_grouping(self):
26082613
self.assertEqual(re.findall(r'(?>(?:ab){1,3})', 'ababc'), ['abab'])
26092614

26102615
def test_bug_gh91616(self):
2611-
self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer
2612-
self.assertTrue(re.fullmatch(r'(?s:(?=(?P<g0>.*?\.))(?P=g0).*)\Z', "a.txt"))
2616+
self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\z', "a.txt")) # reproducer
2617+
self.assertTrue(re.fullmatch(r'(?s:(?=(?P<g0>.*?\.))(?P=g0).*)\z', "a.txt"))
26132618

26142619
def test_bug_gh100061(self):
26152620
# gh-100061
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Support ``\z`` as a synonym for ``\Z`` in :mod:`regular expressions <re>`.

0 commit comments

Comments
 (0)