|
26 | 26 | from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
|
27 | 27 | from pandas.core.dtypes.missing import _infer_fill_value, isna
|
28 | 28 |
|
| 29 | +from pandas._typing import FrameOrSeries |
29 | 30 | import pandas.core.common as com
|
30 | 31 | from pandas.core.index import Index, InvalidIndexError, MultiIndex
|
31 | 32 | from pandas.core.indexers import is_list_like_indexer, length_of_indexer
|
@@ -940,15 +941,11 @@ def _getitem_lowerdim(self, tup: Tuple):
|
940 | 941 | # slice returns a new object.
|
941 | 942 | if com.is_null_slice(new_key):
|
942 | 943 | return section
|
943 |
| - |
944 |
| - return self._getitem_lower_section(section, new_key) |
| 944 | + # This is an elided recursive call to iloc/loc/etc' |
| 945 | + return getattr(section, self.name)[new_key] |
945 | 946 |
|
946 | 947 | raise IndexingError("not applicable")
|
947 | 948 |
|
948 |
| - def _getitem_lower_section(self, section, key): |
949 |
| - # This is an elided recursive call to iloc/loc/etc' |
950 |
| - return getattr(section, self.name)[key] |
951 |
| - |
952 | 949 | def _getitem_nested_tuple(self, tup: Tuple):
|
953 | 950 | # we have a nested tuple so have at least 1 multi-index level
|
954 | 951 | # we should be able to match up the dimensionality here
|
@@ -1187,7 +1184,7 @@ def _validate_read_indexer(
|
1187 | 1184 | )
|
1188 | 1185 |
|
1189 | 1186 | if not (ax.is_categorical() or ax.is_interval()):
|
1190 |
| - warnings.warn(_missing_key_warning, FutureWarning, stacklevel=6) |
| 1187 | + warnings.warn(_missing_key_warning, FutureWarning, stacklevel=7) |
1191 | 1188 |
|
1192 | 1189 | def _convert_to_indexer(
|
1193 | 1190 | self, obj, axis: int, is_setter: bool = False, raise_missing: bool = False
|
@@ -1472,10 +1469,10 @@ class _LocIndexer(_LocationIndexer):
|
1472 | 1469 | - A ``callable`` function with one argument (the calling Series or
|
1473 | 1470 | DataFrame) and that returns valid output for indexing (one of the above)
|
1474 | 1471 |
|
1475 |
| - ``.loc`` can be called before selecting using parameters: |
| 1472 | + ``.loc`` can be called before selecting using these parameters: |
1476 | 1473 |
|
1477 | 1474 | - ``axis``, to select by a single axis on a DataFrame, e.g. ``.loc(axis=1)['a']``.
|
1478 |
| - - ``regex``, to let strings be interpreted as regex patterns, e.g. |
| 1475 | + - ``regex``, to let single strings be interpreted as regex patterns, e.g. |
1479 | 1476 | ``.loc(regex=True)[:, '^col_']``
|
1480 | 1477 |
|
1481 | 1478 | See more at :ref:`Selection by Label <indexing.label>`
|
@@ -1723,6 +1720,11 @@ def __call__(self, axis=None, regex=False):
|
1723 | 1720 | new_self.regex = regex
|
1724 | 1721 | return new_self
|
1725 | 1722 |
|
| 1723 | + def __getitem__(self, key): |
| 1724 | + if self.regex: |
| 1725 | + return self._getitem_regex(key) |
| 1726 | + return super().__getitem__(key) |
| 1727 | + |
1726 | 1728 | @Appender(_NDFrameIndexer._validate_key.__doc__)
|
1727 | 1729 | def _validate_key(self, key, axis: int):
|
1728 | 1730 |
|
@@ -1789,25 +1791,35 @@ def _get_partial_string_timestamp_match_key(self, key, labels):
|
1789 | 1791 |
|
1790 | 1792 | return key
|
1791 | 1793 |
|
1792 |
| - def _get_regex_mappings(self, key, axis=None): |
1793 |
| - |
1794 |
| - if axis is None: |
1795 |
| - axis = self.axis or 0 |
1796 |
| - |
1797 |
| - labels = self.obj._get_axis(axis) |
| 1794 | + def _getitem_regex(self, key, axis=None): |
| 1795 | + """Subset obj by regex-searching axis for key.""" |
| 1796 | + if isinstance(key, str): |
| 1797 | + if axis is None: |
| 1798 | + axis = self.axis or 0 |
| 1799 | + return self._get_regex_axis(self.obj, pattern=key, axis=axis) |
| 1800 | + elif isinstance(key, tuple): |
| 1801 | + assert len(key) == 2 |
| 1802 | + result = self.obj # type: ABCDataFrame |
| 1803 | + # slicing columns first, then index is typically faster |
| 1804 | + for ax, sub_key in zip([1, 0], reversed(key)): |
| 1805 | + if isinstance(sub_key, str): |
| 1806 | + result = self._get_regex_axis(result, pattern=sub_key, axis=ax) |
| 1807 | + else: |
| 1808 | + result = result.loc(axis=ax)[sub_key] |
| 1809 | + return result |
1798 | 1810 |
|
1799 |
| - matcher = re.compile(key) |
| 1811 | + def _get_regex_axis( |
| 1812 | + self, obj: FrameOrSeries, pattern: str, axis: int |
| 1813 | + ) -> FrameOrSeries: |
| 1814 | + """Subset a single axis of ``obj`` from a regex pattern.""" |
| 1815 | + labels = obj._get_axis(axis) |
| 1816 | + matcher = re.compile(pattern) |
1800 | 1817 |
|
1801 |
| - def f(x): |
| 1818 | + def func(x): |
1802 | 1819 | return matcher.search(x) is not None
|
1803 | 1820 |
|
1804 |
| - return labels.map(f) |
1805 |
| - |
1806 |
| - def _getitem_regex(self, key, axis=None): |
1807 |
| - """Subset obj by regex-searching axis for key.""" |
1808 |
| - mapped = self._get_regex_mappings(key, axis) |
1809 |
| - |
1810 |
| - return self.obj.loc(axis=axis)[mapped] |
| 1821 | + mapped = labels.map(func) |
| 1822 | + return obj.loc(axis=axis)[mapped] |
1811 | 1823 |
|
1812 | 1824 | def _getitem_axis(self, key, axis: int):
|
1813 | 1825 | key = item_from_zerodim(key)
|
@@ -1877,9 +1889,6 @@ def _getitem_axis(self, key, axis: int):
|
1877 | 1889 | self._validate_key(key, axis)
|
1878 | 1890 | return self._get_label(key, axis=axis)
|
1879 | 1891 |
|
1880 |
| - def _getitem_lower_section(self, section, key): |
1881 |
| - return getattr(section, self.name)(regex=self.regex)[key] |
1882 |
| - |
1883 | 1892 | def __setitem__(self, key, value):
|
1884 | 1893 | if self.regex:
|
1885 | 1894 | raise NotImplementedError("Inserting with regex has not been implemented")
|
|
0 commit comments