Work around pyupgrade edge cases (#26384)

uranusjr · ephraimbuddy · commit 1ea86de21886 · 2022-09-14T18:50:23.000+01:00
(cherry picked from commit 9444d97)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -213,13 +213,7 @@ repos:
       - id: pyupgrade
         name: Upgrade Python code automatically
         args: ["--py37-plus"]
-        # We need to exclude gcs hook from pyupgrade because it has public "list" command which clashes
-        # with `list` that is used as type
-        # Test Python tests if different kinds of typing including one that does not have
-        # __future__ annotations, so it should be left without automated upgrade
-        # BaseOperator is disabled because replacing ClassVar[List[Type with corresponding list/type causes the attr to fail
-        # see https://github.com/apache/airflow/pull/26290#issuecomment-1246014807
-        exclude: ^airflow/_vendor/|^airflow/providers/google/cloud/hooks/gcs.py$|^test/decorators/test_python.py$|^airflow/models/baseoperator.py$
+        exclude: ^airflow/_vendor/
   - repo: https://github.com/pre-commit/pygrep-hooks
     rev: v1.9.0
     hooks:
diff --git a/airflow/models/baseoperator.py b/airflow/models/baseoperator.py
@@ -1738,11 +1738,19 @@ def cross_downstream(
         task.set_downstream(to_tasks)
 
 
+# pyupgrade assumes all type annotations can be lazily evaluated, but this is
+# not the case for attrs-decorated classes, since cattrs needs to evaluate the
+# annotation expressions at runtime, and Python before 3.9.0 does not lazily
+# evaluate those. Putting the expression in a top-level assignment statement
+# communicates this runtime requirement to pyupgrade.
+BaseOperatorClassList = List[Type[BaseOperator]]
+
+
 @attr.s(auto_attribs=True)
 class BaseOperatorLink(metaclass=ABCMeta):
     """Abstract base class that defines how we get an operator link."""
 
-    operators: ClassVar[List[Type[BaseOperator]]] = []
+    operators: ClassVar[BaseOperatorClassList] = []
     """
     This property will be used by Airflow Plugins to find the Operators to which you want
     to assign this Operator Link
diff --git a/airflow/providers/google/cloud/hooks/gcs.py b/airflow/providers/google/cloud/hooks/gcs.py
@@ -15,8 +15,9 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-#
 """This module contains a Google Cloud Storage hook."""
+from __future__ import annotations
+
 import functools
 import gzip as gz
 import os
@@ -28,20 +29,7 @@
 from io import BytesIO
 from os import path
 from tempfile import NamedTemporaryFile
-from typing import (
-    IO,
-    Callable,
-    Generator,
-    List,
-    Optional,
-    Sequence,
-    Set,
-    Tuple,
-    TypeVar,
-    Union,
-    cast,
-    overload,
-)
+from typing import IO, Callable, Generator, Sequence, TypeVar, cast, overload
 from urllib.parse import urlparse
 
 from google.api_core.exceptions import NotFound
@@ -60,6 +48,10 @@
 RT = TypeVar('RT')
 T = TypeVar("T", bound=Callable)
 
+# GCSHook has a method named 'list' (to junior devs: please don't do this), so
+# we need to create an alias to prevent Mypy being confused.
+List = list
+
 # Use default timeout from google-cloud-storage
 DEFAULT_TIMEOUT = 60
 
@@ -80,7 +72,7 @@ def _fallback_object_url_to_object_name_and_bucket_name(
 
     def _wrapper(func: T):
         @functools.wraps(func)
-        def _inner_wrapper(self: "GCSHook", *args, **kwargs) -> RT:
+        def _inner_wrapper(self: GCSHook, *args, **kwargs) -> RT:
             if args:
                 raise AirflowException(
                     "You must use keyword arguments in this methods rather than positional"
@@ -139,13 +131,13 @@ class GCSHook(GoogleBaseHook):
     connection.
     """
 
-    _conn = None  # type: Optional[storage.Client]
+    _conn: storage.Client | None = None
 
     def __init__(
         self,
         gcp_conn_id: str = "google_cloud_default",
-        delegate_to: Optional[str] = None,
-        impersonation_chain: Optional[Union[str, Sequence[str]]] = None,
+        delegate_to: str | None = None,
+        impersonation_chain: str | Sequence[str] | None = None,
     ) -> None:
         super().__init__(
             gcp_conn_id=gcp_conn_id,
@@ -166,8 +158,8 @@ def copy(
         self,
         source_bucket: str,
         source_object: str,
-        destination_bucket: Optional[str] = None,
-        destination_object: Optional[str] = None,
+        destination_bucket: str | None = None,
+        destination_object: str | None = None,
     ) -> None:
         """
         Copies an object from a bucket to another, with renaming if requested.
@@ -215,7 +207,7 @@ def rewrite(
         source_bucket: str,
         source_object: str,
         destination_bucket: str,
-        destination_object: Optional[str] = None,
+        destination_object: str | None = None,
     ) -> None:
         """
         Has the same functionality as copy, except that will work on files
@@ -270,9 +262,9 @@ def download(
         bucket_name: str,
         object_name: str,
         filename: None = None,
-        chunk_size: Optional[int] = None,
-        timeout: Optional[int] = DEFAULT_TIMEOUT,
-        num_max_attempts: Optional[int] = 1,
+        chunk_size: int | None = None,
+        timeout: int | None = DEFAULT_TIMEOUT,
+        num_max_attempts: int | None = 1,
     ) -> bytes:
         ...
 
@@ -282,21 +274,21 @@ def download(
         bucket_name: str,
         object_name: str,
         filename: str,
-        chunk_size: Optional[int] = None,
-        timeout: Optional[int] = DEFAULT_TIMEOUT,
-        num_max_attempts: Optional[int] = 1,
+        chunk_size: int | None = None,
+        timeout: int | None = DEFAULT_TIMEOUT,
+        num_max_attempts: int | None = 1,
     ) -> str:
         ...
 
     def download(
         self,
         bucket_name: str,
         object_name: str,
-        filename: Optional[str] = None,
-        chunk_size: Optional[int] = None,
-        timeout: Optional[int] = DEFAULT_TIMEOUT,
-        num_max_attempts: Optional[int] = 1,
-    ) -> Union[str, bytes]:
+        filename: str | None = None,
+        chunk_size: int | None = None,
+        timeout: int | None = DEFAULT_TIMEOUT,
+        num_max_attempts: int | None = 1,
+    ) -> str | bytes:
         """
         Downloads a file from Google Cloud Storage.
 
@@ -351,9 +343,9 @@ def download_as_byte_array(
         self,
         bucket_name: str,
         object_name: str,
-        chunk_size: Optional[int] = None,
-        timeout: Optional[int] = DEFAULT_TIMEOUT,
-        num_max_attempts: Optional[int] = 1,
+        chunk_size: int | None = None,
+        timeout: int | None = DEFAULT_TIMEOUT,
+        num_max_attempts: int | None = 1,
     ) -> bytes:
         """
         Downloads a file from Google Cloud Storage.
@@ -383,9 +375,9 @@ def download_as_byte_array(
     def provide_file(
         self,
         bucket_name: str = PROVIDE_BUCKET,
-        object_name: Optional[str] = None,
-        object_url: Optional[str] = None,
-        dir: Optional[str] = None,
+        object_name: str | None = None,
+        object_url: str | None = None,
+        dir: str | None = None,
     ) -> Generator[IO[bytes], None, None]:
         """
         Downloads the file to a temporary directory and returns a file handle
@@ -412,8 +404,8 @@ def provide_file(
     def provide_file_and_upload(
         self,
         bucket_name: str = PROVIDE_BUCKET,
-        object_name: Optional[str] = None,
-        object_url: Optional[str] = None,
+        object_name: str | None = None,
+        object_url: str | None = None,
     ) -> Generator[IO[bytes], None, None]:
         """
         Creates temporary file, returns a file handle and uploads the files content
@@ -440,15 +432,15 @@ def upload(
         self,
         bucket_name: str,
         object_name: str,
-        filename: Optional[str] = None,
-        data: Optional[Union[str, bytes]] = None,
-        mime_type: Optional[str] = None,
+        filename: str | None = None,
+        data: str | bytes | None = None,
+        mime_type: str | None = None,
         gzip: bool = False,
         encoding: str = 'utf-8',
-        chunk_size: Optional[int] = None,
-        timeout: Optional[int] = DEFAULT_TIMEOUT,
+        chunk_size: int | None = None,
+        timeout: int | None = DEFAULT_TIMEOUT,
         num_max_attempts: int = 1,
-        metadata: Optional[dict] = None,
+        metadata: dict | None = None,
     ) -> None:
         """
         Uploads a local file or file data as string or bytes to Google Cloud Storage.
@@ -683,7 +675,7 @@ def delete_bucket(self, bucket_name: str, force: bool = False) -> None:
         except NotFound:
             self.log.info("Bucket %s not exists", bucket_name)
 
-    def list(self, bucket_name, versions=None, max_results=None, prefix=None, delimiter=None) -> list:
+    def list(self, bucket_name, versions=None, max_results=None, prefix=None, delimiter=None) -> List:
         """
         List all objects from the bucket with the give string prefix in name
 
@@ -730,10 +722,10 @@ def list_by_timespan(
         bucket_name: str,
         timespan_start: datetime,
         timespan_end: datetime,
-        versions: Optional[bool] = None,
-        max_results: Optional[int] = None,
-        prefix: Optional[str] = None,
-        delimiter: Optional[str] = None,
+        versions: bool | None = None,
+        max_results: int | None = None,
+        prefix: str | None = None,
+        delimiter: str | None = None,
     ) -> List[str]:
         """
         List all objects from the bucket with the give string prefix in name that were
@@ -838,11 +830,11 @@ def get_md5hash(self, bucket_name: str, object_name: str) -> str:
     def create_bucket(
         self,
         bucket_name: str,
-        resource: Optional[dict] = None,
+        resource: dict | None = None,
         storage_class: str = 'MULTI_REGIONAL',
         location: str = 'US',
-        project_id: Optional[str] = None,
-        labels: Optional[dict] = None,
+        project_id: str | None = None,
+        labels: dict | None = None,
     ) -> str:
         """
         Creates a new bucket. Google Cloud Storage uses a flat namespace, so
@@ -900,7 +892,7 @@ def create_bucket(
         return bucket.id
 
     def insert_bucket_acl(
-        self, bucket_name: str, entity: str, role: str, user_project: Optional[str] = None
+        self, bucket_name: str, entity: str, role: str, user_project: str | None = None
     ) -> None:
         """
         Creates a new ACL entry on the specified bucket_name.
@@ -933,8 +925,8 @@ def insert_object_acl(
         object_name: str,
         entity: str,
         role: str,
-        generation: Optional[int] = None,
-        user_project: Optional[str] = None,
+        generation: int | None = None,
+        user_project: str | None = None,
     ) -> None:
         """
         Creates a new ACL entry on the specified object.
@@ -967,7 +959,7 @@ def insert_object_acl(
 
         self.log.info('A new ACL entry created for object: %s in bucket: %s', object_name, bucket_name)
 
-    def compose(self, bucket_name: str, source_objects: List, destination_object: str) -> None:
+    def compose(self, bucket_name: str, source_objects: List[str], destination_object: str) -> None:
         """
         Composes a list of existing object into a new object in the same storage bucket_name
 
@@ -1002,8 +994,8 @@ def sync(
         self,
         source_bucket: str,
         destination_bucket: str,
-        source_object: Optional[str] = None,
-        destination_object: Optional[str] = None,
+        source_object: str | None = None,
+        destination_object: str | None = None,
         recursive: bool = True,
         allow_overwrite: bool = False,
         delete_extra_files: bool = False,
@@ -1104,7 +1096,7 @@ def sync(
         self.log.info("Synchronization finished.")
 
     def _calculate_sync_destination_path(
-        self, blob: storage.Blob, destination_object: Optional[str], source_object_prefix_len: int
+        self, blob: storage.Blob, destination_object: str | None, source_object_prefix_len: int
     ) -> str:
         return (
             path.join(destination_object, blob.name[source_object_prefix_len:])
@@ -1116,10 +1108,10 @@ def _calculate_sync_destination_path(
     def _prepare_sync_plan(
         source_bucket: storage.Bucket,
         destination_bucket: storage.Bucket,
-        source_object: Optional[str],
-        destination_object: Optional[str],
+        source_object: str | None,
+        destination_object: str | None,
         recursive: bool,
-    ) -> Tuple[Set[storage.Blob], Set[storage.Blob], Set[storage.Blob]]:
+    ) -> tuple[set[storage.Blob], set[storage.Blob], set[storage.Blob]]:
         # Calculate the number of characters that remove from the name, because they contain information
         # about the parent's path
         source_object_prefix_len = len(source_object) if source_object else 0
@@ -1139,11 +1131,11 @@ def _prepare_sync_plan(
         # Determine objects to copy and delete
         to_copy = source_names - destination_names
         to_delete = destination_names - source_names
-        to_copy_blobs = {source_names_index[a] for a in to_copy}  # type: Set[storage.Blob]
-        to_delete_blobs = {destination_names_index[a] for a in to_delete}  # type: Set[storage.Blob]
+        to_copy_blobs: set[storage.Blob] = {source_names_index[a] for a in to_copy}
+        to_delete_blobs: set[storage.Blob] = {destination_names_index[a] for a in to_delete}
         # Find names that are in both buckets
         names_to_check = source_names.intersection(destination_names)
-        to_rewrite_blobs = set()  # type: Set[storage.Blob]
+        to_rewrite_blobs: set[storage.Blob] = set()
         # Compare objects based on crc32
         for current_name in names_to_check:
             source_blob = source_names_index[current_name]
@@ -1164,7 +1156,7 @@ def gcs_object_is_directory(bucket: str) -> bool:
     return len(blob) == 0 or blob.endswith('/')
 
 
-def _parse_gcs_url(gsurl: str) -> Tuple[str, str]:
+def _parse_gcs_url(gsurl: str) -> tuple[str, str]:
     """
     Given a Google Cloud Storage URL (gs://<bucket>/<blob>), returns a
     tuple containing the corresponding bucket and blob.