diff --git a/.gitignore b/.gitignore index 5427c785..53f230ec 100644 --- a/.gitignore +++ b/.gitignore @@ -1,94 +1,64 @@ -######################################### -# Editor temporary/working/backup files # -.#* -*\#*\# -[#]*# -*~ -*$ -*.bak -*flymake* -*.kdev4 -*.log -*.swp -*.pdb -.project -.pydevproject -.settings -.idea -.vagrant -.noseids -.ipynb_checkpoints -.tags -.pytest_cache -.testmon* -.vscode/ -.env - -# Docs # -######## -docs/source/_build +*.py[cod] +*.sw[op] -# Coverage # -############ -.coverage -coverage.xml -coverage_html_report -.pytest_cache - -# Compiled source # -################### -*.a -*.com -*.class -*.dll -*.exe -*.pxi -*.o -*.py[ocd] +# C extensions *.so -.build_cache_dir -MANIFEST -__pycache__ -# Python files # -################ -# setup.py working directory -build -# setup.py dist directory -dist -# Egg metadata +# Packages +*.egg *.egg-info +dist +build +eggs .eggs -.pypirc +parts +bin +var +sdist +develop-eggs +.installed.cfg +lib +lib64 +__pycache__ -# tox testing tool -.tox -# rope -.ropeproject -# wheel files -*.whl -**/wheelhouse/* -pip-wheel-metadata +# Installer logs +pip-log.txt -# coverage +# Unit test / coverage reports .coverage -.testmondata -.pytest_cache .nox +.cache +.pytest_cache + -# OS generated files # -###################### -.directory -.gdb_history +# Mac .DS_Store -ehthumbs.db -Icon? -Thumbs.db -# caches # -.cache +# JetBrains +.idea + +# VS Code +.vscode + +# emacs +*~ + +# Built documentation +docs/_build +docs/source/_build +bigquery/docs/generated +docs.metadata + +# Virtual environment +env/ + +# Test logs +coverage.xml +*sponge_log.xml + +# System test environment variables. +system_tests/local_test_setup -# Credentials # -############### -bigquery_credentials.dat -ci/service_account.json +# Make sure a generated file isn't accidentally committed. +pylintrc +pylintrc.test diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..62eb5a77 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,31 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.1 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml +- repo: https://github.com/psf/black + rev: 19.10b0 + hooks: + - id: black +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.9.2 + hooks: + - id: flake8 diff --git a/.stickler.yml b/.stickler.yml deleted file mode 100644 index 7bb34d25..00000000 --- a/.stickler.yml +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright (c) 2017 pandas-gbq Authors All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -linters: - black: - config: ./pyproject.toml - fixer: true \ No newline at end of file diff --git a/codecov.yml b/codecov.yml deleted file mode 100644 index 4c2ed9b1..00000000 --- a/codecov.yml +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2017 pandas-gbq Authors All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -coverage: - status: - project: - default: - target: '0' - enabled: no - patch: - default: - enabled: no - target: '50' - branches: null diff --git a/docs/source/conf.py b/docs/source/conf.py index bfcc94ef..b250e7d0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -70,9 +70,7 @@ # General information about the project. project = u"pandas-gbq" -copyright = u"2017-{}, PyData Development Team".format( - datetime.datetime.now().year -) +copyright = u"2017-{}, PyData Development Team".format(datetime.datetime.now().year) author = u"PyData Development Team" # The version info for the project you're documenting, acts as replacement for @@ -102,8 +100,13 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +exclude_patterns = [ + "_build", + "**/.nox/**/*", + "samples/AUTHORING_GUIDE.md", + "samples/CONTRIBUTING.md", + "samples/snippets/README.rst", +] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -335,9 +338,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, "pandas-gbq", u"pandas-gbq Documentation", [author], 1) -] +man_pages = [(master_doc, "pandas-gbq", u"pandas-gbq Documentation", [author], 1)] # If true, show URL addresses after external links. # diff --git a/noxfile.py b/noxfile.py index e1564138..b8a3a985 100644 --- a/noxfile.py +++ b/noxfile.py @@ -14,30 +14,48 @@ import nox -supported_pythons = ["3.7", "3.8"] -system_test_pythons = ["3.7", "3.8"] -latest_python = "3.8" +BLACK_VERSION = "black==19.10b0" +BLACK_PATHS = ["docs", "pandas_gbq", "tests", "noxfile.py", "setup.py"] -# Use a consistent version of black so CI is deterministic. -# Should match Stickler: https://stickler-ci.com/docs#black -black_package = "black==20.8b1" +DEFAULT_PYTHON_VERSION = "3.8" +SYSTEM_TEST_PYTHON_VERSIONS = ["3.8"] +UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9"] -@nox.session(python=latest_python) +# Error if a python version is missing +nox.options.error_on_missing_interpreters = True + + +@nox.session(python=DEFAULT_PYTHON_VERSION) def lint(session): - session.install(black_package, "flake8") - session.run("flake8", "pandas_gbq") - session.run("flake8", "tests") - session.run("black", "--check", ".") + """Run linters. + Returns a failure if the linters find linting errors or sufficiently + serious code quality issues. + """ + session.install("flake8", BLACK_VERSION) + session.run( + "black", "--check", *BLACK_PATHS, + ) + session.run("flake8", "pandas_gbq", "tests") -@nox.session(python=latest_python) +@nox.session(python=DEFAULT_PYTHON_VERSION) def blacken(session): - session.install(black_package) - session.run("black", ".") + """Run black. Format code to uniform standard.""" + session.install(BLACK_VERSION) + session.run( + "black", *BLACK_PATHS, + ) + +@nox.session(python=DEFAULT_PYTHON_VERSION) +def lint_setup_py(session): + """Verify that setup.py is valid (including RST check).""" + session.install("docutils", "pygments") + session.run("python", "setup.py", "check", "--restructuredtext", "--strict") -@nox.session(python=supported_pythons) + +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS) def unit(session): session.install("pytest", "pytest-cov") session.install( @@ -56,18 +74,23 @@ def unit(session): "--cov=tests.unit", "--cov-report", "xml:/tmp/pytest-cov.xml", - *session.posargs + *session.posargs, ) -@nox.session(python=latest_python) +@nox.session(python=DEFAULT_PYTHON_VERSION) def cover(session): + """Run the final coverage report. + This outputs the coverage report aggregating coverage from the unit + test runs (not system test runs), and then erases coverage data. + """ session.install("coverage", "pytest-cov") session.run("coverage", "report", "--show-missing", "--fail-under=73") + session.run("coverage", "erase") -@nox.session(python=latest_python) +@nox.session(python=DEFAULT_PYTHON_VERSION) def docs(session): """Build the docs.""" @@ -89,7 +112,7 @@ def docs(session): ) -@nox.session(python=system_test_pythons) +@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def system(session): session.install("pytest", "pytest-cov") session.install( @@ -111,5 +134,5 @@ def system(session): os.path.join(".", "tests", "system"), os.path.join(".", "samples", "tests"), "-v", - *additional_args + *additional_args, ) diff --git a/pandas_gbq/_version.py b/pandas_gbq/_version.py deleted file mode 100644 index 017eefdf..00000000 --- a/pandas_gbq/_version.py +++ /dev/null @@ -1,571 +0,0 @@ -# Copyright (c) 2017 pandas-gbq Authors All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - - -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.18 (https://github.com/warner/python-versioneer) - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "$Format:%d$" - git_full = "$Format:%H$" - git_date = "$Format:%ci$" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "pep440" - cfg.tag_prefix = "" - cfg.parentdir_prefix = "pandas_gbq/_version.py" - cfg.versionfile_source = "pandas_gbq/_version.py" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY = {} -HANDLERS = {} - - -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - - return decorate - - -def run_command( - commands, args, cwd=None, verbose=False, hide_stderr=False, env=None -): - """Call the given command(s).""" - assert isinstance(commands, list) - p = None - for c in commands: - try: - dispcmd = str([c] + args) - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen( - [c] + args, - cwd=cwd, - env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr else None), - ) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, p.returncode - return stdout, p.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for i in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return { - "version": dirname[len(parentdir_prefix) :], - "full-revisionid": None, - "dirty": False, - "error": None, - "date": None, - } - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print( - "Tried directories %s but none started with prefix %s" - % (str(rootdirs), parentdir_prefix) - ) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") - date = keywords.get("date") - if date is not None: - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix) :] - if verbose: - print("picking %s" % r) - return { - "version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, - "error": None, - "date": date, - } - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return { - "version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, - "error": "no suitable tags", - "date": None, - } - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - out, rc = run_command( - GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True - ) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command( - GITS, - [ - "describe", - "--tags", - "--dirty", - "--always", - "--long", - "--match", - "%s*" % tag_prefix, - ], - cwd=root, - ) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[: git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ( - "unable to parse git-describe output: '%s'" % describe_out - ) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, - ) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix) :] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = run_command( - GITS, ["rev-list", "HEAD", "--count"], cwd=root - ) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ - 0 - ].strip() - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post.devDISTANCE - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += ".post.dev%d" % pieces["distance"] - else: - # exception #1 - rendered = "0.post.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Eexceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return { - "version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None, - } - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return { - "version": rendered, - "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], - "error": None, - "date": pieces.get("date"), - } - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords( - get_keywords(), cfg.tag_prefix, verbose - ) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for i in cfg.versionfile_source.split("/"): - root = os.path.dirname(root) - except NameError: - return { - "version": "0+unknown", - "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None, - } - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return { - "version": "0+unknown", - "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", - "date": None, - } diff --git a/pandas_gbq/auth.py b/pandas_gbq/auth.py index 61dcee06..41ee4192 100644 --- a/pandas_gbq/auth.py +++ b/pandas_gbq/auth.py @@ -23,9 +23,7 @@ # machine. # # See: https://cloud.google.com/docs/authentication/end-user for details. -CLIENT_ID = ( - "725825577420-unm2gnkiprugilg743tkbig250f4sfsj.apps.googleusercontent.com" -) +CLIENT_ID = "725825577420-unm2gnkiprugilg743tkbig250f4sfsj.apps.googleusercontent.com" CLIENT_SECRET = "4hqze9yI8fxShls8eJWkeMdJ" @@ -60,8 +58,7 @@ def get_credentials_cache(reauth): if reauth: return pydata_google_auth.cache.WriteOnlyCredentialsCache( - dirname=CREDENTIALS_CACHE_DIRNAME, - filename=CREDENTIALS_CACHE_FILENAME, + dirname=CREDENTIALS_CACHE_DIRNAME, filename=CREDENTIALS_CACHE_FILENAME, ) return pydata_google_auth.cache.ReadWriteCredentialsCache( dirname=CREDENTIALS_CACHE_DIRNAME, filename=CREDENTIALS_CACHE_FILENAME diff --git a/pandas_gbq/features.py b/pandas_gbq/features.py index 5a90caa2..ef1969fd 100644 --- a/pandas_gbq/features.py +++ b/pandas_gbq/features.py @@ -28,9 +28,7 @@ def bigquery_installed_version(self): self._bigquery_installed_version = pkg_resources.parse_version( google.cloud.bigquery.__version__ ) - bigquery_minimum_version = pkg_resources.parse_version( - BIGQUERY_MINIMUM_VERSION - ) + bigquery_minimum_version = pkg_resources.parse_version(BIGQUERY_MINIMUM_VERSION) if self._bigquery_installed_version < bigquery_minimum_version: raise ImportError( @@ -67,9 +65,7 @@ def bigquery_has_from_dataframe_with_csv(self): bigquery_from_dataframe_version = pkg_resources.parse_version( BIGQUERY_FROM_DATAFRAME_CSV_VERSION ) - return ( - self.bigquery_installed_version >= bigquery_from_dataframe_version - ) + return self.bigquery_installed_version >= bigquery_from_dataframe_version @property def pandas_installed_version(self): @@ -79,9 +75,7 @@ def pandas_installed_version(self): if self._pandas_installed_version is not None: return self._pandas_installed_version - self._pandas_installed_version = pkg_resources.parse_version( - pandas.__version__ - ) + self._pandas_installed_version = pkg_resources.parse_version(pandas.__version__) return self._pandas_installed_version @property diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index e7f8b0ae..856c1285 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -309,9 +309,7 @@ def __init__( self.project_id = default_project if self.project_id is None: - raise ValueError( - "Could not determine project ID and one was not supplied." - ) + raise ValueError("Could not determine project ID and one was not supplied.") # Cache the credentials if they haven't been set yet. if context.credentials is None: @@ -372,9 +370,7 @@ def get_client(self): client_info=client_info, ) - return bigquery.Client( - project=self.project_id, credentials=self.credentials - ) + return bigquery.Client(project=self.project_id, credentials=self.credentials) @staticmethod def process_http_error(ex): @@ -383,9 +379,7 @@ def process_http_error(ex): raise GenericGBQException("Reason: {0}".format(ex)) - def run_query( - self, query, max_results=None, progress_bar_type=None, **kwargs - ): + def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): from concurrent.futures import TimeoutError from google.auth.exceptions import RefreshError @@ -423,9 +417,7 @@ def run_query( logger.debug("Query running...") except (RefreshError, ValueError): if self.private_key: - raise AccessDenied( - "The service account credentials are not valid" - ) + raise AccessDenied("The service account credentials are not valid") else: raise AccessDenied( "The credentials have been revoked or expired, " @@ -440,9 +432,9 @@ def run_query( while query_reply.state != "DONE": self.log_elapsed_seconds(" Elapsed", "s. Waiting...") - timeout_ms = job_config.get("jobTimeoutMs") or job_config[ - "query" - ].get("timeoutMs") + timeout_ms = job_config.get("jobTimeoutMs") or job_config["query"].get( + "timeoutMs" + ) timeout_ms = int(timeout_ms) if timeout_ms else None if timeout_ms and timeout_ms < self.get_elapsed_seconds() * 1000: raise QueryTimeout("Query timeout: {} ms".format(timeout_ms)) @@ -467,8 +459,7 @@ def run_query( bytes_billed = query_reply.total_bytes_billed or 0 logger.debug( "Query done.\nProcessed: {} Billed: {}".format( - self.sizeof_fmt(bytes_processed), - self.sizeof_fmt(bytes_billed), + self.sizeof_fmt(bytes_processed), self.sizeof_fmt(bytes_billed), ) ) logger.debug( @@ -486,11 +477,7 @@ def run_query( ) def _download_results( - self, - query_job, - max_results=None, - progress_bar_type=None, - user_dtypes=None, + self, query_job, max_results=None, progress_bar_type=None, user_dtypes=None, ): # No results are desired, so don't bother downloading anything. if max_results == 0: @@ -519,17 +506,13 @@ def _download_results( to_dataframe_kwargs = {} if FEATURES.bigquery_has_bqstorage: - to_dataframe_kwargs[ - "create_bqstorage_client" - ] = create_bqstorage_client + to_dataframe_kwargs["create_bqstorage_client"] = create_bqstorage_client try: query_job.result() # Get the table schema, so that we can list rows. destination = self.client.get_table(query_job.destination) - rows_iter = self.client.list_rows( - destination, max_results=max_results - ) + rows_iter = self.client.list_rows(destination, max_results=max_results) schema_fields = [field.to_api_repr() for field in rows_iter.schema] conversion_dtypes = _bqschema_to_nullsafe_dtypes(schema_fields) @@ -584,9 +567,7 @@ def load_data( self.process_http_error(ex) def delete_and_recreate_table(self, dataset_id, table_id, table_schema): - table = _Table( - self.project_id, dataset_id, credentials=self.credentials - ) + table = _Table(self.project_id, dataset_id, credentials=self.credentials) table.delete(table_id) table.create(table_id, table_schema) @@ -644,9 +625,7 @@ def _cast_empty_df_dtypes(schema_fields, df): ``object``. """ if not df.empty: - raise ValueError( - "DataFrame must be empty in order to cast non-nullsafe dtypes" - ) + raise ValueError("DataFrame must be empty in order to cast non-nullsafe dtypes") dtype_map = {"BOOLEAN": bool, "INTEGER": np.int64} @@ -867,9 +846,7 @@ def read_gbq( final_df.set_index(index_col, inplace=True) else: raise InvalidIndexColumn( - 'Index column "{0}" does not exist in DataFrame.'.format( - index_col - ) + 'Index column "{0}" does not exist in DataFrame.'.format(index_col) ) # Change the order of columns in the DataFrame based on provided list @@ -877,9 +854,7 @@ def read_gbq( if sorted(col_order) == sorted(final_df.columns): final_df = final_df[col_order] else: - raise InvalidColumnOrder( - "Column order does not match this DataFrame." - ) + raise InvalidColumnOrder("Column order does not match this DataFrame.") connector.log_elapsed_seconds( "Total time taken", @@ -1070,13 +1045,9 @@ def to_gbq( "'append' or 'replace' data." ) elif if_exists == "replace": - connector.delete_and_recreate_table( - dataset_id, table_id, table_schema - ) + connector.delete_and_recreate_table(dataset_id, table_id, table_schema) elif if_exists == "append": - if not pandas_gbq.schema.schema_is_subset( - original_schema, table_schema - ): + if not pandas_gbq.schema.schema_is_subset(original_schema, table_schema): raise InvalidSchema( "Please verify that the structure and " "data types in the DataFrame match the " @@ -1116,8 +1087,7 @@ def generate_bq_schema(df, default_type="STRING"): """ # deprecation TimeSeries, #11121 warnings.warn( - "generate_bq_schema is deprecated and will be removed in " - "a future version", + "generate_bq_schema is deprecated and will be removed in " "a future version", FutureWarning, stacklevel=2, ) @@ -1206,17 +1176,13 @@ def create(self, table_id, schema): from google.cloud.bigquery import TableReference if self.exists(table_id): - raise TableCreationError( - "Table {0} already exists".format(table_id) - ) + raise TableCreationError("Table {0} already exists".format(table_id)) if not _Dataset(self.project_id, credentials=self.credentials).exists( self.dataset_id ): _Dataset( - self.project_id, - credentials=self.credentials, - location=self.location, + self.project_id, credentials=self.credentials, location=self.location, ).create(self.dataset_id) table_ref = TableReference( diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py index 4eca7c56..faa674c2 100644 --- a/pandas_gbq/load.py +++ b/pandas_gbq/load.py @@ -81,10 +81,7 @@ def load_chunks( if FEATURES.bigquery_has_from_dataframe_with_csv: client.load_table_from_dataframe( - chunk, - destination_table_ref, - job_config=job_config, - location=location, + chunk, destination_table_ref, job_config=job_config, location=location, ).result() else: try: diff --git a/pandas_gbq/schema.py b/pandas_gbq/schema.py index ec81045c..e2f97455 100644 --- a/pandas_gbq/schema.py +++ b/pandas_gbq/schema.py @@ -21,9 +21,7 @@ def to_pandas_gbq(client_schema): """Given a sequence of :class:`google.cloud.bigquery.schema.SchemaField`, return a schema in pandas-gbq API format. """ - remote_fields = [ - field_remote.to_api_repr() for field_remote in client_schema - ] + remote_fields = [field_remote.to_api_repr() for field_remote in client_schema] for field in remote_fields: field["type"] = field["type"].upper() field["mode"] = field["mode"].upper() @@ -39,9 +37,7 @@ def to_google_cloud_bigquery(pandas_gbq_schema): # Need to convert from JSON representation to format used by client library. schema = add_default_nullable_mode(pandas_gbq_schema) - return [ - bigquery.SchemaField.from_api_repr(field) for field in schema["fields"] - ] + return [bigquery.SchemaField.from_api_repr(field) for field in schema["fields"]] def _clean_schema_fields(fields): @@ -110,10 +106,7 @@ def generate_bq_schema(dataframe, default_type="STRING"): fields = [] for column_name, dtype in dataframe.dtypes.iteritems(): fields.append( - { - "name": column_name, - "type": type_mapping.get(dtype.kind, default_type), - } + {"name": column_name, "type": type_mapping.get(dtype.kind, default_type)} ) return {"fields": fields} diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 318a0442..00000000 --- a/pyproject.toml +++ /dev/null @@ -1,8 +0,0 @@ -[tool.black] -line-length = 79 -exclude = ''' -versioneer.py -| _version.py -| docs -| .nox -''' \ No newline at end of file diff --git a/samples/__init__.py b/samples/__init__.py index edbca6c3..c9ab8506 100644 --- a/samples/__init__.py +++ b/samples/__init__.py @@ -1,4 +1,3 @@ # Copyright (c) 2017 pandas-gbq Authors All rights reserved. # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. - diff --git a/samples/tests/__init__.py b/samples/tests/__init__.py index edbca6c3..c9ab8506 100644 --- a/samples/tests/__init__.py +++ b/samples/tests/__init__.py @@ -1,4 +1,3 @@ # Copyright (c) 2017 pandas-gbq Authors All rights reserved. # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. - diff --git a/tests/__init__.py b/tests/__init__.py index edbca6c3..c9ab8506 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,3 @@ # Copyright (c) 2017 pandas-gbq Authors All rights reserved. # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. - diff --git a/tests/system/__init__.py b/tests/system/__init__.py index edbca6c3..c9ab8506 100644 --- a/tests/system/__init__.py +++ b/tests/system/__init__.py @@ -1,4 +1,3 @@ # Copyright (c) 2017 pandas-gbq Authors All rights reserved. # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. - diff --git a/tests/system/conftest.py b/tests/system/conftest.py index a40ac47b..4745da0c 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -32,9 +32,7 @@ def gbq_connector(project, credentials): def random_dataset(bigquery_client, random_dataset_id): from google.cloud import bigquery - dataset_ref = bigquery.DatasetReference( - bigquery_client.project, random_dataset_id - ) + dataset_ref = bigquery.DatasetReference(bigquery_client.project, random_dataset_id) dataset = bigquery.Dataset(dataset_ref) bigquery_client.create_dataset(dataset) return dataset @@ -44,9 +42,7 @@ def random_dataset(bigquery_client, random_dataset_id): def tokyo_dataset(bigquery_client, random_dataset_id): from google.cloud import bigquery - dataset_ref = bigquery.DatasetReference( - bigquery_client.project, random_dataset_id - ) + dataset_ref = bigquery.DatasetReference(bigquery_client.project, random_dataset_id) dataset = bigquery.Dataset(dataset_ref) dataset.location = "asia-northeast1" bigquery_client.create_dataset(dataset) diff --git a/tests/system/test_auth.py b/tests/system/test_auth.py index 5e8f5a47..34d5c8ff 100644 --- a/tests/system/test_auth.py +++ b/tests/system/test_auth.py @@ -52,9 +52,7 @@ def _check_if_can_get_correct_default_credentials(): import pandas_gbq.gbq try: - credentials, project = google.auth.default( - scopes=pandas_gbq.auth.SCOPES - ) + credentials, project = google.auth.default(scopes=pandas_gbq.auth.SCOPES) except (DefaultCredentialsError, IOError): return False @@ -68,9 +66,7 @@ def test_should_be_able_to_get_valid_credentials(project_id, private_key_path): @pytest.mark.local_auth -def test_get_credentials_bad_file_returns_user_credentials( - project_id, monkeypatch -): +def test_get_credentials_bad_file_returns_user_credentials(project_id, monkeypatch): import google.auth from google.auth.credentials import Credentials diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index bbc0da64..00bbd3d6 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -27,9 +27,7 @@ TABLE_ID = "new_test" PANDAS_VERSION = pkg_resources.parse_version(pandas.__version__) NULLABLE_INT_PANDAS_VERSION = pkg_resources.parse_version("0.24.0") -NULLABLE_INT_MESSAGE = ( - "Require pandas 0.24+ in order to use nullable integer type." -) +NULLABLE_INT_MESSAGE = "Require pandas 0.24+ in order to use nullable integer type." def test_imports(): @@ -43,8 +41,7 @@ def make_mixed_dataframe_v2(test_size): ints = np.random.randint(1, 10, size=(1, test_size)) strs = np.random.randint(1, 10, size=(1, test_size)).astype(str) times = [ - datetime.datetime.now(pytz.timezone("US/Arizona")) - for t in range(test_size) + datetime.datetime.now(pytz.timezone("US/Arizona")) for t in range(test_size) ] return DataFrame( { @@ -58,9 +55,7 @@ def make_mixed_dataframe_v2(test_size): ) -def get_schema( - gbq_connector: gbq.GbqConnector, dataset_id: str, table_id: str -): +def get_schema(gbq_connector: gbq.GbqConnector, dataset_id: str, table_id: str): """Retrieve the schema of the table Obtain from BigQuery the field names and field types @@ -82,17 +77,14 @@ def get_schema( bqclient = gbq_connector.client table_ref = bigquery.TableReference( - bigquery.DatasetReference(bqclient.project, dataset_id), - table_id, + bigquery.DatasetReference(bqclient.project, dataset_id), table_id, ) try: table = bqclient.get_table(table_ref) remote_schema = table.schema - remote_fields = [ - field_remote.to_api_repr() for field_remote in remote_schema - ] + remote_fields = [field_remote.to_api_repr() for field_remote in remote_schema] for field in remote_fields: field["type"] = field["type"].upper() field["mode"] = field["mode"].upper() @@ -197,11 +189,7 @@ def test_should_properly_handle_nullable_integers(self, project_id): tm.assert_frame_equal( df, DataFrame( - { - "nullable_integer": pandas.Series( - [1, pandas.NA], dtype="Int64" - ) - } + {"nullable_integer": pandas.Series([1, pandas.NA], dtype="Int64")} ), ) @@ -232,11 +220,7 @@ def test_should_properly_handle_nullable_longs(self, project_id): tm.assert_frame_equal( df, DataFrame( - { - "nullable_long": pandas.Series( - [1 << 62, pandas.NA], dtype="Int64" - ) - } + {"nullable_long": pandas.Series([1 << 62, pandas.NA], dtype="Int64")} ), ) @@ -253,10 +237,7 @@ def test_should_properly_handle_null_integers(self, project_id): dtypes={"null_integer": "Int64"}, ) tm.assert_frame_equal( - df, - DataFrame( - {"null_integer": pandas.Series([pandas.NA], dtype="Int64")} - ), + df, DataFrame({"null_integer": pandas.Series([pandas.NA], dtype="Int64")}), ) def test_should_properly_handle_valid_floats(self, project_id): @@ -295,9 +276,7 @@ def test_should_properly_handle_valid_doubles(self, project_id): credentials=self.credentials, dialect="legacy", ) - tm.assert_frame_equal( - df, DataFrame({"valid_double": [pi * 10 ** 307]}) - ) + tm.assert_frame_equal(df, DataFrame({"valid_double": [pi * 10 ** 307]})) def test_should_properly_handle_nullable_doubles(self, project_id): from math import pi @@ -329,11 +308,7 @@ def test_should_properly_handle_null_floats(self, project_id): def test_should_properly_handle_date(self, project_id): query = "SELECT DATE(2003, 1, 4) AS date_col" - df = gbq.read_gbq( - query, - project_id=project_id, - credentials=self.credentials, - ) + df = gbq.read_gbq(query, project_id=project_id, credentials=self.credentials,) expected = DataFrame( { "date_col": pandas.Series( @@ -344,12 +319,10 @@ def test_should_properly_handle_date(self, project_id): tm.assert_frame_equal(df, expected) def test_should_properly_handle_time(self, project_id): - query = "SELECT TIME_ADD(TIME(3, 14, 15), INTERVAL 926589 MICROSECOND) AS time_col" - df = gbq.read_gbq( - query, - project_id=project_id, - credentials=self.credentials, + query = ( + "SELECT TIME_ADD(TIME(3, 14, 15), INTERVAL 926589 MICROSECOND) AS time_col" ) + df = gbq.read_gbq(query, project_id=project_id, credentials=self.credentials,) expected = DataFrame( { "time_col": pandas.Series( @@ -368,13 +341,10 @@ def test_should_properly_handle_timestamp_unix_epoch(self, project_id): dialect="legacy", ) expected = DataFrame( - {"unix_epoch": ["1970-01-01T00:00:00.000000Z"]}, - dtype="datetime64[ns]", + {"unix_epoch": ["1970-01-01T00:00:00.000000Z"]}, dtype="datetime64[ns]", ) if expected["unix_epoch"].dt.tz is None: - expected["unix_epoch"] = expected["unix_epoch"].dt.tz_localize( - "UTC" - ) + expected["unix_epoch"] = expected["unix_epoch"].dt.tz_localize("UTC") tm.assert_frame_equal(df, expected) def test_should_properly_handle_arbitrary_timestamp(self, project_id): @@ -390,9 +360,9 @@ def test_should_properly_handle_arbitrary_timestamp(self, project_id): dtype="datetime64[ns]", ) if expected["valid_timestamp"].dt.tz is None: - expected["valid_timestamp"] = expected[ - "valid_timestamp" - ].dt.tz_localize("UTC") + expected["valid_timestamp"] = expected["valid_timestamp"].dt.tz_localize( + "UTC" + ) tm.assert_frame_equal(df, expected) def test_should_properly_handle_datetime_unix_epoch(self, project_id): @@ -405,9 +375,7 @@ def test_should_properly_handle_datetime_unix_epoch(self, project_id): ) tm.assert_frame_equal( df, - DataFrame( - {"unix_epoch": ["1970-01-01T00:00:00"]}, dtype="datetime64[ns]" - ), + DataFrame({"unix_epoch": ["1970-01-01T00:00:00"]}, dtype="datetime64[ns]"), ) def test_should_properly_handle_arbitrary_datetime(self, project_id): @@ -419,10 +387,7 @@ def test_should_properly_handle_arbitrary_datetime(self, project_id): dialect="legacy", ) tm.assert_frame_equal( - df, - DataFrame( - {"valid_timestamp": [np.datetime64("2004-09-15T05:00:00")]} - ), + df, DataFrame({"valid_timestamp": [np.datetime64("2004-09-15T05:00:00")]}), ) @pytest.mark.parametrize( @@ -435,9 +400,7 @@ def test_should_properly_handle_arbitrary_datetime(self, project_id): ("FALSE", pandas.api.types.is_bool_dtype), ], ) - def test_return_correct_types( - self, project_id, expression, is_expected_dtype - ): + def test_return_correct_types(self, project_id, expression, is_expected_dtype): """ All type checks can be added to this function using additional parameters, rather than creating additional functions. @@ -464,9 +427,7 @@ def test_should_properly_handle_null_timestamp(self, project_id): dialect="legacy", ) expected = DataFrame({"null_timestamp": [NaT]}, dtype="datetime64[ns]") - expected["null_timestamp"] = expected["null_timestamp"].dt.tz_localize( - "UTC" - ) + expected["null_timestamp"] = expected["null_timestamp"].dt.tz_localize("UTC") tm.assert_frame_equal(df, expected) def test_should_properly_handle_null_datetime(self, project_id): @@ -525,9 +486,9 @@ def test_index_column(self, project_id): credentials=self.credentials, dialect="legacy", ) - correct_frame = DataFrame( - {"string_1": ["a"], "string_2": ["b"]} - ).set_index("string_1") + correct_frame = DataFrame({"string_1": ["a"], "string_2": ["b"]}).set_index( + "string_1" + ) assert result_frame.index.name == correct_frame.index.name def test_column_order(self, project_id): @@ -672,8 +633,7 @@ def test_zero_rows(self, project_id): "iso_time": pandas.Series([], dtype="datetime64[ns]"), } expected_result = DataFrame( - empty_columns, - columns=["name", "number", "is_hurricane", "iso_time"], + empty_columns, columns=["name", "number", "is_hurricane", "iso_time"], ) tm.assert_frame_equal(df, expected_result, check_index_type=False) @@ -712,8 +672,7 @@ def test_legacy_sql(self, project_id): def test_standard_sql(self, project_id): standard_sql = ( - "SELECT DISTINCT id FROM " - "`publicdata.samples.wikipedia` LIMIT 10" + "SELECT DISTINCT id FROM " "`publicdata.samples.wikipedia` LIMIT 10" ) # Test that a standard sql statement fails when using @@ -828,9 +787,7 @@ def test_configuration_without_query(self, project_id): dialect="legacy", ) - def test_configuration_raises_value_error_with_multiple_config( - self, project_id - ): + def test_configuration_raises_value_error_with_multiple_config(self, project_id): sql_statement = "SELECT 1" config = { "query": {"query": sql_statement, "useQueryCache": False}, @@ -900,8 +857,7 @@ def test_struct(self, project_id): dialect="standard", ) expected = DataFrame( - [[1, {"letter": "a", "num": 1}]], - columns=["int_field", "struct_field"], + [[1, {"letter": "a", "num": 1}]], columns=["int_field", "struct_field"], ) tm.assert_frame_equal(df, expected) @@ -914,8 +870,7 @@ def test_array(self, project_id): dialect="standard", ) tm.assert_frame_equal( - df, - DataFrame([[["a", "x", "b", "y", "c", "z"]]], columns=["letters"]), + df, DataFrame([[["a", "x", "b", "y", "c", "z"]]], columns=["letters"]), ) def test_array_length_zero(self, project_id): @@ -934,8 +889,7 @@ def test_array_length_zero(self, project_id): dialect="standard", ) expected = DataFrame( - [["a", [""], 1], ["b", [], 0]], - columns=["letter", "array_field", "len"], + [["a", [""], 1], ["b", [], 0]], columns=["letter", "array_field", "len"], ) tm.assert_frame_equal(df, expected) @@ -958,10 +912,7 @@ def test_array_agg(self, project_id): dialect="standard", ) tm.assert_frame_equal( - df, - DataFrame( - [["a", [1, 3]], ["b", [2]]], columns=["letter", "numbers"] - ), + df, DataFrame([["a", [1, 3]], ["b", [2]]], columns=["letter", "numbers"]), ) def test_array_of_floats(self, project_id): @@ -972,9 +923,7 @@ def test_array_of_floats(self, project_id): credentials=self.credentials, dialect="standard", ) - tm.assert_frame_equal( - df, DataFrame([[[1.1, 2.2, 3.3], 4]], columns=["a", "b"]) - ) + tm.assert_frame_equal(df, DataFrame([[[1.1, 2.2, 3.3], 4]], columns=["a", "b"])) def test_tokyo(self, tokyo_dataset, tokyo_table, project_id): df = gbq.read_gbq( @@ -998,9 +947,7 @@ def setup(self, project, credentials, random_dataset_id): self.credentials = credentials self.gbq_connector = gbq.GbqConnector(project, credentials=credentials) self.bqclient = self.gbq_connector.client - self.table = gbq._Table( - project, random_dataset_id, credentials=credentials - ) + self.table = gbq._Table(project, random_dataset_id, credentials=credentials) self.destination_table = "{}.{}".format(random_dataset_id, TABLE_ID) def test_upload_data(self, project_id): @@ -1044,10 +991,7 @@ def test_upload_empty_data(self, project_id): def test_upload_empty_data_with_schema(self, project_id): test_id = "data_with_0_rows" df = DataFrame( - { - "a": pandas.Series(dtype="int64"), - "b": pandas.Series(dtype="object"), - } + {"a": pandas.Series(dtype="int64"), "b": pandas.Series(dtype="object")} ) gbq.to_gbq( @@ -1244,9 +1188,7 @@ def test_google_upload_errors_should_raise_exception(self, project_id): def test_upload_chinese_unicode_data(self, project_id): test_id = "2" test_size = 6 - df = DataFrame( - np.random.randn(6, 4), index=range(6), columns=list("ABCD") - ) + df = DataFrame(np.random.randn(6, 4), index=range(6), columns=list("ABCD")) df["s"] = u"信用卡" gbq.to_gbq( @@ -1419,9 +1361,7 @@ def test_upload_data_with_valid_user_schema(self, project_id): self.gbq_connector, dataset, table, dict(fields=test_schema) ) - def test_upload_data_with_invalid_user_schema_raises_error( - self, project_id - ): + def test_upload_data_with_invalid_user_schema_raises_error(self, project_id): df = tm.makeMixedDataFrame() test_id = "19" test_schema = [ @@ -1440,9 +1380,7 @@ def test_upload_data_with_invalid_user_schema_raises_error( table_schema=test_schema, ) - def test_upload_data_with_missing_schema_fields_raises_error( - self, project_id - ): + def test_upload_data_with_missing_schema_fields_raises_error(self, project_id): df = tm.makeMixedDataFrame() test_id = "20" test_schema = [ @@ -1464,9 +1402,7 @@ def test_upload_data_with_timestamp(self, project_id): test_id = "21" test_size = 6 df = DataFrame( - np.random.randn(test_size, 4), - index=range(test_size), - columns=list("ABCD"), + np.random.randn(test_size, 4), index=range(test_size), columns=list("ABCD"), ) df["times"] = pandas.Series( [ @@ -1524,9 +1460,7 @@ def test_upload_data_with_different_df_and_user_schema(self, project_id): self.gbq_connector, dataset, table, dict(fields=test_schema) ) - def test_upload_data_tokyo( - self, project_id, tokyo_dataset, bigquery_client - ): + def test_upload_data_tokyo(self, project_id, tokyo_dataset, bigquery_client): from google.cloud import bigquery test_size = 10 @@ -1544,8 +1478,7 @@ def test_upload_data_tokyo( table = bigquery_client.get_table( bigquery.TableReference( - bigquery.DatasetReference(project_id, tokyo_dataset), - "to_gbq_test", + bigquery.DatasetReference(project_id, tokyo_dataset), "to_gbq_test", ) ) assert table.num_rows > 0 @@ -1573,9 +1506,7 @@ def test_upload_data_tokyo_non_existing_dataset( table = bigquery_client.get_table( bigquery.TableReference( - bigquery.DatasetReference( - project_id, non_existing_tokyo_dataset - ), + bigquery.DatasetReference(project_id, non_existing_tokyo_dataset), "to_gbq_test", ) ) @@ -1585,15 +1516,11 @@ def test_upload_data_tokyo_non_existing_dataset( # _Dataset tests -def test_create_dataset( - bigquery_client, gbq_dataset, random_dataset_id, project_id -): +def test_create_dataset(bigquery_client, gbq_dataset, random_dataset_id, project_id): from google.cloud import bigquery gbq_dataset.create(random_dataset_id) - dataset_reference = bigquery.DatasetReference( - project_id, random_dataset_id - ) + dataset_reference = bigquery.DatasetReference(project_id, random_dataset_id) assert bigquery_client.get_dataset(dataset_reference) is not None @@ -1684,9 +1611,7 @@ def test_verify_schema_allows_flexible_column_order(gbq_table, gbq_connector): } gbq_table.create(table_id, test_schema_1) - assert verify_schema( - gbq_connector, gbq_table.dataset_id, table_id, test_schema_2 - ) + assert verify_schema(gbq_connector, gbq_table.dataset_id, table_id, test_schema_2) def test_verify_schema_fails_different_data_type(gbq_table, gbq_connector): @@ -1759,9 +1684,7 @@ def test_verify_schema_ignores_field_mode(gbq_table, gbq_connector): } gbq_table.create(table_id, test_schema_1) - assert verify_schema( - gbq_connector, gbq_table.dataset_id, table_id, test_schema_2 - ) + assert verify_schema(gbq_connector, gbq_table.dataset_id, table_id, test_schema_2) def test_retrieve_schema(gbq_table, gbq_connector): @@ -1769,24 +1692,9 @@ def test_retrieve_schema(gbq_table, gbq_connector): table_id = "test_retrieve_schema" test_schema = { "fields": [ - { - "name": "A", - "type": "FLOAT", - "mode": "NULLABLE", - "description": None, - }, - { - "name": "B", - "type": "FLOAT", - "mode": "NULLABLE", - "description": None, - }, - { - "name": "C", - "type": "STRING", - "mode": "NULLABLE", - "description": None, - }, + {"name": "A", "type": "FLOAT", "mode": "NULLABLE", "description": None}, + {"name": "B", "type": "FLOAT", "mode": "NULLABLE", "description": None}, + {"name": "C", "type": "STRING", "mode": "NULLABLE", "description": None}, { "name": "D", "type": "TIMESTAMP", @@ -1813,24 +1721,9 @@ def test_to_gbq_does_not_override_mode(gbq_table, gbq_connector): table_id = "test_to_gbq_does_not_override_mode" table_schema = { "fields": [ - { - "mode": "REQUIRED", - "name": "A", - "type": "FLOAT", - "description": "A", - }, - { - "mode": "NULLABLE", - "name": "B", - "type": "FLOAT", - "description": "B", - }, - { - "mode": "NULLABLE", - "name": "C", - "type": "STRING", - "description": "C", - }, + {"mode": "REQUIRED", "name": "A", "type": "FLOAT", "description": "A"}, + {"mode": "NULLABLE", "name": "B", "type": "FLOAT", "description": "B"}, + {"mode": "NULLABLE", "name": "C", "type": "STRING", "description": "C"}, ] } @@ -1842,6 +1735,4 @@ def test_to_gbq_does_not_override_mode(gbq_table, gbq_connector): if_exists="append", ) - assert verify_schema( - gbq_connector, gbq_table.dataset_id, table_id, table_schema - ) + assert verify_schema(gbq_connector, gbq_table.dataset_id, table_id, table_schema) diff --git a/tests/system/test_read_gbq_with_bqstorage.py b/tests/system/test_read_gbq_with_bqstorage.py index 8b9c7ecc..8440948a 100644 --- a/tests/system/test_read_gbq_with_bqstorage.py +++ b/tests/system/test_read_gbq_with_bqstorage.py @@ -34,10 +34,7 @@ def test_empty_results(method_under_test, query_string): See: https://github.com/pydata/pandas-gbq/issues/299 """ - df = method_under_test( - query_string, - use_bqstorage_api=True, - ) + df = method_under_test(query_string, use_bqstorage_api=True,) assert len(df.index) == 0 diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py index 59435c33..f5009421 100644 --- a/tests/system/test_to_gbq.py +++ b/tests/system/test_to_gbq.py @@ -19,9 +19,7 @@ def method_under_test(credentials): return functools.partial(pandas_gbq.to_gbq, credentials=credentials) -def test_float_round_trip( - method_under_test, random_dataset_id, bigquery_client -): +def test_float_round_trip(method_under_test, random_dataset_id, bigquery_client): """Ensure that 64-bit floating point numbers are unchanged. See: https://github.com/pydata/pandas-gbq/issues/326 @@ -47,7 +45,5 @@ def test_float_round_trip( round_trip = bigquery_client.list_rows(table_id).to_dataframe() round_trip_floats = round_trip["float_col"].sort_values() pandas.testing.assert_series_equal( - round_trip_floats, - input_floats, - check_exact=True, + round_trip_floats, input_floats, check_exact=True, ) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index edbca6c3..c9ab8506 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -1,4 +1,3 @@ # Copyright (c) 2017 pandas-gbq Authors All rights reserved. # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. - diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index ca544746..c101942e 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -51,13 +51,9 @@ def mock_default_credentials(scopes=None, request=None): return (None, None) monkeypatch.setattr(google.auth, "default", mock_default_credentials) - mock_user_credentials = mock.create_autospec( - google.auth.credentials.Credentials - ) + mock_user_credentials = mock.create_autospec(google.auth.credentials.Credentials) - mock_cache = mock.create_autospec( - pydata_google_auth.cache.CredentialsCache - ) + mock_cache = mock.create_autospec(pydata_google_auth.cache.CredentialsCache) mock_cache.load.return_value = mock_user_credentials monkeypatch.setattr(auth, "get_credentials_cache", lambda _: mock_cache) @@ -71,6 +67,4 @@ def test_get_credentials_cache_w_reauth(): import pydata_google_auth.cache cache = auth.get_credentials_cache(True) - assert isinstance( - cache, pydata_google_auth.cache.WriteOnlyCredentialsCache - ) + assert isinstance(cache, pydata_google_auth.cache.WriteOnlyCredentialsCache) diff --git a/tests/unit/test_features.py b/tests/unit/test_features.py index d1d5af81..b10b0fa8 100644 --- a/tests/unit/test_features.py +++ b/tests/unit/test_features.py @@ -23,9 +23,7 @@ def fresh_bigquery_version(monkeypatch): ("2.12.0", True), ], ) -def test_bigquery_has_from_dataframe_with_csv( - monkeypatch, bigquery_version, expected -): +def test_bigquery_has_from_dataframe_with_csv(monkeypatch, bigquery_version, expected): import google.cloud.bigquery monkeypatch.setattr(google.cloud.bigquery, "__version__", bigquery_version) diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 7476db3f..3b603412 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -17,9 +17,7 @@ from pandas_gbq.features import FEATURES -pytestmark = pytest.mark.filterwarnings( - "ignore:credentials from Google Cloud SDK" -) +pytestmark = pytest.mark.filterwarnings("ignore:credentials from Google Cloud SDK") def _make_connector(project_id="some-project", **kwargs): @@ -29,18 +27,14 @@ def _make_connector(project_id="some-project", **kwargs): def mock_get_credentials_no_project(*args, **kwargs): import google.auth.credentials - mock_credentials = mock.create_autospec( - google.auth.credentials.Credentials - ) + mock_credentials = mock.create_autospec(google.auth.credentials.Credentials) return mock_credentials, None def mock_get_credentials(*args, **kwargs): import google.auth.credentials - mock_credentials = mock.create_autospec( - google.auth.credentials.Credentials - ) + mock_credentials = mock.create_autospec(google.auth.credentials.Credentials) return mock_credentials, "default-project" @@ -48,9 +42,7 @@ def mock_get_credentials(*args, **kwargs): def mock_service_account_credentials(): import google.oauth2.service_account - mock_credentials = mock.create_autospec( - google.oauth2.service_account.Credentials - ) + mock_credentials = mock.create_autospec(google.oauth2.service_account.Credentials) return mock_credentials @@ -58,9 +50,7 @@ def mock_service_account_credentials(): def mock_compute_engine_credentials(): import google.auth.compute_engine - mock_credentials = mock.create_autospec( - google.auth.compute_engine.Credentials - ) + mock_credentials = mock.create_autospec(google.auth.compute_engine.Credentials) return mock_credentials @@ -104,9 +94,7 @@ def test_GbqConnector_get_client_w_old_bq(monkeypatch, mock_bigquery_client): connector.get_client() # No client_info argument. - mock_bigquery_client.assert_called_with( - credentials=mock.ANY, project=mock.ANY - ) + mock_bigquery_client.assert_called_with(credentials=mock.ANY, project=mock.ANY) def test_GbqConnector_get_client_w_new_bq(mock_bigquery_client): @@ -119,9 +107,7 @@ def test_GbqConnector_get_client_w_new_bq(mock_bigquery_client): connector.get_client() _, kwargs = mock_bigquery_client.call_args - assert kwargs["client_info"].user_agent == "pandas-{}".format( - pandas.__version__ - ) + assert kwargs["client_info"].user_agent == "pandas-{}".format(pandas.__version__) def test_to_gbq_should_fail_if_invalid_table_name_passed(): @@ -132,18 +118,14 @@ def test_to_gbq_should_fail_if_invalid_table_name_passed(): def test_to_gbq_with_no_project_id_given_should_fail(monkeypatch): import pydata_google_auth - monkeypatch.setattr( - pydata_google_auth, "default", mock_get_credentials_no_project - ) + monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials_no_project) with pytest.raises(ValueError, match="Could not determine project ID"): gbq.to_gbq(DataFrame([[1]]), "dataset.tablename") @pytest.mark.parametrize(["verbose"], [(True,), (False,)]) -def test_to_gbq_with_verbose_new_pandas_warns_deprecation( - monkeypatch, verbose -): +def test_to_gbq_with_verbose_new_pandas_warns_deprecation(monkeypatch, verbose): monkeypatch.setattr( type(FEATURES), "pandas_has_deprecated_verbose", @@ -168,9 +150,7 @@ def test_to_gbq_wo_verbose_w_new_pandas_no_warnings(monkeypatch, recwarn): mock.PropertyMock(return_value=True), ) try: - gbq.to_gbq( - DataFrame([[1]]), "dataset.tablename", project_id="my-project" - ) + gbq.to_gbq(DataFrame([[1]]), "dataset.tablename", project_id="my-project") except gbq.TableCreationError: pass assert len(recwarn) == 0 @@ -206,9 +186,7 @@ def test_to_gbq_with_private_key_raises_notimplementederror(): def test_to_gbq_doesnt_run_query(mock_bigquery_client): try: - gbq.to_gbq( - DataFrame([[1]]), "dataset.tablename", project_id="my-project" - ) + gbq.to_gbq(DataFrame([[1]]), "dataset.tablename", project_id="my-project") except gbq.TableCreationError: pass @@ -218,8 +196,8 @@ def test_to_gbq_doesnt_run_query(mock_bigquery_client): def test_to_gbq_w_empty_df(mock_bigquery_client): import google.api_core.exceptions - mock_bigquery_client.get_table.side_effect = ( - google.api_core.exceptions.NotFound("my_table") + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" ) gbq.to_gbq(DataFrame(), "my_dataset.my_table", project_id="1234") mock_bigquery_client.create_table.assert_called_with(mock.ANY) @@ -234,8 +212,8 @@ def test_to_gbq_w_default_project(mock_bigquery_client): import google.api_core.exceptions from google.cloud.bigquery.table import TableReference - mock_bigquery_client.get_table.side_effect = ( - google.api_core.exceptions.NotFound("my_table") + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" ) gbq.to_gbq(DataFrame(), "my_dataset.my_table") @@ -254,13 +232,11 @@ def test_to_gbq_w_project_table(mock_bigquery_client): import google.api_core.exceptions from google.cloud.bigquery.table import TableReference - mock_bigquery_client.get_table.side_effect = ( - google.api_core.exceptions.NotFound("my_table") + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" ) gbq.to_gbq( - DataFrame(), - "project_table.my_dataset.my_table", - project_id="project_client", + DataFrame(), "project_table.my_dataset.my_table", project_id="project_client", ) mock_bigquery_client.get_table.assert_called_with( @@ -274,11 +250,11 @@ def test_to_gbq_w_project_table(mock_bigquery_client): def test_to_gbq_creates_dataset(mock_bigquery_client): import google.api_core.exceptions - mock_bigquery_client.get_table.side_effect = ( - google.api_core.exceptions.NotFound("my_table") + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" ) - mock_bigquery_client.get_dataset.side_effect = ( - google.api_core.exceptions.NotFound("my_dataset") + mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( + "my_dataset" ) gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234") mock_bigquery_client.create_dataset.assert_called_with(mock.ANY) @@ -287,9 +263,7 @@ def test_to_gbq_creates_dataset(mock_bigquery_client): def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): import pydata_google_auth - monkeypatch.setattr( - pydata_google_auth, "default", mock_get_credentials_no_project - ) + monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials_no_project) with pytest.raises(ValueError, match="Could not determine project ID"): gbq.read_gbq("SELECT 1", dialect="standard") @@ -305,9 +279,7 @@ def test_read_gbq_with_inferred_project_id_from_service_account_credentials( ): mock_service_account_credentials.project_id = "service_account_project_id" df = gbq.read_gbq( - "SELECT 1", - dialect="standard", - credentials=mock_service_account_credentials, + "SELECT 1", dialect="standard", credentials=mock_service_account_credentials, ) assert df is not None mock_bigquery_client.query.assert_called_once_with( @@ -323,9 +295,7 @@ def test_read_gbq_without_inferred_project_id_from_compute_engine_credentials( ): with pytest.raises(ValueError, match="Could not determine project ID"): gbq.read_gbq( - "SELECT 1", - dialect="standard", - credentials=mock_compute_engine_credentials, + "SELECT 1", dialect="standard", credentials=mock_compute_engine_credentials, ) @@ -341,9 +311,7 @@ def test_read_gbq_with_max_results_ten(monkeypatch, mock_bigquery_client): @pytest.mark.parametrize(["verbose"], [(True,), (False,)]) -def test_read_gbq_with_verbose_new_pandas_warns_deprecation( - monkeypatch, verbose -): +def test_read_gbq_with_verbose_new_pandas_warns_deprecation(monkeypatch, verbose): monkeypatch.setattr( type(FEATURES), "pandas_has_deprecated_verbose", @@ -370,8 +338,7 @@ def test_read_gbq_with_old_bq_raises_importerror(monkeypatch): monkeypatch.setattr(FEATURES, "_bigquery_installed_version", None) with pytest.raises(ImportError, match="google-cloud-bigquery"): gbq.read_gbq( - "SELECT 1", - project_id="my-project", + "SELECT 1", project_id="my-project", ) @@ -382,10 +349,7 @@ def test_read_gbq_with_verbose_old_pandas_no_warnings(monkeypatch, recwarn): mock.PropertyMock(return_value=False), ) gbq.read_gbq( - "SELECT 1", - project_id="my-project", - dialect="standard", - verbose=True, + "SELECT 1", project_id="my-project", dialect="standard", verbose=True, ) assert len(recwarn) == 0 @@ -411,9 +375,7 @@ def test_read_gbq_with_configuration_duplicate_query_raises_error(): with pytest.raises( ValueError, match="Query statement can't be specified inside config" ): - gbq.read_gbq( - "SELECT 1", configuration={"query": {"query": "SELECT 2"}} - ) + gbq.read_gbq("SELECT 1", configuration={"query": {"query": "SELECT 2"}}) def test_generate_bq_schema_deprecated(): @@ -469,9 +431,7 @@ def test_load_does_not_modify_schema_arg(mock_bigquery_client): assert original_schema == original_schema_cp -def test_read_gbq_passes_dtypes( - mock_bigquery_client, mock_service_account_credentials -): +def test_read_gbq_passes_dtypes(mock_bigquery_client, mock_service_account_credentials): mock_service_account_credentials.project_id = "service_account_project_id" df = gbq.read_gbq( "SELECT 1 AS int_col", @@ -504,15 +464,11 @@ def test_read_gbq_use_bqstorage_api( mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100) mock_list_rows.to_dataframe.assert_called_once_with( - create_bqstorage_client=True, - dtypes=mock.ANY, - progress_bar_type=mock.ANY, + create_bqstorage_client=True, dtypes=mock.ANY, progress_bar_type=mock.ANY, ) -def test_read_gbq_calls_tqdm( - mock_bigquery_client, mock_service_account_credentials -): +def test_read_gbq_calls_tqdm(mock_bigquery_client, mock_service_account_credentials): mock_service_account_credentials.project_id = "service_account_project_id" df = gbq.read_gbq( "SELECT 1", diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py index 353b8bd1..d00495a6 100644 --- a/tests/unit/test_load.py +++ b/tests/unit/test_load.py @@ -56,13 +56,9 @@ def test_encode_chunk_with_floats(): StringIO(input_csv), header=None, float_precision="round_trip" ) csv_buffer = load.encode_chunk(input_df) - round_trip = pandas.read_csv( - csv_buffer, header=None, float_precision="round_trip" - ) + round_trip = pandas.read_csv(csv_buffer, header=None, float_precision="round_trip") pandas.testing.assert_frame_equal( - round_trip, - input_df, - check_exact=True, + round_trip, input_df, check_exact=True, ) @@ -95,9 +91,7 @@ def test_encode_chunks_with_chunksize_none(): assert len(chunk.index) == 6 -@pytest.mark.parametrize( - ["bigquery_has_from_dataframe_with_csv"], [(True,), (False,)] -) +@pytest.mark.parametrize(["bigquery_has_from_dataframe_with_csv"], [(True,), (False,)]) def test_load_chunks_omits_policy_tags( monkeypatch, mock_bigquery_client, bigquery_has_from_dataframe_with_csv ): @@ -118,14 +112,10 @@ def test_load_chunks_omits_policy_tags( "my-project.my_dataset.my_table" ) schema = { - "fields": [ - {"name": "col1", "type": "INT64", "policyTags": ["tag1", "tag2"]} - ] + "fields": [{"name": "col1", "type": "INT64", "policyTags": ["tag1", "tag2"]}] } - _ = list( - load.load_chunks(mock_bigquery_client, df, destination, schema=schema) - ) + _ = list(load.load_chunks(mock_bigquery_client, df, destination, schema=schema)) mock_load = load_method(mock_bigquery_client) assert mock_load.called diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index bd04508e..743ddc26 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -24,21 +24,12 @@ def module_under_test(): {"name": "B", "type": "FLOAT64"}, {"name": "C", "type": "STRING"}, ], - [ - {"name": "A", "type": "FLOAT64"}, - {"name": "B", "type": "FLOAT"}, - ], + [{"name": "A", "type": "FLOAT64"}, {"name": "B", "type": "FLOAT"}], ), # Original schema from API may contain legacy SQL datatype names. # https://github.com/pydata/pandas-gbq/issues/322 - ( - [{"name": "A", "type": "INTEGER"}], - [{"name": "A", "type": "INT64"}], - ), - ( - [{"name": "A", "type": "BOOL"}], - [{"name": "A", "type": "BOOLEAN"}], - ), + ([{"name": "A", "type": "INTEGER"}], [{"name": "A", "type": "INT64"}],), + ([{"name": "A", "type": "BOOL"}], [{"name": "A", "type": "BOOLEAN"}],), ( # TODO: include sub-fields when struct uploads are supported. [{"name": "A", "type": "STRUCT"}], @@ -65,10 +56,7 @@ def test_schema_is_subset_fails_if_not_subset(module_under_test): ] } tested_schema = { - "fields": [ - {"name": "A", "type": "FLOAT"}, - {"name": "C", "type": "FLOAT"}, - ] + "fields": [{"name": "A", "type": "FLOAT"}, {"name": "C", "type": "FLOAT"}] } assert not module_under_test.schema_is_subset(table_schema, tested_schema) @@ -160,8 +148,6 @@ def test_generate_bq_schema(module_under_test, dataframe, expected_schema): ), ], ) -def test_update_schema( - module_under_test, schema_old, schema_new, expected_output -): +def test_update_schema(module_under_test, schema_old, schema_new, expected_output): output = module_under_test.update_schema(schema_old, schema_new) assert output == expected_output diff --git a/tests/unit/test_timestamp.py b/tests/unit/test_timestamp.py index 6c9e3282..406643d0 100644 --- a/tests/unit/test_timestamp.py +++ b/tests/unit/test_timestamp.py @@ -33,9 +33,7 @@ def test_localize_df_with_empty_dataframe(module_under_test): def test_localize_df_with_no_timestamp_columns(module_under_test): - df = pandas.DataFrame( - {"integer_col": [1, 2, 3], "float_col": [0.1, 0.2, 0.3]} - ) + df = pandas.DataFrame({"integer_col": [1, 2, 3], "float_col": [0.1, 0.2, 0.3]}) original = df.copy() bq_schema = [ {"name": "integer_col", "type": "INTEGER"}, @@ -54,11 +52,7 @@ def test_localize_df_with_timestamp_column(module_under_test): { "integer_col": [1, 2, 3], "timestamp_col": pandas.Series( - [ - "2011-01-01 01:02:03", - "2012-02-02 04:05:06", - "2013-03-03 07:08:09", - ], + ["2011-01-01 01:02:03", "2012-02-02 04:05:06", "2013-03-03 07:08:09"], dtype="datetime64[ns]", ), "float_col": [0.1, 0.2, 0.3],