Skip to content

CI/WEB: Fix github quota errors by using website as cache #50811

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions .github/workflows/docbuild-and-upload.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,6 @@ jobs:
- name: Build Pandas
uses: ./.github/actions/build_pandas

- name: Set up maintainers cache
uses: actions/cache@v3
with:
path: maintainers.json
key: maintainers

- name: Build website
run: python web/pandas_web.py web/pandas --target-path=web/build

Expand Down
6 changes: 4 additions & 2 deletions web/pandas/about/team.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ If you want to support pandas development, you can find information in the [dona
## Active maintainers

<div class="card-group maintainers">
{% for person in maintainers.active_with_github_info %}
{% for username in maintainers.active %}
{% set person = maintainers.github_info.get(username) %}
<div class="card">
<img class="card-img-top" alt="" src="{{ person.avatar_url }}"/>
<div class="card-body">
Expand Down Expand Up @@ -67,7 +68,8 @@ The project governance is available in the [project governance page](governance.
## Inactive maintainers

<ul>
{% for person in maintainers.inactive_with_github_info %}
{% for username in maintainers.inactive %}
{% set person = maintainers.github_info.get(username) %}
<li>
<a href="{{ person.blog or person.html_url }}">
{{ person.name or person.login }}
Expand Down
2 changes: 1 addition & 1 deletion web/pandas/config.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
main:
templates_path: _templates
base_template: "layout.html"
production_url: "https://pandas.pydata.org/"
ignore:
- _templates/layout.html
- config.yml
- try.md # the binder page will be added later
github_repo_url: pandas-dev/pandas
context_preprocessors:
- pandas_web.Preprocessors.current_year
Expand Down
118 changes: 59 additions & 59 deletions web/pandas_web.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,6 @@
import requests
import yaml

api_token = os.environ.get("GITHUB_TOKEN")
if api_token is not None:
GITHUB_API_HEADERS = {"Authorization": f"Bearer {api_token}"}
else:
GITHUB_API_HEADERS = {}


class Preprocessors:
"""
Expand Down Expand Up @@ -164,37 +158,39 @@ def maintainers_add_info(context):
Given the active maintainers defined in the yaml file, it fetches
the GitHub user information for them.
"""
timestamp = time.time()

cache_file = pathlib.Path("maintainers.json")
if cache_file.is_file():
with open(cache_file) as f:
context["maintainers"] = json.load(f)
# refresh cache after 1 hour
if (timestamp - context["maintainers"]["timestamp"]) < 3_600:
return context

context["maintainers"]["timestamp"] = timestamp

repeated = set(context["maintainers"]["active"]) & set(
context["maintainers"]["inactive"]
)
if repeated:
raise ValueError(f"Maintainers {repeated} are both active and inactive")

for kind in ("active", "inactive"):
context["maintainers"][f"{kind}_with_github_info"] = []
for user in context["maintainers"][kind]:
resp = requests.get(
f"https://api.github.com/users/{user}", headers=GITHUB_API_HEADERS
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might as well still try the API call with GITHUB_TOKEN

maintainers_info = {}
for user in (
context["maintainers"]["active"] + context["maintainers"]["inactive"]
):
resp = requests.get(f"https://api.github.com/users/{user}")
if resp.status_code == 403:
sys.stderr.write(
"WARN: GitHub API quota exceeded when fetching maintainers\n"
)
# if we exceed github api quota, we use the github info
# of maintainers saved with the website
resp_bkp = requests.get(
context["main"]["production_url"] + "maintainers.json"
)
if context["ignore_io_errors"] and resp.status_code == 403:
return context
resp.raise_for_status()
context["maintainers"][f"{kind}_with_github_info"].append(resp.json())
resp_bkp.raise_for_status()
maintainers_info = resp_bkp.json()
break

with open(cache_file, "w") as f:
json.dump(context["maintainers"], f)
resp.raise_for_status()
maintainers_info[user] = resp.json()

context["maintainers"]["github_info"] = maintainers_info

# save the data fetched from github to use it in case we exceed
# git github api quota in the future
with open(pathlib.Path(context["target_path"]) / "maintainers.json", "w") as f:
json.dump(maintainers_info, f)

return context

Expand All @@ -203,15 +199,20 @@ def home_add_releases(context):
context["releases"] = []

github_repo_url = context["main"]["github_repo_url"]
resp = requests.get(
f"https://api.github.com/repos/{github_repo_url}/releases",
headers=GITHUB_API_HEADERS,
)
if context["ignore_io_errors"] and resp.status_code == 403:
return context
resp.raise_for_status()
resp = requests.get(f"https://api.github.com/repos/{github_repo_url}/releases")
if resp.status_code == 403:
sys.stderr.write("WARN: GitHub API quota exceeded when fetching releases\n")
resp_bkp = requests.get(context["main"]["production_url"] + "releases.json")
resp_bkp.raise_for_status()
releases = resp_bkp.json()
else:
resp.raise_for_status()
releases = resp.json()

with open(pathlib.Path(context["target_path"]) / "releases.json", "w") as f:
json.dump(releases, f, default=datetime.datetime.isoformat)

for release in resp.json():
for release in releases:
if release["prerelease"]:
continue
published = datetime.datetime.strptime(
Expand All @@ -229,6 +230,7 @@ def home_add_releases(context):
),
}
)

return context

@staticmethod
Expand Down Expand Up @@ -273,15 +275,22 @@ def roadmap_pdeps(context):
github_repo_url = context["main"]["github_repo_url"]
resp = requests.get(
"https://api.github.com/search/issues?"
f"q=is:pr is:open label:PDEP repo:{github_repo_url}",
headers=GITHUB_API_HEADERS,
f"q=is:pr is:open label:PDEP repo:{github_repo_url}"
)
if context["ignore_io_errors"] and resp.status_code == 403:
return context
resp.raise_for_status()
if resp.status_code == 403:
sys.stderr.write("WARN: GitHub API quota exceeded when fetching pdeps\n")
resp_bkp = requests.get(context["main"]["production_url"] + "pdeps.json")
resp_bkp.raise_for_status()
pdeps = resp_bkp.json()
else:
resp.raise_for_status()
pdeps = resp.json()

for pdep in resp.json()["items"]:
context["pdeps"]["under_discussion"].append(
with open(pathlib.Path(context["target_path"]) / "pdeps.json", "w") as f:
json.dump(pdeps, f)

for pdep in pdeps["items"]:
context["pdeps"]["Under discussion"].append(
{"title": pdep["title"], "url": pdep["url"]}
)

Expand Down Expand Up @@ -314,7 +323,7 @@ def get_callable(obj_as_str: str) -> object:
return obj


def get_context(config_fname: str, ignore_io_errors: bool, **kwargs):
def get_context(config_fname: str, **kwargs):
"""
Load the config yaml as the base context, and enrich it with the
information added by the context preprocessors defined in the file.
Expand All @@ -323,7 +332,6 @@ def get_context(config_fname: str, ignore_io_errors: bool, **kwargs):
context = yaml.safe_load(f)

context["source_path"] = os.path.dirname(config_fname)
context["ignore_io_errors"] = ignore_io_errors
context.update(kwargs)

preprocessors = (
Expand Down Expand Up @@ -361,7 +369,9 @@ def extend_base_template(content: str, base_template: str) -> str:


def main(
source_path: str, target_path: str, base_url: str, ignore_io_errors: bool
source_path: str,
target_path: str,
base_url: str,
) -> int:
"""
Copy every file in the source directory to the target directory.
Expand All @@ -375,7 +385,7 @@ def main(
os.makedirs(target_path, exist_ok=True)

sys.stderr.write("Generating context...\n")
context = get_context(config_fname, ignore_io_errors, base_url=base_url)
context = get_context(config_fname, base_url=base_url, target_path=target_path)
sys.stderr.write("Context generated\n")

templates_path = os.path.join(source_path, context["main"]["templates_path"])
Expand Down Expand Up @@ -419,15 +429,5 @@ def main(
parser.add_argument(
"--base-url", default="", help="base url where the website is served from"
)
parser.add_argument(
"--ignore-io-errors",
action="store_true",
help="do not fail if errors happen when fetching "
"data from http sources, and those fail "
"(mostly useful to allow GitHub quota errors "
"when running the script locally)",
)
args = parser.parse_args()
sys.exit(
main(args.source_path, args.target_path, args.base_url, args.ignore_io_errors)
)
sys.exit(main(args.source_path, args.target_path, args.base_url))