Skip to content

Adding conbench-poc #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions conbench-poc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

.DS_Store
.vscode/
Deas-MacBook-Air.local/
__pycache__/
asv_files/**
my-files/
no_results/
failing/
asv_files_ALL/
algos2_results/
local_env.yml
server_env.yml
benchmarks.json
asv_processed_files
alert_processed_files
19 changes: 19 additions & 0 deletions conbench-poc/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Conbench PoC for pandas


The **purpose** of adding conbench to the current pandas benchmark system
is:
1. To improve the UI.
2. Use conbench statistical analysis and detection of regression/improvement
3. Add an automatic alert system for regressions or improvements.

## Files description
**client.py:** Calls the adapter asvbench.py and posts to a conbench web app. <br/>
**asvbench.py:** Converts asv's benchmarks results to conbench format. <br/>
**alert.py:** Runs conbench alert pipeline, generates a report and sends alerts. <br/>
**benchmark_email.py:** Handles the email. <br/>
**utilities.py:** setup env variables, reads files. <br/>
**setup_server.txt:** Steps to install this PoC. <br/>

## PoC structure/setup
![Setup](setup_pic.png "Setup")
90 changes: 90 additions & 0 deletions conbench-poc/alert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import os
from utilities import Environment, alerts_done_file, check_new_files
import benchalerts.pipeline_steps as steps
from benchalerts.integrations.github import CheckStatus
import benchmark_email
import re
import json
#from benchalerts.pipeline_steps.slack import (
# SlackErrorHandler,
#)
from benchalerts import AlertPipeline, Alerter
from benchalerts.integrations.github import GitHubRepoClient
import asvbench
from benchalerts.conbench_dataclasses import FullComparisonInfo
import pandas as pd

env = Environment()

repo = env.GITHUB_REPOSITORY

def alert_instance(commit_hash):

# Create a pipeline to update a GitHub Check
pipeline = AlertPipeline(
steps=[
steps.GetConbenchZComparisonStep(
commit_hash=commit_hash,
#baseline_run_type=steps.BaselineRunCandidates.fork_point,
#baseline_run_type=steps.BaselineRunCandidates.latest_default,
baseline_run_type=steps.BaselineRunCandidates.parent,
z_score_threshold=5.5, #If not set, defaults to 5
),
#steps.GitHubCheckStep(
# commit_hash=commit_hash,
# comparison_step_name="GetConbenchZComparisonStep",
# github_client=GitHubRepoClient(repo=repo),
# #build_url=build_url,
#),
#steps.SlackMessageAboutBadCheckStep(
# channel_id="conbench-poc",
#),

],
#error_handlers=[
# steps.GitHubCheckErrorHandler(
# commit_hash=commit_hash, repo=repo, #build_url=build_url
# )
#],
)
return pipeline

# To see the whole report, look at:
# pipeline.run_pipeline()['GetConbenchZComparisonStep'].results_with_z_regressions
def report(pipeline):
full_comparison_info = pipeline.run_pipeline()['GetConbenchZComparisonStep']
alerter = Alerter()
if alerter.github_check_status(full_comparison_info) == CheckStatus.FAILURE:

message = """Subject: Benchmarks Alert \n\n """ \
+ alerter.github_check_summary(full_comparison_info, "")
#TODO add links to message
#cleaned_message = re.sub(r'0\.0\.0\.0', '127.0.0.1', message) #local
correctserver = re.sub(r'0\.0\.0\.0', '57.128.112.95', message) #new server
cleaned_message = re.sub(r'- Commit Run.+\)|#| All benchmark runs analyzed:', '', correctserver)
#send message or cleaned_message
benchmark_email.email(cleaned_message)

def alert() -> None:

#while True:
with open(env.ASV_PROCESSED_FILES, "r+") as f:
processed_files = f.read().split('\n')

for new_file in (set(processed_files) - set(alerts_done_file(env))):
with open(new_file, "r") as f:
benchmarks_results = json.load(f)
pipeline = alert_instance(benchmarks_results['commit_hash'])
report(pipeline)

with open(env.ALERT_PROCESSED_FILES, "a") as f:
f.write(new_file)
f.write("\n")


if __name__ == "__main__":
#commit_hash = 'acf5d7d84187b5ba53e54b2a5d91a34725814bf9' #old server
#commit_hash = 'fce520d45a304ee2659bb4156acf484cee5aea07' #new server
#commit_hash = "c8a9c2fd3bcf23a21acfa6f4cffbc4c9360b9ea6" #local

alert()
149 changes: 149 additions & 0 deletions conbench-poc/asvbench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import json
from pathlib import Path
from typing import Any, Dict, List
import itertools
import numpy as np
import os
from datetime import datetime


from benchadapt.adapters._adapter import BenchmarkAdapter
from benchadapt.result import BenchmarkResult

class AsvBenchmarkAdapter(BenchmarkAdapter):

def __init__(
self,
command: List[str],
result_file: Path,
benchmarks_file_path: str,
result_fields_override: Dict[str, Any] = None,
result_fields_append: Dict[str, Any] = None,
) -> None:
"""
Parameters
----------
command : List[str]
A list of strings defining a shell command to run benchmarks
result_dir : Path
Path to directory where results will be populated
result_fields_override : Dict[str, Any]
A dict of values to override on each instance of `BenchmarkResult`. Useful
for specifying metadata only available at runtime, e.g. build info. Applied
before ``results_field_append``.
results_fields_append : Dict[str, Any]
A dict of default values to be appended to `BenchmarkResult` values after
instantiation. Useful for appending extra tags or other metadata in addition
to that gathered elsewhere. Only applicable for dict attributes. For each
element, will override any keys that already exist, i.e. it does not append
recursively.
"""
self.result_file = result_file
self.benchmarks_file_path=benchmarks_file_path
super().__init__(
command=command,
result_fields_override=result_fields_override,
result_fields_append=result_fields_append,
)

def _transform_results(self) -> List[BenchmarkResult]:
"""Transform asv results into a list of BenchmarkResults instances"""
parsed_benchmarks = []

with open(self.result_file, "r") as f:
benchmarks_results = json.load(f)

benchmarks_file = self.benchmarks_file_path + "benchmarks.json"
with open(benchmarks_file) as f:
benchmarks_info = json.load(f)

parsed_benchmarks = self._parse_results(benchmarks_results, benchmarks_info)

return parsed_benchmarks

def _parse_results(self, benchmarks_results, benchmarks_info):
# From asv documention "result_columns" is a list of column names for the results dictionary.
# ["result", "params", "version", "started_at", "duration", "stats_ci_99_a", "stats_ci_99_b",
# "stats_q_25", "stats_q_75", "stats_number", "stats_repeat", "samples", "profile"]
# In this first version of the adapter we are using only the "result" column.
# TODO: use the "samples" column instead.
try:
result_columns = benchmarks_results["result_columns"]
except:
raise Exception("Incorrect file format")
parsed_benchmarks = []

for name in benchmarks_results["results"]:
#Bug with this benchmark: series_methods.ToFrame.time_to_frame
if name == "series_methods.ToFrame.time_to_frame":
continue
#print(name)
try:
result_dict = dict(zip(result_columns,
benchmarks_results["results"][name]))
for param_values, data in zip(
itertools.product(*result_dict["params"]),
result_dict['result']
):
if np.isnan(data):
#print('failing ', name)
continue
param_dic = dict(zip(benchmarks_info[name]["param_names"],
param_values))
tags = {}
tags["name"] = name
tags.update(param_dic)
#asv units are seconds or bytes, conbench uses "s" or "B"
units = {"seconds": "s",
"bytes": "B"}
params = benchmarks_results["params"]
parsed_benchmark = BenchmarkResult(
#batch_id=str(self.result_file), #CORRECT THIS
stats={
#asv returns one value wich is the average of the iterations
#but it can be changed so it returns the value of each iteration
#if asv returns the value of each iteration, the variable "data"
#will be a list, so this needs to be addressed below
"data": [data],
"unit": units[benchmarks_info[name]['unit']],
#iterations below is for conbench, 1 if we only provide a value
#if we run asv to return the value of each iteration (in data above)
#iterations should match the number of values
"iterations": 1,
},
tags=tags,
context={"benchmark_language": "Python",
"env_name": benchmarks_results["env_name"],
"python": benchmarks_results["python"],
"requirements": benchmarks_results["requirements"],
},
github={"repository": os.environ["REPOSITORY"],
"commit":benchmarks_results["commit_hash"],
},
info={"date": str(datetime.fromtimestamp(benchmarks_results["date"]/1e3)),
},
machine_info={
"name": params["machine"],
"os_name": params["os"],
"os_version":params["os"],
"architecture_name": params["arch"],
"kernel_name": "x",
"memory_bytes": 0,
"cpu_model_name": params["cpu"],
"cpu_core_count": params["num_cpu"],
"cpu_thread_count": 0,
"cpu_l1d_cache_bytes": 0,
"cpu_l1i_cache_bytes": 0,
"cpu_l2_cache_bytes": 0,
"cpu_l3_cache_bytes": 0,
"cpu_frequency_max_hz": 0,
"gpu_count": 0,
"gpu_product_names": [],
}
)
parsed_benchmarks.append(parsed_benchmark)
except:
continue

return parsed_benchmarks

31 changes: 31 additions & 0 deletions conbench-poc/benchmark_email.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@

import smtplib, ssl
import os
from dotenv import load_dotenv
import socket

if socket.gethostname().startswith('Deas'):
load_dotenv(dotenv_path="./local_env.yml")
else:
load_dotenv(dotenv_path="./server_env.yml")

def email(message):

port = 465 # For SSL
sender_email = "[email protected]"
receiver_email = ["[email protected]"]
gmail_password=os.getenv("GMAIL_PASSWORD")

# Create a secure SSL context
context = ssl.create_default_context()
with smtplib.SMTP_SSL("smtp.gmail.com", port, context=context) as server:
server.login("[email protected]", gmail_password)
print(message)
server.sendmail(sender_email, receiver_email, message)

if __name__=="__main__":
message = """\
Subject: Hello

Message sent from conbenchalert."""
email(message)
36 changes: 36 additions & 0 deletions conbench-poc/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from asvbench import AsvBenchmarkAdapter
from pathlib import Path
import os
import time
import alert
from utilities import Environment, check_new_files

env = Environment()

def adapter_instance(file_to_read) -> None:
adapter = AsvBenchmarkAdapter(
command=["echo", str(file_to_read)],
result_file=Path(file_to_read),
result_fields_override={
"run_reason": env.CONBENCH_RUN_REASON,
},
benchmarks_file_path=env.BENCHMARKS_FILE_PATH,
)
adapter.run()
adapter.post_results()


def post_data() -> None:

while True:
all_files, processed_files = check_new_files(env)
for new_file in (set(all_files) - set(processed_files)):
adapter_instance(new_file)
with open(env.ASV_PROCESSED_FILES, "a") as f:
f.write(new_file)
f.write("\n")
time.sleep(30) #adjust this on server

if __name__=="__main__":
post_data()

Binary file added conbench-poc/setup_pic.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading