diff --git a/conbench-poc/.gitignore b/conbench-poc/.gitignore new file mode 100644 index 0000000..8b3e30d --- /dev/null +++ b/conbench-poc/.gitignore @@ -0,0 +1,16 @@ + +.DS_Store +.vscode/ +Deas-MacBook-Air.local/ +__pycache__/ +asv_files/** +my-files/ +no_results/ +failing/ +asv_files_ALL/ +algos2_results/ +local_env.yml +server_env.yml +benchmarks.json +asv_processed_files +alert_processed_files diff --git a/conbench-poc/README.md b/conbench-poc/README.md new file mode 100644 index 0000000..bc4e805 --- /dev/null +++ b/conbench-poc/README.md @@ -0,0 +1,19 @@ +# Conbench PoC for pandas + + +The **purpose** of adding conbench to the current pandas benchmark system +is: +1. To improve the UI. +2. Use conbench statistical analysis and detection of regression/improvement +3. Add an automatic alert system for regressions or improvements. + +## Files description +**client.py:** Calls the adapter asvbench.py and posts to a conbench web app.
+**asvbench.py:** Converts asv's benchmarks results to conbench format.
+**alert.py:** Runs conbench alert pipeline, generates a report and sends alerts.
+**benchmark_email.py:** Handles the email.
+**utilities.py:** setup env variables, reads files.
+**setup_server.txt:** Steps to install this PoC.
+ +## PoC structure/setup +![Setup](setup_pic.png "Setup") \ No newline at end of file diff --git a/conbench-poc/alert.py b/conbench-poc/alert.py new file mode 100644 index 0000000..91c6de7 --- /dev/null +++ b/conbench-poc/alert.py @@ -0,0 +1,90 @@ +import os +from utilities import Environment, alerts_done_file, check_new_files +import benchalerts.pipeline_steps as steps +from benchalerts.integrations.github import CheckStatus +import benchmark_email +import re +import json +#from benchalerts.pipeline_steps.slack import ( +# SlackErrorHandler, +#) +from benchalerts import AlertPipeline, Alerter +from benchalerts.integrations.github import GitHubRepoClient +import asvbench +from benchalerts.conbench_dataclasses import FullComparisonInfo +import pandas as pd + +env = Environment() + +repo = env.GITHUB_REPOSITORY + +def alert_instance(commit_hash): + + # Create a pipeline to update a GitHub Check + pipeline = AlertPipeline( + steps=[ + steps.GetConbenchZComparisonStep( + commit_hash=commit_hash, + #baseline_run_type=steps.BaselineRunCandidates.fork_point, + #baseline_run_type=steps.BaselineRunCandidates.latest_default, + baseline_run_type=steps.BaselineRunCandidates.parent, + z_score_threshold=5.5, #If not set, defaults to 5 + ), + #steps.GitHubCheckStep( + # commit_hash=commit_hash, + # comparison_step_name="GetConbenchZComparisonStep", + # github_client=GitHubRepoClient(repo=repo), + # #build_url=build_url, + #), + #steps.SlackMessageAboutBadCheckStep( + # channel_id="conbench-poc", + #), + + ], + #error_handlers=[ + # steps.GitHubCheckErrorHandler( + # commit_hash=commit_hash, repo=repo, #build_url=build_url + # ) + #], + ) + return pipeline + + # To see the whole report, look at: + # pipeline.run_pipeline()['GetConbenchZComparisonStep'].results_with_z_regressions +def report(pipeline): + full_comparison_info = pipeline.run_pipeline()['GetConbenchZComparisonStep'] + alerter = Alerter() + if alerter.github_check_status(full_comparison_info) == CheckStatus.FAILURE: + + message = """Subject: Benchmarks Alert \n\n """ \ + + alerter.github_check_summary(full_comparison_info, "") + #TODO add links to message + #cleaned_message = re.sub(r'0\.0\.0\.0', '127.0.0.1', message) #local + correctserver = re.sub(r'0\.0\.0\.0', '57.128.112.95', message) #new server + cleaned_message = re.sub(r'- Commit Run.+\)|#| All benchmark runs analyzed:', '', correctserver) + #send message or cleaned_message + benchmark_email.email(cleaned_message) + +def alert() -> None: + + #while True: + with open(env.ASV_PROCESSED_FILES, "r+") as f: + processed_files = f.read().split('\n') + + for new_file in (set(processed_files) - set(alerts_done_file(env))): + with open(new_file, "r") as f: + benchmarks_results = json.load(f) + pipeline = alert_instance(benchmarks_results['commit_hash']) + report(pipeline) + + with open(env.ALERT_PROCESSED_FILES, "a") as f: + f.write(new_file) + f.write("\n") + + +if __name__ == "__main__": + #commit_hash = 'acf5d7d84187b5ba53e54b2a5d91a34725814bf9' #old server + #commit_hash = 'fce520d45a304ee2659bb4156acf484cee5aea07' #new server + #commit_hash = "c8a9c2fd3bcf23a21acfa6f4cffbc4c9360b9ea6" #local + + alert() \ No newline at end of file diff --git a/conbench-poc/asvbench.py b/conbench-poc/asvbench.py new file mode 100644 index 0000000..b564535 --- /dev/null +++ b/conbench-poc/asvbench.py @@ -0,0 +1,149 @@ +import json +from pathlib import Path +from typing import Any, Dict, List +import itertools +import numpy as np +import os +from datetime import datetime + + +from benchadapt.adapters._adapter import BenchmarkAdapter +from benchadapt.result import BenchmarkResult + +class AsvBenchmarkAdapter(BenchmarkAdapter): + + def __init__( + self, + command: List[str], + result_file: Path, + benchmarks_file_path: str, + result_fields_override: Dict[str, Any] = None, + result_fields_append: Dict[str, Any] = None, + ) -> None: + """ + Parameters + ---------- + command : List[str] + A list of strings defining a shell command to run benchmarks + result_dir : Path + Path to directory where results will be populated + result_fields_override : Dict[str, Any] + A dict of values to override on each instance of `BenchmarkResult`. Useful + for specifying metadata only available at runtime, e.g. build info. Applied + before ``results_field_append``. + results_fields_append : Dict[str, Any] + A dict of default values to be appended to `BenchmarkResult` values after + instantiation. Useful for appending extra tags or other metadata in addition + to that gathered elsewhere. Only applicable for dict attributes. For each + element, will override any keys that already exist, i.e. it does not append + recursively. + """ + self.result_file = result_file + self.benchmarks_file_path=benchmarks_file_path + super().__init__( + command=command, + result_fields_override=result_fields_override, + result_fields_append=result_fields_append, + ) + + def _transform_results(self) -> List[BenchmarkResult]: + """Transform asv results into a list of BenchmarkResults instances""" + parsed_benchmarks = [] + + with open(self.result_file, "r") as f: + benchmarks_results = json.load(f) + + benchmarks_file = self.benchmarks_file_path + "benchmarks.json" + with open(benchmarks_file) as f: + benchmarks_info = json.load(f) + + parsed_benchmarks = self._parse_results(benchmarks_results, benchmarks_info) + + return parsed_benchmarks + + def _parse_results(self, benchmarks_results, benchmarks_info): + # From asv documention "result_columns" is a list of column names for the results dictionary. + # ["result", "params", "version", "started_at", "duration", "stats_ci_99_a", "stats_ci_99_b", + # "stats_q_25", "stats_q_75", "stats_number", "stats_repeat", "samples", "profile"] + # In this first version of the adapter we are using only the "result" column. + # TODO: use the "samples" column instead. + try: + result_columns = benchmarks_results["result_columns"] + except: + raise Exception("Incorrect file format") + parsed_benchmarks = [] + + for name in benchmarks_results["results"]: + #Bug with this benchmark: series_methods.ToFrame.time_to_frame + if name == "series_methods.ToFrame.time_to_frame": + continue + #print(name) + try: + result_dict = dict(zip(result_columns, + benchmarks_results["results"][name])) + for param_values, data in zip( + itertools.product(*result_dict["params"]), + result_dict['result'] + ): + if np.isnan(data): + #print('failing ', name) + continue + param_dic = dict(zip(benchmarks_info[name]["param_names"], + param_values)) + tags = {} + tags["name"] = name + tags.update(param_dic) + #asv units are seconds or bytes, conbench uses "s" or "B" + units = {"seconds": "s", + "bytes": "B"} + params = benchmarks_results["params"] + parsed_benchmark = BenchmarkResult( + #batch_id=str(self.result_file), #CORRECT THIS + stats={ + #asv returns one value wich is the average of the iterations + #but it can be changed so it returns the value of each iteration + #if asv returns the value of each iteration, the variable "data" + #will be a list, so this needs to be addressed below + "data": [data], + "unit": units[benchmarks_info[name]['unit']], + #iterations below is for conbench, 1 if we only provide a value + #if we run asv to return the value of each iteration (in data above) + #iterations should match the number of values + "iterations": 1, + }, + tags=tags, + context={"benchmark_language": "Python", + "env_name": benchmarks_results["env_name"], + "python": benchmarks_results["python"], + "requirements": benchmarks_results["requirements"], + }, + github={"repository": os.environ["REPOSITORY"], + "commit":benchmarks_results["commit_hash"], + }, + info={"date": str(datetime.fromtimestamp(benchmarks_results["date"]/1e3)), + }, + machine_info={ + "name": params["machine"], + "os_name": params["os"], + "os_version":params["os"], + "architecture_name": params["arch"], + "kernel_name": "x", + "memory_bytes": 0, + "cpu_model_name": params["cpu"], + "cpu_core_count": params["num_cpu"], + "cpu_thread_count": 0, + "cpu_l1d_cache_bytes": 0, + "cpu_l1i_cache_bytes": 0, + "cpu_l2_cache_bytes": 0, + "cpu_l3_cache_bytes": 0, + "cpu_frequency_max_hz": 0, + "gpu_count": 0, + "gpu_product_names": [], + } + ) + parsed_benchmarks.append(parsed_benchmark) + except: + continue + + return parsed_benchmarks + diff --git a/conbench-poc/benchmark_email.py b/conbench-poc/benchmark_email.py new file mode 100644 index 0000000..6fc8916 --- /dev/null +++ b/conbench-poc/benchmark_email.py @@ -0,0 +1,31 @@ + +import smtplib, ssl +import os +from dotenv import load_dotenv +import socket + +if socket.gethostname().startswith('Deas'): + load_dotenv(dotenv_path="./local_env.yml") +else: + load_dotenv(dotenv_path="./server_env.yml") + +def email(message): + + port = 465 # For SSL + sender_email = "conbenchalert@gmail.com" + receiver_email = ["deamarialeon@gmail.com"] + gmail_password=os.getenv("GMAIL_PASSWORD") + + # Create a secure SSL context + context = ssl.create_default_context() + with smtplib.SMTP_SSL("smtp.gmail.com", port, context=context) as server: + server.login("conbenchalert@gmail.com", gmail_password) + print(message) + server.sendmail(sender_email, receiver_email, message) + +if __name__=="__main__": + message = """\ + Subject: Hello + + Message sent from conbenchalert.""" + email(message) \ No newline at end of file diff --git a/conbench-poc/client.py b/conbench-poc/client.py new file mode 100644 index 0000000..bd817cc --- /dev/null +++ b/conbench-poc/client.py @@ -0,0 +1,36 @@ +from asvbench import AsvBenchmarkAdapter +from pathlib import Path +import os +import time +import alert +from utilities import Environment, check_new_files + +env = Environment() + +def adapter_instance(file_to_read) -> None: + adapter = AsvBenchmarkAdapter( + command=["echo", str(file_to_read)], + result_file=Path(file_to_read), + result_fields_override={ + "run_reason": env.CONBENCH_RUN_REASON, + }, + benchmarks_file_path=env.BENCHMARKS_FILE_PATH, + ) + adapter.run() + adapter.post_results() + + +def post_data() -> None: + + while True: + all_files, processed_files = check_new_files(env) + for new_file in (set(all_files) - set(processed_files)): + adapter_instance(new_file) + with open(env.ASV_PROCESSED_FILES, "a") as f: + f.write(new_file) + f.write("\n") + time.sleep(30) #adjust this on server + +if __name__=="__main__": + post_data() + diff --git a/conbench-poc/setup_pic.png b/conbench-poc/setup_pic.png new file mode 100644 index 0000000..ece77a8 Binary files /dev/null and b/conbench-poc/setup_pic.png differ diff --git a/conbench-poc/setup_server.txt b/conbench-poc/setup_server.txt new file mode 100644 index 0000000..f054417 --- /dev/null +++ b/conbench-poc/setup_server.txt @@ -0,0 +1,166 @@ +Installing on the server + +Connect to server as ssh ubuntu@57.128.112.95 + +sudo apt update +sudo apt upgrade + +sudo apt install make + +Install Docker engine: (installed for ubuntu Lunar 23.04): +https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository + +To install Docker, “sudo” was used, it would then be needed to run Docker with sudo. +As user bench, I don’t have the rights to use “sudo”. But + +I connected as user “ubuntu”, but conbench needs to be installed as user “bench” + +So user “bench” needs to be added to group docker, this way I don’t need to use “sudo”: + +Do this as user ubuntu: + +To see a list of groups: +cat /etc/group + +To add “bench” user to group docker: +sudo adduser bench docker + +Change user to “bench” +sudo su - bench + +From now on, do everything as user “bench”: +mkdir conbench +cd conbench + +Install mamba with: https://github.com/conda-forge/miniforge + +curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" +bash Miniforge3-$(uname)-$(uname -m).sh + +(hit escape or q and then yes) + +Exit bench user and login again with sudo su - bench (so mamba is activated) + +Enter this so the environment base is not activated all the time you login: + +conda config --set auto_activate_base false + +Create environment: + +mamba create -n conbench-env +mamba activate conbench-env +mamba install python +python -m pip install 'benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python' +python -m pip install 'benchalerts@git+https://github.com/conbench/conbench.git@main#subdirectory=benchalerts' +python -m pip install 'benchclients@git+https://github.com/conbench/conbench.git@main#subdirectory=benchclients/python' +python -m pip install 'benchconnect@git+https://github.com/conbench/conbench.git@main#subdirectory=benchconnect' + +mamba install numpy python-dotenv pandas + +environment is in: +/home/bench/miniforge3/conbench-env/ + +Clone my conbench version in branch “server” +git clone https://github.com/DeaMariaLeon/conbench.git + +Or + +From conbench repository, and then edit Makefile, docker-compose.yml and conbench/api/index.py + +Edit Makefile in order to use port 5000 on server: USE BRANCH “server” + +cd conbench/conbench (where the conbench clone is): + +Go to line 13 an change +export DCOMP_CONBENCH_HOST_PORT=127.0.0.1:5000 + +to: +export DCOMP_CONBENCH_HOST_PORT=0.0.0.0:5000 + +Edit docker-compose.yml +line 15 - APPLICATION_NAME: "pandas-conbench-PoC" +line 42 - REGISTRATION_KEY: “innocent-registration-key" CHANGE THIS + +At the end of the file, in order to use volumes: + +db: + image: library/postgres:15.2-alpine + volumes: + - db-data:/var/lib/postgresql/data + environment: + POSTGRES_DB: "postgres" + POSTGRES_USER: "postgres" + POSTGRES_PASSWORD: "postgres" + healthcheck: + test: [ "CMD-SHELL", "pg_isready -U postgres" ] + interval: 2s + timeout: 5s + retries: 5 + ports: + - "127.0.0.1::5432" +volumes: + db-data: + +Edit conbench/api/index.py line 209 so the database can’t be cleaned up: +#empty_db_tables() + +Clone my repo conbench_toy + +git clone https://github.com/DeaMariaLeon/conbench.git + +ADD machine.json and benchmarks.json with its path, to this file: +asv_processed_files_server + +Provide environment variables: +touch server_env.yml + +Edit server_env.yml and set up these variables: + +CONBENCH_URL=http://0.0.0.0:5000 +CONBENCH_EMAIL=“set-your-email” +CONBENCH_PASSWORD=“set-your-password” -THIS IS MINE, when I sign up +CONBENCH_RUN_REASON=commit +PANDAS_ASV_RESULTS_PATH= Path to asv .json files +BENCHMARKS_FILE_PATH= Path to a file called benchmarks.json generated by asv +ASV_PROCESSED_FILES= Path and name to file that stores file names posted to web app +ALERT_PROCESSED_FILES= Path and name to file that stores file names analyzed by alert +REPOSITORY=git@github.com:pandas-dev/pandas +GITHUB_REPOSITORY=DeaMariaLeon/algos2 #temporary used for alerts +SLACK_TOKEN= +GMAIL_PASSWORD= Password of email account that sends alerts +GITHUB_APP_ID= Alerts app +GITHUB_APP_PRIVATE_KEY= For alerts app + +Set the GITHUB_API_TOKEN: +Run export GITHUB_API_TOKEN="{token}" in your current shell. + +Run the following inside conbench/conbench (The root of conbench clone): + +Use nohup so it keeps running: + +nohup command >/dev/null 2>&1 & # runs in background, still doesn't create nohup. + +nohup make run-app 2>&1 & this is no hungup + +You can see nohup.out to see the logs - look for 200, when this happens the conbench server should be running. + +At that point open conbench server and create an account. Use the values of server_env.yml for that. +Once you have registered an account, you need to login. + +Use this to: + +make teardown-app - TO REMOVE CONTAINER +make run-app TO RUN - YOU NEED TO BE IN ~/conbench/conbench-clone/ THIS HUNGS UP ON YOU + +Under subdirectory conbench/conbench_toy, make sure your environment is activated. +Run client.py with nohup: + +nohup python3 client.py 2>&1 & + +The last file should send all the asv results files to conbench server. + +To run the alerts: +nohup python3 alerts.py + +Make sure you have two files: alert_processed_files_server and asv_processed_files_server. +The web app, client.py and alerts.py run separately. diff --git a/conbench-poc/utilities.py b/conbench-poc/utilities.py new file mode 100644 index 0000000..7f2e484 --- /dev/null +++ b/conbench-poc/utilities.py @@ -0,0 +1,37 @@ +import socket +from dotenv import load_dotenv +import os +from pathlib import Path +from dataclasses import dataclass + +class Environment: + + def __init__(self): + if socket.gethostname().startswith('Deas'): + load_dotenv(dotenv_path= './local_env.yml') + else: + load_dotenv(dotenv_path= './server_env.yml') + + self.PANDAS_ASV_RESULTS_PATH = os.getenv("PANDAS_ASV_RESULTS_PATH") + self.BENCHMARKS_FILE_PATH = os.getenv("BENCHMARKS_FILE_PATH") + self.GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY") + self.CONBENCH_RUN_REASON = os.getenv("CONBENCH_RUN_REASON") + self.ASV_PROCESSED_FILES = os.getenv("ASV_PROCESSED_FILES") + self.ALERT_PROCESSED_FILES = os.getenv("ALERT_PROCESSED_FILES") + +def check_new_files(env): + + benchmarks_path = Path(env.PANDAS_ASV_RESULTS_PATH) + all_files = [str(file) for file in benchmarks_path.glob('*.json')] + with open(env.ASV_PROCESSED_FILES, "r+") as f: + processed_files = f.read().split('\n') + + return all_files, processed_files + +def alerts_done_file(env): + + _ , processed_files = check_new_files(env) + with open(env.ALERT_PROCESSED_FILES, "r+") as f: + alert_sent_files = f.read().split('\n') + + return alert_sent_files