diff --git a/conbench-poc/.gitignore b/conbench-poc/.gitignore
new file mode 100644
index 0000000..8b3e30d
--- /dev/null
+++ b/conbench-poc/.gitignore
@@ -0,0 +1,16 @@
+
+.DS_Store
+.vscode/
+Deas-MacBook-Air.local/
+__pycache__/
+asv_files/**
+my-files/
+no_results/
+failing/
+asv_files_ALL/
+algos2_results/
+local_env.yml
+server_env.yml
+benchmarks.json
+asv_processed_files
+alert_processed_files
diff --git a/conbench-poc/README.md b/conbench-poc/README.md
new file mode 100644
index 0000000..bc4e805
--- /dev/null
+++ b/conbench-poc/README.md
@@ -0,0 +1,19 @@
+# Conbench PoC for pandas
+
+
+The **purpose** of adding conbench to the current pandas benchmark system
+is:
+1. To improve the UI.
+2. Use conbench statistical analysis and detection of regression/improvement
+3. Add an automatic alert system for regressions or improvements.
+
+## Files description
+**client.py:** Calls the adapter asvbench.py and posts to a conbench web app.
+**asvbench.py:** Converts asv's benchmarks results to conbench format.
+**alert.py:** Runs conbench alert pipeline, generates a report and sends alerts.
+**benchmark_email.py:** Handles the email.
+**utilities.py:** setup env variables, reads files.
+**setup_server.txt:** Steps to install this PoC.
+
+## PoC structure/setup
+
\ No newline at end of file
diff --git a/conbench-poc/alert.py b/conbench-poc/alert.py
new file mode 100644
index 0000000..91c6de7
--- /dev/null
+++ b/conbench-poc/alert.py
@@ -0,0 +1,90 @@
+import os
+from utilities import Environment, alerts_done_file, check_new_files
+import benchalerts.pipeline_steps as steps
+from benchalerts.integrations.github import CheckStatus
+import benchmark_email
+import re
+import json
+#from benchalerts.pipeline_steps.slack import (
+# SlackErrorHandler,
+#)
+from benchalerts import AlertPipeline, Alerter
+from benchalerts.integrations.github import GitHubRepoClient
+import asvbench
+from benchalerts.conbench_dataclasses import FullComparisonInfo
+import pandas as pd
+
+env = Environment()
+
+repo = env.GITHUB_REPOSITORY
+
+def alert_instance(commit_hash):
+
+ # Create a pipeline to update a GitHub Check
+ pipeline = AlertPipeline(
+ steps=[
+ steps.GetConbenchZComparisonStep(
+ commit_hash=commit_hash,
+ #baseline_run_type=steps.BaselineRunCandidates.fork_point,
+ #baseline_run_type=steps.BaselineRunCandidates.latest_default,
+ baseline_run_type=steps.BaselineRunCandidates.parent,
+ z_score_threshold=5.5, #If not set, defaults to 5
+ ),
+ #steps.GitHubCheckStep(
+ # commit_hash=commit_hash,
+ # comparison_step_name="GetConbenchZComparisonStep",
+ # github_client=GitHubRepoClient(repo=repo),
+ # #build_url=build_url,
+ #),
+ #steps.SlackMessageAboutBadCheckStep(
+ # channel_id="conbench-poc",
+ #),
+
+ ],
+ #error_handlers=[
+ # steps.GitHubCheckErrorHandler(
+ # commit_hash=commit_hash, repo=repo, #build_url=build_url
+ # )
+ #],
+ )
+ return pipeline
+
+ # To see the whole report, look at:
+ # pipeline.run_pipeline()['GetConbenchZComparisonStep'].results_with_z_regressions
+def report(pipeline):
+ full_comparison_info = pipeline.run_pipeline()['GetConbenchZComparisonStep']
+ alerter = Alerter()
+ if alerter.github_check_status(full_comparison_info) == CheckStatus.FAILURE:
+
+ message = """Subject: Benchmarks Alert \n\n """ \
+ + alerter.github_check_summary(full_comparison_info, "")
+ #TODO add links to message
+ #cleaned_message = re.sub(r'0\.0\.0\.0', '127.0.0.1', message) #local
+ correctserver = re.sub(r'0\.0\.0\.0', '57.128.112.95', message) #new server
+ cleaned_message = re.sub(r'- Commit Run.+\)|#| All benchmark runs analyzed:', '', correctserver)
+ #send message or cleaned_message
+ benchmark_email.email(cleaned_message)
+
+def alert() -> None:
+
+ #while True:
+ with open(env.ASV_PROCESSED_FILES, "r+") as f:
+ processed_files = f.read().split('\n')
+
+ for new_file in (set(processed_files) - set(alerts_done_file(env))):
+ with open(new_file, "r") as f:
+ benchmarks_results = json.load(f)
+ pipeline = alert_instance(benchmarks_results['commit_hash'])
+ report(pipeline)
+
+ with open(env.ALERT_PROCESSED_FILES, "a") as f:
+ f.write(new_file)
+ f.write("\n")
+
+
+if __name__ == "__main__":
+ #commit_hash = 'acf5d7d84187b5ba53e54b2a5d91a34725814bf9' #old server
+ #commit_hash = 'fce520d45a304ee2659bb4156acf484cee5aea07' #new server
+ #commit_hash = "c8a9c2fd3bcf23a21acfa6f4cffbc4c9360b9ea6" #local
+
+ alert()
\ No newline at end of file
diff --git a/conbench-poc/asvbench.py b/conbench-poc/asvbench.py
new file mode 100644
index 0000000..b564535
--- /dev/null
+++ b/conbench-poc/asvbench.py
@@ -0,0 +1,149 @@
+import json
+from pathlib import Path
+from typing import Any, Dict, List
+import itertools
+import numpy as np
+import os
+from datetime import datetime
+
+
+from benchadapt.adapters._adapter import BenchmarkAdapter
+from benchadapt.result import BenchmarkResult
+
+class AsvBenchmarkAdapter(BenchmarkAdapter):
+
+ def __init__(
+ self,
+ command: List[str],
+ result_file: Path,
+ benchmarks_file_path: str,
+ result_fields_override: Dict[str, Any] = None,
+ result_fields_append: Dict[str, Any] = None,
+ ) -> None:
+ """
+ Parameters
+ ----------
+ command : List[str]
+ A list of strings defining a shell command to run benchmarks
+ result_dir : Path
+ Path to directory where results will be populated
+ result_fields_override : Dict[str, Any]
+ A dict of values to override on each instance of `BenchmarkResult`. Useful
+ for specifying metadata only available at runtime, e.g. build info. Applied
+ before ``results_field_append``.
+ results_fields_append : Dict[str, Any]
+ A dict of default values to be appended to `BenchmarkResult` values after
+ instantiation. Useful for appending extra tags or other metadata in addition
+ to that gathered elsewhere. Only applicable for dict attributes. For each
+ element, will override any keys that already exist, i.e. it does not append
+ recursively.
+ """
+ self.result_file = result_file
+ self.benchmarks_file_path=benchmarks_file_path
+ super().__init__(
+ command=command,
+ result_fields_override=result_fields_override,
+ result_fields_append=result_fields_append,
+ )
+
+ def _transform_results(self) -> List[BenchmarkResult]:
+ """Transform asv results into a list of BenchmarkResults instances"""
+ parsed_benchmarks = []
+
+ with open(self.result_file, "r") as f:
+ benchmarks_results = json.load(f)
+
+ benchmarks_file = self.benchmarks_file_path + "benchmarks.json"
+ with open(benchmarks_file) as f:
+ benchmarks_info = json.load(f)
+
+ parsed_benchmarks = self._parse_results(benchmarks_results, benchmarks_info)
+
+ return parsed_benchmarks
+
+ def _parse_results(self, benchmarks_results, benchmarks_info):
+ # From asv documention "result_columns" is a list of column names for the results dictionary.
+ # ["result", "params", "version", "started_at", "duration", "stats_ci_99_a", "stats_ci_99_b",
+ # "stats_q_25", "stats_q_75", "stats_number", "stats_repeat", "samples", "profile"]
+ # In this first version of the adapter we are using only the "result" column.
+ # TODO: use the "samples" column instead.
+ try:
+ result_columns = benchmarks_results["result_columns"]
+ except:
+ raise Exception("Incorrect file format")
+ parsed_benchmarks = []
+
+ for name in benchmarks_results["results"]:
+ #Bug with this benchmark: series_methods.ToFrame.time_to_frame
+ if name == "series_methods.ToFrame.time_to_frame":
+ continue
+ #print(name)
+ try:
+ result_dict = dict(zip(result_columns,
+ benchmarks_results["results"][name]))
+ for param_values, data in zip(
+ itertools.product(*result_dict["params"]),
+ result_dict['result']
+ ):
+ if np.isnan(data):
+ #print('failing ', name)
+ continue
+ param_dic = dict(zip(benchmarks_info[name]["param_names"],
+ param_values))
+ tags = {}
+ tags["name"] = name
+ tags.update(param_dic)
+ #asv units are seconds or bytes, conbench uses "s" or "B"
+ units = {"seconds": "s",
+ "bytes": "B"}
+ params = benchmarks_results["params"]
+ parsed_benchmark = BenchmarkResult(
+ #batch_id=str(self.result_file), #CORRECT THIS
+ stats={
+ #asv returns one value wich is the average of the iterations
+ #but it can be changed so it returns the value of each iteration
+ #if asv returns the value of each iteration, the variable "data"
+ #will be a list, so this needs to be addressed below
+ "data": [data],
+ "unit": units[benchmarks_info[name]['unit']],
+ #iterations below is for conbench, 1 if we only provide a value
+ #if we run asv to return the value of each iteration (in data above)
+ #iterations should match the number of values
+ "iterations": 1,
+ },
+ tags=tags,
+ context={"benchmark_language": "Python",
+ "env_name": benchmarks_results["env_name"],
+ "python": benchmarks_results["python"],
+ "requirements": benchmarks_results["requirements"],
+ },
+ github={"repository": os.environ["REPOSITORY"],
+ "commit":benchmarks_results["commit_hash"],
+ },
+ info={"date": str(datetime.fromtimestamp(benchmarks_results["date"]/1e3)),
+ },
+ machine_info={
+ "name": params["machine"],
+ "os_name": params["os"],
+ "os_version":params["os"],
+ "architecture_name": params["arch"],
+ "kernel_name": "x",
+ "memory_bytes": 0,
+ "cpu_model_name": params["cpu"],
+ "cpu_core_count": params["num_cpu"],
+ "cpu_thread_count": 0,
+ "cpu_l1d_cache_bytes": 0,
+ "cpu_l1i_cache_bytes": 0,
+ "cpu_l2_cache_bytes": 0,
+ "cpu_l3_cache_bytes": 0,
+ "cpu_frequency_max_hz": 0,
+ "gpu_count": 0,
+ "gpu_product_names": [],
+ }
+ )
+ parsed_benchmarks.append(parsed_benchmark)
+ except:
+ continue
+
+ return parsed_benchmarks
+
diff --git a/conbench-poc/benchmark_email.py b/conbench-poc/benchmark_email.py
new file mode 100644
index 0000000..6fc8916
--- /dev/null
+++ b/conbench-poc/benchmark_email.py
@@ -0,0 +1,31 @@
+
+import smtplib, ssl
+import os
+from dotenv import load_dotenv
+import socket
+
+if socket.gethostname().startswith('Deas'):
+ load_dotenv(dotenv_path="./local_env.yml")
+else:
+ load_dotenv(dotenv_path="./server_env.yml")
+
+def email(message):
+
+ port = 465 # For SSL
+ sender_email = "conbenchalert@gmail.com"
+ receiver_email = ["deamarialeon@gmail.com"]
+ gmail_password=os.getenv("GMAIL_PASSWORD")
+
+ # Create a secure SSL context
+ context = ssl.create_default_context()
+ with smtplib.SMTP_SSL("smtp.gmail.com", port, context=context) as server:
+ server.login("conbenchalert@gmail.com", gmail_password)
+ print(message)
+ server.sendmail(sender_email, receiver_email, message)
+
+if __name__=="__main__":
+ message = """\
+ Subject: Hello
+
+ Message sent from conbenchalert."""
+ email(message)
\ No newline at end of file
diff --git a/conbench-poc/client.py b/conbench-poc/client.py
new file mode 100644
index 0000000..bd817cc
--- /dev/null
+++ b/conbench-poc/client.py
@@ -0,0 +1,36 @@
+from asvbench import AsvBenchmarkAdapter
+from pathlib import Path
+import os
+import time
+import alert
+from utilities import Environment, check_new_files
+
+env = Environment()
+
+def adapter_instance(file_to_read) -> None:
+ adapter = AsvBenchmarkAdapter(
+ command=["echo", str(file_to_read)],
+ result_file=Path(file_to_read),
+ result_fields_override={
+ "run_reason": env.CONBENCH_RUN_REASON,
+ },
+ benchmarks_file_path=env.BENCHMARKS_FILE_PATH,
+ )
+ adapter.run()
+ adapter.post_results()
+
+
+def post_data() -> None:
+
+ while True:
+ all_files, processed_files = check_new_files(env)
+ for new_file in (set(all_files) - set(processed_files)):
+ adapter_instance(new_file)
+ with open(env.ASV_PROCESSED_FILES, "a") as f:
+ f.write(new_file)
+ f.write("\n")
+ time.sleep(30) #adjust this on server
+
+if __name__=="__main__":
+ post_data()
+
diff --git a/conbench-poc/setup_pic.png b/conbench-poc/setup_pic.png
new file mode 100644
index 0000000..ece77a8
Binary files /dev/null and b/conbench-poc/setup_pic.png differ
diff --git a/conbench-poc/setup_server.txt b/conbench-poc/setup_server.txt
new file mode 100644
index 0000000..f054417
--- /dev/null
+++ b/conbench-poc/setup_server.txt
@@ -0,0 +1,166 @@
+Installing on the server
+
+Connect to server as ssh ubuntu@57.128.112.95
+
+sudo apt update
+sudo apt upgrade
+
+sudo apt install make
+
+Install Docker engine: (installed for ubuntu Lunar 23.04):
+https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository
+
+To install Docker, “sudo” was used, it would then be needed to run Docker with sudo.
+As user bench, I don’t have the rights to use “sudo”. But
+
+I connected as user “ubuntu”, but conbench needs to be installed as user “bench”
+
+So user “bench” needs to be added to group docker, this way I don’t need to use “sudo”:
+
+Do this as user ubuntu:
+
+To see a list of groups:
+cat /etc/group
+
+To add “bench” user to group docker:
+sudo adduser bench docker
+
+Change user to “bench”
+sudo su - bench
+
+From now on, do everything as user “bench”:
+mkdir conbench
+cd conbench
+
+Install mamba with: https://github.com/conda-forge/miniforge
+
+curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
+bash Miniforge3-$(uname)-$(uname -m).sh
+
+(hit escape or q and then yes)
+
+Exit bench user and login again with sudo su - bench (so mamba is activated)
+
+Enter this so the environment base is not activated all the time you login:
+
+conda config --set auto_activate_base false
+
+Create environment:
+
+mamba create -n conbench-env
+mamba activate conbench-env
+mamba install python
+python -m pip install 'benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python'
+python -m pip install 'benchalerts@git+https://github.com/conbench/conbench.git@main#subdirectory=benchalerts'
+python -m pip install 'benchclients@git+https://github.com/conbench/conbench.git@main#subdirectory=benchclients/python'
+python -m pip install 'benchconnect@git+https://github.com/conbench/conbench.git@main#subdirectory=benchconnect'
+
+mamba install numpy python-dotenv pandas
+
+environment is in:
+/home/bench/miniforge3/conbench-env/
+
+Clone my conbench version in branch “server”
+git clone https://github.com/DeaMariaLeon/conbench.git
+
+Or
+
+From conbench repository, and then edit Makefile, docker-compose.yml and conbench/api/index.py
+
+Edit Makefile in order to use port 5000 on server: USE BRANCH “server”
+
+cd conbench/conbench (where the conbench clone is):
+
+Go to line 13 an change
+export DCOMP_CONBENCH_HOST_PORT=127.0.0.1:5000
+
+to:
+export DCOMP_CONBENCH_HOST_PORT=0.0.0.0:5000
+
+Edit docker-compose.yml
+line 15 - APPLICATION_NAME: "pandas-conbench-PoC"
+line 42 - REGISTRATION_KEY: “innocent-registration-key" CHANGE THIS
+
+At the end of the file, in order to use volumes:
+
+db:
+ image: library/postgres:15.2-alpine
+ volumes:
+ - db-data:/var/lib/postgresql/data
+ environment:
+ POSTGRES_DB: "postgres"
+ POSTGRES_USER: "postgres"
+ POSTGRES_PASSWORD: "postgres"
+ healthcheck:
+ test: [ "CMD-SHELL", "pg_isready -U postgres" ]
+ interval: 2s
+ timeout: 5s
+ retries: 5
+ ports:
+ - "127.0.0.1::5432"
+volumes:
+ db-data:
+
+Edit conbench/api/index.py line 209 so the database can’t be cleaned up:
+#empty_db_tables()
+
+Clone my repo conbench_toy
+
+git clone https://github.com/DeaMariaLeon/conbench.git
+
+ADD machine.json and benchmarks.json with its path, to this file:
+asv_processed_files_server
+
+Provide environment variables:
+touch server_env.yml
+
+Edit server_env.yml and set up these variables:
+
+CONBENCH_URL=http://0.0.0.0:5000
+CONBENCH_EMAIL=“set-your-email”
+CONBENCH_PASSWORD=“set-your-password” -THIS IS MINE, when I sign up
+CONBENCH_RUN_REASON=commit
+PANDAS_ASV_RESULTS_PATH= Path to asv .json files
+BENCHMARKS_FILE_PATH= Path to a file called benchmarks.json generated by asv
+ASV_PROCESSED_FILES= Path and name to file that stores file names posted to web app
+ALERT_PROCESSED_FILES= Path and name to file that stores file names analyzed by alert
+REPOSITORY=git@github.com:pandas-dev/pandas
+GITHUB_REPOSITORY=DeaMariaLeon/algos2 #temporary used for alerts
+SLACK_TOKEN=
+GMAIL_PASSWORD= Password of email account that sends alerts
+GITHUB_APP_ID= Alerts app
+GITHUB_APP_PRIVATE_KEY= For alerts app
+
+Set the GITHUB_API_TOKEN:
+Run export GITHUB_API_TOKEN="{token}" in your current shell.
+
+Run the following inside conbench/conbench (The root of conbench clone):
+
+Use nohup so it keeps running:
+
+nohup command >/dev/null 2>&1 & # runs in background, still doesn't create nohup.
+
+nohup make run-app 2>&1 & this is no hungup
+
+You can see nohup.out to see the logs - look for 200, when this happens the conbench server should be running.
+
+At that point open conbench server and create an account. Use the values of server_env.yml for that.
+Once you have registered an account, you need to login.
+
+Use this to:
+
+make teardown-app - TO REMOVE CONTAINER
+make run-app TO RUN - YOU NEED TO BE IN ~/conbench/conbench-clone/ THIS HUNGS UP ON YOU
+
+Under subdirectory conbench/conbench_toy, make sure your environment is activated.
+Run client.py with nohup:
+
+nohup python3 client.py 2>&1 &
+
+The last file should send all the asv results files to conbench server.
+
+To run the alerts:
+nohup python3 alerts.py
+
+Make sure you have two files: alert_processed_files_server and asv_processed_files_server.
+The web app, client.py and alerts.py run separately.
diff --git a/conbench-poc/utilities.py b/conbench-poc/utilities.py
new file mode 100644
index 0000000..7f2e484
--- /dev/null
+++ b/conbench-poc/utilities.py
@@ -0,0 +1,37 @@
+import socket
+from dotenv import load_dotenv
+import os
+from pathlib import Path
+from dataclasses import dataclass
+
+class Environment:
+
+ def __init__(self):
+ if socket.gethostname().startswith('Deas'):
+ load_dotenv(dotenv_path= './local_env.yml')
+ else:
+ load_dotenv(dotenv_path= './server_env.yml')
+
+ self.PANDAS_ASV_RESULTS_PATH = os.getenv("PANDAS_ASV_RESULTS_PATH")
+ self.BENCHMARKS_FILE_PATH = os.getenv("BENCHMARKS_FILE_PATH")
+ self.GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY")
+ self.CONBENCH_RUN_REASON = os.getenv("CONBENCH_RUN_REASON")
+ self.ASV_PROCESSED_FILES = os.getenv("ASV_PROCESSED_FILES")
+ self.ALERT_PROCESSED_FILES = os.getenv("ALERT_PROCESSED_FILES")
+
+def check_new_files(env):
+
+ benchmarks_path = Path(env.PANDAS_ASV_RESULTS_PATH)
+ all_files = [str(file) for file in benchmarks_path.glob('*.json')]
+ with open(env.ASV_PROCESSED_FILES, "r+") as f:
+ processed_files = f.read().split('\n')
+
+ return all_files, processed_files
+
+def alerts_done_file(env):
+
+ _ , processed_files = check_new_files(env)
+ with open(env.ALERT_PROCESSED_FILES, "r+") as f:
+ alert_sent_files = f.read().split('\n')
+
+ return alert_sent_files