diff --git a/.github/workflows/fireci.yml b/.github/workflows/fireci.yml index 7b5f7109da0..8228fa10728 100644 --- a/.github/workflows/fireci.yml +++ b/.github/workflows/fireci.yml @@ -18,8 +18,10 @@ jobs: - uses: actions/checkout@v3.0.2 - uses: actions/setup-python@v2 with: - python-version: '3.9' + python-version: '3.8' - run: | pip install -e "ci/fireci[test]" - run: | pytest ci/fireci + - run: | + mypy --config-file ci/fireci/setup.cfg ci/fireci/ diff --git a/.gitignore b/.gitignore index 300f5bb4b2f..da3e77d46fe 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ firebase-crashlytics-ndk/.externalNativeBuild/ firebase-crashlytics-ndk/.cxx/ smoke-test-logs/ smoke-tests/build-debug-headGit-smoke-test -smoke-tests/firehorn.log \ No newline at end of file +smoke-tests/firehorn.log +macrobenchmark-output.json diff --git a/ci/fireci/fireci/internal.py b/ci/fireci/fireci/internal.py index c76123e3228..0950d770fc2 100644 --- a/ci/fireci/fireci/internal.py +++ b/ci/fireci/fireci/internal.py @@ -13,7 +13,6 @@ # limitations under the License. import click -import contextlib import functools import glob import itertools @@ -21,6 +20,7 @@ import os import shutil +from contextlib import contextmanager, nullcontext _logger = logging.getLogger('fireci') @@ -30,7 +30,7 @@ def _ensure_dir(directory): os.makedirs(directory) -@contextlib.contextmanager +@contextmanager def _artifact_handler(target_directory, artifact_patterns): _logger.debug( 'Artifacts will be searched for in directories matching {} patterns and placed in {}' @@ -45,7 +45,7 @@ def _artifact_handler(target_directory, artifact_patterns): target_name = os.path.join(target_directory, "_".join(path.split('/'))) _logger.debug('Copying artifact {} to {}'.format(path, target_name)) if os.path.isdir(path): - shutil.copytree(path, target_name) + shutil.copytree(path, target_name, dirs_exist_ok=True) else: shutil.copyfile(path, target_name) @@ -68,8 +68,8 @@ class _CommonOptions: '--artifact-patterns', default=('**/build/test-results', '**/build/reports'), help= - 'Shell-style artifact patterns that are copied into `artifact-target-dir`.'\ - 'Can be specified multiple times.', + 'Shell-style artifact patterns that are copied into `artifact-target-dir`. ' + 'Can be specified multiple times.', multiple=True, type=str, ) @@ -83,30 +83,34 @@ def main(options, **kwargs): setattr(options, k, v) -def ci_command(name=None): +def ci_command(name=None, cls=click.Command, group=main): """Decorator to use for CI commands. The differences from the standard @click.command are: * Allows configuration of artifacts that are uploaded for later viewing in CI. - * Registers the command automatically + * Registers the command automatically. - :param name: Optional name of the task. Defaults to the function name that is decorated with - this decorator. + :param name: Optional name of the task. Defaults to the function name that is decorated with this decorator. + :param cls: Specifies whether the func is a command or a command group. Defaults to `click.Command`. + :param group: Specifies the group the command belongs to. Defaults to the `main` command group. """ def ci_command(f): actual_name = f.__name__ if name is None else name - @main.command(name=actual_name, help=f.__doc__) + @click.command(name=actual_name, cls=cls, help=f.__doc__) @_pass_options @click.pass_context def new_func(ctx, options, *args, **kwargs): with _artifact_handler( options.artifact_target_dir, - options.artifact_patterns): + options.artifact_patterns, + ) if cls is click.Command else nullcontext(): return ctx.invoke(f, *args, **kwargs) + group.add_command(new_func) + return functools.update_wrapper(new_func, f) return ci_command diff --git a/ci/fireci/fireci/plugins.py b/ci/fireci/fireci/plugins.py index 66aebd30f8c..715c8de0884 100644 --- a/ci/fireci/fireci/plugins.py +++ b/ci/fireci/fireci/plugins.py @@ -27,7 +27,7 @@ def discover(): Note: plugins *must* define the `firebaseplugins` package as a namespace package. See: https://packaging.python.org/guides/packaging-namespace-packages/ """ - modules = pkgutil.iter_modules(fireciplugins.__path__, - fireciplugins.__name__ + ".") + modules = pkgutil.walk_packages(fireciplugins.__path__, + fireciplugins.__name__ + ".") for _, name, _ in modules: importlib.import_module(name) diff --git a/ci/fireci/fireciplugins/macrobenchmark.py b/ci/fireci/fireciplugins/macrobenchmark.py deleted file mode 100644 index a0fc2f81a39..00000000000 --- a/ci/fireci/fireciplugins/macrobenchmark.py +++ /dev/null @@ -1,319 +0,0 @@ -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import asyncio -import glob -import json -import logging -import os -import random -import re -import shutil -import sys -import tempfile -import uuid - -import click -import numpy -import pystache -import yaml -from google.cloud import storage - -from fireci import ci_command -from fireci import ci_utils -from fireci import uploader -from fireci.dir_utils import chdir - -_logger = logging.getLogger('fireci.macrobenchmark') - - -@click.option( - '--build-only/--no-build-only', - default=False, - help='Whether to only build tracing test apps or to also run them on FTL afterwards' -) -@ci_command() -def macrobenchmark(build_only): - """Measures app startup times for Firebase SDKs.""" - asyncio.run(_launch_macrobenchmark_test(build_only)) - - -async def _launch_macrobenchmark_test(build_only): - _logger.info('Starting macrobenchmark test...') - - artifact_versions = await _assemble_all_artifacts() - _logger.info(f'Artifact versions: {artifact_versions}') - - test_dir = await _prepare_test_directory() - _logger.info(f'Directory for test apps: {test_dir}') - - config = await _process_config_yaml() - _logger.info(f'Processed yaml configurations: {config}') - - tests = [MacrobenchmarkTest(app, artifact_versions, os.getcwd(), test_dir) for app in config['test-apps']] - - _logger.info(f'Building {len(tests)} macrobenchmark test apps...') - # TODO(yifany): investigate why it is much slower with asyncio.gather - # - on corp workstations (9 min) than M1 macbook pro (3 min) - # - with gradle 7.5.1 (9 min) than gradle 6.9.2 (5 min) - # await asyncio.gather(*[x.build() for x in tests]) - for test in tests: - await test.build() - - if not build_only: - _logger.info(f'Submitting {len(tests)} tests to Firebase Test Lab...') - results = await asyncio.gather(*[x.test() for x in tests], return_exceptions=True) - await _post_processing(results) - - _logger.info('Macrobenchmark test finished.') - - -async def _assemble_all_artifacts(): - await (await asyncio.create_subprocess_exec('./gradlew', 'assembleAllForSmokeTests')).wait() - - with open('build/m2repository/changed-artifacts.json') as json_file: - artifacts = json.load(json_file) - return dict(_artifact_key_version(x) for x in artifacts['headGit']) - - -def _artifact_key_version(artifact): - group_id, artifact_id, version = artifact.split(':') - return f'{group_id}:{artifact_id}', version - - -async def _process_config_yaml(): - with open('health-metrics/benchmark/config.yaml') as yaml_file: - config = yaml.safe_load(yaml_file) - for app in config['test-apps']: - app['plugins'] = app.get('plugins', []) - app['traces'] = app.get('traces', []) - app['plugins'].extend(config['common-plugins']) - app['traces'].extend(config['common-traces']) - return config - - -async def _prepare_test_directory(): - test_dir = tempfile.mkdtemp(prefix='benchmark-test-') - - # Required for creating gradle wrapper, as the dir is not defined in the root settings.gradle - open(os.path.join(test_dir, 'settings.gradle'), 'w').close() - - command = ['./gradlew', 'wrapper', '--gradle-version', '7.5.1', '--project-dir', test_dir] - await (await asyncio.create_subprocess_exec(*command)).wait() - - return test_dir - - -async def _post_processing(results): - _logger.info(f'Macrobenchmark results: {results}') - - if os.getenv('CI') is None: - _logger.info('Running locally. Results upload skipped.') - return - - # Upload successful measurements to the metric service - measurements = [] - for result in results: - if not isinstance(result, Exception): - measurements.extend(result) - - log = ci_utils.ci_log_link() - test_report = {'benchmarks': measurements, 'log': log} - - metrics_service_url = 'https://api.firebase-sdk-health-metrics.com' - access_token = ci_utils.gcloud_identity_token() - uploader.post_report(test_report, metrics_service_url, access_token, 'macrobenchmark') - - # Raise exceptions for failed measurements - if any(map(lambda x: isinstance(x, Exception), results)): - _logger.error(f'Exceptions: {[x for x in results if isinstance(x, Exception)]}') - raise click.ClickException('Macrobenchmark test failed with above errors.') - - -class MacrobenchmarkTest: - """Builds the test based on configurations and runs the test on FTL.""" - def __init__( - self, - test_app_config, - artifact_versions, - repo_root_dir, - test_dir, - logger=_logger - ): - self.test_app_config = test_app_config - self.artifact_versions = artifact_versions - self.repo_root_dir = repo_root_dir - self.test_dir = test_dir - self.logger = MacrobenchmarkLoggerAdapter(logger, test_app_config['sdk']) - self.test_app_dir = os.path.join(test_dir, test_app_config['name']) - self.test_results_bucket = 'fireescape-benchmark-results' - self.test_results_dir = str(uuid.uuid4()) - self.gcs_client = storage.Client() - - async def build(self): - """Creates test app project and assembles app and test apks.""" - await self._create_benchmark_projects() - await self._assemble_benchmark_apks() - - async def test(self): - """Runs benchmark tests on FTL and fetches FTL results from GCS.""" - await self._execute_benchmark_tests() - return await self._aggregate_benchmark_results() - - async def _create_benchmark_projects(self): - app_name = self.test_app_config['name'] - self.logger.info(f'Creating test app "{app_name}"...') - - self.logger.info(f'Copying project template files into "{self.test_app_dir}"...') - template_dir = os.path.join(self.repo_root_dir, 'health-metrics/benchmark/template') - shutil.copytree(template_dir, self.test_app_dir) - - self.logger.info(f'Copying gradle wrapper binary into "{self.test_app_dir}"...') - shutil.copy(os.path.join(self.test_dir, 'gradlew'), self.test_app_dir) - shutil.copy(os.path.join(self.test_dir, 'gradlew.bat'), self.test_app_dir) - shutil.copytree(os.path.join(self.test_dir, 'gradle'), os.path.join(self.test_app_dir, 'gradle')) - - with chdir(self.test_app_dir): - mustache_context = await self._prepare_mustache_context() - renderer = pystache.Renderer() - mustaches = glob.glob('**/*.mustache', recursive=True) - for mustache in mustaches: - self.logger.info(f'Processing template file: {mustache}') - result = renderer.render_path(mustache, mustache_context) - original_name = mustache.removesuffix('.mustache') - with open(original_name, 'w') as file: - file.write(result) - - async def _assemble_benchmark_apks(self): - with chdir(self.test_app_dir): - await self._exec_subprocess('./gradlew', ['assemble']) - - async def _execute_benchmark_tests(self): - app_apk_path = glob.glob(f'{self.test_app_dir}/**/app-benchmark.apk', recursive=True)[0] - test_apk_path = glob.glob(f'{self.test_app_dir}/**/macrobenchmark-benchmark.apk', recursive=True)[0] - - self.logger.info(f'App apk: {app_apk_path}') - self.logger.info(f'Test apk: {test_apk_path}') - - ftl_environment_variables = [ - 'clearPackageData=true', - 'additionalTestOutputDir=/sdcard/Download', - 'no-isolated-storage=true', - ] - executable = 'gcloud' - args = ['firebase', 'test', 'android', 'run'] - args += ['--type', 'instrumentation'] - args += ['--app', app_apk_path] - args += ['--test', test_apk_path] - args += ['--device', 'model=oriole,version=32,locale=en,orientation=portrait'] - args += ['--directories-to-pull', '/sdcard/Download'] - args += ['--results-bucket', f'gs://{self.test_results_bucket}'] - args += ['--results-dir', self.test_results_dir] - args += ['--environment-variables', ','.join(ftl_environment_variables)] - args += ['--timeout', '30m'] - args += ['--project', 'fireescape-c4819'] - - await self._exec_subprocess(executable, args) - - async def _prepare_mustache_context(self): - mustache_context = { - 'm2repository': os.path.join(self.repo_root_dir, 'build/m2repository'), - 'plugins': self.test_app_config.get('plugins', []), - 'traces': self.test_app_config.get('traces', []), - 'dependencies': [], - } - - if 'dependencies' in self.test_app_config: - for dep in self.test_app_config['dependencies']: - if '@' in dep: - key, version = dep.split('@', 1) - dependency = {'key': key, 'version': version} - else: - dependency = {'key': dep, 'version': self.artifact_versions[dep]} - mustache_context['dependencies'].append(dependency) - - return mustache_context - - async def _aggregate_benchmark_results(self): - results = [] - blobs = self.gcs_client.list_blobs(self.test_results_bucket, prefix=self.test_results_dir) - files = [x for x in blobs if re.search(r'sdcard/Download/[^/]*\.json', x.name)] - for file in files: - device = re.search(r'([^/]*)/artifacts/', file.name).group(1) - benchmarks = json.loads(file.download_as_bytes())['benchmarks'] - for benchmark in benchmarks: - method = benchmark['name'] - clazz = benchmark['className'].split('.')[-1] - runs = benchmark['metrics']['timeToInitialDisplayMs']['runs'] - results.append({ - 'sdk': self.test_app_config['sdk'], - 'device': device, - 'name': f'{clazz}.{method}', - 'min': min(runs), - 'max': max(runs), - 'p50': numpy.percentile(runs, 50), - 'p90': numpy.percentile(runs, 90), - 'p99': numpy.percentile(runs, 99), - 'unit': 'ms', - }) - self.logger.info(f'Benchmark results: {results}') - return results - - async def _exec_subprocess(self, executable, args): - command = " ".join([executable, *args]) - self.logger.info(f'Executing command: "{command}"...') - - proc = await asyncio.subprocess.create_subprocess_exec( - executable, - *args, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE - ) - await asyncio.gather( - self._stream_output(executable, proc.stdout), - self._stream_output(executable, proc.stderr) - ) - - await proc.communicate() - if proc.returncode == 0: - self.logger.info(f'"{command}" finished.') - else: - message = f'"{command}" exited with return code {proc.returncode}.' - self.logger.error(message) - raise click.ClickException(message) - - async def _stream_output(self, executable, stream: asyncio.StreamReader): - async for line in stream: - self.logger.info(f'[{executable}] {line.decode("utf-8").strip()}') - - -class MacrobenchmarkLoggerAdapter(logging.LoggerAdapter): - """Decorates log messages for a sdk to make them more distinguishable.""" - - reset_code = '\x1b[m' - - @staticmethod - def random_color_code(): - code = random.randint(16, 231) # https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit - return f'\x1b[38;5;{code}m' - - def __init__(self, logger, sdk_name, color_code=None): - super().__init__(logger, {}) - self.sdk_name = sdk_name - self.color_code = self.random_color_code() if color_code is None else color_code - - def process(self, msg, kwargs): - colored = f'{self.color_code}[{self.sdk_name}]{self.reset_code} {msg}' - uncolored = f'[{self.sdk_name}] {msg}' - return colored if sys.stderr.isatty() else uncolored, kwargs diff --git a/ci/fireci/fireciplugins/macrobenchmark/__init__.py b/ci/fireci/fireciplugins/macrobenchmark/__init__.py new file mode 100644 index 00000000000..6d6d1266c32 --- /dev/null +++ b/ci/fireci/fireciplugins/macrobenchmark/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ci/fireci/fireciplugins/macrobenchmark/analyze/__init__.py b/ci/fireci/fireciplugins/macrobenchmark/analyze/__init__.py new file mode 100644 index 00000000000..6d6d1266c32 --- /dev/null +++ b/ci/fireci/fireciplugins/macrobenchmark/analyze/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ci/fireci/fireciplugins/macrobenchmark/analyze/aggregator.py b/ci/fireci/fireciplugins/macrobenchmark/analyze/aggregator.py new file mode 100644 index 00000000000..5b75e3f2678 --- /dev/null +++ b/ci/fireci/fireciplugins/macrobenchmark/analyze/aggregator.py @@ -0,0 +1,79 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import logging +import pandas as pd +import seaborn as sns + +from pathlib import Path + +logger = logging.getLogger('fireci.macrobenchmark') +sns.set() + + +def calculate_statistic(trace: str, device: str, data: pd.DataFrame, output_dir: Path = None): + logger.info(f'Calculating statistics for trace "{trace}" on device "{device}" ...') + + # Calculate percentiles per each run_id + quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] + percentiles = data.groupby('run_id').quantile(quantiles, numeric_only=True) + percentiles.index.set_names('percentile', level=1, inplace=True) + percentiles = percentiles.reset_index(['run_id', 'percentile']) + percentiles = percentiles.pivot(index='run_id', columns='percentile', values='duration') + + def mapper(quantile: float) -> str: return f'p{int(quantile * 100)}' + + percentiles.rename(mapper=mapper, axis='columns', inplace=True) + + # Calculate dispersions of each percentile over all runs + mean = percentiles.mean() + std = percentiles.std() # standard deviation + cv = std / mean # coefficient of variation (relative standard deviation) + mad = (percentiles - percentiles.mean()).abs().mean() # mean absolute deviation + rmad = mad / mean # relative mean absolute deviation (mad / mean) + dispersions = pd.DataFrame([pd.Series(cv, name='cv'), pd.Series(rmad, name='rmad')]) + + # Optionally save percentiles and dispersions to file + if output_dir: + percentiles.to_json(output_dir.joinpath('percentiles.json'), orient='index') + dispersions.to_json(output_dir.joinpath('dispersions.json'), orient='index') + logger.info(f'Percentiles and dispersions saved in: {output_dir}') + + return percentiles, dispersions + + +def calculate_statistic_diff( + trace: str, + device: str, + control: pd.DataFrame, + experimental: pd.DataFrame, + output_dir: Path = None, +): + logger.info(f'Calculating statistic diff for trace "{trace}" on device "{device}" ...') + + ctl_percentiles, _ = calculate_statistic(trace, device, control) + exp_percentiles, _ = calculate_statistic(trace, device, experimental) + + ctl_mean = ctl_percentiles.mean() + exp_mean = exp_percentiles.mean() + + delta = exp_mean - ctl_mean + percentage = delta / ctl_mean + + # Optionally save statistics to file + if output_dir: + delta.to_json(output_dir.joinpath('delta.json')) + percentage.to_json(output_dir.joinpath('percentage.json')) + logger.info(f'Percentiles diff saved in: {output_dir}') diff --git a/ci/fireci/fireciplugins/macrobenchmark/analyze/analyzer.py b/ci/fireci/fireciplugins/macrobenchmark/analyze/analyzer.py new file mode 100644 index 00000000000..86b8ec4ca76 --- /dev/null +++ b/ci/fireci/fireciplugins/macrobenchmark/analyze/analyzer.py @@ -0,0 +1,104 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import tempfile +import pandas as pd + +from .aggregator import calculate_statistic, calculate_statistic_diff +from .plotter import plot_graph, plot_diff_graph +from .utils import collect_data_points, DataPoint +from click import progressbar +from pathlib import Path +from typing import List + + +logger = logging.getLogger('fireci.macrobenchmark') + + +def start( + diff_mode: bool, + ftl_results_dir: List[str], + local_reports_dir: Path, + ctl_ftl_results_dir: List[str], + ctl_local_reports_dir: Path, + exp_ftl_results_dir: List[str], + exp_local_reports_dir: Path, + output_dir: Path +): + logger.info('Starting to analyze macrobenchmark test results ...') + + if not output_dir: + output_dir = Path(tempfile.mkdtemp(prefix='macrobenchmark-analysis-')) + logger.info(f'Created temporary dir "{output_dir}" to save analysis results') + + if not diff_mode: + data_points = collect_data_points(ftl_results_dir, local_reports_dir) + _process(data_points, output_dir) + else: + logger.info('Running in diff mode ...') + ctl_data_points = collect_data_points(ctl_ftl_results_dir, ctl_local_reports_dir) + exp_data_points = collect_data_points(exp_ftl_results_dir, exp_local_reports_dir) + _diff(ctl_data_points, exp_data_points, output_dir) + + logger.info(f'Completed analysis and saved output in: {output_dir}') + + +def _process(data_points: List[DataPoint], output_dir: Path) -> None: + data = pd.DataFrame(data_points) + traces = sorted(data['trace'].unique()) + devices = sorted(data['device'].unique()) + + trace_device_combinations = [(trace, device) for trace in traces for device in devices] + + with progressbar(trace_device_combinations) as combinations: + for trace, device in combinations: + combination_dir = output_dir.joinpath(trace, device) + combination_dir.mkdir(parents=True, exist_ok=True) + subset = _filter_subset(data, trace, device) + calculate_statistic(trace, device, subset, combination_dir) + plot_graph(trace, device, subset, combination_dir) + + +def _diff( + ctl_data_points: List[DataPoint], + exp_data_points: List[DataPoint], + output_dir: Path +) -> None: + ctl_data = pd.DataFrame(ctl_data_points) + exp_data = pd.DataFrame(exp_data_points) + all_data = pd.concat([ctl_data, exp_data]) + + traces = sorted(all_data['trace'].unique()) + devices = sorted(all_data['device'].unique()) + + trace_device_combinations = [(trace, device) for trace in traces for device in devices] + + with progressbar(trace_device_combinations) as combinations: + for trace, device in combinations: + combination_dir = output_dir.joinpath(trace, device) + combination_dir.mkdir(parents=True, exist_ok=True) + + ctl_subset = _filter_subset(ctl_data, trace, device) + exp_subset = _filter_subset(exp_data, trace, device) + + calculate_statistic_diff(trace, device, ctl_subset, exp_subset, combination_dir) + plot_diff_graph(trace, device, ctl_subset, exp_subset, combination_dir) + + +def _filter_subset(data: pd.DataFrame, trace: str, device: str) -> pd.DataFrame: + return data.loc[ + (data['trace'] == trace) & (data['device'] == device), + ['duration', 'run_id'] + ] diff --git a/ci/fireci/fireciplugins/macrobenchmark/analyze/plotter.py b/ci/fireci/fireciplugins/macrobenchmark/analyze/plotter.py new file mode 100644 index 00000000000..ac73f80815c --- /dev/null +++ b/ci/fireci/fireciplugins/macrobenchmark/analyze/plotter.py @@ -0,0 +1,70 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import numpy as np +import pandas as pd +import seaborn as sns + +from pathlib import Path + + +logger = logging.getLogger('fireci.macrobenchmark') +sns.set() + + +def plot_graph(trace: str, device: str, data: pd.DataFrame, output_dir: Path): + logger.info(f'Plotting graphs for trace "{trace}" on device "{device}" ...') + + unique_run_ids = len(data['run_id'].unique()) + col_wrap = int(np.ceil(np.sqrt(unique_run_ids))) + + histograms = sns.displot(data=data, x='duration', kde=True, col="run_id", col_wrap=col_wrap) + histograms.set_axis_labels(x_var=f'{trace} (ms)') + histograms.set_titles(f'{device} ({{col_var}} = {{col_name}})') + histograms.savefig(output_dir.joinpath('histograms.svg')) + + distributions = sns.displot( + data=data, x='duration', kde=True, height=8, + hue='run_id', palette='muted', multiple='dodge' + ) + distributions.set_axis_labels(x_var=f'{trace} (ms)').set(title=device) + distributions.savefig(output_dir.joinpath('distributions.svg')) + + logger.info(f'Graphs saved in: {output_dir}') + + +def plot_diff_graph( + trace: str, + device: str, + control: pd.DataFrame, + experimental: pd.DataFrame, + output_dir: Path +): + logger.info(f'Plotting distribution diff graph for trace "{trace}" on device "{device}" ...') + + control_run_ids = control['run_id'] + experimental_run_ids = experimental['run_id'] + all_data = pd.concat([control, experimental]) + + palette = {**{x: 'b' for x in control_run_ids}, **{x: 'r' for x in experimental_run_ids}} + + distribution_diff = sns.displot( + data=all_data, x='duration', kde=True, height=8, + hue='run_id', palette=palette, multiple='dodge' + ) + distribution_diff.set_axis_labels(x_var=f'{trace} (ms)').set(title=device) + distribution_diff.savefig(output_dir.joinpath('distribution_diff.svg')) + + logger.info(f'Graph saved in: {output_dir}') diff --git a/ci/fireci/fireciplugins/macrobenchmark/analyze/utils.py b/ci/fireci/fireciplugins/macrobenchmark/analyze/utils.py new file mode 100644 index 00000000000..131bb909a83 --- /dev/null +++ b/ci/fireci/fireciplugins/macrobenchmark/analyze/utils.py @@ -0,0 +1,82 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import logging +import re +import tempfile + +from click import ClickException +from google.cloud import storage +from pathlib import Path +from typing import List, TypedDict + + +logger = logging.getLogger('fireci.macrobenchmark') +DataPoint = TypedDict('DataPoint', {'duration': float, 'device': str, 'trace': str, 'run_id': str}) + + +def collect_data_points(ftl_results_dir: List[str], local_reports_dir: Path) -> List[DataPoint]: + if not ftl_results_dir and not local_reports_dir: + raise ClickException('Neither ftl-results-dir or local-reports-dir is provided.') + elif ftl_results_dir and not local_reports_dir: + temp_dir = _download(ftl_results_dir) + return _extract_raw_data(temp_dir) + elif not ftl_results_dir and local_reports_dir: + return _extract_raw_data(local_reports_dir) + else: + raise ClickException('Should specify either ftl-results-dir or local-reports-dir, not both.') + + +def _download(ftl_results_dirs: List[str]) -> Path: + ftl_results_bucket = 'fireescape-benchmark-results' + gcs = storage.Client() + + temp_dir = tempfile.mkdtemp(prefix='ftl-results-') + for ftl_results_dir in ftl_results_dirs: + blobs = gcs.list_blobs(ftl_results_bucket, prefix=ftl_results_dir) + files = [f for f in blobs if f.name.endswith('.json')] + for file in files: + device = re.search(r'([^/]*)/artifacts/', file.name).group(1) + report_dir = Path(temp_dir).joinpath(ftl_results_dir, device) + report_dir.mkdir(parents=True, exist_ok=True) + filename = file.name.split('/')[-1] + file.download_to_filename(report_dir.joinpath(filename)) + logger.info(f'Downloaded "{file.name}" to "{report_dir}"') + + return Path(temp_dir) + + +def _extract_raw_data(test_reports_dir: Path) -> List[DataPoint]: + data_points: List[DataPoint] = [] + reports = sorted(list(test_reports_dir.rglob("*-benchmarkData.json"))) + for report in reports: + logger.info(f'Processing "{report}" ...') + + run_id = str(report.relative_to(test_reports_dir)).split('/')[0] + with open(report) as file: + obj = json.load(file) + build_context = obj['context']['build'] + device = f'{build_context["device"]}-{build_context["version"]["sdk"]}' + for metric in obj['benchmarks'][0]['metrics'].keys(): + measurements = obj['benchmarks'][0]['metrics'][metric]['runs'] + trace = metric[:-2] # TODO(yifany): .removesuffix('Ms') w/ python 3.9+ + data_points.extend([{ + 'duration': measurement, + 'device': device, + 'trace': trace, + 'run_id': run_id + } for measurement in measurements]) + logger.info(f'Extracted {len(data_points)} data points from reports in "{test_reports_dir}"') + return data_points diff --git a/ci/fireci/fireciplugins/macrobenchmark/commands.py b/ci/fireci/fireciplugins/macrobenchmark/commands.py new file mode 100644 index 00000000000..d431a61b18c --- /dev/null +++ b/ci/fireci/fireciplugins/macrobenchmark/commands.py @@ -0,0 +1,128 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import click + +from .analyze import analyzer +from .run import runner +from fireci import ci_command +from pathlib import Path +from typing import List + + +@ci_command(cls=click.Group) +def macrobenchmark(): + """Macrobenchmark testing command group.""" + pass + + +@click.option( + '--build-only', + is_flag=True, + default=False, + show_default=True, + help='Build the test projects without running the test.' +) +@click.option( + '--local/--remote', + required=True, + help='Run the test on local devices or Firebase Test Lab.' +) +@click.option( + '--repeat', + default=1, + show_default=True, + help='Number of times to repeat the test (for obtaining more data points).' +) +@click.option( + '--output', + type=click.Path(dir_okay=True, resolve_path=True, path_type=Path), + default='macrobenchmark-output.json', + show_default=True, + help='The file for saving macrobenchmark test output if running on Firebase Test Lab.' +) +@ci_command(group=macrobenchmark) +def run(build_only: bool, local: bool, repeat: int, output: Path): + """Run macrobenchmark test.""" + asyncio.run(runner.start(build_only, local, repeat, output)) + + +@click.option( + '--diff-mode', + is_flag=True, + default=False, + help='Compare two sets of macrobenchmark result.' +) +@click.option( + '--ftl-results-dir', + multiple=True, + help='Firebase Test Lab results directory name. Can be specified multiple times.' +) +@click.option( + '--local-reports-dir', + type=click.Path(dir_okay=True, resolve_path=True, path_type=Path), + help='Path to the directory of local test reports.' +) +@click.option( + '--ctl-ftl-results-dir', + multiple=True, + help='FTL results dir of the control group, if running in diff mode. ' + 'Can be specified multiple times.' +) +@click.option( + '--ctl-local-reports-dir', + type=click.Path(dir_okay=True, resolve_path=True, path_type=Path), + help='Path to the local test reports of the control group, if running in diff mode.' +) +@click.option( + '--exp-ftl-results-dir', + multiple=True, + help='FTL results dir of the experimental group, if running in diff mode. ' + 'Can be specified multiple times.' +) +@click.option( + '--exp-local-reports-dir', + type=click.Path(dir_okay=True, resolve_path=True, path_type=Path), + help='Path to the local test reports of the experimental group, if running in diff mode.' +) +@click.option( + '--output-dir', + type=click.Path(dir_okay=True, resolve_path=True, path_type=Path), + help='The directory for saving macrobenchmark analysis result.' +) +@ci_command(group=macrobenchmark) +def analyze( + diff_mode: bool, + ftl_results_dir: List[str], + local_reports_dir: Path, + ctl_ftl_results_dir: List[str], + ctl_local_reports_dir: Path, + exp_ftl_results_dir: List[str], + exp_local_reports_dir: Path, + output_dir: Path +): + """Analyze macrobenchmark result.""" + analyzer.start( + diff_mode, + ftl_results_dir, + local_reports_dir, + ctl_ftl_results_dir, + ctl_local_reports_dir, + exp_ftl_results_dir, + exp_local_reports_dir, + output_dir, + ) + +# TODO(yifany): support of command chaining diff --git a/ci/fireci/fireciplugins/macrobenchmark/run/__init__.py b/ci/fireci/fireciplugins/macrobenchmark/run/__init__.py new file mode 100644 index 00000000000..6d6d1266c32 --- /dev/null +++ b/ci/fireci/fireciplugins/macrobenchmark/run/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ci/fireci/fireciplugins/macrobenchmark/run/log_decorator.py b/ci/fireci/fireciplugins/macrobenchmark/run/log_decorator.py new file mode 100644 index 00000000000..177f5a1a3ba --- /dev/null +++ b/ci/fireci/fireciplugins/macrobenchmark/run/log_decorator.py @@ -0,0 +1,51 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import sys + +from logging import Logger, LoggerAdapter +from typing import Union + + +RESET_CODE = '\x1b[m' + + +class LogDecorator(LoggerAdapter): + """Decorates log messages with colors in console output.""" + + def __init__(self, logger: Union[Logger, LoggerAdapter], key: str): + super().__init__(logger, {}) + self.key = key + self.color_code = self._random_color_code() + + def process(self, msg, kwargs): + colored, uncolored = self._produce_prefix() + result = f'{colored if sys.stderr.isatty() else uncolored} {msg}' + return result, kwargs + + @staticmethod + def _random_color_code(): + code = random.randint(16, 231) # https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit + return f'\x1b[38;5;{code}m' + + def _produce_prefix(self): + if hasattr(super(), '_produce_prefix'): + colored_super, uncolored_super = getattr(super(), '_produce_prefix')() + colored = f'{colored_super} {self.color_code}[{self.key}]{RESET_CODE}' + uncolored = f'{uncolored_super} [{self.key}]' + else: + colored = f'{self.color_code}[{self.key}]{RESET_CODE}' + uncolored = f'[{self.key}]' + return colored, uncolored diff --git a/ci/fireci/fireciplugins/macrobenchmark/run/runner.py b/ci/fireci/fireciplugins/macrobenchmark/run/runner.py new file mode 100644 index 00000000000..af233b8e758 --- /dev/null +++ b/ci/fireci/fireciplugins/macrobenchmark/run/runner.py @@ -0,0 +1,101 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import click +import json +import logging +import tempfile +import yaml + +from .test_project_builder import TestProjectBuilder +from .utils import execute +from pathlib import Path +from typing import Dict + + +logger = logging.getLogger('fireci.macrobenchmark') + + +async def start(build_only: bool, local: bool, repeat: int, output: Path): + logger.info('Starting macrobenchmark test ...') + + config = _process_config_yaml() + product_versions = _assemble_all_products() + test_dir = _prepare_test_directory() + template_project_dir = Path('health-metrics/benchmark/template') + + test_projects = [ + TestProjectBuilder( + test_config, + test_dir, + template_project_dir, + product_versions, + ).build() for test_config in config['test-apps']] + + if not build_only: + if local: + for test_project in test_projects: + test_project.run_local(repeat) + else: + remote_runs = [test_project.run_remote(repeat) for test_project in test_projects] + results = await asyncio.gather(*remote_runs, return_exceptions=True) + test_outputs = [x for x in results if not isinstance(x, Exception)] + exceptions = [x for x in results if isinstance(x, Exception)] + + with open(output, 'w') as file: + json.dump(test_outputs, file) + logger.info(f'Output of remote testing saved to: {output}') + + if exceptions: + logger.error(f'Exceptions occurred: {exceptions}') + for test_output in test_outputs: + if test_output['exceptions']: + logger.error(f'Exceptions occurred: {test_output["exceptions"]}') + + if exceptions or any(test_output['exceptions'] for test_output in test_outputs): + raise click.ClickException('Macrobenchmark test failed with above exceptions') + + logger.info(f'Completed macrobenchmark test successfully') + + +def _assemble_all_products() -> Dict[str, str]: + execute('./gradlew', 'assembleAllForSmokeTests', logger=logger) + + product_versions: Dict[str, str] = {} + with open('build/m2repository/changed-artifacts.json') as json_file: + artifacts = json.load(json_file) + for artifact in artifacts['headGit']: + group_id, artifact_id, version = artifact.split(':') + product_versions[f'{group_id}:{artifact_id}'] = version + + logger.info(f'Product versions: {product_versions}') + return product_versions + + +def _process_config_yaml(): + with open('health-metrics/benchmark/config.yaml') as yaml_file: + config = yaml.safe_load(yaml_file) + for app in config['test-apps']: + app['plugins'] = app.get('plugins', []) + app['traces'] = app.get('traces', []) + app['plugins'].extend(config['common-plugins']) + app['traces'].extend(config['common-traces']) + return config + + +def _prepare_test_directory() -> Path: + test_dir = tempfile.mkdtemp(prefix='benchmark-test-') + logger.info(f'Temporary test directory created at: {test_dir}') + return Path(test_dir) diff --git a/ci/fireci/fireciplugins/macrobenchmark/run/test_project.py b/ci/fireci/fireciplugins/macrobenchmark/run/test_project.py new file mode 100644 index 00000000000..9a7bb22befd --- /dev/null +++ b/ci/fireci/fireciplugins/macrobenchmark/run/test_project.py @@ -0,0 +1,107 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import glob +import re +import shutil + +from .log_decorator import LogDecorator +from .utils import execute, execute_async, generate_test_run_id +from fireci.dir_utils import chdir +from logging import getLogger, Logger, LoggerAdapter +from pathlib import Path +from typing import List, TypedDict, Union + +logger = getLogger('fireci.macrobenchmark') + + +class RemoteTestOutput(TypedDict, total=False): + project: str + successful_runs: List[str] + exceptions: List[str] # Using str due to Exception being not JSON serializable + + +class TestProject: + def __init__(self, name: str, project_dir: Path, custom_logger: Union[Logger, LoggerAdapter]): + self.name = name + self.test_project_dir = project_dir + self.logger = custom_logger + + def run_local(self, repeat: int): + self.logger.info(f'Running test locally for {repeat} times ...') + local_reports_dir = self.test_project_dir.joinpath('_reports') + + with chdir(self.test_project_dir): + for index in range(repeat): + run_id = generate_test_run_id() + run_logger = LogDecorator(self.logger, f'run-{index}') + run_logger.info(f'Run-{index}: {run_id}') + execute('./gradlew', ':macrobenchmark:connectedCheck', logger=run_logger) + + reports = self.test_project_dir.rglob('build/**/*-benchmarkData.json') + run_dir = local_reports_dir.joinpath(run_id) + for report in reports: + device = re.search(r'benchmark/connected/([^/]*)/', str(report)).group(1) + device_dir = run_dir.joinpath(device) + device_dir.mkdir(parents=True, exist_ok=True) + shutil.copy(report, device_dir) + run_logger.debug(f'Copied report file "{report}" to "{device_dir}"') + + self.logger.info(f'Finished all {repeat} runs, local reports dir: "{local_reports_dir}"') + + async def run_remote(self, repeat: int) -> RemoteTestOutput: + self.logger.info(f'Running test remotely for {repeat} times ...') + + with chdir(self.test_project_dir): + await execute_async('./gradlew', 'assemble', logger=self.logger) + app_apk_path = glob.glob('**/app-benchmark.apk', recursive=True)[0] + test_apk_path = glob.glob('**/macrobenchmark-benchmark.apk', recursive=True)[0] + self.logger.info(f'App apk: "{app_apk_path}", Test apk: "{test_apk_path}"') + + async def run(index: int, run_id: str) -> str: + run_logger = LogDecorator(self.logger, f'run-{index}') + run_logger.info(f'Run-{index}: {run_id}') + ftl_environment_variables = [ + 'clearPackageData=true', + 'additionalTestOutputDir=/sdcard/Download', + 'no-isolated-storage=true', + ] + executable = 'gcloud' + args = ['firebase', 'test', 'android', 'run'] + args += ['--type', 'instrumentation'] + args += ['--app', app_apk_path] + args += ['--test', test_apk_path] + args += ['--device', 'model=oriole,version=32,locale=en,orientation=portrait'] + args += ['--directories-to-pull', '/sdcard/Download'] + args += ['--results-bucket', 'fireescape-benchmark-results'] + args += ['--results-dir', run_id] + args += ['--environment-variables', ','.join(ftl_environment_variables)] + args += ['--timeout', '30m'] + args += ['--project', 'fireescape-c4819'] + await execute_async(executable, *args, logger=run_logger) + return run_id + + runs = [run(i, generate_test_run_id()) for i in range(repeat)] + results = await asyncio.gather(*runs, return_exceptions=True) + successes = [x for x in results if not isinstance(x, Exception)] + exceptions = [x for x in results if isinstance(x, Exception)] + + self.logger.info(f'Finished all {repeat} runs, successes: {successes}, failures: {exceptions}') + + return RemoteTestOutput( + project=self.name, + successful_runs=successes, + exceptions=[str(e) for e in exceptions] + ) diff --git a/ci/fireci/fireciplugins/macrobenchmark/run/test_project_builder.py b/ci/fireci/fireciplugins/macrobenchmark/run/test_project_builder.py new file mode 100644 index 00000000000..6e6dd6d2a14 --- /dev/null +++ b/ci/fireci/fireciplugins/macrobenchmark/run/test_project_builder.py @@ -0,0 +1,89 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import pystache +import shutil + +from .log_decorator import LogDecorator +from .test_project import TestProject +from .utils import execute +from pathlib import Path +from typing import Any, Dict + + +logger = logging.getLogger('fireci.macrobenchmark') + + +class TestProjectBuilder: + def __init__( + self, + test_config: Any, + test_dir: Path, + template_project_dir: Path, + product_versions: Dict[str, str] + ): + self.test_config = test_config + self.template_project_dir = template_project_dir + self.product_versions = product_versions + + self.name = test_config['name'] + self.logger = LogDecorator(logger, self.name) + self.project_dir = test_dir.joinpath(self.name) + + def build(self) -> TestProject: + self.logger.info(f'Creating test project "{self.name}" ...') + + self._copy_template_project() + self._flesh_out_mustache_template_files() + self._download_gradle_wrapper() + + self.logger.info(f'Test project "{self.name}" created at "{self.project_dir}"') + return TestProject(self.name, self.project_dir, self.logger) + + def _copy_template_project(self): + shutil.copytree(self.template_project_dir, self.project_dir) + self.logger.debug(f'Copied project template files into "{self.project_dir}"') + + def _download_gradle_wrapper(self): + args = ['wrapper', '--gradle-version', '7.5.1', '--project-dir', str(self.project_dir)] + execute('./gradlew', *args, logger=self.logger) + self.logger.debug(f'Created gradle wrapper in "{self.project_dir}"') + + def _flesh_out_mustache_template_files(self): + mustache_context = { + 'm2repository': os.path.abspath('build/m2repository'), + 'plugins': self.test_config.get('plugins', []), + 'traces': self.test_config.get('traces', []), + 'dependencies': [], + } + + if 'dependencies' in self.test_config: + for dep in self.test_config['dependencies']: + if '@' in dep: + key, version = dep.split('@', 1) + dependency = {'key': key, 'version': version} + else: + dependency = {'key': dep, 'version': self.product_versions[dep]} + mustache_context['dependencies'].append(dependency) + + renderer = pystache.Renderer() + mustaches = self.project_dir.rglob('**/*.mustache') + for mustache in mustaches: + self.logger.debug(f'Processing template file: {mustache}') + result = renderer.render_path(mustache, mustache_context) + original_name = str(mustache)[:-9] # TODO(yifany): .removesuffix('.mustache') w/ python 3.9+ + with open(original_name, 'w') as file: + file.write(result) diff --git a/ci/fireci/fireciplugins/macrobenchmark/run/utils.py b/ci/fireci/fireciplugins/macrobenchmark/run/utils.py new file mode 100644 index 00000000000..32e90193438 --- /dev/null +++ b/ci/fireci/fireciplugins/macrobenchmark/run/utils.py @@ -0,0 +1,65 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import string +import random + +from asyncio import create_subprocess_exec +from asyncio.subprocess import PIPE as ASYNC_PIPE, STDOUT as ASYNC_STDOUT +from logging import Logger, LoggerAdapter +from subprocess import Popen, PIPE, STDOUT +from typing import Union + + +def generate_test_run_id() -> str: + now = datetime.datetime.now() + date = now.date() + time = now.time() + name = ''.join(random.choices(string.ascii_letters, k=4)) + return f'{date}_{time}_{name}' + + +def execute(program: str, *args: str, logger: Union[Logger, LoggerAdapter]) -> None: + command = " ".join([program, *args]) + logger.info(f'Executing subprocess: "{command}" ...') + + popen = Popen([program, *args], stdout=PIPE, stderr=STDOUT) + for line in popen.stdout: + logger.info(f'[{program}] {line.decode("utf-8").strip()}') + popen.communicate() + + if popen.returncode == 0: + logger.info(f'"{command}" succeeded') + else: + message = f'"{command}" failed with return code {popen.returncode}' + logger.error(message) + raise RuntimeError(message) + + +async def execute_async(program: str, *args: str, logger: Union[Logger, LoggerAdapter]) -> None: + command = " ".join([program, *args]) + logger.info(f'Executing subprocess: "{command}" ...') + + process = await create_subprocess_exec(program, *args, stdout=ASYNC_PIPE, stderr=ASYNC_STDOUT) + async for line in process.stdout: + logger.info(f'[{program}] {line.decode("utf-8").strip()}') + await process.communicate() + + if process.returncode == 0: + logger.info(f'"{command}" succeeded') + else: + message = f'"{command}" failed with return code {process.returncode}' + logger.error(message) + raise RuntimeError(message) diff --git a/ci/fireci/setup.cfg b/ci/fireci/setup.cfg index 4bc55ca8ea5..1237d1a6af9 100644 --- a/ci/fireci/setup.cfg +++ b/ci/fireci/setup.cfg @@ -5,12 +5,15 @@ version = 0.1 [options] install_requires = protobuf==3.19 - click==7.0 - google-cloud-storage==1.44.0 + click==8.1.3 + google-cloud-storage==2.5.0 + mypy==0.991 numpy==1.23.1 + pandas==1.5.1 PyGithub==1.55 pystache==0.6.0 requests==2.23.0 + seaborn==0.12.1 PyYAML==6.0.0 [options.extras_require] @@ -20,3 +23,18 @@ test = [options.entry_points] console_scripts = fireci = fireci.main:cli + +[mypy] +strict_optional = False +[mypy-google.cloud] +ignore_missing_imports = True +[mypy-pandas] +ignore_missing_imports = True +[mypy-pystache] +ignore_missing_imports = True +[mypy-requests] +ignore_missing_imports = True +[mypy-seaborn] +ignore_missing_imports = True +[mypy-yaml] +ignore_missing_imports = True diff --git a/health-metrics/benchmark/README.md b/health-metrics/benchmark/README.md index 5cffaf1532a..3cd25e9617b 100644 --- a/health-metrics/benchmark/README.md +++ b/health-metrics/benchmark/README.md @@ -10,7 +10,7 @@ building a macrobenchmark test app for each of the Firebase Android SDKs. If not all of them are required, comment out irrelevant ones for faster build and test time. -## Run benchmark tests +## Run macrobenchmark tests ### Prerequisite @@ -35,16 +35,27 @@ and test time. [doc](https://cloud.google.com/docs/authentication) for full guidance on authentication. -### Run benchmark tests locally +### Run tests locally -1. Build all test apps by running below command in the root - directory `firebase-android-sdk`: +1. [Connect an Android device to the computer](https://d.android.com/studio/run/device) + +1. Run below command in the repository root directory `firebase-android-sdk`: ```shell - fireci macrobenchmark --build-only + fireci macrobenchmark run --local ``` -1. [Connect an Android device to the computer](https://d.android.com/studio/run/device) + **Note**: specify `--repeat ` to run the test multiple times. Run + `fireci macrobenchmark run --help` to see more details. + +Alternatively, developers can also create test apps with `fireci`, and run the +test from either CLI or Android Studio: + +1. Run below command to build all test apps: + + ```shell + fireci macrobenchmark run --build-only + ``` 1. Locate the temporary test apps directory from the log, for example: @@ -89,23 +100,90 @@ and test time. Alternatively, same set of result files are produced at the same output location as invoking tests from CLI, which can be used for inspection. -### Run benchmark tests on Firebase Test Lab +### Run tests on Firebase Test Lab -Build and run all tests on FTL by running below command in the root -directory `firebase-android-sdk`: +Run below command to build and run all tests on FTL: +```shell +fireci macrobenchmark run --remote ``` -fireci macrobenchmark -``` -Alternatively, it is possible to build all test apps via steps described in -[Running benchmark tests locally](#running-benchmark-tests-locally) -and manually -[run tests on FTL with `gcloud` CLI ](https://firebase.google.com/docs/test-lab/android/command-line#running_your_instrumentation_tests). +**Note**: `--repeat ` is also supported to submit the test to FTL for +`` times. All tests on FTL will run in parallel. + +Alternatively, developers can still build test apps locally, and manually +[run tests on FTL with `gcloud` CLI](https://firebase.google.com/docs/test-lab/android/command-line#running_your_instrumentation_tests). Aggregated benchmark results are displayed in the log. The log also contains links to FTL result pages and result files on Google Cloud Storage. +## Analyze macrobenchmark results + +Besides results from `*-benchmarkData.json` as descriped above, `fireci` +supports more in depth analysis, such as: + +- calculating percentiles and visualizing distributions for one test run +- comparing two sets of results (with stats and graphs) from two different runs + +To see more details, run + +```shell +fireci macrobenchmark analyze --help +``` + +### Example usage + +1. Analyzing local test results + + ```shell + fireci macrobenchmark analyze --local-reports-dir + ``` + + `` is the directory containing the `*-benchmarkData.json` from + the local test runs. + + **Note**: If the test is started: + + - with `fireci macrobenchmark run --local`, `fireci` copies all benchmark + json files into a dir, which can be supplied here. + - manually (CLI or Android Studio), `` shall be the directory + that contains `*-benchmarkData.json` in the gradle build directory. + +1. Analyzing remote test results + + ```shell + fireci macrobenchmark analyze --ftl-results-dir --ftl-results-dir ... + ``` + + ``, `` are Firebase Test Lab results directory names, such as + `2022-11-04_11:18:34.039437_OqZn`. + +1. Comparing two sets of result from two different FTL runs + + ```shell + fireci macrobenchmark analyze \ + --diff-mode \ + --ctl-ftl-results-dir \ + --ctl-ftl-results-dir \ + ... + --exp-ftl-results-dir \ + --exp-ftl-results-dir \ + ... + ``` + + `ctl` and `exp` are short for "control group" and "experimental group". + +1. Comparing a local test run against a FTL run + + ```shell + fireci macrobenchmark analyze \ + --diff-mode \ + --ctl-ftl-results-dir \ + --ctl-ftl-results-dir \ + ... + --exp-local-reports-dir + ``` + ## Toolchains - Gradle 7.5.1 diff --git a/health-metrics/benchmark/config.yaml b/health-metrics/benchmark/config.yaml index 8852965302e..6a8bc2a0a27 100644 --- a/health-metrics/benchmark/config.yaml +++ b/health-metrics/benchmark/config.yaml @@ -21,52 +21,41 @@ common-plugins: [com.google.gms.google-services] common-traces: [Firebase, ComponentDiscovery, Runtime] test-apps: - - sdk: firebase-config - name: config - dependencies: [com.google.firebase:firebase-config-ktx] - - sdk: firebase-common - name: common - dependencies: [com.google.firebase:firebase-common] - - sdk: firebase-crashlytics - name: crash - dependencies: [com.google.firebase:firebase-crashlytics-ktx] - plugins: [com.google.firebase.crashlytics] - - sdk: firebase-database - name: database - dependencies: [com.google.firebase:firebase-database-ktx] - - sdk: firebase-dynamic-links - name: fdl - dependencies: [com.google.firebase:firebase-dynamic-links-ktx] - - sdk: firebase-firestore - name: firestore - dependencies: [com.google.firebase:firebase-firestore-ktx] - - sdk: firebase-functions - name: functions - dependencies: [com.google.firebase:firebase-functions-ktx] - # TODO(yifany): disable temporarily due to errors of duplicate class and gradle crash - # - sdk: firebase-inappmessaging-display - # name: fiam - # dependencies: - # - com.google.firebase:firebase-analytics-ktx@18.0.3 - # - com.google.firebase:firebase-inappmessaging-ktx - # - com.google.firebase:firebase-inappmessaging-display-ktx - - sdk: firebase-messaging - name: message - dependencies: [com.google.firebase:firebase-messaging-ktx] - - sdk: firebase-perf - name: perf - dependencies: [com.google.firebase:firebase-perf-ktx] - plugins: [com.google.firebase.firebase-perf] - - sdk: firebase-storage - name: stroage - dependencies: [com.google.firebase:firebase-storage-ktx] - - -# TODO(yifany): google3 sdks, customizing FTL devices -# auth -# analytics -# combined -# - crashlytics + analytics -# - crashlytics + fireperf -# - auth + firestore -# - ... + - sdk: N.A. + name: all-included + dependencies: + - com.google.firebase:firebase-abt + - com.google.firebase:firebase-appcheck + - com.google.firebase:firebase-appdistribution + - com.google.firebase:firebase-crashlytics + - com.google.firebase:firebase-database + - com.google.firebase:firebase-dynamic-links + - com.google.firebase:firebase-firestore + - com.google.firebase:firebase-functions + - com.google.firebase:firebase-inappmessaging + - com.google.firebase:firebase-inappmessaging-display + - com.google.firebase:firebase-messaging + - com.google.firebase:firebase-ml-modeldownloader + - com.google.firebase:firebase-perf + - com.google.firebase:firebase-storage + plugins: + - com.google.firebase.crashlytics + - com.google.firebase.firebase-perf + traces: + - fire-abt + - fire-app-check + - fire-appdistribution + - fire-cls + - fire-dl + - fire-fcm + - fire-fiam + - fire-fiamd + - fire-fn + - fire-fst + - fire-gcs + - fire-installations + - firebase-ml-modeldownloader + - fire-perf + - fire-rc + - fire-rtdb + - fire-transport diff --git a/health-metrics/benchmark/template/macrobenchmark/src/main/java/com/google/firebase/macrobenchmark/BenchmarkTest.kt.mustache b/health-metrics/benchmark/template/macrobenchmark/src/main/java/com/google/firebase/macrobenchmark/BenchmarkTest.kt.mustache index 4fa5af3546e..82dd0ecbf3b 100644 --- a/health-metrics/benchmark/template/macrobenchmark/src/main/java/com/google/firebase/macrobenchmark/BenchmarkTest.kt.mustache +++ b/health-metrics/benchmark/template/macrobenchmark/src/main/java/com/google/firebase/macrobenchmark/BenchmarkTest.kt.mustache @@ -39,7 +39,7 @@ class StartupBenchmark { TraceSectionMetric("{{.}}"), {{/traces}} ), - iterations = 5, + iterations = 100, startupMode = StartupMode.COLD ) { pressHome()