From f8c4dcaeba50cbff9d0595e618a45d4094db1073 Mon Sep 17 00:00:00 2001
From: Yifan Yang <yifany@google.com>
Date: Fri, 11 Nov 2022 11:04:16 -0800
Subject: [PATCH] Revert "Add support of macrobenchmark result analysis in
 fireci (#4285)"

This reverts commit 00d8c72e28f9daae0d0064dc13de0c4ecd194b39.
---
 .github/workflows/copyright-check.yml         |   4 +-
 .github/workflows/fireci.yml                  |   4 +-
 .gitignore                                    |   3 +-
 ci/fireci/fireci/internal.py                  |  26 +-
 ci/fireci/fireci/plugins.py                   |   4 +-
 ci/fireci/fireciplugins/macrobenchmark.py     | 319 ++++++++++++++++++
 .../fireciplugins/macrobenchmark/__init__.py  |  13 -
 .../macrobenchmark/analyze/__init__.py        |  13 -
 .../macrobenchmark/analyze/aggregator.py      |  79 -----
 .../macrobenchmark/analyze/analyzer.py        | 103 ------
 .../macrobenchmark/analyze/plotter.py         |  70 ----
 .../macrobenchmark/analyze/utils.py           |  82 -----
 .../fireciplugins/macrobenchmark/commands.py  | 127 -------
 .../macrobenchmark/run/__init__.py            |  13 -
 .../macrobenchmark/run/log_decorator.py       |  50 ---
 .../macrobenchmark/run/runner.py              | 101 ------
 .../macrobenchmark/run/test_project.py        | 108 ------
 .../run/test_project_builder.py               |  88 -----
 .../fireciplugins/macrobenchmark/run/utils.py |  64 ----
 ci/fireci/setup.cfg                           |   6 +-
 health-metrics/benchmark/README.md            | 108 +-----
 health-metrics/benchmark/config.yaml          |  87 ++---
 .../macrobenchmark/BenchmarkTest.kt.mustache  |   2 +-
 23 files changed, 404 insertions(+), 1070 deletions(-)
 create mode 100644 ci/fireci/fireciplugins/macrobenchmark.py
 delete mode 100644 ci/fireci/fireciplugins/macrobenchmark/__init__.py
 delete mode 100644 ci/fireci/fireciplugins/macrobenchmark/analyze/__init__.py
 delete mode 100644 ci/fireci/fireciplugins/macrobenchmark/analyze/aggregator.py
 delete mode 100644 ci/fireci/fireciplugins/macrobenchmark/analyze/analyzer.py
 delete mode 100644 ci/fireci/fireciplugins/macrobenchmark/analyze/plotter.py
 delete mode 100644 ci/fireci/fireciplugins/macrobenchmark/analyze/utils.py
 delete mode 100644 ci/fireci/fireciplugins/macrobenchmark/commands.py
 delete mode 100644 ci/fireci/fireciplugins/macrobenchmark/run/__init__.py
 delete mode 100644 ci/fireci/fireciplugins/macrobenchmark/run/log_decorator.py
 delete mode 100644 ci/fireci/fireciplugins/macrobenchmark/run/runner.py
 delete mode 100644 ci/fireci/fireciplugins/macrobenchmark/run/test_project.py
 delete mode 100644 ci/fireci/fireciplugins/macrobenchmark/run/test_project_builder.py
 delete mode 100644 ci/fireci/fireciplugins/macrobenchmark/run/utils.py

diff --git a/.github/workflows/copyright-check.yml b/.github/workflows/copyright-check.yml
index a8b81c304a0..eccfd470a56 100644
--- a/.github/workflows/copyright-check.yml
+++ b/.github/workflows/copyright-check.yml
@@ -11,9 +11,9 @@ jobs:
     runs-on: ubuntu-22.04
     steps:
       - uses: actions/checkout@v3.0.2
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v2
         with:
-          python-version: '3.10'
+          python-version: '3.9'
       - run: |
           pip install -e "ci/fireci"
       - run: |
diff --git a/.github/workflows/fireci.yml b/.github/workflows/fireci.yml
index 8be0418d43b..7b5f7109da0 100644
--- a/.github/workflows/fireci.yml
+++ b/.github/workflows/fireci.yml
@@ -16,9 +16,9 @@ jobs:
     runs-on: ubuntu-22.04
     steps:
       - uses: actions/checkout@v3.0.2
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v2
         with:
-          python-version: '3.10'
+          python-version: '3.9'
       - run: |
           pip install -e "ci/fireci[test]"
       - run: |
diff --git a/.gitignore b/.gitignore
index da3e77d46fe..300f5bb4b2f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,5 +11,4 @@ firebase-crashlytics-ndk/.externalNativeBuild/
 firebase-crashlytics-ndk/.cxx/
 smoke-test-logs/
 smoke-tests/build-debug-headGit-smoke-test
-smoke-tests/firehorn.log
-macrobenchmark-output.json
+smoke-tests/firehorn.log
\ No newline at end of file
diff --git a/ci/fireci/fireci/internal.py b/ci/fireci/fireci/internal.py
index 0950d770fc2..c76123e3228 100644
--- a/ci/fireci/fireci/internal.py
+++ b/ci/fireci/fireci/internal.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import click
+import contextlib
 import functools
 import glob
 import itertools
@@ -20,7 +21,6 @@
 import os
 import shutil
 
-from contextlib import contextmanager, nullcontext
 
 _logger = logging.getLogger('fireci')
 
@@ -30,7 +30,7 @@ def _ensure_dir(directory):
     os.makedirs(directory)
 
 
-@contextmanager
+@contextlib.contextmanager
 def _artifact_handler(target_directory, artifact_patterns):
   _logger.debug(
       'Artifacts will be searched for in directories matching {} patterns and placed in {}'
@@ -45,7 +45,7 @@ def _artifact_handler(target_directory, artifact_patterns):
       target_name = os.path.join(target_directory, "_".join(path.split('/')))
       _logger.debug('Copying artifact {} to {}'.format(path, target_name))
       if os.path.isdir(path):
-        shutil.copytree(path, target_name, dirs_exist_ok=True)
+        shutil.copytree(path, target_name)
       else:
         shutil.copyfile(path, target_name)
 
@@ -68,8 +68,8 @@ class _CommonOptions:
     '--artifact-patterns',
     default=('**/build/test-results', '**/build/reports'),
     help=
-    'Shell-style artifact patterns that are copied into `artifact-target-dir`. '
-    'Can be specified multiple times.',
+    'Shell-style artifact patterns that are copied into `artifact-target-dir`.'\
+        'Can be specified multiple times.',
     multiple=True,
     type=str,
 )
@@ -83,34 +83,30 @@ def main(options, **kwargs):
     setattr(options, k, v)
 
 
-def ci_command(name=None, cls=click.Command, group=main):
+def ci_command(name=None):
   """Decorator to use for CI commands.
 
        The differences from the standard @click.command are:
 
        * Allows configuration of artifacts that are uploaded for later viewing in CI.
-       * Registers the command automatically.
+       * Registers the command automatically
 
-       :param name:  Optional name of the task. Defaults to the function name that is decorated with this decorator.
-       :param cls:   Specifies whether the func is a command or a command group. Defaults to `click.Command`.
-       :param group: Specifies the group the command belongs to. Defaults to the `main` command group.
+       :param name: Optional name of the task. Defaults to the function name that is decorated with
+                    this decorator.
     """
 
   def ci_command(f):
     actual_name = f.__name__ if name is None else name
 
-    @click.command(name=actual_name, cls=cls, help=f.__doc__)
+    @main.command(name=actual_name, help=f.__doc__)
     @_pass_options
     @click.pass_context
     def new_func(ctx, options, *args, **kwargs):
       with _artifact_handler(
           options.artifact_target_dir,
-          options.artifact_patterns,
-      ) if cls is click.Command else nullcontext():
+          options.artifact_patterns):
         return ctx.invoke(f, *args, **kwargs)
 
-    group.add_command(new_func)
-
     return functools.update_wrapper(new_func, f)
 
   return ci_command
diff --git a/ci/fireci/fireci/plugins.py b/ci/fireci/fireci/plugins.py
index 715c8de0884..66aebd30f8c 100644
--- a/ci/fireci/fireci/plugins.py
+++ b/ci/fireci/fireci/plugins.py
@@ -27,7 +27,7 @@ def discover():
      Note: plugins *must* define the `firebaseplugins` package as a namespace package.
            See: https://packaging.python.org/guides/packaging-namespace-packages/
   """
-  modules = pkgutil.walk_packages(fireciplugins.__path__,
-                                  fireciplugins.__name__ + ".")
+  modules = pkgutil.iter_modules(fireciplugins.__path__,
+                                 fireciplugins.__name__ + ".")
   for _, name, _ in modules:
     importlib.import_module(name)
diff --git a/ci/fireci/fireciplugins/macrobenchmark.py b/ci/fireci/fireciplugins/macrobenchmark.py
new file mode 100644
index 00000000000..a0fc2f81a39
--- /dev/null
+++ b/ci/fireci/fireciplugins/macrobenchmark.py
@@ -0,0 +1,319 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import glob
+import json
+import logging
+import os
+import random
+import re
+import shutil
+import sys
+import tempfile
+import uuid
+
+import click
+import numpy
+import pystache
+import yaml
+from google.cloud import storage
+
+from fireci import ci_command
+from fireci import ci_utils
+from fireci import uploader
+from fireci.dir_utils import chdir
+
+_logger = logging.getLogger('fireci.macrobenchmark')
+
+
+@click.option(
+  '--build-only/--no-build-only',
+  default=False,
+  help='Whether to only build tracing test apps or to also run them on FTL afterwards'
+)
+@ci_command()
+def macrobenchmark(build_only):
+  """Measures app startup times for Firebase SDKs."""
+  asyncio.run(_launch_macrobenchmark_test(build_only))
+
+
+async def _launch_macrobenchmark_test(build_only):
+  _logger.info('Starting macrobenchmark test...')
+
+  artifact_versions = await _assemble_all_artifacts()
+  _logger.info(f'Artifact versions: {artifact_versions}')
+
+  test_dir = await _prepare_test_directory()
+  _logger.info(f'Directory for test apps: {test_dir}')
+
+  config = await _process_config_yaml()
+  _logger.info(f'Processed yaml configurations: {config}')
+
+  tests = [MacrobenchmarkTest(app, artifact_versions, os.getcwd(), test_dir) for app in config['test-apps']]
+
+  _logger.info(f'Building {len(tests)} macrobenchmark test apps...')
+  # TODO(yifany): investigate why it is much slower with asyncio.gather
+  #   - on corp workstations (9 min) than M1 macbook pro (3 min)
+  #   - with gradle 7.5.1 (9 min) than gradle 6.9.2 (5 min)
+  # await asyncio.gather(*[x.build() for x in tests])
+  for test in tests:
+    await test.build()
+
+  if not build_only:
+    _logger.info(f'Submitting {len(tests)} tests to Firebase Test Lab...')
+    results = await asyncio.gather(*[x.test() for x in tests], return_exceptions=True)
+    await _post_processing(results)
+
+  _logger.info('Macrobenchmark test finished.')
+
+
+async def _assemble_all_artifacts():
+  await (await asyncio.create_subprocess_exec('./gradlew', 'assembleAllForSmokeTests')).wait()
+
+  with open('build/m2repository/changed-artifacts.json') as json_file:
+    artifacts = json.load(json_file)
+  return dict(_artifact_key_version(x) for x in artifacts['headGit'])
+
+
+def _artifact_key_version(artifact):
+  group_id, artifact_id, version = artifact.split(':')
+  return f'{group_id}:{artifact_id}', version
+
+
+async def _process_config_yaml():
+  with open('health-metrics/benchmark/config.yaml') as yaml_file:
+    config = yaml.safe_load(yaml_file)
+    for app in config['test-apps']:
+      app['plugins'] = app.get('plugins', [])
+      app['traces'] = app.get('traces', [])
+      app['plugins'].extend(config['common-plugins'])
+      app['traces'].extend(config['common-traces'])
+    return config
+
+
+async def _prepare_test_directory():
+  test_dir = tempfile.mkdtemp(prefix='benchmark-test-')
+
+  # Required for creating gradle wrapper, as the dir is not defined in the root settings.gradle
+  open(os.path.join(test_dir, 'settings.gradle'), 'w').close()
+
+  command = ['./gradlew', 'wrapper', '--gradle-version', '7.5.1', '--project-dir', test_dir]
+  await (await asyncio.create_subprocess_exec(*command)).wait()
+
+  return test_dir
+
+
+async def _post_processing(results):
+  _logger.info(f'Macrobenchmark results: {results}')
+
+  if os.getenv('CI') is None:
+    _logger.info('Running locally. Results upload skipped.')
+    return
+
+  # Upload successful measurements to the metric service
+  measurements = []
+  for result in results:
+    if not isinstance(result, Exception):
+      measurements.extend(result)
+
+  log = ci_utils.ci_log_link()
+  test_report = {'benchmarks': measurements, 'log': log}
+
+  metrics_service_url = 'https://api.firebase-sdk-health-metrics.com'
+  access_token = ci_utils.gcloud_identity_token()
+  uploader.post_report(test_report, metrics_service_url, access_token, 'macrobenchmark')
+
+  # Raise exceptions for failed measurements
+  if any(map(lambda x: isinstance(x, Exception), results)):
+    _logger.error(f'Exceptions: {[x for x in results if isinstance(x, Exception)]}')
+    raise click.ClickException('Macrobenchmark test failed with above errors.')
+
+
+class MacrobenchmarkTest:
+  """Builds the test based on configurations and runs the test on FTL."""
+  def __init__(
+      self,
+      test_app_config,
+      artifact_versions,
+      repo_root_dir,
+      test_dir,
+      logger=_logger
+  ):
+    self.test_app_config = test_app_config
+    self.artifact_versions = artifact_versions
+    self.repo_root_dir = repo_root_dir
+    self.test_dir = test_dir
+    self.logger = MacrobenchmarkLoggerAdapter(logger, test_app_config['sdk'])
+    self.test_app_dir = os.path.join(test_dir, test_app_config['name'])
+    self.test_results_bucket = 'fireescape-benchmark-results'
+    self.test_results_dir = str(uuid.uuid4())
+    self.gcs_client = storage.Client()
+
+  async def build(self):
+    """Creates test app project and assembles app and test apks."""
+    await self._create_benchmark_projects()
+    await self._assemble_benchmark_apks()
+
+  async def test(self):
+    """Runs benchmark tests on FTL and fetches FTL results from GCS."""
+    await self._execute_benchmark_tests()
+    return await self._aggregate_benchmark_results()
+
+  async def _create_benchmark_projects(self):
+    app_name = self.test_app_config['name']
+    self.logger.info(f'Creating test app "{app_name}"...')
+
+    self.logger.info(f'Copying project template files into "{self.test_app_dir}"...')
+    template_dir = os.path.join(self.repo_root_dir, 'health-metrics/benchmark/template')
+    shutil.copytree(template_dir, self.test_app_dir)
+
+    self.logger.info(f'Copying gradle wrapper binary into "{self.test_app_dir}"...')
+    shutil.copy(os.path.join(self.test_dir, 'gradlew'), self.test_app_dir)
+    shutil.copy(os.path.join(self.test_dir, 'gradlew.bat'), self.test_app_dir)
+    shutil.copytree(os.path.join(self.test_dir, 'gradle'), os.path.join(self.test_app_dir, 'gradle'))
+
+    with chdir(self.test_app_dir):
+      mustache_context = await self._prepare_mustache_context()
+      renderer = pystache.Renderer()
+      mustaches = glob.glob('**/*.mustache', recursive=True)
+      for mustache in mustaches:
+        self.logger.info(f'Processing template file: {mustache}')
+        result = renderer.render_path(mustache, mustache_context)
+        original_name = mustache.removesuffix('.mustache')
+        with open(original_name, 'w') as file:
+          file.write(result)
+
+  async def _assemble_benchmark_apks(self):
+    with chdir(self.test_app_dir):
+      await self._exec_subprocess('./gradlew', ['assemble'])
+
+  async def _execute_benchmark_tests(self):
+    app_apk_path = glob.glob(f'{self.test_app_dir}/**/app-benchmark.apk', recursive=True)[0]
+    test_apk_path = glob.glob(f'{self.test_app_dir}/**/macrobenchmark-benchmark.apk', recursive=True)[0]
+
+    self.logger.info(f'App apk: {app_apk_path}')
+    self.logger.info(f'Test apk: {test_apk_path}')
+
+    ftl_environment_variables = [
+      'clearPackageData=true',
+      'additionalTestOutputDir=/sdcard/Download',
+      'no-isolated-storage=true',
+    ]
+    executable = 'gcloud'
+    args = ['firebase', 'test', 'android', 'run']
+    args += ['--type', 'instrumentation']
+    args += ['--app', app_apk_path]
+    args += ['--test', test_apk_path]
+    args += ['--device', 'model=oriole,version=32,locale=en,orientation=portrait']
+    args += ['--directories-to-pull', '/sdcard/Download']
+    args += ['--results-bucket', f'gs://{self.test_results_bucket}']
+    args += ['--results-dir', self.test_results_dir]
+    args += ['--environment-variables', ','.join(ftl_environment_variables)]
+    args += ['--timeout', '30m']
+    args += ['--project', 'fireescape-c4819']
+
+    await self._exec_subprocess(executable, args)
+
+  async def _prepare_mustache_context(self):
+    mustache_context = {
+      'm2repository': os.path.join(self.repo_root_dir, 'build/m2repository'),
+      'plugins': self.test_app_config.get('plugins', []),
+      'traces': self.test_app_config.get('traces', []),
+      'dependencies': [],
+    }
+
+    if 'dependencies' in self.test_app_config:
+      for dep in self.test_app_config['dependencies']:
+        if '@' in dep:
+          key, version = dep.split('@', 1)
+          dependency = {'key': key, 'version': version}
+        else:
+          dependency = {'key': dep, 'version': self.artifact_versions[dep]}
+        mustache_context['dependencies'].append(dependency)
+
+    return mustache_context
+
+  async def _aggregate_benchmark_results(self):
+    results = []
+    blobs = self.gcs_client.list_blobs(self.test_results_bucket, prefix=self.test_results_dir)
+    files = [x for x in blobs if re.search(r'sdcard/Download/[^/]*\.json', x.name)]
+    for file in files:
+      device = re.search(r'([^/]*)/artifacts/', file.name).group(1)
+      benchmarks = json.loads(file.download_as_bytes())['benchmarks']
+      for benchmark in benchmarks:
+        method = benchmark['name']
+        clazz = benchmark['className'].split('.')[-1]
+        runs = benchmark['metrics']['timeToInitialDisplayMs']['runs']
+        results.append({
+          'sdk': self.test_app_config['sdk'],
+          'device': device,
+          'name': f'{clazz}.{method}',
+          'min': min(runs),
+          'max': max(runs),
+          'p50': numpy.percentile(runs, 50),
+          'p90': numpy.percentile(runs, 90),
+          'p99': numpy.percentile(runs, 99),
+          'unit': 'ms',
+        })
+    self.logger.info(f'Benchmark results: {results}')
+    return results
+
+  async def _exec_subprocess(self, executable, args):
+    command = " ".join([executable, *args])
+    self.logger.info(f'Executing command: "{command}"...')
+
+    proc = await asyncio.subprocess.create_subprocess_exec(
+      executable,
+      *args,
+      stdout=asyncio.subprocess.PIPE,
+      stderr=asyncio.subprocess.PIPE
+    )
+    await asyncio.gather(
+      self._stream_output(executable, proc.stdout),
+      self._stream_output(executable, proc.stderr)
+    )
+
+    await proc.communicate()
+    if proc.returncode == 0:
+      self.logger.info(f'"{command}" finished.')
+    else:
+      message = f'"{command}" exited with return code {proc.returncode}.'
+      self.logger.error(message)
+      raise click.ClickException(message)
+
+  async def _stream_output(self, executable, stream: asyncio.StreamReader):
+    async for line in stream:
+      self.logger.info(f'[{executable}] {line.decode("utf-8").strip()}')
+
+
+class MacrobenchmarkLoggerAdapter(logging.LoggerAdapter):
+  """Decorates log messages for a sdk to make them more distinguishable."""
+
+  reset_code = '\x1b[m'
+
+  @staticmethod
+  def random_color_code():
+    code = random.randint(16, 231)  # https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
+    return f'\x1b[38;5;{code}m'
+
+  def __init__(self, logger, sdk_name, color_code=None):
+    super().__init__(logger, {})
+    self.sdk_name = sdk_name
+    self.color_code = self.random_color_code() if color_code is None else color_code
+
+  def process(self, msg, kwargs):
+    colored = f'{self.color_code}[{self.sdk_name}]{self.reset_code} {msg}'
+    uncolored = f'[{self.sdk_name}] {msg}'
+    return colored if sys.stderr.isatty() else uncolored, kwargs
diff --git a/ci/fireci/fireciplugins/macrobenchmark/__init__.py b/ci/fireci/fireciplugins/macrobenchmark/__init__.py
deleted file mode 100644
index 6d6d1266c32..00000000000
--- a/ci/fireci/fireciplugins/macrobenchmark/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2022 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/ci/fireci/fireciplugins/macrobenchmark/analyze/__init__.py b/ci/fireci/fireciplugins/macrobenchmark/analyze/__init__.py
deleted file mode 100644
index 6d6d1266c32..00000000000
--- a/ci/fireci/fireciplugins/macrobenchmark/analyze/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2022 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/ci/fireci/fireciplugins/macrobenchmark/analyze/aggregator.py b/ci/fireci/fireciplugins/macrobenchmark/analyze/aggregator.py
deleted file mode 100644
index 5b75e3f2678..00000000000
--- a/ci/fireci/fireciplugins/macrobenchmark/analyze/aggregator.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright 2022 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import logging
-import pandas as pd
-import seaborn as sns
-
-from pathlib import Path
-
-logger = logging.getLogger('fireci.macrobenchmark')
-sns.set()
-
-
-def calculate_statistic(trace: str, device: str, data: pd.DataFrame, output_dir: Path = None):
-  logger.info(f'Calculating statistics for trace "{trace}" on device "{device}" ...')
-
-  # Calculate percentiles per each run_id
-  quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
-  percentiles = data.groupby('run_id').quantile(quantiles, numeric_only=True)
-  percentiles.index.set_names('percentile', level=1, inplace=True)
-  percentiles = percentiles.reset_index(['run_id', 'percentile'])
-  percentiles = percentiles.pivot(index='run_id', columns='percentile', values='duration')
-
-  def mapper(quantile: float) -> str: return f'p{int(quantile * 100)}'
-
-  percentiles.rename(mapper=mapper, axis='columns', inplace=True)
-
-  # Calculate dispersions of each percentile over all runs
-  mean = percentiles.mean()
-  std = percentiles.std()  # standard deviation
-  cv = std / mean  # coefficient of variation (relative standard deviation)
-  mad = (percentiles - percentiles.mean()).abs().mean()  # mean absolute deviation
-  rmad = mad / mean  # relative mean absolute deviation (mad / mean)
-  dispersions = pd.DataFrame([pd.Series(cv, name='cv'), pd.Series(rmad, name='rmad')])
-
-  # Optionally save percentiles and dispersions to file
-  if output_dir:
-    percentiles.to_json(output_dir.joinpath('percentiles.json'), orient='index')
-    dispersions.to_json(output_dir.joinpath('dispersions.json'), orient='index')
-    logger.info(f'Percentiles and dispersions saved in: {output_dir}')
-
-  return percentiles, dispersions
-
-
-def calculate_statistic_diff(
-    trace: str,
-    device: str,
-    control: pd.DataFrame,
-    experimental: pd.DataFrame,
-    output_dir: Path = None,
-):
-  logger.info(f'Calculating statistic diff for trace "{trace}" on device "{device}" ...')
-
-  ctl_percentiles, _ = calculate_statistic(trace, device, control)
-  exp_percentiles, _ = calculate_statistic(trace, device, experimental)
-
-  ctl_mean = ctl_percentiles.mean()
-  exp_mean = exp_percentiles.mean()
-
-  delta = exp_mean - ctl_mean
-  percentage = delta / ctl_mean
-
-  # Optionally save statistics to file
-  if output_dir:
-    delta.to_json(output_dir.joinpath('delta.json'))
-    percentage.to_json(output_dir.joinpath('percentage.json'))
-    logger.info(f'Percentiles diff saved in: {output_dir}')
diff --git a/ci/fireci/fireciplugins/macrobenchmark/analyze/analyzer.py b/ci/fireci/fireciplugins/macrobenchmark/analyze/analyzer.py
deleted file mode 100644
index 13d081a85b1..00000000000
--- a/ci/fireci/fireciplugins/macrobenchmark/analyze/analyzer.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright 2022 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import tempfile
-import pandas as pd
-
-from .aggregator import calculate_statistic, calculate_statistic_diff
-from .plotter import plot_graph, plot_diff_graph
-from .utils import collect_data_points, DataPoint
-from click import progressbar
-from pathlib import Path
-
-
-logger = logging.getLogger('fireci.macrobenchmark')
-
-
-def start(
-    diff_mode: bool,
-    ftl_results_dir: list[str],
-    local_reports_dir: Path,
-    ctl_ftl_results_dir: list[str],
-    ctl_local_reports_dir: Path,
-    exp_ftl_results_dir: list[str],
-    exp_local_reports_dir: Path,
-    output_dir: Path
-):
-  logger.info('Starting to analyze macrobenchmark test results ...')
-
-  if not output_dir:
-    output_dir = Path(tempfile.mkdtemp(prefix='macrobenchmark-analysis-'))
-    logger.info(f'Created temporary dir "{output_dir}" to save analysis results')
-
-  if not diff_mode:
-    data_points = collect_data_points(ftl_results_dir, local_reports_dir)
-    _process(data_points, output_dir)
-  else:
-    logger.info('Running in diff mode ...')
-    ctl_data_points = collect_data_points(ctl_ftl_results_dir, ctl_local_reports_dir)
-    exp_data_points = collect_data_points(exp_ftl_results_dir, exp_local_reports_dir)
-    _diff(ctl_data_points, exp_data_points, output_dir)
-
-  logger.info(f'Completed analysis and saved output in: {output_dir}')
-
-
-def _process(data_points: list[DataPoint], output_dir: Path) -> None:
-  data = pd.DataFrame(data_points)
-  traces = sorted(data['trace'].unique())
-  devices = sorted(data['device'].unique())
-
-  trace_device_combinations = [(trace, device) for trace in traces for device in devices]
-
-  with progressbar(trace_device_combinations) as combinations:
-    for trace, device in combinations:
-      combination_dir = output_dir.joinpath(trace, device)
-      combination_dir.mkdir(parents=True, exist_ok=True)
-      subset = _filter_subset(data, trace, device)
-      calculate_statistic(trace, device, subset, combination_dir)
-      plot_graph(trace, device, subset, combination_dir)
-
-
-def _diff(
-    ctl_data_points: list[DataPoint],
-    exp_data_points: list[DataPoint],
-    output_dir: Path
-) -> None:
-  ctl_data = pd.DataFrame(ctl_data_points)
-  exp_data = pd.DataFrame(exp_data_points)
-  all_data = pd.concat([ctl_data, exp_data])
-
-  traces = sorted(all_data['trace'].unique())
-  devices = sorted(all_data['device'].unique())
-
-  trace_device_combinations = [(trace, device) for trace in traces for device in devices]
-
-  with progressbar(trace_device_combinations) as combinations:
-    for trace, device in combinations:
-      combination_dir = output_dir.joinpath(trace, device)
-      combination_dir.mkdir(parents=True, exist_ok=True)
-
-      ctl_subset = _filter_subset(ctl_data, trace, device)
-      exp_subset = _filter_subset(exp_data, trace, device)
-
-      calculate_statistic_diff(trace, device, ctl_subset, exp_subset, combination_dir)
-      plot_diff_graph(trace, device, ctl_subset, exp_subset, combination_dir)
-
-
-def _filter_subset(data: pd.DataFrame, trace: str, device: str) -> pd.DataFrame:
-  return data.loc[
-    (data['trace'] == trace) & (data['device'] == device),
-    ['duration', 'run_id']
-  ]
diff --git a/ci/fireci/fireciplugins/macrobenchmark/analyze/plotter.py b/ci/fireci/fireciplugins/macrobenchmark/analyze/plotter.py
deleted file mode 100644
index 75b9e53fcbe..00000000000
--- a/ci/fireci/fireciplugins/macrobenchmark/analyze/plotter.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright 2022 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import numpy as np
-import pandas as pd
-import seaborn as sns
-
-from pathlib import Path
-
-
-logger = logging.getLogger('fireci.macrobenchmark')
-sns.set()
-
-
-def plot_graph(trace: str, device: str, data: pd.DataFrame, output_dir: Path):
-  logger.info(f'Plotting graphs for trace "{trace}" on device "{device}" ...')
-
-  unique_run_ids = len(data['run_id'].unique())
-  col_wrap = int(np.ceil(np.sqrt(unique_run_ids)))
-
-  histograms = sns.displot(data=data, x='duration', kde=True, col="run_id", col_wrap=col_wrap)
-  histograms.set_axis_labels(x_var=f'{trace} (ms)')
-  histograms.set_titles(f'{device} ({{col_var}} = {{col_name}})')
-  histograms.savefig(output_dir.joinpath('histograms.svg'))
-
-  distributions = sns.displot(
-    data=data, x='duration', kde=True, height=8,
-    hue='run_id', palette='muted', multiple='dodge'
-  )
-  distributions.set_axis_labels(x_var=f'{trace} (ms)').set(title=device)
-  distributions.savefig(output_dir.joinpath('distributions.svg'))
-
-  logger.info(f'Graphs saved in: {output_dir}')
-
-
-def plot_diff_graph(
-    trace: str,
-    device: str,
-    control: pd.DataFrame,
-    experimental: pd.DataFrame,
-    output_dir: Path
-):
-  logger.info(f'Plotting distribution diff graph for trace "{trace}" on device "{device}" ...')
-
-  control_run_ids = control['run_id']
-  experimental_run_ids = experimental['run_id']
-  all_data = pd.concat([control, experimental])
-
-  palette = {x: 'b' for x in control_run_ids} | {x: 'r' for x in experimental_run_ids}
-
-  distribution_diff = sns.displot(
-    data=all_data, x='duration', kde=True, height=8,
-    hue='run_id', palette=palette, multiple='dodge'
-  )
-  distribution_diff.set_axis_labels(x_var=f'{trace} (ms)').set(title=device)
-  distribution_diff.savefig(output_dir.joinpath('distribution_diff.svg'))
-
-  logger.info(f'Graph saved in: {output_dir}')
diff --git a/ci/fireci/fireciplugins/macrobenchmark/analyze/utils.py b/ci/fireci/fireciplugins/macrobenchmark/analyze/utils.py
deleted file mode 100644
index c7730e792b1..00000000000
--- a/ci/fireci/fireciplugins/macrobenchmark/analyze/utils.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright 2022 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import logging
-import re
-import tempfile
-
-from click import ClickException
-from google.cloud import storage
-from pathlib import Path
-from typing import TypedDict
-
-
-logger = logging.getLogger('fireci.macrobenchmark')
-DataPoint = TypedDict('DataPoint', {'duration': float, 'device': str, 'trace': str, 'run_id': str})
-
-
-def collect_data_points(ftl_results_dir: list[str], local_reports_dir: Path) -> list[DataPoint]:
-  if not ftl_results_dir and not local_reports_dir:
-    raise ClickException('Neither ftl-results-dir or local-reports-dir is provided.')
-  elif ftl_results_dir and not local_reports_dir:
-    temp_dir = _download(ftl_results_dir)
-    return _extract_raw_data(temp_dir)
-  elif not ftl_results_dir and local_reports_dir:
-    return _extract_raw_data(local_reports_dir)
-  else:
-    raise ClickException('Should specify either ftl-results-dir or local-reports-dir, not both.')
-
-
-def _download(ftl_results_dirs: list[str]) -> Path:
-  ftl_results_bucket = 'fireescape-benchmark-results'
-  gcs = storage.Client()
-
-  temp_dir = tempfile.mkdtemp(prefix='ftl-results-')
-  for ftl_results_dir in ftl_results_dirs:
-    blobs = gcs.list_blobs(ftl_results_bucket, prefix=ftl_results_dir)
-    files = [f for f in blobs if f.name.endswith('.json')]
-    for file in files:
-      device = re.search(r'([^/]*)/artifacts/', file.name).group(1)
-      report_dir = Path(temp_dir).joinpath(ftl_results_dir, device)
-      report_dir.mkdir(parents=True, exist_ok=True)
-      filename = file.name.split('/')[-1]
-      file.download_to_filename(report_dir.joinpath(filename))
-      logger.info(f'Downloaded "{file.name}" to "{report_dir}"')
-
-  return Path(temp_dir)
-
-
-def _extract_raw_data(test_reports_dir: Path) -> list[DataPoint]:
-  data_points: list[DataPoint] = []
-  reports = sorted(list(test_reports_dir.rglob("*-benchmarkData.json")))
-  for report in reports:
-    logger.info(f'Processing "{report}" ...')
-
-    run_id = str(report.relative_to(test_reports_dir)).split('/')[0]
-    with open(report) as file:
-      obj = json.load(file)
-      build_context = obj['context']['build']
-      device = f'{build_context["device"]}-{build_context["version"]["sdk"]}'
-      for metric in obj['benchmarks'][0]['metrics'].keys():
-        measurements = obj['benchmarks'][0]['metrics'][metric]['runs']
-        trace = metric.removesuffix('Ms')
-        data_points.extend([{
-          'duration': measurement,
-          'device': device,
-          'trace': trace,
-          'run_id': run_id
-        } for measurement in measurements])
-  logger.info(f'Extracted {len(data_points)} data points from reports in "{test_reports_dir}"')
-  return data_points
diff --git a/ci/fireci/fireciplugins/macrobenchmark/commands.py b/ci/fireci/fireciplugins/macrobenchmark/commands.py
deleted file mode 100644
index d0fba03d2cf..00000000000
--- a/ci/fireci/fireciplugins/macrobenchmark/commands.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# Copyright 2022 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import asyncio
-import click
-
-from .analyze import analyzer
-from .run import runner
-from fireci import ci_command
-from pathlib import Path
-
-
-@ci_command(cls=click.Group)
-def macrobenchmark():
-  """Macrobenchmark testing command group."""
-  pass
-
-
-@click.option(
-  '--build-only',
-  is_flag=True,
-  default=False,
-  show_default=True,
-  help='Build the test projects without running the test.'
-)
-@click.option(
-  '--local/--remote',
-  required=True,
-  help='Run the test on local devices or Firebase Test Lab.'
-)
-@click.option(
-  '--repeat',
-  default=1,
-  show_default=True,
-  help='Number of times to repeat the test (for obtaining more data points).'
-)
-@click.option(
-  '--output',
-  type=click.Path(dir_okay=True, resolve_path=True, path_type=Path),
-  default='macrobenchmark-output.json',
-  show_default=True,
-  help='The file for saving macrobenchmark test output if running on Firebase Test Lab.'
-)
-@ci_command(group=macrobenchmark)
-def run(build_only: bool, local: bool, repeat: int, output: Path):
-  """Run macrobenchmark test."""
-  asyncio.run(runner.start(build_only, local, repeat, output))
-
-
-@click.option(
-  '--diff-mode',
-  is_flag=True,
-  default=False,
-  help='Compare two sets of macrobenchmark result.'
-)
-@click.option(
-  '--ftl-results-dir',
-  multiple=True,
-  help='Firebase Test Lab results directory name. Can be specified multiple times.'
-)
-@click.option(
-  '--local-reports-dir',
-  type=click.Path(dir_okay=True, resolve_path=True, path_type=Path),
-  help='Path to the directory of local test reports.'
-)
-@click.option(
-  '--ctl-ftl-results-dir',
-  multiple=True,
-  help='FTL results dir of the control group, if running in diff mode. '
-       'Can be specified multiple times.'
-)
-@click.option(
-  '--ctl-local-reports-dir',
-  type=click.Path(dir_okay=True, resolve_path=True, path_type=Path),
-  help='Path to the local test reports of the control group, if running in diff mode.'
-)
-@click.option(
-  '--exp-ftl-results-dir',
-  multiple=True,
-  help='FTL results dir of the experimental group, if running in diff mode. '
-       'Can be specified multiple times.'
-)
-@click.option(
-  '--exp-local-reports-dir',
-  type=click.Path(dir_okay=True, resolve_path=True, path_type=Path),
-  help='Path to the local test reports of the experimental group, if running in diff mode.'
-)
-@click.option(
-  '--output-dir',
-  type=click.Path(dir_okay=True, resolve_path=True, path_type=Path),
-  help='The directory for saving macrobenchmark analysis result.'
-)
-@ci_command(group=macrobenchmark)
-def analyze(
-    diff_mode: bool,
-    ftl_results_dir: list[str],
-    local_reports_dir: Path,
-    ctl_ftl_results_dir: list[str],
-    ctl_local_reports_dir: Path,
-    exp_ftl_results_dir: list[str],
-    exp_local_reports_dir: Path,
-    output_dir: Path
-):
-  """Analyze macrobenchmark result."""
-  analyzer.start(
-    diff_mode,
-    ftl_results_dir,
-    local_reports_dir,
-    ctl_ftl_results_dir,
-    ctl_local_reports_dir,
-    exp_ftl_results_dir,
-    exp_local_reports_dir,
-    output_dir,
-  )
-
-# TODO(yifany): support of command chaining
diff --git a/ci/fireci/fireciplugins/macrobenchmark/run/__init__.py b/ci/fireci/fireciplugins/macrobenchmark/run/__init__.py
deleted file mode 100644
index 6d6d1266c32..00000000000
--- a/ci/fireci/fireciplugins/macrobenchmark/run/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2022 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/ci/fireci/fireciplugins/macrobenchmark/run/log_decorator.py b/ci/fireci/fireciplugins/macrobenchmark/run/log_decorator.py
deleted file mode 100644
index 062d6fe809e..00000000000
--- a/ci/fireci/fireciplugins/macrobenchmark/run/log_decorator.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright 2022 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-import sys
-
-from logging import Logger, LoggerAdapter
-
-
-RESET_CODE = '\x1b[m'
-
-
-class LogDecorator(LoggerAdapter):
-  """Decorates log messages with colors in console output."""
-
-  def __init__(self, logger: Logger | LoggerAdapter, key: str):
-    super().__init__(logger, {})
-    self.key = key
-    self.color_code = self._random_color_code()
-
-  def process(self, msg, kwargs):
-    colored, uncolored = self._produce_prefix()
-    result = f'{colored if sys.stderr.isatty() else uncolored} {msg}'
-    return result, kwargs
-
-  @staticmethod
-  def _random_color_code():
-    code = random.randint(16, 231)  # https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
-    return f'\x1b[38;5;{code}m'
-
-  def _produce_prefix(self):
-    if hasattr(super(), '_produce_prefix'):
-      colored_super, uncolored_super = getattr(super(), '_produce_prefix')()
-      colored = f'{colored_super} {self.color_code}[{self.key}]{RESET_CODE}'
-      uncolored = f'{uncolored_super} [{self.key}]'
-    else:
-      colored = f'{self.color_code}[{self.key}]{RESET_CODE}'
-      uncolored = f'[{self.key}]'
-    return colored, uncolored
diff --git a/ci/fireci/fireciplugins/macrobenchmark/run/runner.py b/ci/fireci/fireciplugins/macrobenchmark/run/runner.py
deleted file mode 100644
index fb51406108b..00000000000
--- a/ci/fireci/fireciplugins/macrobenchmark/run/runner.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright 2022 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import asyncio
-import json
-import logging
-import tempfile
-
-import click
-import yaml
-
-from .test_project_builder import TestProjectBuilder
-from .utils import execute
-from pathlib import Path
-
-
-logger = logging.getLogger('fireci.macrobenchmark')
-
-
-async def start(build_only: bool, local: bool, repeat: int, output: Path):
-  logger.info('Starting macrobenchmark test ...')
-
-  config = _process_config_yaml()
-  product_versions = _assemble_all_products()
-  test_dir = _prepare_test_directory()
-  template_project_dir = Path('health-metrics/benchmark/template')
-
-  test_projects = [
-    TestProjectBuilder(
-      test_config,
-      test_dir,
-      template_project_dir,
-      product_versions,
-    ).build() for test_config in config['test-apps']]
-
-  if not build_only:
-    if local:
-      for test_project in test_projects:
-        test_project.run_local(repeat)
-    else:
-      remote_runs = [test_project.run_remote(repeat) for test_project in test_projects]
-      results = await asyncio.gather(*remote_runs, return_exceptions=True)
-      test_outputs = [x for x in results if not isinstance(x, Exception)]
-      exceptions = [x for x in results if isinstance(x, Exception)]
-
-      with open(output, 'w') as file:
-        json.dump(test_outputs, file)
-        logger.info(f'Output of remote testing saved to: {output}')
-
-      if exceptions:
-        logger.error(f'Exceptions occurred: {exceptions}')
-      for test_output in test_outputs:
-        if test_output['exceptions']:
-          logger.error(f'Exceptions occurred: {test_output["exceptions"]}')
-
-      if exceptions or any(test_output['exceptions'] for test_output in test_outputs):
-        raise click.ClickException('Macrobenchmark test failed with above exceptions')
-
-  logger.info(f'Completed macrobenchmark test successfully')
-
-
-def _assemble_all_products() -> dict[str, str]:
-  execute('./gradlew', 'assembleAllForSmokeTests', logger=logger)
-
-  product_versions: dict[str, str] = {}
-  with open('build/m2repository/changed-artifacts.json') as json_file:
-    artifacts = json.load(json_file)
-    for artifact in artifacts['headGit']:
-      group_id, artifact_id, version = artifact.split(':')
-      product_versions[f'{group_id}:{artifact_id}'] = version
-
-  logger.info(f'Product versions: {product_versions}')
-  return product_versions
-
-
-def _process_config_yaml():
-  with open('health-metrics/benchmark/config.yaml') as yaml_file:
-    config = yaml.safe_load(yaml_file)
-    for app in config['test-apps']:
-      app['plugins'] = app.get('plugins', [])
-      app['traces'] = app.get('traces', [])
-      app['plugins'].extend(config['common-plugins'])
-      app['traces'].extend(config['common-traces'])
-    return config
-
-
-def _prepare_test_directory() -> Path:
-  test_dir = tempfile.mkdtemp(prefix='benchmark-test-')
-  logger.info(f'Temporary test directory created at: {test_dir}')
-  return Path(test_dir)
diff --git a/ci/fireci/fireciplugins/macrobenchmark/run/test_project.py b/ci/fireci/fireciplugins/macrobenchmark/run/test_project.py
deleted file mode 100644
index 8947261a1df..00000000000
--- a/ci/fireci/fireciplugins/macrobenchmark/run/test_project.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright 2022 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import asyncio
-import glob
-import re
-import shutil
-import random
-
-from .log_decorator import LogDecorator
-from .utils import execute, execute_async, generate_test_run_id
-from fireci.dir_utils import chdir
-from logging import getLogger, Logger, LoggerAdapter
-from pathlib import Path
-from typing import TypedDict
-
-logger = getLogger('fireci.macrobenchmark')
-
-
-class RemoteTestOutput(TypedDict, total=False):
-  project: str
-  successful_runs: list[str]
-  exceptions: list[str]  # Using str due to Exception being not JSON serializable
-
-
-class TestProject:
-  def __init__(self, name: str, project_dir: Path, custom_logger: Logger | LoggerAdapter):
-    self.name = name
-    self.test_project_dir = project_dir
-    self.logger = custom_logger
-
-  def run_local(self, repeat: int):
-    self.logger.info(f'Running test locally for {repeat} times ...')
-    local_reports_dir = self.test_project_dir.joinpath('_reports')
-
-    with chdir(self.test_project_dir):
-      for index in range(repeat):
-        run_id = generate_test_run_id()
-        run_logger = LogDecorator(self.logger, f'run-{index}')
-        run_logger.info(f'Run-{index}: {run_id}')
-        execute('./gradlew', ':macrobenchmark:connectedCheck', logger=run_logger)
-
-        reports = self.test_project_dir.rglob('build/**/*-benchmarkData.json')
-        run_dir = local_reports_dir.joinpath(run_id)
-        for report in reports:
-          device = re.search(r'benchmark/connected/([^/]*)/', str(report)).group(1)
-          device_dir = run_dir.joinpath(device)
-          device_dir.mkdir(parents=True, exist_ok=True)
-          shutil.copy(report, device_dir)
-          run_logger.debug(f'Copied report file "{report}" to "{device_dir}"')
-
-    self.logger.info(f'Finished all {repeat} runs, local reports dir: "{local_reports_dir}"')
-
-  async def run_remote(self, repeat: int) -> RemoteTestOutput:
-    self.logger.info(f'Running test remotely for {repeat} times ...')
-
-    with chdir(self.test_project_dir):
-      await execute_async('./gradlew', 'assemble', logger=self.logger)
-      app_apk_path = glob.glob('**/app-benchmark.apk', recursive=True)[0]
-      test_apk_path = glob.glob('**/macrobenchmark-benchmark.apk', recursive=True)[0]
-      self.logger.info(f'App apk: "{app_apk_path}", Test apk: "{test_apk_path}"')
-
-      async def run(index: int, run_id: str) -> str:
-        run_logger = LogDecorator(self.logger, f'run-{index}')
-        run_logger.info(f'Run-{index}: {run_id}')
-        ftl_environment_variables = [
-          'clearPackageData=true',
-          'additionalTestOutputDir=/sdcard/Download',
-          'no-isolated-storage=true',
-        ]
-        executable = 'gcloud'
-        args = ['firebase', 'test', 'android', 'run']
-        args += ['--type', 'instrumentation']
-        args += ['--app', app_apk_path]
-        args += ['--test', test_apk_path]
-        args += ['--device', 'model=oriole,version=32,locale=en,orientation=portrait']
-        args += ['--directories-to-pull', '/sdcard/Download']
-        args += ['--results-bucket', 'fireescape-benchmark-results']
-        args += ['--results-dir', run_id]
-        args += ['--environment-variables', ','.join(ftl_environment_variables)]
-        args += ['--timeout', '30m']
-        args += ['--project', 'fireescape-c4819']
-        await execute_async(executable, *args, logger=run_logger)
-        return run_id
-
-      runs = [run(i, generate_test_run_id()) for i in range(repeat)]
-      results = await asyncio.gather(*runs, return_exceptions=True)
-      successes = [x for x in results if not isinstance(x, Exception)]
-      exceptions = [x for x in results if isinstance(x, Exception)]
-
-    self.logger.info(f'Finished all {repeat} runs, successes: {successes}, failures: {exceptions}')
-
-    return RemoteTestOutput(
-      project=self.name,
-      successful_runs=successes,
-      exceptions=[str(e) for e in exceptions]
-    )
diff --git a/ci/fireci/fireciplugins/macrobenchmark/run/test_project_builder.py b/ci/fireci/fireciplugins/macrobenchmark/run/test_project_builder.py
deleted file mode 100644
index a98bb6f56a3..00000000000
--- a/ci/fireci/fireciplugins/macrobenchmark/run/test_project_builder.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright 2022 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import pystache
-import shutil
-
-from .log_decorator import LogDecorator
-from .test_project import TestProject
-from .utils import execute
-from pathlib import Path
-
-
-logger = logging.getLogger('fireci.macrobenchmark')
-
-
-class TestProjectBuilder:
-  def __init__(
-      self,
-      test_config: any,
-      test_dir: Path,
-      template_project_dir: Path,
-      product_versions: dict[str, str]
-  ):
-    self.test_config = test_config
-    self.template_project_dir = template_project_dir
-    self.product_versions = product_versions
-
-    self.name = test_config['name']
-    self.logger = LogDecorator(logger, self.name)
-    self.project_dir = test_dir.joinpath(self.name)
-
-  def build(self) -> TestProject:
-    self.logger.info(f'Creating test project "{self.name}" ...')
-
-    self._copy_template_project()
-    self._flesh_out_mustache_template_files()
-    self._download_gradle_wrapper()
-
-    self.logger.info(f'Test project "{self.name}" created at "{self.project_dir}"')
-    return TestProject(self.name, self.project_dir, self.logger)
-
-  def _copy_template_project(self):
-    shutil.copytree(self.template_project_dir, self.project_dir)
-    self.logger.debug(f'Copied project template files into "{self.project_dir}"')
-
-  def _download_gradle_wrapper(self):
-    args = ['wrapper', '--gradle-version', '7.5.1', '--project-dir', str(self.project_dir)]
-    execute('./gradlew', *args, logger=self.logger)
-    self.logger.debug(f'Created gradle wrapper in "{self.project_dir}"')
-
-  def _flesh_out_mustache_template_files(self):
-    mustache_context = {
-      'm2repository': os.path.abspath('build/m2repository'),
-      'plugins': self.test_config.get('plugins', []),
-      'traces': self.test_config.get('traces', []),
-      'dependencies': [],
-    }
-
-    if 'dependencies' in self.test_config:
-      for dep in self.test_config['dependencies']:
-        if '@' in dep:
-          key, version = dep.split('@', 1)
-          dependency = {'key': key, 'version': version}
-        else:
-          dependency = {'key': dep, 'version': self.product_versions[dep]}
-        mustache_context['dependencies'].append(dependency)
-
-    renderer = pystache.Renderer()
-    mustaches = self.project_dir.rglob('**/*.mustache')
-    for mustache in mustaches:
-      self.logger.debug(f'Processing template file: {mustache}')
-      result = renderer.render_path(mustache, mustache_context)
-      original_name = Path(str(mustache).removesuffix('.mustache'))
-      with open(original_name, 'w') as file:
-        file.write(result)
diff --git a/ci/fireci/fireciplugins/macrobenchmark/run/utils.py b/ci/fireci/fireciplugins/macrobenchmark/run/utils.py
deleted file mode 100644
index 2ca8a73ee5a..00000000000
--- a/ci/fireci/fireciplugins/macrobenchmark/run/utils.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2022 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import datetime
-import string
-import random
-
-from asyncio import create_subprocess_exec
-from asyncio.subprocess import PIPE as ASYNC_PIPE, STDOUT as ASYNC_STDOUT
-from logging import Logger, LoggerAdapter
-from subprocess import Popen, PIPE, STDOUT
-
-
-def generate_test_run_id() -> str:
-  now = datetime.datetime.now()
-  date = now.date()
-  time = now.time()
-  name = ''.join(random.choices(string.ascii_letters, k=4))
-  return f'{date}_{time}_{name}'
-
-
-def execute(program: str, *args: [str], logger: Logger | LoggerAdapter) -> None:
-  command = " ".join([program, *args])
-  logger.info(f'Executing subprocess: "{command}" ...')
-
-  popen = Popen([program, *args], stdout=PIPE, stderr=STDOUT)
-  for line in popen.stdout:
-    logger.info(f'[{program}] {line.decode("utf-8").strip()}')
-  popen.communicate()
-
-  if popen.returncode == 0:
-    logger.info(f'"{command}" succeeded')
-  else:
-    message = f'"{command}" failed with return code {popen.returncode}'
-    logger.error(message)
-    raise RuntimeError(message)
-
-
-async def execute_async(program: str, *args: [str], logger: Logger | LoggerAdapter) -> None:
-  command = " ".join([program, *args])
-  logger.info(f'Executing subprocess: "{command}" ...')
-
-  process = await create_subprocess_exec(program, *args, stdout=ASYNC_PIPE, stderr=ASYNC_STDOUT)
-  async for line in process.stdout:
-    logger.info(f'[{program}] {line.decode("utf-8").strip()}')
-  await process.communicate()
-
-  if process.returncode == 0:
-    logger.info(f'"{command}" succeeded')
-  else:
-    message = f'"{command}" failed with return code {process.returncode}'
-    logger.error(message)
-    raise RuntimeError(message)
diff --git a/ci/fireci/setup.cfg b/ci/fireci/setup.cfg
index 71b002fbd8e..4bc55ca8ea5 100644
--- a/ci/fireci/setup.cfg
+++ b/ci/fireci/setup.cfg
@@ -5,14 +5,12 @@ version = 0.1
 [options]
 install_requires =
         protobuf==3.19
-        click==8.1.3
-        google-cloud-storage==2.5.0
+        click==7.0
+        google-cloud-storage==1.44.0
         numpy==1.23.1
-        pandas==1.5.1
         PyGithub==1.55
         pystache==0.6.0
         requests==2.23.0
-        seaborn==0.12.1
         PyYAML==6.0.0
 
 [options.extras_require]
diff --git a/health-metrics/benchmark/README.md b/health-metrics/benchmark/README.md
index 3cd25e9617b..5cffaf1532a 100644
--- a/health-metrics/benchmark/README.md
+++ b/health-metrics/benchmark/README.md
@@ -10,7 +10,7 @@ building a macrobenchmark test app for each of the Firebase Android SDKs.
 If not all of them are required, comment out irrelevant ones for faster build
 and test time.
 
-## Run macrobenchmark tests
+## Run benchmark tests
 
 ### Prerequisite
 
@@ -35,27 +35,16 @@ and test time.
    [doc](https://cloud.google.com/docs/authentication) for full guidance on
    authentication.
 
-### Run tests locally
+### Run benchmark tests locally
 
-1. [Connect an Android device to the computer](https://d.android.com/studio/run/device)
-
-1. Run below command in the repository root directory `firebase-android-sdk`:
+1. Build all test apps by running below command in the root
+   directory `firebase-android-sdk`:
 
    ```shell
-   fireci macrobenchmark run --local
+   fireci macrobenchmark --build-only
    ```
 
-   **Note**: specify `--repeat <number>` to run the test multiple times. Run
-   `fireci macrobenchmark run --help` to see more details.
-
-Alternatively, developers can also create test apps with `fireci`, and run the
-test from either CLI or Android Studio:
-
-1. Run below command to build all test apps:
-
-   ```shell
-   fireci macrobenchmark run --build-only
-   ```
+1. [Connect an Android device to the computer](https://d.android.com/studio/run/device)
 
 1. Locate the temporary test apps directory from the log, for example:
 
@@ -100,90 +89,23 @@ test from either CLI or Android Studio:
      Alternatively, same set of result files are produced at the same output
      location as invoking tests from CLI, which can be used for inspection.
 
-### Run tests on Firebase Test Lab
+### Run benchmark tests on Firebase Test Lab
 
-Run below command to build and run all tests on FTL:
+Build and run all tests on FTL by running below command in the root
+directory `firebase-android-sdk`:
 
-```shell
-fireci macrobenchmark run --remote
+```
+fireci macrobenchmark
 ```
 
-**Note**: `--repeat <number>` is also supported to submit the test to FTL for
-`<number>` times. All tests on FTL will run in parallel.
-
-Alternatively, developers can still build test apps locally, and manually
-[run tests on FTL with `gcloud` CLI](https://firebase.google.com/docs/test-lab/android/command-line#running_your_instrumentation_tests).
+Alternatively, it is possible to build all test apps via steps described in
+[Running benchmark tests locally](#running-benchmark-tests-locally)
+and manually
+[run tests on FTL with `gcloud` CLI ](https://firebase.google.com/docs/test-lab/android/command-line#running_your_instrumentation_tests).
 
 Aggregated benchmark results are displayed in the log. The log also
 contains links to FTL result pages and result files on Google Cloud Storage.
 
-## Analyze macrobenchmark results
-
-Besides results from `*-benchmarkData.json` as descriped above, `fireci`
-supports more in depth analysis, such as:
-
-- calculating percentiles and visualizing distributions for one test run
-- comparing two sets of results (with stats and graphs) from two different runs
-
-To see more details, run
-
-```shell
-fireci macrobenchmark analyze --help
-```
-
-### Example usage
-
-1. Analyzing local test results
-
-   ```shell
-   fireci macrobenchmark analyze --local-reports-dir <path-to-dir>
-   ```
-
-   `<path-to-dir>` is the directory containing the `*-benchmarkData.json` from
-   the local test runs.
-
-   **Note**: If the test is started:
-
-   - with `fireci macrobenchmark run --local`, `fireci` copies all benchmark
-     json files into a dir, which can be supplied here.
-   - manually (CLI or Android Studio), `<path-to-dir>` shall be the directory
-     that contains `*-benchmarkData.json` in the gradle build directory.
-
-1. Analyzing remote test results
-
-   ```shell
-   fireci macrobenchmark analyze --ftl-results-dir <dir1> --ftl-results-dir <dir2> ...
-   ```
-
-   `<dir1>`, `<dir2>` are Firebase Test Lab results directory names, such as
-   `2022-11-04_11:18:34.039437_OqZn`.
-
-1. Comparing two sets of result from two different FTL runs
-
-   ```shell
-   fireci macrobenchmark analyze \
-     --diff-mode \
-     --ctl-ftl-results-dir <dir1-from-run1> \
-     --ctl-ftl-results-dir <dir2-from-run1> \
-     ...
-     --exp-ftl-results-dir <dir1-from-run2> \
-     --exp-ftl-results-dir <dir2-from-run2> \
-     ...
-   ```
-
-   `ctl` and `exp` are short for "control group" and "experimental group".
-
-1. Comparing a local test run against a FTL run
-
-   ```shell
-   fireci macrobenchmark analyze \
-     --diff-mode \
-     --ctl-ftl-results-dir <dir1-from-ftl-run> \
-     --ctl-ftl-results-dir <dir2-from-ftl-run> \
-     ...
-     --exp-local-reports-dir <dir-from-local-run>
-   ```
-
 ## Toolchains
 
 - Gradle 7.5.1
diff --git a/health-metrics/benchmark/config.yaml b/health-metrics/benchmark/config.yaml
index 6a8bc2a0a27..8852965302e 100644
--- a/health-metrics/benchmark/config.yaml
+++ b/health-metrics/benchmark/config.yaml
@@ -21,41 +21,52 @@ common-plugins: [com.google.gms.google-services]
 common-traces: [Firebase, ComponentDiscovery, Runtime]
 
 test-apps:
-  - sdk: N.A.
-    name: all-included
-    dependencies:
-      - com.google.firebase:firebase-abt
-      - com.google.firebase:firebase-appcheck
-      - com.google.firebase:firebase-appdistribution
-      - com.google.firebase:firebase-crashlytics
-      - com.google.firebase:firebase-database
-      - com.google.firebase:firebase-dynamic-links
-      - com.google.firebase:firebase-firestore
-      - com.google.firebase:firebase-functions
-      - com.google.firebase:firebase-inappmessaging
-      - com.google.firebase:firebase-inappmessaging-display
-      - com.google.firebase:firebase-messaging
-      - com.google.firebase:firebase-ml-modeldownloader
-      - com.google.firebase:firebase-perf
-      - com.google.firebase:firebase-storage
-    plugins:
-      - com.google.firebase.crashlytics
-      - com.google.firebase.firebase-perf
-    traces:
-      - fire-abt
-      - fire-app-check
-      - fire-appdistribution
-      - fire-cls
-      - fire-dl
-      - fire-fcm
-      - fire-fiam
-      - fire-fiamd
-      - fire-fn
-      - fire-fst
-      - fire-gcs
-      - fire-installations
-      - firebase-ml-modeldownloader
-      - fire-perf
-      - fire-rc
-      - fire-rtdb
-      - fire-transport
+  - sdk: firebase-config
+    name: config
+    dependencies: [com.google.firebase:firebase-config-ktx]
+  - sdk: firebase-common
+    name: common
+    dependencies: [com.google.firebase:firebase-common]
+  - sdk: firebase-crashlytics
+    name: crash
+    dependencies: [com.google.firebase:firebase-crashlytics-ktx]
+    plugins: [com.google.firebase.crashlytics]
+  - sdk: firebase-database
+    name: database
+    dependencies: [com.google.firebase:firebase-database-ktx]
+  - sdk: firebase-dynamic-links
+    name: fdl
+    dependencies: [com.google.firebase:firebase-dynamic-links-ktx]
+  - sdk: firebase-firestore
+    name: firestore
+    dependencies: [com.google.firebase:firebase-firestore-ktx]
+  - sdk: firebase-functions
+    name: functions
+    dependencies: [com.google.firebase:firebase-functions-ktx]
+  # TODO(yifany): disable temporarily due to errors of duplicate class and gradle crash
+  #  - sdk: firebase-inappmessaging-display
+  #    name: fiam
+  #    dependencies:
+  #      - com.google.firebase:firebase-analytics-ktx@18.0.3
+  #      - com.google.firebase:firebase-inappmessaging-ktx
+  #      - com.google.firebase:firebase-inappmessaging-display-ktx
+  - sdk: firebase-messaging
+    name: message
+    dependencies: [com.google.firebase:firebase-messaging-ktx]
+  - sdk: firebase-perf
+    name: perf
+    dependencies: [com.google.firebase:firebase-perf-ktx]
+    plugins: [com.google.firebase.firebase-perf]
+  - sdk: firebase-storage
+    name: stroage
+    dependencies: [com.google.firebase:firebase-storage-ktx]
+
+
+# TODO(yifany): google3 sdks, customizing FTL devices
+# auth
+# analytics
+# combined
+#   - crashlytics + analytics
+#   - crashlytics + fireperf
+#   - auth + firestore
+#   - ...
diff --git a/health-metrics/benchmark/template/macrobenchmark/src/main/java/com/google/firebase/macrobenchmark/BenchmarkTest.kt.mustache b/health-metrics/benchmark/template/macrobenchmark/src/main/java/com/google/firebase/macrobenchmark/BenchmarkTest.kt.mustache
index 82dd0ecbf3b..4fa5af3546e 100644
--- a/health-metrics/benchmark/template/macrobenchmark/src/main/java/com/google/firebase/macrobenchmark/BenchmarkTest.kt.mustache
+++ b/health-metrics/benchmark/template/macrobenchmark/src/main/java/com/google/firebase/macrobenchmark/BenchmarkTest.kt.mustache
@@ -39,7 +39,7 @@ class StartupBenchmark {
       TraceSectionMetric("{{.}}"),
       {{/traces}}
     ),
-    iterations = 100,
+    iterations = 5,
     startupMode = StartupMode.COLD
   ) {
     pressHome()