Skip to content

Commit aafad2a

Browse files
jschefflutkarsharma2
authored andcommitted
Make Scarf usage reporting in major+minor versions and counters in buckets (#41900)
(cherry picked from commit 5a04519)
1 parent fa03a32 commit aafad2a

File tree

4 files changed

+62
-12
lines changed

4 files changed

+62
-12
lines changed

airflow/utils/usage_data_collection.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@ def get_database_version() -> str:
8080
return "None"
8181

8282
version_info = settings.engine.dialect.server_version_info
83-
# Example: (1, 2, 3) -> "1.2.3"
84-
return ".".join(map(str, version_info)) if version_info else "None"
83+
# Example: (1, 2, 3) -> "1.2" (cut only major+minor w/o patch)
84+
return ".".join(map(str, version_info[0:2])) if version_info else "None"
8585

8686

8787
def get_database_name() -> str:
@@ -95,7 +95,8 @@ def get_executor() -> str:
9595

9696

9797
def get_python_version() -> str:
98-
return platform.python_version()
98+
# Cut only major+minor from the python version string (e.g. 3.10.12 --> 3.10)
99+
return ".".join(platform.python_version().split(".")[0:2])
99100

100101

101102
def get_plugin_counts() -> dict[str, int]:
@@ -108,3 +109,14 @@ def get_plugin_counts() -> dict[str, int]:
108109
"appbuilder_menu_items": sum(len(x["appbuilder_menu_items"]) for x in plugin_info),
109110
"timetables": sum(len(x["timetables"]) for x in plugin_info),
110111
}
112+
113+
114+
def to_bucket(counter: int) -> str:
115+
"""As we don't want to have preceise numbers, make number into a bucket."""
116+
if counter == 0:
117+
return "0"
118+
buckets = [0, 5, 10, 20, 50, 100, 200, 500, 1000, 2000]
119+
for idx, val in enumerate(buckets[1:]):
120+
if buckets[idx] < counter and counter <= val:
121+
return f"{buckets[idx] + 1}-{val}"
122+
return f"{buckets[-1]}+"

airflow/www/views.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,9 @@ def build_scarf_url(dags_count: int) -> str:
240240
appbuilder_views_count = plugin_counts["appbuilder_views"]
241241
appbuilder_menu_items_count = plugin_counts["appbuilder_menu_items"]
242242
timetables_count = plugin_counts["timetables"]
243+
dag_bucket = usage_data_collection.to_bucket(dags_count)
244+
plugins_bucket = usage_data_collection.to_bucket(plugins_count)
245+
timetable_bucket = usage_data_collection.to_bucket(timetables_count)
243246

244247
# Path Format:
245248
# /{version}/{python_version}/{platform}/{arch}/{database}/{db_version}/{executor}/{num_dags}/{plugin_count}/{flask_blueprint_count}/{appbuilder_view_count}/{appbuilder_menu_item_count}/{timetables}
@@ -248,8 +251,8 @@ def build_scarf_url(dags_count: int) -> str:
248251
scarf_url = (
249252
f"{scarf_domain}/webserver"
250253
f"/{version}/{python_version}"
251-
f"/{platform_sys}/{platform_arch}/{db_name}/{db_version}/{executor}/{dags_count}"
252-
f"/{plugins_count}/{flask_blueprints_count}/{appbuilder_views_count}/{appbuilder_menu_items_count}/{timetables_count}"
254+
f"/{platform_sys}/{platform_arch}/{db_name}/{db_version}/{executor}/{dag_bucket}"
255+
f"/{plugins_bucket}/{flask_blueprints_count}/{appbuilder_views_count}/{appbuilder_menu_items_count}/{timetable_bucket}"
253256
)
254257

255258
return scarf_url

tests/utils/test_usage_data_collection.py

+39-4
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@
2424

2525
from airflow import __version__ as airflow_version
2626
from airflow.configuration import conf
27-
from airflow.utils.usage_data_collection import get_database_version, usage_data_collection
27+
from airflow.utils.usage_data_collection import (
28+
get_database_version,
29+
get_python_version,
30+
to_bucket,
31+
usage_data_collection,
32+
)
2833

2934

3035
@pytest.mark.parametrize("is_enabled, is_prerelease", [(False, True), (True, True)])
@@ -51,7 +56,7 @@ def test_scarf_analytics(
5156
):
5257
platform_sys = platform.system()
5358
platform_machine = platform.machine()
54-
python_version = platform.python_version()
59+
python_version = get_python_version()
5560
executor = conf.get("core", "EXECUTOR")
5661
scarf_endpoint = "https://apacheairflow.gateway.scarf.sh/scheduler"
5762
usage_data_collection()
@@ -74,12 +79,42 @@ def test_scarf_analytics(
7479
@pytest.mark.parametrize(
7580
"version_info, expected_version",
7681
[
77-
((1, 2, 3), "1.2.3"), # Normal version tuple
82+
((1, 2, 3), "1.2"), # Normal version tuple
7883
(None, "None"), # No version info available
7984
((1,), "1"), # Single element version tuple
80-
((1, 2, 3, "beta", 4), "1.2.3.beta.4"), # Complex version tuple with strings
85+
((1, 2, 3, "beta", 4), "1.2"), # Complex version tuple with strings
8186
],
8287
)
8388
def test_get_database_version(version_info, expected_version):
8489
with mock.patch("airflow.settings.engine.dialect.server_version_info", new=version_info):
8590
assert get_database_version() == expected_version
91+
92+
93+
@pytest.mark.parametrize(
94+
"version_info, expected_version",
95+
[
96+
("1.2.3", "1.2"), # Normal version
97+
("4", "4"), # Single element version
98+
("1.2.3.beta4", "1.2"), # Complex version tuple with strings
99+
],
100+
)
101+
def test_get_python_version(version_info, expected_version):
102+
with mock.patch("platform.python_version", return_value=version_info):
103+
assert get_python_version() == expected_version
104+
105+
106+
@pytest.mark.parametrize(
107+
"counter, expected_bucket",
108+
[
109+
(0, "0"),
110+
(1, "1-5"),
111+
(5, "1-5"),
112+
(6, "6-10"),
113+
(11, "11-20"),
114+
(20, "11-20"),
115+
(21, "21-50"),
116+
(10000, "2000+"),
117+
],
118+
)
119+
def test_to_bucket(counter, expected_bucket):
120+
assert to_bucket(counter) == expected_bucket

tests/www/views/test_views.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,7 @@ def test_invalid_dates(app, admin_client, url, content):
604604
@patch("airflow.utils.usage_data_collection.get_database_version", return_value="12.3")
605605
@patch("airflow.utils.usage_data_collection.get_database_name", return_value="postgres")
606606
@patch("airflow.utils.usage_data_collection.get_executor", return_value="SequentialExecutor")
607-
@patch("airflow.utils.usage_data_collection.get_python_version", return_value="3.8.5")
607+
@patch("airflow.utils.usage_data_collection.get_python_version", return_value="3.8")
608608
@patch("airflow.utils.usage_data_collection.get_plugin_counts")
609609
def test_build_scarf_url(
610610
get_plugin_counts,
@@ -626,8 +626,8 @@ def test_build_scarf_url(
626626
result = build_scarf_url(5)
627627
expected_url = (
628628
"https://apacheairflow.gateway.scarf.sh/webserver/"
629-
f"{airflow_version}/3.8.5/Linux/x86_64/postgres/12.3/SequentialExecutor/5"
630-
f"/10/15/20/25/30"
629+
f"{airflow_version}/3.8/Linux/x86_64/postgres/12.3/SequentialExecutor/1-5"
630+
f"/6-10/15/20/25/21-50"
631631
)
632632
if enabled:
633633
assert result == expected_url

0 commit comments

Comments
 (0)