Skip to content

Commit 2df5491

Browse files
committed
Telemetry: delete old BuildData models
Define a task to delete old `BuildData` older than `RTD_TELEMETRY_DATA_RETENTION_DAYS`, which is set to 180 days for now. This task is configured to be run every day at 2AM. Related #9328
1 parent dc001ae commit 2df5491

File tree

2 files changed

+26
-0
lines changed

2 files changed

+26
-0
lines changed

readthedocs/settings/base.py

+8
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ def SESSION_COOKIE_SAMESITE(self):
124124
RTD_ANALYTICS_DEFAULT_RETENTION_DAYS = 30 * 3
125125
RTD_AUDITLOGS_DEFAULT_RETENTION_DAYS = 30 * 3
126126

127+
# Keep BuildData models on database during this time
128+
RTD_TELEMETRY_DATA_RETENTION_DAYS = 30 * 6 # 180 days / 6 months
129+
127130
# Database and API hitting settings
128131
DONT_HIT_API = False
129132
DONT_HIT_DB = True
@@ -419,6 +422,11 @@ def TEMPLATES(self):
419422
'schedule': crontab(minute=0, hour=1),
420423
'options': {'queue': 'web'},
421424
},
425+
'every-day-delete-old-buildata-models': {
426+
'task': 'readthedocs.telemetry.tasks.delete_old_build_data',
427+
'schedule': crontab(minute=0, hour=2),
428+
'options': {'queue': 'web'},
429+
},
422430
'every-day-resync-sso-organization-users': {
423431
'task': 'readthedocs.oauth.tasks.sync_remote_repositories_organizations',
424432
'schedule': crontab(minute=0, hour=4),

readthedocs/telemetry/tasks.py

+18
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
"""Tasks related to telemetry."""
22

3+
from django.conf import settings
4+
from django.utils import timezone
5+
36
from readthedocs.builds.models import Build
47
from readthedocs.telemetry.models import BuildData
58
from readthedocs.worker import app
@@ -16,3 +19,18 @@ def save_build_data(build_id, data):
1619
build = Build.objects.filter(id=build_id).first()
1720
if build:
1821
BuildData.objects.collect(build, data)
22+
23+
24+
@app.task(queue="web")
25+
def delete_old_build_data():
26+
"""
27+
Delete BuildData models older than ``RTD_TELEMETRY_DATA_RETENTION_DAYS``.
28+
29+
This is intended to run from a periodic task daily.
30+
31+
NOTE: the logic of this task could be improved to keep longer data we care
32+
more (eg. active projects )and remove data we don't (eg. builds from spam projects)
33+
"""
34+
retention_days = settings.RTD_TELEMETRY_DATA_RETENTION_DAYS
35+
days_ago = timezone.now().date() - timezone.timedelta(days=retention_days)
36+
return BuildData.objects.filter(created__lt=days_ago).delete()

0 commit comments

Comments
 (0)