diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index 4d4f3161c7e..3314569f01e 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -124,6 +124,9 @@ def SESSION_COOKIE_SAMESITE(self): RTD_ANALYTICS_DEFAULT_RETENTION_DAYS = 30 * 3 RTD_AUDITLOGS_DEFAULT_RETENTION_DAYS = 30 * 3 + # Keep BuildData models on database during this time + RTD_TELEMETRY_DATA_RETENTION_DAYS = 30 * 6 # 180 days / 6 months + # Database and API hitting settings DONT_HIT_API = False DONT_HIT_DB = True @@ -419,6 +422,11 @@ def TEMPLATES(self): 'schedule': crontab(minute=0, hour=1), 'options': {'queue': 'web'}, }, + 'every-day-delete-old-buildata-models': { + 'task': 'readthedocs.telemetry.tasks.delete_old_build_data', + 'schedule': crontab(minute=0, hour=2), + 'options': {'queue': 'web'}, + }, 'every-day-resync-sso-organization-users': { 'task': 'readthedocs.oauth.tasks.sync_remote_repositories_organizations', 'schedule': crontab(minute=0, hour=4), diff --git a/readthedocs/telemetry/tasks.py b/readthedocs/telemetry/tasks.py index 3e8b54447f2..bf850e9e12c 100644 --- a/readthedocs/telemetry/tasks.py +++ b/readthedocs/telemetry/tasks.py @@ -1,5 +1,8 @@ """Tasks related to telemetry.""" +from django.conf import settings +from django.utils import timezone + from readthedocs.builds.models import Build from readthedocs.telemetry.models import BuildData from readthedocs.worker import app @@ -16,3 +19,18 @@ def save_build_data(build_id, data): build = Build.objects.filter(id=build_id).first() if build: BuildData.objects.collect(build, data) + + +@app.task(queue="web") +def delete_old_build_data(): + """ + Delete BuildData models older than ``RTD_TELEMETRY_DATA_RETENTION_DAYS``. + + This is intended to run from a periodic task daily. + + NOTE: the logic of this task could be improved to keep longer data we care + more (eg. active projects )and remove data we don't (eg. builds from spam projects) + """ + retention_days = settings.RTD_TELEMETRY_DATA_RETENTION_DAYS + days_ago = timezone.now().date() - timezone.timedelta(days=retention_days) + return BuildData.objects.filter(created__lt=days_ago).delete()