Skip to content

Added UNIQUE KEYS to fluview_clinical to avoid duplicates #1127

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
2 changes: 1 addition & 1 deletion src/acquisition/fluview/fluview_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ def update_from_file(issue, date, filename, test_mode=False):
VALUES
(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
`release_date` = least(`release_date`, %s),
`issue` = least(`issue`, %s),
`num_ili` = %s,
`num_patients` = %s,
`num_providers` = %s,
Expand Down
2 changes: 1 addition & 1 deletion src/ddl/fluview.sql
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,8 @@ CREATE TABLE `fluview_clinical` (
`percent_a` double DEFAULT NULL,
`percent_b` double DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `issue` (`issue`, `epiweek`, `region`),
KEY `release_date` (`release_date`),
KEY `issue` (`issue`),
KEY `epiweek` (`epiweek`),
KEY `region` (`region`),
KEY `lag` (`lag`)
Expand Down
58 changes: 58 additions & 0 deletions src/ddl/migrations/fluview_clinical_v0.1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
USE epidata;

DELETE FROM
fluview_clinical
WHERE
id IN (
SELECT
dup.id
FROM
(
SELECT
*
FROM
(
SELECT
fc.id,
fc.release_date,
fc.issue,
fc.epiweek,
fc.region,
fc.lag,
fc.total_specimens,
fc.total_a,
fc.total_b,
fc.percent_positive,
fc.percent_a,
fc.percent_b,
ROW_NUMBER() OVER (
PARTITION BY fc.release_date,
fc.issue,
fc.epiweek,
fc.region,
fc.lag,
fc.total_specimens,
fc.total_a,
fc.total_b,
fc.percent_positive,
fc.percent_a,
fc.percent_b
ORDER BY
fc.release_date
) as row_num
FROM
epidata.fluview_clinical fc
ORDER BY
fc.release_date DESC
) s
WHERE
s.row_num > 1
) dup
);

DROP INDEX issue ON epidata.fluview_clinical;

ALTER TABLE
fluview_clinical
ADD
CONSTRAINT `issue` UNIQUE(`issue`, `epiweek`, `region`);