@@ -26,51 +26,9 @@ def __str__(self):
26
26
return f"{ self .source } :{ self .signal } "
27
27
28
28
29
- def fetch_data (
30
- time_types : Optional [List [str ]],
31
- geo_types : Optional [List [str ]],
32
- signals : Optional [List [SourceSignal ]],
33
- ):
34
- # complain if the cache is more than 75 minutes old
35
- max_age = 75 * 60
36
-
37
- row = db .execute (
38
- text (
39
- "SELECT UNIX_TIMESTAMP(NOW()) - timestamp AS age, epidata FROM covidcast_meta_cache LIMIT 1"
40
- )
41
- ).fetchone ()
42
-
43
- if not row or not row ["epidata" ]:
44
- get_structured_logger ('server_api' ).warning ("no data in covidcast_meta cache" )
45
- return
46
-
47
- age = row ["age" ]
48
- if age > max_age and row ["epidata" ]:
49
- get_structured_logger ('server_api' ).warning ("covidcast_meta cache is stale" , cache_age = age )
50
-
51
- epidata = loads (row ["epidata" ])
52
-
53
- if not epidata :
54
- return
55
-
56
- def filter_row (row : Dict ):
57
- if time_types and row .get ("time_type" ) not in time_types :
58
- return False
59
- if geo_types and row .get ("geo_type" ) not in geo_types :
60
- return False
61
- if not signals :
62
- return True
63
- for signal in signals :
64
- # match source and (signal or no signal or signal = *)
65
- if row .get ("data_source" ) == signal .source and (
66
- signal .signal == "*" or signal .signal == row .get ("signal" )
67
- ):
68
- return True
69
- return False
70
-
71
- for row in epidata :
72
- if filter_row (row ):
73
- yield row
29
+ # empty generator that never yields
30
+ def _nonerator ():
31
+ return (x for x in [])
74
32
75
33
76
34
@bp .route ("/" , methods = ("GET" , "POST" ))
@@ -79,4 +37,60 @@ def handle():
79
37
signals = [SourceSignal (v ) for v in (extract_strings ("signals" ) or [])]
80
38
geo_types = extract_strings ("geo_types" )
81
39
82
- return create_printer (request .values .get ("format" ))(filter_fields (fetch_data (time_types , geo_types , signals )))
40
+ printer = create_printer (request .values .get ("format" ))
41
+
42
+ metadata = db .execute (
43
+ text (
44
+ "SELECT UNIX_TIMESTAMP(NOW()) - timestamp AS age, epidata FROM covidcast_meta_cache LIMIT 1"
45
+ )
46
+ ).fetchone ()
47
+
48
+ if not metadata or "epidata" not in metadata :
49
+ # the db table `covidcast_meta_cache` has no rows
50
+ get_structured_logger ('server_api' ).warning ("no data in covidcast_meta cache" )
51
+ return printer (_nonerator ())
52
+
53
+ metadata_list = loads (metadata ["epidata" ])
54
+
55
+ if not metadata_list :
56
+ # the db table has a row, but there is no metadata about any signals in it
57
+ get_structured_logger ('server_api' ).warning ("empty entry in covidcast_meta cache" )
58
+ return printer (_nonerator ())
59
+
60
+ # the expected metadata regeneration interval in seconds, aka time between runs of
61
+ # src/acquisition/covidcast/covidcast_meta_cache_updater.py (currently 2h)
62
+ standard_age = 2 * 60 * 60
63
+ # a short period when a client can continue to use this metadata even if its slightly stale,
64
+ # which also gives some padding if the md generation is running slow,
65
+ # and which also acts as a minimum cacheable time (currently 10 mins)
66
+ age_margin = 10 * 60
67
+ # these should be updated if a stale cache will have undue impact on user activities, such as
68
+ # if we start updating the metadata table much more frequently and having up-to-the-minute
69
+ # metadata accuracy becomes important to users once more.
70
+ # TODO: get the above two values ^ from config vars?
71
+ age = metadata ["age" ]
72
+ reported_age = max (0 , min (age , standard_age ) - age_margin )
73
+
74
+ def cache_entry_gen ():
75
+ for entry in metadata_list :
76
+ if time_types and entry .get ("time_type" ) not in time_types :
77
+ continue
78
+ if geo_types and entry .get ("geo_type" ) not in geo_types :
79
+ continue
80
+ if not signals :
81
+ yield entry
82
+ for signal in signals :
83
+ # match source and (signal or no signal or signal = *)
84
+ if entry .get ("data_source" ) == signal .source and (
85
+ signal .signal == "*" or signal .signal == entry .get ("signal" )
86
+ ):
87
+ yield entry
88
+
89
+ return printer (
90
+ filter_fields (cache_entry_gen ()),
91
+ headers = {
92
+ "Cache-Control" : f"max-age={ standard_age } , public" ,
93
+ "Age" : f"{ reported_age } " ,
94
+ # TODO?: "Expires": f"{}", # superseded by Cache-Control: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Expires
95
+ }
96
+ )
0 commit comments