Skip to content

Commit 8774952

Browse files
authored
feat: add non-repeating config logger (#4268)
* Add non repeating logger * Update docstring * Test CI * create log method * add tests * Limit cache size
1 parent 2a98067 commit 8774952

File tree

3 files changed

+131
-27
lines changed

3 files changed

+131
-27
lines changed

src/sagemaker/config/config.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@
2828
from botocore.utils import merge_dicts
2929
from six.moves.urllib.parse import urlparse
3030
from sagemaker.config.config_schema import SAGEMAKER_PYTHON_SDK_CONFIG_SCHEMA
31-
from sagemaker.config.config_utils import get_sagemaker_config_logger
31+
from sagemaker.config.config_utils import non_repeating_log_factory, get_sagemaker_config_logger
3232

3333
logger = get_sagemaker_config_logger()
34+
log_info_function = non_repeating_log_factory(logger, "info")
3435

3536
_APP_NAME = "sagemaker"
3637
# The default name of the config file.
@@ -52,7 +53,9 @@
5253
S3_PREFIX = "s3://"
5354

5455

55-
def load_sagemaker_config(additional_config_paths: List[str] = None, s3_resource=None) -> dict:
56+
def load_sagemaker_config(
57+
additional_config_paths: List[str] = None, s3_resource=None, repeat_log=False
58+
) -> dict:
5659
"""Loads config files and merges them.
5760
5861
By default, this method first searches for config files in the default locations
@@ -99,6 +102,8 @@ def load_sagemaker_config(additional_config_paths: List[str] = None, s3_resource
99102
<https://boto3.amazonaws.com/v1/documentation/api\
100103
/latest/reference/core/session.html#boto3.session.Session.resource>`__.
101104
This argument is not needed if the config files are present in the local file system.
105+
repeat_log (bool): Whether the log with the same contents should be emitted.
106+
Default to ``False``
102107
"""
103108
default_config_path = os.getenv(
104109
ENV_VARIABLE_ADMIN_CONFIG_OVERRIDE, _DEFAULT_ADMIN_CONFIG_FILE_PATH
@@ -109,6 +114,11 @@ def load_sagemaker_config(additional_config_paths: List[str] = None, s3_resource
109114
config_paths += additional_config_paths
110115
config_paths = list(filter(lambda item: item is not None, config_paths))
111116
merged_config = {}
117+
118+
log_info = log_info_function
119+
if repeat_log:
120+
log_info = logger.info
121+
112122
for file_path in config_paths:
113123
config_from_file = {}
114124
if file_path.startswith(S3_PREFIX):
@@ -130,9 +140,9 @@ def load_sagemaker_config(additional_config_paths: List[str] = None, s3_resource
130140
if config_from_file:
131141
validate_sagemaker_config(config_from_file)
132142
merge_dicts(merged_config, config_from_file)
133-
logger.info("Fetched defaults config from location: %s", file_path)
143+
log_info("Fetched defaults config from location: %s", file_path)
134144
else:
135-
logger.info("Not applying SDK defaults from location: %s", file_path)
145+
log_info("Not applying SDK defaults from location: %s", file_path)
136146

137147
return merged_config
138148

src/sagemaker/config/config_utils.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@
1515
These utils may be used inside or outside the config module.
1616
"""
1717
from __future__ import absolute_import
18+
from collections import deque
1819

1920
import logging
2021
import sys
22+
from typing import Callable
2123

2224

2325
def get_sagemaker_config_logger():
@@ -197,3 +199,33 @@ def _log_sagemaker_config_merge(
197199
else:
198200
# nothing was specified in the config and nothing is being automatically applied
199201
logger.debug("Skipped value because no value defined\n config key = %s", config_key_path)
202+
203+
204+
def non_repeating_log_factory(logger: logging.Logger, method: str, cache_size=100) -> Callable:
205+
"""Create log function that filters the repeated messages.
206+
207+
By default. It only keeps track of last 100 messages, if a repeated
208+
message arrives after the ``cache_size`` messages, it will be displayed.
209+
210+
Args:
211+
logger (logging.Logger): the logger to be used to dispatch the message.
212+
method (str): the log method, can be info, warning or debug.
213+
cache_size (int): the number of last log messages to keep in cache.
214+
Default to 100
215+
216+
Returns:
217+
(Callable): the new log method
218+
"""
219+
if method not in ["info", "warning", "debug"]:
220+
raise ValueError("Not supported logging method.")
221+
222+
_caches = deque(maxlen=cache_size)
223+
log_method = getattr(logger, method)
224+
225+
def new_log_method(msg, *args, **kwargs):
226+
key = f"{msg}:{args}"
227+
if key not in _caches:
228+
log_method(msg, *args, **kwargs)
229+
_caches.append(key)
230+
231+
return new_log_method

tests/unit/sagemaker/config/test_config.py

Lines changed: 85 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,13 @@
1616
import pytest
1717
import yaml
1818
import logging
19-
from mock import Mock, MagicMock, patch
19+
from mock import Mock, MagicMock, patch, call
2020

2121
from sagemaker.config.config import (
2222
load_local_mode_config,
2323
load_sagemaker_config,
2424
logger,
25+
non_repeating_log_factory,
2526
_DEFAULT_ADMIN_CONFIG_FILE_PATH,
2627
_DEFAULT_USER_CONFIG_FILE_PATH,
2728
_DEFAULT_LOCAL_MODE_CONFIG_FILE_PATH,
@@ -45,14 +46,14 @@ def expected_merged_config(get_data_dir):
4546

4647

4748
def test_config_when_default_config_file_and_user_config_file_is_not_found():
48-
assert load_sagemaker_config() == {}
49+
assert load_sagemaker_config(repeat_log=True) == {}
4950

5051

5152
def test_config_when_overriden_default_config_file_is_not_found(get_data_dir):
5253
fake_config_file_path = os.path.join(get_data_dir, "config-not-found.yaml")
5354
os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"] = fake_config_file_path
5455
with pytest.raises(ValueError):
55-
load_sagemaker_config()
56+
load_sagemaker_config(repeat_log=True)
5657
del os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"]
5758

5859

@@ -63,14 +64,14 @@ def test_invalid_config_file_which_has_python_code(get_data_dir):
6364
# PyYAML will throw exceptions for yaml.safe_load. SageMaker Config is using
6465
# yaml.safe_load internally
6566
with pytest.raises(ConstructorError) as exception_info:
66-
load_sagemaker_config(additional_config_paths=[invalid_config_file_path])
67+
load_sagemaker_config(additional_config_paths=[invalid_config_file_path], repeat_log=True)
6768
assert "python/object/apply:eval" in str(exception_info.value)
6869

6970

7071
def test_config_when_additional_config_file_path_is_not_found(get_data_dir):
7172
fake_config_file_path = os.path.join(get_data_dir, "config-not-found.yaml")
7273
with pytest.raises(ValueError):
73-
load_sagemaker_config(additional_config_paths=[fake_config_file_path])
74+
load_sagemaker_config(additional_config_paths=[fake_config_file_path], repeat_log=True)
7475

7576

7677
def test_config_factory_when_override_user_config_file_is_not_found(get_data_dir):
@@ -79,15 +80,15 @@ def test_config_factory_when_override_user_config_file_is_not_found(get_data_dir
7980
)
8081
os.environ["SAGEMAKER_USER_CONFIG_OVERRIDE"] = fake_additional_override_config_file_path
8182
with pytest.raises(ValueError):
82-
load_sagemaker_config()
83+
load_sagemaker_config(repeat_log=True)
8384
del os.environ["SAGEMAKER_USER_CONFIG_OVERRIDE"]
8485

8586

8687
def test_default_config_file_with_invalid_schema(get_data_dir):
8788
config_file_path = os.path.join(get_data_dir, "invalid_config_file.yaml")
8889
os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"] = config_file_path
8990
with pytest.raises(exceptions.ValidationError):
90-
load_sagemaker_config()
91+
load_sagemaker_config(repeat_log=True)
9192
del os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"]
9293

9394

@@ -98,7 +99,7 @@ def test_default_config_file_when_directory_is_provided_as_the_path(
9899
expected_config = base_config_with_schema
99100
expected_config["SageMaker"] = valid_config_with_all_the_scopes
100101
os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"] = get_data_dir
101-
assert expected_config == load_sagemaker_config()
102+
assert expected_config == load_sagemaker_config(repeat_log=True)
102103
del os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"]
103104

104105

@@ -108,7 +109,9 @@ def test_additional_config_paths_when_directory_is_provided(
108109
# This will try to load config.yaml file from that directory if present.
109110
expected_config = base_config_with_schema
110111
expected_config["SageMaker"] = valid_config_with_all_the_scopes
111-
assert expected_config == load_sagemaker_config(additional_config_paths=[get_data_dir])
112+
assert expected_config == load_sagemaker_config(
113+
additional_config_paths=[get_data_dir], repeat_log=True
114+
)
112115

113116

114117
def test_default_config_file_when_path_is_provided_as_environment_variable(
@@ -118,7 +121,7 @@ def test_default_config_file_when_path_is_provided_as_environment_variable(
118121
# This will try to load config.yaml file from that directory if present.
119122
expected_config = base_config_with_schema
120123
expected_config["SageMaker"] = valid_config_with_all_the_scopes
121-
assert expected_config == load_sagemaker_config()
124+
assert expected_config == load_sagemaker_config(repeat_log=True)
122125
del os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"]
123126

124127

@@ -131,7 +134,9 @@ def test_merge_behavior_when_additional_config_file_path_is_not_found(
131134
)
132135
os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"] = valid_config_file_path
133136
with pytest.raises(ValueError):
134-
load_sagemaker_config(additional_config_paths=[fake_additional_override_config_file_path])
137+
load_sagemaker_config(
138+
additional_config_paths=[fake_additional_override_config_file_path], repeat_log=True
139+
)
135140
del os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"]
136141

137142

@@ -142,10 +147,10 @@ def test_merge_behavior(get_data_dir, expected_merged_config):
142147
)
143148
os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"] = valid_config_file_path
144149
assert expected_merged_config == load_sagemaker_config(
145-
additional_config_paths=[additional_override_config_file_path]
150+
additional_config_paths=[additional_override_config_file_path], repeat_log=True
146151
)
147152
os.environ["SAGEMAKER_USER_CONFIG_OVERRIDE"] = additional_override_config_file_path
148-
assert expected_merged_config == load_sagemaker_config()
153+
assert expected_merged_config == load_sagemaker_config(repeat_log=True)
149154
del os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"]
150155
del os.environ["SAGEMAKER_USER_CONFIG_OVERRIDE"]
151156

@@ -169,7 +174,7 @@ def test_s3_config_file(
169174
expected_config = base_config_with_schema
170175
expected_config["SageMaker"] = valid_config_with_all_the_scopes
171176
assert expected_config == load_sagemaker_config(
172-
additional_config_paths=[config_file_s3_uri], s3_resource=s3_resource_mock
177+
additional_config_paths=[config_file_s3_uri], s3_resource=s3_resource_mock, repeat_log=True
173178
)
174179

175180

@@ -183,7 +188,9 @@ def test_config_factory_when_default_s3_config_file_is_not_found(s3_resource_moc
183188
config_file_s3_uri = "s3://{}/{}".format(config_file_bucket, config_file_s3_prefix)
184189
with pytest.raises(ValueError):
185190
load_sagemaker_config(
186-
additional_config_paths=[config_file_s3_uri], s3_resource=s3_resource_mock
191+
additional_config_paths=[config_file_s3_uri],
192+
s3_resource=s3_resource_mock,
193+
repeat_log=True,
187194
)
188195

189196

@@ -213,7 +220,7 @@ def test_s3_config_file_when_uri_provided_corresponds_to_a_path(
213220
expected_config = base_config_with_schema
214221
expected_config["SageMaker"] = valid_config_with_all_the_scopes
215222
assert expected_config == load_sagemaker_config(
216-
additional_config_paths=[config_file_s3_uri], s3_resource=s3_resource_mock
223+
additional_config_paths=[config_file_s3_uri], s3_resource=s3_resource_mock, repeat_log=True
217224
)
218225

219226

@@ -242,6 +249,7 @@ def test_merge_of_s3_default_config_file_and_regular_config_file(
242249
assert expected_merged_config == load_sagemaker_config(
243250
additional_config_paths=[additional_override_config_file_path],
244251
s3_resource=s3_resource_mock,
252+
repeat_log=True,
245253
)
246254
del os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"]
247255

@@ -254,7 +262,7 @@ def test_logging_when_overridden_admin_is_found_and_overridden_user_config_is_fo
254262

255263
os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"] = get_data_dir
256264
os.environ["SAGEMAKER_USER_CONFIG_OVERRIDE"] = get_data_dir
257-
load_sagemaker_config()
265+
load_sagemaker_config(repeat_log=True)
258266
assert "Fetched defaults config from location: {}".format(get_data_dir) in caplog.text
259267
assert (
260268
"Not applying SDK defaults from location: {}".format(_DEFAULT_ADMIN_CONFIG_FILE_PATH)
@@ -275,7 +283,7 @@ def test_logging_when_overridden_admin_is_found_and_default_user_config_not_foun
275283
logger.propagate = True
276284
caplog.set_level(logging.DEBUG, logger=logger.name)
277285
os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"] = get_data_dir
278-
load_sagemaker_config()
286+
load_sagemaker_config(repeat_log=True)
279287
assert "Fetched defaults config from location: {}".format(get_data_dir) in caplog.text
280288
assert (
281289
"Not applying SDK defaults from location: {}".format(_DEFAULT_USER_CONFIG_FILE_PATH)
@@ -297,7 +305,7 @@ def test_logging_when_default_admin_not_found_and_overriden_user_config_is_found
297305
logger.propagate = True
298306
caplog.set_level(logging.DEBUG, logger=logger.name)
299307
os.environ["SAGEMAKER_USER_CONFIG_OVERRIDE"] = get_data_dir
300-
load_sagemaker_config()
308+
load_sagemaker_config(repeat_log=True)
301309
assert "Fetched defaults config from location: {}".format(get_data_dir) in caplog.text
302310
assert (
303311
"Not applying SDK defaults from location: {}".format(_DEFAULT_ADMIN_CONFIG_FILE_PATH)
@@ -318,7 +326,7 @@ def test_logging_when_default_admin_not_found_and_default_user_config_not_found(
318326
# for admin and user config since both are missing from default location
319327
logger.propagate = True
320328
caplog.set_level(logging.DEBUG, logger=logger.name)
321-
load_sagemaker_config()
329+
load_sagemaker_config(repeat_log=True)
322330
assert (
323331
"Not applying SDK defaults from location: {}".format(_DEFAULT_ADMIN_CONFIG_FILE_PATH)
324332
in caplog.text
@@ -342,6 +350,26 @@ def test_logging_when_default_admin_not_found_and_default_user_config_not_found(
342350
logger.propagate = False
343351

344352

353+
@patch("sagemaker.config.config.log_info_function")
354+
def test_load_config_without_repeating_log(log_info):
355+
356+
load_sagemaker_config(repeat_log=False)
357+
assert log_info.call_count == 2
358+
log_info.assert_has_calls(
359+
[
360+
call(
361+
"Not applying SDK defaults from location: %s",
362+
_DEFAULT_ADMIN_CONFIG_FILE_PATH,
363+
),
364+
call(
365+
"Not applying SDK defaults from location: %s",
366+
_DEFAULT_USER_CONFIG_FILE_PATH,
367+
),
368+
],
369+
any_order=True,
370+
)
371+
372+
345373
def test_logging_when_default_admin_not_found_and_overriden_user_config_not_found(
346374
get_data_dir, caplog
347375
):
@@ -351,7 +379,7 @@ def test_logging_when_default_admin_not_found_and_overriden_user_config_not_foun
351379
fake_config_file_path = os.path.join(get_data_dir, "config-not-found.yaml")
352380
os.environ["SAGEMAKER_USER_CONFIG_OVERRIDE"] = fake_config_file_path
353381
with pytest.raises(ValueError):
354-
load_sagemaker_config()
382+
load_sagemaker_config(repeat_log=True)
355383
assert (
356384
"Not applying SDK defaults from location: {}".format(_DEFAULT_ADMIN_CONFIG_FILE_PATH)
357385
in caplog.text
@@ -374,7 +402,7 @@ def test_logging_when_overriden_admin_not_found_and_overridden_user_config_not_f
374402
os.environ["SAGEMAKER_USER_CONFIG_OVERRIDE"] = fake_config_file_path
375403
os.environ["SAGEMAKER_ADMIN_CONFIG_OVERRIDE"] = fake_config_file_path
376404
with pytest.raises(ValueError):
377-
load_sagemaker_config()
405+
load_sagemaker_config(repeat_log=True)
378406
assert (
379407
"Not applying SDK defaults from location: {}".format(_DEFAULT_ADMIN_CONFIG_FILE_PATH)
380408
not in caplog.text
@@ -394,7 +422,7 @@ def test_logging_with_additional_configs_and_none_are_found(caplog):
394422
# Should throw exception when config in additional_config_path is missing
395423
logger.propagate = True
396424
with pytest.raises(ValueError):
397-
load_sagemaker_config(additional_config_paths=["fake-path"])
425+
load_sagemaker_config(additional_config_paths=["fake-path"], repeat_log=True)
398426
assert (
399427
"Not applying SDK defaults from location: {}".format(_DEFAULT_ADMIN_CONFIG_FILE_PATH)
400428
in caplog.text
@@ -414,3 +442,37 @@ def test_load_local_mode_config(mock_load_config):
414442

415443
def test_load_local_mode_config_when_config_file_is_not_found():
416444
assert load_local_mode_config() is None
445+
446+
447+
@pytest.mark.parametrize(
448+
"method_name",
449+
["info", "warning", "debug"],
450+
)
451+
def test_non_repeating_log_factory(method_name):
452+
tmp_logger = logging.getLogger("test-logger")
453+
mock = MagicMock()
454+
setattr(tmp_logger, method_name, mock)
455+
456+
log_function = non_repeating_log_factory(tmp_logger, method_name)
457+
log_function("foo")
458+
log_function("foo")
459+
460+
mock.assert_called_once()
461+
462+
463+
@pytest.mark.parametrize(
464+
"method_name",
465+
["info", "warning", "debug"],
466+
)
467+
def test_non_repeating_log_factory_cache_size(method_name):
468+
tmp_logger = logging.getLogger("test-logger")
469+
mock = MagicMock()
470+
setattr(tmp_logger, method_name, mock)
471+
472+
log_function = non_repeating_log_factory(tmp_logger, method_name, cache_size=2)
473+
log_function("foo")
474+
log_function("bar")
475+
log_function("foo2")
476+
log_function("foo")
477+
478+
assert mock.call_count == 4

0 commit comments

Comments
 (0)