Skip to content

Commit 6ba36fd

Browse files
dirkrkotzemldirkkotze-workRudolf LuttichRudolf07688
authored
Sagemaker Operator Character limit fix (#45551)
Co-authored-by: Dirk Kotze <[email protected]> Co-authored-by: Rudolf Luttich <[email protected]> Co-authored-by: Rudolf07688 <[email protected]>
1 parent 6afde78 commit 6ba36fd

File tree

3 files changed

+44
-9
lines changed

3 files changed

+44
-9
lines changed

providers/src/airflow/providers/amazon/aws/operators/sagemaker.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,11 @@ def _get_unique_name(
165165
if fail_if_exists:
166166
raise AirflowException(f"A SageMaker {resource_type} with name {name} already exists.")
167167
else:
168-
name = f"{proposed_name}-{time.time_ns()//1000000}"
168+
max_name_len = 63
169+
timestamp = str(
170+
time.time_ns() // 1000000000
171+
) # only keep the relevant datetime (first 10 digits)
172+
name = f"{proposed_name[:max_name_len - len(timestamp) - 1]}-{timestamp}" # we subtract one to make provision for the dash between the truncated name and timestamp
169173
self.log.info("Changed %s name to '%s' to avoid collision.", resource_type, name)
170174
return name
171175

providers/tests/amazon/aws/operators/test_sagemaker_base.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949

5050
EXPECTED_INTEGER_FIELDS: list[list[Any]] = []
5151

52+
MOCK_UNIX_TIME = 1234567890123456789 # reproducible time for testing time.time_ns()
53+
5254

5355
class TestSageMakerBaseOperator:
5456
ERROR_WHEN_RESOURCE_NOT_FOUND = ClientError({"Error": {"Code": "ValidationException"}}, "op")
@@ -92,6 +94,31 @@ def test_job_renamed(self):
9294
assert describe_mock.call_count == 3
9395
assert re.match("test-[0-9]+$", name)
9496

97+
@patch("airflow.providers.amazon.aws.operators.sagemaker.time.time_ns", return_value=MOCK_UNIX_TIME)
98+
def test_job_name_length(self, _):
99+
describe_mock = MagicMock()
100+
# scenario: The name is longer than 63 characters so we need the function to truncate the name and add a timestamp
101+
describe_mock.side_effect = [None, None, self.ERROR_WHEN_RESOURCE_NOT_FOUND]
102+
name = self.sagemaker._get_unique_job_name(
103+
"ThisNameIsLongerThan64CharactersSoItShouldBeTruncatedWithATimestamp", False, describe_mock
104+
)
105+
assert len(name) <= 63
106+
107+
@patch("airflow.providers.amazon.aws.operators.sagemaker.time.time_ns", return_value=MOCK_UNIX_TIME)
108+
def test_truncated_job_name(self, _):
109+
describe_mock = MagicMock()
110+
111+
describe_mock.side_effect = [None, None, self.ERROR_WHEN_RESOURCE_NOT_FOUND]
112+
113+
# scenario: The name is longer than 63 characters so we need the function to truncate the name and add a timestamp
114+
full_name = "ThisNameIsLongerThan64CharactersSoItShouldBeTruncatedWithATimestamp"
115+
116+
name = self.sagemaker._get_unique_job_name(full_name, False, describe_mock)
117+
118+
base_name, timestamp = name.split("-")
119+
assert base_name == full_name[: len(base_name)]
120+
assert timestamp == str(MOCK_UNIX_TIME)[:10]
121+
95122
def test_job_not_unique_with_fail(self):
96123
with pytest.raises(AirflowException):
97124
self.sagemaker._get_unique_job_name("test", True, lambda _: None)
@@ -116,7 +143,7 @@ def test_get_unique_name_raises_exception_if_name_exists_when_fail_is_true(self)
116143

117144
assert str(context.value) == "A SageMaker model with name existing_name already exists."
118145

119-
@patch("airflow.providers.amazon.aws.operators.sagemaker.time.time_ns", return_value=3000000)
146+
@patch("airflow.providers.amazon.aws.operators.sagemaker.time.time_ns", return_value=MOCK_UNIX_TIME)
120147
def test_get_unique_name_avoids_name_collision(self, time_mock):
121148
new_name = self.sagemaker._get_unique_name(
122149
"existing_name",
@@ -126,7 +153,7 @@ def test_get_unique_name_avoids_name_collision(self, time_mock):
126153
resource_type="model",
127154
)
128155

129-
assert new_name == "existing_name-3"
156+
assert new_name == "existing_name-1234567890"
130157

131158
def test_get_unique_name_checks_only_once_when_resource_does_not_exist(self):
132159
describe_func = MagicMock(side_effect=ClientError({"Error": {"Code": "ValidationException"}}, "op"))

providers/tests/amazon/aws/operators/test_sagemaker_transform.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@
6868

6969
CONFIG: dict = {"Model": CREATE_MODEL_PARAMS, "Transform": CREATE_TRANSFORM_PARAMS}
7070

71+
MOCK_UNIX_TIME: int = 1234567890123456789 # reproducible time for testing time.time_ns()
72+
7173

7274
class TestSageMakerTransformOperator:
7375
def setup_method(self):
@@ -182,10 +184,12 @@ def test_execute_without_check_if_job_exists(self, _, __, ___, mock_transform, _
182184
max_ingestion_time=None,
183185
)
184186

185-
@mock.patch( # since it is divided by 1000000, the added timestamp should be 2.
186-
"airflow.providers.amazon.aws.operators.sagemaker.time.time_ns", return_value=2000000
187+
@mock.patch( # since it is divided by 1000000000, the added timestamp should be 1234567890.
188+
"airflow.providers.amazon.aws.operators.sagemaker.time.time_ns", return_value=MOCK_UNIX_TIME
189+
)
190+
@mock.patch.object(
191+
SageMakerHook, "describe_transform_job", return_value={"ModelName": "model_name-1234567890"}
187192
)
188-
@mock.patch.object(SageMakerHook, "describe_transform_job", return_value={"ModelName": "model_name-2"})
189193
@mock.patch.object(
190194
SageMakerHook,
191195
"create_transform_job",
@@ -200,7 +204,7 @@ def test_execute_without_check_if_job_exists(self, _, __, ___, mock_transform, _
200204
side_effect=[
201205
None,
202206
ClientError({"Error": {"Code": "ValidationException"}}, "op"),
203-
"model_name-2",
207+
"model_name-1234567890",
204208
],
205209
)
206210
@mock.patch.object(sagemaker, "serialize", return_value="")
@@ -215,9 +219,9 @@ def test_when_model_already_exists_it_should_add_timestamp_to_model_name(
215219
self.sagemaker.execute(None)
216220

217221
mock_describe_model.assert_has_calls(
218-
[mock.call("model_name"), mock.call("model_name-2"), mock.call("model_name-2")]
222+
[mock.call("model_name"), mock.call("model_name-1234567890"), mock.call("model_name-1234567890")]
219223
)
220-
mock_create_model.assert_called_once_with({"ModelName": "model_name-2"})
224+
mock_create_model.assert_called_once_with({"ModelName": "model_name-1234567890"})
221225

222226
@mock.patch.object(SageMakerHook, "describe_transform_job")
223227
@mock.patch.object(SageMakerHook, "create_transform_job")

0 commit comments

Comments
 (0)