Skip to content

Commit 50d77ac

Browse files
fix: chunked data in EMSI client for xblock-skills job (#182)
* fix: chunked data at 50000 byte in EMSI client for xblock-skills job * fix: Added unit test
1 parent 2c39d13 commit 50d77ac

File tree

4 files changed

+28
-1
lines changed

4 files changed

+28
-1
lines changed

CHANGELOG.rst

+4
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ Change Log
1313
1414
Unreleased
1515

16+
[1.44.2] - 2023-09-11
17+
---------------------
18+
* fix: chunked data at 50000 byte in EMSI client for xblock-skills job
19+
1620
[1.44.1] - 2023-08-25
1721
---------------------
1822
* feat: add prefetch related to the whitelisted product skills

taxonomy/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,6 @@
1515
# 2. MINOR version when you add functionality in a backwards compatible manner, and
1616
# 3. PATCH version when you make backwards compatible bug fixes.
1717
# More details can be found at https://semver.org/
18-
__version__ = '1.44.1'
18+
__version__ = '1.44.2'
1919

2020
default_app_config = 'taxonomy.apps.TaxonomyConfig' # pylint: disable=invalid-name

taxonomy/emsi/client.py

+6
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ class EMSISkillsApiClient(JwtEMSIApiClient):
183183
"""
184184

185185
API_BASE_URL = urljoin(JwtEMSIApiClient.API_BASE_URL, '/skills/versions/8.9')
186+
MAX_LIGHTCAST_DATA_SIZE = 50000 # Maximum 50,000-byte data is supported by LightCast
186187

187188
def __init__(self):
188189
"""
@@ -229,6 +230,11 @@ def get_product_skills(self, text_data):
229230
Returns:
230231
dict: A dictionary containing details of all the skills.
231232
"""
233+
234+
if text_data and len(text_data) > self.MAX_LIGHTCAST_DATA_SIZE:
235+
# Truncate the text_data to 50,000 bytes since only 50,000-byte data is supported by LightCast
236+
text_data = text_data[:self.MAX_LIGHTCAST_DATA_SIZE]
237+
232238
data = {
233239
'text': text_data
234240
}

tests/emsi/test_client.py

+17
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
import logging
77
from time import time
8+
from unittest import mock
9+
from faker import Faker
810

911
import responses
1012
from pytest import raises
@@ -161,6 +163,21 @@ def test_get_product_skills(self):
161163

162164
assert skills == SKILLS_EMSI_CLIENT_RESPONSE
163165

166+
def test_get_product_skills_large_text(self):
167+
"""
168+
Validate that the behavior of client while fetching product skills for very large text.
169+
"""
170+
api_response = mock.Mock()
171+
api_response.json.return_value = SKILLS_EMSI_RESPONSE
172+
self.client.is_token_expired = mock.Mock(return_value=False)
173+
self.client.client = mock.MagicMock(post=mock.Mock(return_value=api_response))
174+
175+
max_data_size = self.client.MAX_LIGHTCAST_DATA_SIZE
176+
skill_text_data = Faker().text(max_data_size + max_data_size * 0.1)
177+
self.client.get_product_skills(skill_text_data)
178+
179+
assert len(self.client.client.post.call_args_list[0][1]['json']['text']) == max_data_size
180+
164181
@mock_api_response(
165182
method=responses.POST,
166183
url=EMSISkillsApiClient.API_BASE_URL + '/extract',

0 commit comments

Comments
 (0)