Skip to content

Commit

Permalink
Merge pull request #176 from openedx/saleem-latif/ENT-7123-algolia
Browse files Browse the repository at this point in the history
ENT-7123: Added the ability to ignore unrelated jobs from being indexed on algolia.
  • Loading branch information
saleem-latif authored Aug 9, 2023
2 parents 9689161 + 5edb865 commit 1e5e4d8
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 6 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ Change Log
Unreleased

[1.44.0] - 2023-08-09
---------------------
* feat: Added the ability to ignore unrelated jobs from being indexed on algolia.

[1.43.4] - 2023-08-02
---------------------
* fix: Added missing comma in algolia constants that was masking the b2c_opt_in attribute.
Expand Down
2 changes: 1 addition & 1 deletion taxonomy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@
# 2. MINOR version when you add functionality in a backwards compatible manner, and
# 3. PATCH version when you make backwards compatible bug fixes.
# More details can be found at https://semver.org/
__version__ = '1.43.4'
__version__ = '1.44.0'

default_app_config = 'taxonomy.apps.TaxonomyConfig' # pylint: disable=invalid-name
5 changes: 5 additions & 0 deletions taxonomy/algolia/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,8 @@

# This is the maximum number of objects that should be embedded inside an algolia record.
EMBEDDED_OBJECT_LENGTH_CAP = 20

# External ID of all the jobs that should not be indexed on algolia.
JOBS_TO_IGNORE = [
'ET0000000000000000', # 'Unclassified' job
]
15 changes: 10 additions & 5 deletions taxonomy/algolia/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,21 @@
Utility functions related to algolia indexing.
"""
import logging
from datetime import datetime
from collections import deque, namedtuple
from datetime import datetime

from django.conf import settings
from django.db.models import Sum
from django.db.models import Q, Sum

from taxonomy.algolia.client import AlgoliaClient
from taxonomy.algolia.constants import ALGOLIA_JOBS_INDEX_SETTINGS, JOBS_PAGE_SIZE, EMBEDDED_OBJECT_LENGTH_CAP
from taxonomy.algolia.constants import (
ALGOLIA_JOBS_INDEX_SETTINGS,
EMBEDDED_OBJECT_LENGTH_CAP,
JOBS_PAGE_SIZE,
JOBS_TO_IGNORE,
)
from taxonomy.algolia.serializers import JobSerializer
from taxonomy.models import Job, Industry, JobSkills, IndustryJobSkill
from taxonomy.models import Industry, IndustryJobSkill, Job, JobSkills

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -238,7 +243,7 @@ def fetch_jobs_data():
Returns:
(list<dict>): A list of dicts containing job data.
"""
qs = Job.objects.exclude(name__isnull=True)
qs = Job.objects.exclude(Q(name__isnull=True) | Q(external_id__in=JOBS_TO_IGNORE))

LOGGER.info('[TAXONOMY] Started combining skills and recommendations data for the jobs.')
jobs_data = fetch_and_combine_job_details(qs)
Expand Down

0 comments on commit 1e5e4d8

Please sign in to comment.