@@ -50,41 +50,26 @@ def calculate_jaccard_similarity(set_a, set_b):
50
50
Calculate Jaccard similarity between two sets of job skills.
51
51
"""
52
52
try :
53
- intersection = set_a .intersection (set_b )
54
- jaccard_similarity = len (intersection ) / len (set_a .union (set_b ))
55
- return jaccard_similarity
53
+ return len (set_a .intersection (set_b )) / len (set_a .union (set_b ))
56
54
except ZeroDivisionError :
57
55
return float (0 )
58
56
59
57
60
- def fetch_job_skills (job , all_job_skills ):
61
- """
62
- Construct a list of all the job skills from the database.
63
-
64
- Returns:
65
- (list<dict>): A list of dicts containing job skills data.
66
- """
67
- job_skills = all_job_skills .filter (job = job )
68
- skills = []
69
- for job_skill in job_skills :
70
- skills .append (job_skill .skill .name )
71
- return skills
72
-
73
-
74
- def combine_jobs_and_skills_data (jobs ):
58
+ def combine_jobs_and_skills_data (jobs_qs ):
75
59
"""
76
60
Combine jobs and skills data.
77
61
62
+ Arguments:
63
+ jobs_qs (QuerySet): Django queryset of Job model that will be used as a starting point to fetch skills data.
64
+
78
65
Returns:
79
66
(list<dict>): A list of dicts containing job and their skills in a list.
80
67
"""
81
- jobs = jobs .all ()
82
- all_job_skills = JobSkills .objects .all ()
83
-
84
68
all_job_and_skills_data = []
85
- for job in jobs :
86
- all_job_skills = JobSkills .objects .filter (job = job )
87
- skills = fetch_job_skills (job , all_job_skills )
69
+ for job in jobs_qs .all ():
70
+ skills = list (
71
+ JobSkills .objects .filter (job = job ).values_list ('skill__name' , flat = True )
72
+ )
88
73
all_job_and_skills_data .append ({
89
74
'name' : job .name ,
90
75
'skills' : skills ,
@@ -98,7 +83,7 @@ def calculate_job_recommendations(jobs):
98
83
Calculate job recommendations.
99
84
100
85
Args:
101
- job (list<dict>): AA list of dicts containing job and their skills in a list.
86
+ jobs (list<dict>): A list of dicts containing job and their skills in a list.
102
87
103
88
Returns:
104
89
(list<dict>): A list of dicts containing jobs and their recommended jobs.
@@ -126,6 +111,7 @@ def calculate_job_recommendations(jobs):
126
111
'jaccard_similarity' : "float16" ,
127
112
}
128
113
114
+ LOGGER .info ('[TAXONOMY] [DEBUG] Calculating similar jobs for the active jobs.' )
129
115
similar_jobs = pd .DataFrame (
130
116
{
131
117
'job' : candidate_jobs ,
@@ -134,10 +120,19 @@ def calculate_job_recommendations(jobs):
134
120
},
135
121
).astype (dtype_dict )
136
122
123
+ LOGGER .info ('[TAXONOMY] [DEBUG] similar jobs data frame created.' )
137
124
similar_jobs ['rank' ] = similar_jobs .groupby ('job' )['jaccard_similarity' ].rank (method = 'first' , ascending = False )
138
- mask = (similar_jobs ['rank' ] <= 3 )
139
- similar_jobs = similar_jobs [mask ].sort_values (by = ['job' , 'rank' ], ascending = [True , True ])
140
125
126
+ # This line is necessary as it makes sure save the copy of the data frame generated by
127
+ # `similar_jobs[similar_jobs['rank'] <= 3]` in a new variable.
128
+ similar_jobs = similar_jobs [similar_jobs ['rank' ] <= 3 ]
129
+ similar_jobs .sort_values (
130
+ by = ['job' , 'rank' ],
131
+ ascending = [True , True ],
132
+ inplace = True ,
133
+ )
134
+
135
+ LOGGER .info ('[TAXONOMY] [DEBUG] similar jobs calculation complete.' )
141
136
jobs_and_recommendations = []
142
137
for job in jobs :
143
138
jobs_and_recommendations .append ({
@@ -151,14 +146,19 @@ def combine_industry_skills():
151
146
"""
152
147
Constructs a dict with keys as industry names and values as their skills.
153
148
"""
154
- industries = list (Industry .objects .all ())
155
149
industries_and_skills = {}
156
- for industry in industries :
150
+ for industry in Industry . objects . all () :
157
151
# sum all significances for the same skill and then sort on total significance
158
152
skills = list (
159
- IndustryJobSkill .objects .filter (industry = industry ).values_list ('skill__name' , flat = True ).annotate (
160
- total_significance = Sum ('significance' )).order_by ('-total_significance' ).distinct ()[
161
- :EMBEDDED_OBJECT_LENGTH_CAP ]
153
+ IndustryJobSkill .objects .filter (
154
+ industry = industry
155
+ ).values_list (
156
+ 'skill__name' , flat = True
157
+ ).annotate (
158
+ total_significance = Sum ('significance' )
159
+ ).order_by (
160
+ '-total_significance'
161
+ ).distinct ()[:EMBEDDED_OBJECT_LENGTH_CAP ]
162
162
)
163
163
industries_and_skills [industry .name ] = skills
164
164
return industries_and_skills
0 commit comments