-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
fix: 🐛 Hotfix. Change selectors due to new html structure #905
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
91c7b17
e3b861c
b64eb03
69e45b6
b8d7ffa
3a9e5b7
8c10dcd
ed337c7
6fc96ed
be54105
72027a8
6280296
306fca6
1c1b7f3
fd69fc0
c5955bd
0330af9
e18e2e6
e038183
1f68c27
a349a4a
48481b5
f21530d
eb3e74a
218af31
3772d6b
51fe7eb
86ef242
101a42e
a2bfb04
d7aeabf
2dd187a
76b06ae
dba7b10
170c413
8411cf8
22f2c3b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -253,7 +253,9 @@ def get_jobs_from_page(self, scroll=False): | |
pass | ||
|
||
try: | ||
jobs_container = self.driver.find_element(By.CLASS_NAME, 'scaffold-layout__list-container') | ||
# XPath query to find the ul tag with class scaffold-layout__list-container | ||
jobs_xpath_query = "//ul[contains(@class, 'scaffold-layout__list-container')]" | ||
jobs_container = self.driver.find_element(By.XPATH, jobs_xpath_query) | ||
|
||
if scroll: | ||
jobs_container_scrolableElement = jobs_container.find_element(By.XPATH,"..") | ||
surapuramakhil marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
@@ -262,7 +264,7 @@ def get_jobs_from_page(self, scroll=False): | |
browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement) | ||
browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement, step=300, reverse=True) | ||
|
||
job_element_list = jobs_container.find_elements(By.CSS_SELECTOR, 'div[data-job-id]') | ||
job_element_list = jobs_container.find_elements(By.XPATH, ".//li[contains(@class, 'jobs-search-results__list-item') and contains(@class, 'ember-view')]") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would say that rely on "ember-view" instead of "data-job-id" more likely to cause problems again so I would revert this change |
||
|
||
if not job_element_list: | ||
logger.debug("No job class elements found on page, skipping.") | ||
|
@@ -279,20 +281,8 @@ def get_jobs_from_page(self, scroll=False): | |
return [] | ||
|
||
def read_jobs(self): | ||
try: | ||
no_jobs_element = self.driver.find_element(By.CLASS_NAME, 'jobs-search-two-pane__no-results-banner--expand') | ||
if 'No matching jobs found' in no_jobs_element.text or 'unfortunately, things aren' in self.driver.page_source.lower(): | ||
raise Exception("No more jobs on this page") | ||
except NoSuchElementException: | ||
pass | ||
|
||
jobs_container = self.driver.find_element(By.CLASS_NAME, 'scaffold-layout__list-container') | ||
browser_utils.scroll_slow(self.driver, jobs_container) | ||
browser_utils.scroll_slow(self.driver, jobs_container, step=300, reverse=True) | ||
|
||
job_element_list = jobs_container.find_elements(By.CSS_SELECTOR, 'div[data-job-id]') | ||
if not job_element_list: | ||
raise Exception("No job elements found on page") | ||
job_element_list = self.get_jobs_from_page() | ||
job_list = [self.job_tile_to_job(job_element) for job_element in job_element_list] | ||
for job in job_list: | ||
if self.is_blacklisted(job.title, job.company, job.link, job.location): | ||
|
@@ -483,7 +473,7 @@ def job_tile_to_job(self, job_tile) -> Job: | |
logger.warning("Job link is missing.") | ||
|
||
try: | ||
job.company = job_tile.find_element(By.XPATH, './/span[contains(normalize-space(), " · ")]').text.split(' · ')[0].strip() | ||
job.company = job_tile.find_element(By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span").text | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not robust. Also advise to revert |
||
logger.debug(f"Job company extracted: {job.company}") | ||
except NoSuchElementException as e: | ||
logger.warning(f'Job company is missing. {e} {traceback.format_exc()}') | ||
|
@@ -500,15 +490,21 @@ def job_tile_to_job(self, job_tile) -> Job: | |
logger.warning(f"Failed to extract job ID: {e}", exc_info=True) | ||
|
||
try: | ||
job.location = job_tile.find_element(By.XPATH, './/span[contains(normalize-space(), " · ")]').text.split(' · ')[-1].strip() | ||
job.location = job_tile.find_element(By.CLASS_NAME, 'job-card-container__metadata-item').text | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Simply doesn't work. If we check, 'job-card-container__metadata-item' never follow as to location, but to salary fork. Has to be reverted with no doubt |
||
except NoSuchElementException: | ||
logger.warning("Job location is missing.") | ||
|
||
|
||
try: | ||
job.apply_method = job_tile.find_element(By.XPATH, ".//div[contains(@class, 'job-card-container__job-insight-text') and normalize-space() = 'Easy Apply']").text | ||
job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__apply-method')]").text | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't work. Return empty array all of the time. Revert |
||
except NoSuchElementException as e: | ||
job.apply_method = "Applied" | ||
logger.warning(f'Apply method not found, assuming \'Applied\'. {e} {traceback.format_exc()}') | ||
try: | ||
# Fetching state when apply method is not found | ||
job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-job-state')]").text | ||
job.apply_method = "Applied" | ||
logger.warning(f'Apply method not found, state {job_state}. {e} {traceback.format_exc()}') | ||
except NoSuchElementException as e: | ||
logger.warning(f'Apply method and state not found. {e} {traceback.format_exc()}') | ||
|
||
return job | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
They unlikely will keep this class with not ul tag so both approaches are equal