scraper_linkedin.py

from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup as BS
import sys
import os
import pandas as pd
import numpy as np
#import pdfkit

#Use this command in your terminal to quarnatine chromedriver
#xattr -d com.apple.quarantine chromedriver 

i = 1
big_username_array = []
big_review_titles_array = []

chrome_options = webdriver.ChromeOptions()
chrome_options.add_extension("extension_3_2_3_0.crx")
driver = webdriver.Chrome(chrome_options=chrome_options) 
driver.get("https://www.linkedin.com/in/zachariah-mithani-4212271b2/")
#driver.switch_to.window(driver.window_handles[0])

# while i <= 2:
# 	#Chrome driver to open Browser
# 	#Download from https://sites.google.com/a/chromium.org/chromedriver/downloads
# 	#Add correct path for ChromeDriver here
# 	#driver = webdriver.Chrome('/Users/kaustav/Downloads/test/Scraper/chromedriver')
# 	s=Service('/Users/kaustav/Downloads/test/Scraper/chromedriver')
# 	driver = webdriver.Chrome(service=s)
    
# 	#URL of the website to Scrap
# 	url = "https://www.amazon.com/product-reviews/1338596705/ref=cm_cr_arp_d_viewopt_sr?pageNumber="
# 	url = url + str(i)
# 	driver.get(url)

    
# 	#Parse HTML source using BeautifulSoup
# 	soup = BS(driver.page_source)

# 	#Usernames
# 	usernames = soup.select('#cm_cr-review_list .a-profile-name')
# 	username_array =[]
# 	for username in usernames:
# 		username_array.append(username.text.strip())
# 	big_username_array.extend(username_array)

# 	#Review Title
# 	review_titles = soup.select('#cm_cr-review_list .review-title')
# 	review_titles_array =[]
# 	for review_title in review_titles:
# 		review_titles_array.append(review_title.text.strip())
# 	big_review_titles_array.extend(review_titles_array)

    
# 	driver.quit() # closes the webbrowser
    
# 	i = i + 1
#Status 
# print(big_username_array)
# a = np.array(big_username_array)
# b = np.array(big_review_titles_array)
# df = pd.DataFrame({"Username" : a, "ReviewTitle" : b})
# df.to_csv("output.csv", index=False)