Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
/crs_scraper/__pycache__/
.venv
.venv
.env
181 changes: 169 additions & 12 deletions crs_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,56 @@
# from crs_scraper.crs_data import Data
# from crs_scraper.data_sorter import DataSorter, ScheduleGenerator

# For .env
from dotenv import load_dotenv

# Load .env into os.environ
load_dotenv()

# Optimized libraries
from crs_scraper.optimized_crscraper_preenlistment import CRScraperPreEnlistment
from crs_scraper.optimized_crscraper_student_registration import CRScraperStudentRegistration
from crs_scraper.data_sorter import ScheduleGenerator

from flask import Flask, Response, jsonify, make_response, request
from flask import Flask, Response, jsonify, make_response, url_for, request, session
from flask_cors import CORS
import csv
import os
from requests import Session
from requests.cookies import RequestsCookieJar
from requests.utils import dict_from_cookiejar

# Login with Google OAuth feature
from authlib.integrations.flask_client import OAuth
from bs4 import BeautifulSoup
from requests.utils import cookiejar_from_dict
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options

# ------------------------------------------------------------
app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}})
app = Flask("CRScraper")
CORS(app, resources={r"/*": {"origins": "http://localhost:3000"}})
app.config['DEBUG'] = True

# Login with Google OAuth feature
app.secret_key = os.getenv("FLASK_SECRET_KEY")
app.config.update({
"GOOGLE_CLIENT_ID": os.getenv("GOOGLE_CLIENT_ID"),
"GOOGLE_CLIENT_SECRET": os.getenv("GOOGLE_CLIENT_SECRET"),
"GOOGLE_DISCOVERY_URL": os.getenv("GOOGLE_DISCOVERY_URL"),
})

oauth = OAuth(app)
google = oauth.register(
name="google",
client_id=app.config["GOOGLE_CLIENT_ID"],
client_secret=app.config["GOOGLE_CLIENT_SECRET"],
server_metadata_url=app.config["GOOGLE_DISCOVERY_URL"],
client_kwargs={ "scope": "openid email profile" },
)
# ------------------------------------------------------------


Expand Down Expand Up @@ -46,15 +82,21 @@
# https://crs.upd.edu.ph/student_registration/class_search/19405, https://crs.upd.edu.ph/student_registration/class_search/19398, https://crs.upd.edu.ph/student_registration/class_search/19403, https://crs.upd.edu.ph/student_registration/class_search/19404, https://crs.upd.edu.ph/student_registration/class_search/19480
# ------------------------------------------------------------

# test https://crs.upd.edu.ph/student_registration/class_search/18843, https://crs.upd.edu.ph/student_registration/class_search/14732

all_course_table_schedule_url: list[str] = []
crs_username_global = ""
crs_password_global = ""
clicked_login_with_google = False
# ------------------------------------------------------------


# ------------------------------------------------------------
@app.route('/login', methods=['POST'])
def login() -> Response:
"""
DEPRECATED: Due to CRS new login feature.
"""
global crs_username_global, crs_password_global

credentials = request.json
Expand Down Expand Up @@ -86,6 +128,83 @@ def login() -> Response:
return response


@app.route('/login-with-gmail')
def login_with_google():
# Render a helper page telling the user to log in on CRS
html = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>CRS Login</title>
<style>
body {
font-family: sans-serif;
text-align: center;
}
.box {
display: inline-block;
padding: 1.5rem;
border: 2px solid #444;
border-radius: 8px;
}
</style>
</head>
<body>
<div class="box">
<h2>⚙️ Redirecting to CRS…</h2>
<p>Please complete your UP Mail login in the new window.</p>
<p>If you don’t see anything, check your pop-up blocker.</p>
<p>Note: This is for authentication purposes.</p>
</div>
<script>
// Kick off the real login handshake after a short pause
setTimeout(() => {
// change location to the actual OAuth endpoint
window.location.href = '/_continue_crs_oauth';
}, 1000);
</script>
</body>
</html>
"""
return html


@app.route('/_continue_crs_oauth')
def _continue_crs_oauth() -> Response:
global clicked_login_with_google
clicked_login_with_google = True

# Instantiate your scraper (no need for username/password here)
scraper = CRScraperPreEnlistment(
login_url=login_url,
username=None,
password=None,
all_course_table_schedule_url=all_course_table_schedule_url
)

# Delegate to your method—this will pop up Chrome for the user to log in,
# complete 2FA, then harvest the CRS cookies back into scraper.session.
scraper.login_with_google_token()

# Now persist those cookies in Flask session for later `/scrape` use:
session['crs_selenium_cookies'] = scraper._selenium_cookies
app.logger.debug(f"Selenium gave: {session.get('crs_selenium_cookies')!r}")

# # Seamless Login Idea:
# scraper.login_with_id_token(id_token)
# session['crs_cookies_dict'] = dict_from_cookiejar(scraper.session.cookies)
# app.logger.debug(f"Scraper cookies!: {session.get('crs_cookies_dict')!r}")

# 5) Signal success back to the SPA
return make_response("""
<script>
window.opener.postMessage({ status: 'success' }, 'http://localhost:3000');
window.close();
</script>
""", 200)


@app.route('/set-urls', methods=['POST'])
def set_urls() -> Response:
global all_course_table_schedule_url
Expand All @@ -108,25 +227,59 @@ def set_urls() -> Response:

@app.route('/scrape', methods=['POST'])
def scrape() -> Response:
global crs_username_global, crs_password_global, all_course_table_schedule_url

if not all_course_table_schedule_url:
# Return a failure response with a 400 status code using make_response
response = make_response(jsonify({"message": "No course links set yet", "status": "failure"}), 400)
return response
if clicked_login_with_google:
raw = session.get('crs_selenium_cookies')
if not raw:
return make_response(jsonify({"status":"failure","message":"Not logged in"}), 401)

jar = RequestsCookieJar()
for c in raw:
jar.set(
name = c['name'],
value = c['value'],
domain = c.get('domain', 'crs.upd.edu.ph'),
path = c.get('path', '/'),
secure = c.get('secure', False),
rest = {'HttpOnly': c.get('httpOnly', False)}
)

s = Session()
s.cookies = jar

# Debug: log what cookies will be sent
app.logger.debug(f"Requests sends: {s.cookies.items()}")

# debug: confirm you’re still logged in
r = s.get("https://crs.upd.edu.ph/user/view/classmessages")
open("debug_after_rebuild.html","w",encoding="utf8").write(r.text)

global crs_username_global, crs_password_global, all_course_table_schedule_url

if not all_course_table_schedule_url:
# Return a failure response with a 400 status code using make_response
response = make_response(jsonify({"message": "No course links set yet", "status": "failure"}), 400)
return response

app.logger.debug(f"Scraping data for {crs_username_global} with course links {all_course_table_schedule_url}!")
app.logger.debug(f"Scraping data for {crs_username_global} with course links {all_course_table_schedule_url}!")

# ----------------------------------------------------------------
data = None

# Know if the links are preentlistment or student registration
if "preenlistment" in all_course_table_schedule_url[0]:
crs_scraper = CRScraperPreEnlistment(login_url, crs_username_global, crs_password_global, all_course_table_schedule_url)
data = crs_scraper.main()
if clicked_login_with_google:
crs_scraper.session = s
data = crs_scraper.main_with_email()
else:
data = crs_scraper.main()
elif "student_registration" in all_course_table_schedule_url[0]:
crs_scraper = CRScraperStudentRegistration(login_url, crs_username_global, crs_password_global, all_course_table_schedule_url)
data = crs_scraper.main()
if clicked_login_with_google:
crs_scraper.session = s
data = crs_scraper.main_with_email()
else:
data = crs_scraper.main()
# ----------------------------------------------------------------

# Scraping logic
Expand All @@ -145,10 +298,14 @@ def scrape() -> Response:
schedules = data_generator.generate_schedules(data)
# data_generator.display_all_possible_schedules(schedules)

app.logger.debug(f"Raw schedules from generator: {schedules!r}")

ranked_schedules = data_generator.rank_by_probability(schedules)
# data_generator.display_all_possible_schedules(ranked_schedules)
data_generator.convert_to_csv(ranked_schedules, "schedules_ranked.csv")

app.logger.debug(f"Ranked schedules from generator: {ranked_schedules!r}")

app.logger.debug(f"Schedules generated and ranked successfully in schedules_ranked.csv! {ranked_schedules}")

# Return a success response with a 200 status code using make_response
Expand Down
4 changes: 3 additions & 1 deletion crs_scraper/crscraper_preenlistment.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ def access_all_possible_course_schedules(self) -> Optional[list[dict[str, str |
if table:
# Iterate over each row in the table body
for row in table.find_all("tr")[1:]: # Skip the header row
cells: list[Tag] = row.find_all("td")
if not isinstance(row, Tag):
continue
cells: list[Tag] = [cell for cell in row.find_all("td") if isinstance(cell, Tag)]

if len(cells) > 0:

Expand Down
4 changes: 3 additions & 1 deletion crs_scraper/crscraper_student_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ def access_all_possible_course_schedules(self) -> Optional[list[dict[str, str |
if table:
# Iterate over each row in the table body
for row in table.find_all("tr")[1:]: # Skip the header row
cells: list[Tag] = row.find_all("td")
if not isinstance(row, Tag):
continue
cells: list[Tag] = [cell for cell in row.find_all("td") if isinstance(cell, Tag)]

if len(cells) > 0:

Expand Down
1 change: 1 addition & 0 deletions crs_scraper/data_sorter.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def __init__(self, subjects_with_time: ListOfCoursesWithTime) -> None:
"WF": ["Wednesday", "Friday"],
"TF": ["Tuesday", "Friday"],
"MW": ["Monday", "Wednesday"],
"MTWThF": ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"],
}

def parse_time(self, time: str) -> tuple[datetime, datetime]:
Expand Down
Loading