Skip to content

Commit

Permalink
Add course scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
aditeyabaral committed Apr 9, 2024
1 parent a9b63df commit f15d483
Show file tree
Hide file tree
Showing 9 changed files with 105 additions and 10 deletions.
2 changes: 1 addition & 1 deletion pesu_academy/exceptions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .authentication import AuthenticationError
from .csrf import CSRFTokenError
from .authentication import AuthenticationError
2 changes: 1 addition & 1 deletion pesu_academy/exceptions/authentication.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ def __init__(self, message):
super().__init__(self.message)

def __str__(self):
return f"{self.message}"
return f"{self.message}"
3 changes: 1 addition & 2 deletions pesu_academy/exceptions/csrf.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
class CSRFTokenError (Exception):
class CSRFTokenError(Exception):
def __init__(self, message):
self.message = message
super().__init__(self.message)

def __str__(self):
return f"{self.message}"

1 change: 1 addition & 0 deletions pesu_academy/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .course import Course
from .profile import Profile
9 changes: 9 additions & 0 deletions pesu_academy/models/course.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
class Course:
def __init__(self, code: str, title: str, _type: str, status: str):
self.code = code
self.title = title
self.type = _type
self.status = status

def __str__(self):
return f"{self.__dict__}"
2 changes: 1 addition & 1 deletion pesu_academy/pages/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .profile import get_profile_page
from .profile import get_profile_page
79 changes: 79 additions & 0 deletions pesu_academy/pages/courses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import datetime
from typing import Optional

import requests_html
from bs4 import BeautifulSoup

from pesu_academy.models import Course


def get_courses_in_semester(session: requests_html.HTMLSession, semester_value: Optional[int] = None):
try:
url = "https://www.pesuacademy.com/Academy/s/studentProfilePESUAdmin"
query = {
"menuId": "653",
"controllerMode": "6403",
"actionType": "38",
"id": f"{semester_value}",
"_": str(int(datetime.datetime.now().timestamp() * 1000)),
}
response = session.get(url, allow_redirects=False, params=query)
if response.status_code != 200:
raise ConnectionError("Unable to fetch profile data.")
soup = BeautifulSoup(response.text, "lxml")
except Exception:
raise ConnectionError("Unable to fetch profile data.")

courses = []
table = soup.find("table", attrs={"class": "table table-hover box-shadow"})
table_body = table.find("tbody")
for row in table_body.find_all("tr"):
columns = row.find_all("td")
if len(columns) == 1 and columns[0].text.strip() == 'No\n\t\t\t\t\t\tsubjects found':
break
course_code = columns[0].text.strip()
course_title = columns[1].text.strip()
course_type = columns[2].text.strip()
course_status = columns[3].text.strip()
course = Course(course_code, course_title, course_type, course_status)
courses.append(course)
return courses


def get_courses_page(session: requests_html.HTMLSession, csrf_token: str, semester: Optional[int] = None) -> dict[
int, list[Course]]:
try:
profile_url = "https://www.pesuacademy.com/Academy/a/studentProfilePESU/getStudentSemestersPESU"
query = {"_": str(int(datetime.datetime.now().timestamp() * 1000))}
headers = {
"accept": "*/*",
"accept-language": "en-IN,en-US;q=0.9,en-GB;q=0.8,en;q=0.7",
"content-type": "application/x-www-form-urlencoded",
"referer": "https://www.pesuacademy.com/Academy/s/studentProfilePESU",
"sec-ch-ua": '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "Windows",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
"x-csrf-token": csrf_token,
"x-requested-with": "XMLHttpRequest"
}
response = session.get(profile_url, allow_redirects=False, params=query, headers=headers)
if response.status_code != 200:
raise ConnectionError("Unable to fetch course data.")
except Exception:
raise ConnectionError("Unable to fetch course data.")

option_tags = response.json()
option_tags = BeautifulSoup(option_tags, "lxml")
option_tags = option_tags.find_all("option")
courses = dict()
for semester_option_tag in option_tags:
current_value = semester_option_tag.attrs["value"]
current_semester = int(semester_option_tag.text.split("Sem-")[1])
if semester is None or current_semester == semester:
courses_in_semester = get_courses_in_semester(session, current_value)
courses[current_semester] = courses_in_semester
return courses
4 changes: 0 additions & 4 deletions pesu_academy/pages/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@
from pesu_academy.models import Profile


def get_personal_details(soup: BeautifulSoup) -> dict:
pass


def get_profile_page(session: requests_html.HTMLSession) -> Profile:
try:
profile_url = "https://www.pesuacademy.com/Academy/s/studentProfilePESUAdmin"
Expand Down
13 changes: 12 additions & 1 deletion pesu_academy/pesu_academy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from .exceptions import CSRFTokenError, AuthenticationError
from .models import Profile
from .pages import profile
from .pages import profile, courses


class PESUAcademy:
Expand Down Expand Up @@ -114,3 +114,14 @@ def profile(self):
raise AuthenticationError("You need to authenticate first.")
profile_info = profile.get_profile_page(self.__session)
return profile_info

def courses(self, semester: Optional[int] = None):
"""
Get the courses of the currently authenticated user.
:param semester: The semester number. If not provided, all courses across all semesters are returned.
:return: The course information for the given semester.
"""
if not self._authenticated:
raise AuthenticationError("You need to authenticate first.")
courses_info = courses.get_courses_page(self.__session, self._csrf_token, semester)
return courses_info

0 comments on commit f15d483

Please sign in to comment.