1+ from selenium import webdriver
2+ from selenium .webdriver .common .by import By
3+ from selenium .webdriver .chrome .service import Service
4+ from selenium .webdriver .support .ui import WebDriverWait
5+ from selenium .webdriver .support import expected_conditions as EC
6+ from selenium .webdriver .chrome .options import Options
7+
8+ """
9+ Example code :
10+ python_scraper = Courses("python",5)
11+ print(python_scraper.scrape_all())
12+ """
13+ class Courses :
14+ def __init__ (self , keyword , page_count ):
15+ self .keyword = keyword
16+ self .page_count = page_count
17+
18+ def __scrape_page (self ):
19+ chromedriver_path = ''
20+ options = Options ()
21+ options .add_argument ("--headless" )
22+ driver = webdriver .Chrome (service = Service (chromedriver_path ), options = options )
23+ wait = WebDriverWait (driver , 100 )
24+ driver .get ('https://www.coursera.org/search?query=' + self .keyword )
25+ return wait , driver
26+ def scrape_all (self ):
27+ wait , driver = self .__scrape_page ()
28+ courses_data = []
29+ try :
30+ j = 0
31+ for i in range (self .page_count ):
32+ courses = wait .until (EC .visibility_of_all_elements_located ((By .CSS_SELECTOR , 'main ul>li' )))
33+ for course in courses :
34+ title = driver .execute_script ('return arguments[0].querySelector("h3")?.innerText' ,course )
35+ description = driver .execute_script ('return arguments[0].querySelector("p>span")?.innerText' , course )
36+ review = driver .execute_script ('return arguments[0].querySelector("div:has(>svg)")?.innerText.replace("\\ n\\ n","⭐")' , course )
37+ url = driver .execute_script ('return String(arguments[0].querySelector("a")?.href)' , course )
38+ data = {"id" :j ,"title" :title ,"description" :description ,"review" :review ,"url" :url }
39+ courses_data += [data ]
40+ j += 1
41+ next_btn = driver .find_element (By .CSS_SELECTOR , 'button[aria-label="Next Page"]' )
42+ if 'disabled' in next_btn .get_attribute ('class' ):
43+ print ('There are no more pages' )
44+ break
45+ else :
46+ next_btn .click ()
47+ return {
48+ "data" : courses_data ,
49+ "message" : f"Course Titles for { self .keyword } "
50+ }
51+ except :
52+ return {
53+ "data" : None ,
54+ "message" : f"No courses found for { self .keyword } "
55+ }
56+ def course_titles (self ):
57+ wait , driver = self .__scrape_page ()
58+ titles = []
59+ try :
60+ for i in range (self .page_count ):
61+ courses = wait .until (EC .visibility_of_all_elements_located ((By .CSS_SELECTOR , 'main ul>li' )))
62+ titles .extend ([driver .execute_script ('return arguments[0].querySelector("h3")?.innerText' , course ) for course in courses ])
63+ next_btn = driver .find_element (By .CSS_SELECTOR , 'button[aria-label="Next Page"]' )
64+ if 'disabled' in next_btn .get_attribute ('class' ):
65+ print ('There are no more pages' )
66+ break
67+ else :
68+ next_btn .click ()
69+ return {
70+ "data" : titles ,
71+ "message" : f"Course Titles for { self .keyword } "
72+ }
73+ except :
74+ return {
75+ "data" : None ,
76+ "message" : f"No courses found for { self .keyword } "
77+ }
78+ def course_description (self ):
79+ wait , driver = self .__scrape_page ()
80+ descriptions = []
81+ try :
82+ for i in range (self .page_count ):
83+ courses = wait .until (EC .visibility_of_all_elements_located ((By .CSS_SELECTOR , 'main ul>li' )))
84+ descriptions .extend ([driver .execute_script ('return arguments[0].querySelector("p>span")?.innerText' , course ) for course in courses ])
85+ next_btn = driver .find_element (By .CSS_SELECTOR , 'button[aria-label="Next Page"]' )
86+ if 'disabled' in next_btn .get_attribute ('class' ):
87+ print ('There are no more pages' )
88+ break
89+ else :
90+ next_btn .click ()
91+ return {
92+ "data" : descriptions ,
93+ "message" : f"Course Titles for { self .keyword } "
94+ }
95+ except :
96+ return {
97+ "data" : None ,
98+ "message" : f"No courses found for { self .keyword } "
99+ }
100+ def course_reviews (self ):
101+ wait , driver = self .__scrape_page ()
102+ reviews = []
103+ try :
104+ for i in range (self .page_count ):
105+ courses = wait .until (EC .visibility_of_all_elements_located ((By .CSS_SELECTOR , 'main ul>li' )))
106+ reviews .extend ([driver .execute_script ('return arguments[0].querySelector("div:has(>svg)")?.innerText.replace("\\ n\\ n","⭐")' , course ) for course in courses ])
107+ next_btn = driver .find_element (By .CSS_SELECTOR , 'button[aria-label="Next Page"]' )
108+ if 'disabled' in next_btn .get_attribute ('class' ):
109+ print ('There are no more pages' )
110+ break
111+ else :
112+ next_btn .click ()
113+ return {
114+ "data" : reviews ,
115+ "message" : f"Course Titles for { self .keyword } "
116+ }
117+ except :
118+ return {
119+ "data" : None ,
120+ "message" : f"No courses found for { self .keyword } "
121+ }
122+ def course_urls (self ):
123+ wait , driver = self .__scrape_page ()
124+ urls = []
125+ try :
126+ for i in range (self .page_count ):
127+ courses = wait .until (EC .visibility_of_all_elements_located ((By .CSS_SELECTOR , 'main ul>li' )))
128+ urls .extend ([driver .execute_script ('return String(arguments[0].querySelector("a")?.href)' , course ) for course in courses ])
129+ next_btn = driver .find_element (By .CSS_SELECTOR , 'button[aria-label="Next Page"]' )
130+ if 'disabled' in next_btn .get_attribute ('class' ):
131+ print ('There are no more pages' )
132+ break
133+ else :
134+ next_btn .click ()
135+ return {
136+ "data" : urls ,
137+ "message" : f"Course Titles for { self .keyword } "
138+ }
139+ except :
140+ return {
141+ "data" : None ,
142+ "message" : f"No courses found for { self .keyword } "
143+ }
144+ python_scraper = Courses ("python" ,5 )
145+ print (python_scraper .scrape_all ())
0 commit comments