Skip to content

Commit dd3bd2f

Browse files
Syntax Highlighting for code by Google Prettify
1 parent 7c4fc81 commit dd3bd2f

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

g4g.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def save_articles_as_html_and_pdf():
4040
'<html><head>'
4141
'<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />'
4242
'<link rel="stylesheet" href="style.min.css" type="text/css" media="all" />'
43+
'<script src="https://cdn.rawgit.com/google/code-prettify/master/loader/run_prettify.js"></script>'
4344
'</head><body>'
4445
)
4546
allArticles += '<h1 style="text-align:center;font-size:40px">' + categoryUrl.title() + ' Archive</h1><hr>'
@@ -74,13 +75,15 @@ def scrape_category(categoryUrl):
7475
# Traverse each link to find article and save it.
7576
for link in links:
7677
try:
77-
if(i % 11 == 0):
78-
sleep(5) # Sleep for 5 seconds after getting every 10th link
78+
if(i % 10 == 0):
79+
sleep(5) # Sleep for 5 seconds before scraping every 10th link
7980
print("Scraping link no: " + str(i) + " Link: " + link )
8081
i = i + 1
8182
link_soup = BeautifulSoup(requests.get(link).text)
8283
# Remove the space occupied by Google Ads (Drop script & ins node)
8384
[script.extract() for script in link_soup(["script", "ins"])]
85+
for code_tag in link_soup.find_all('pre'):
86+
code_tag['class'] = code_tag.get('class', []) + ['prettyprint']
8487
article = link_soup.find('article')
8588
# Now add this article to list of all articles
8689
articles.append(article.encode('UTF-8'))

0 commit comments

Comments
 (0)