File tree Expand file tree Collapse file tree 1 file changed +5
-2
lines changed
Expand file tree Collapse file tree 1 file changed +5
-2
lines changed Original file line number Diff line number Diff line change @@ -40,6 +40,7 @@ def save_articles_as_html_and_pdf():
4040 '<html><head>'
4141 '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />'
4242 '<link rel="stylesheet" href="style.min.css" type="text/css" media="all" />'
43+ '<script src="https://cdn.rawgit.com/google/code-prettify/master/loader/run_prettify.js"></script>'
4344 '</head><body>'
4445 )
4546 allArticles += '<h1 style="text-align:center;font-size:40px">' + categoryUrl .title () + ' Archive</h1><hr>'
@@ -74,13 +75,15 @@ def scrape_category(categoryUrl):
7475 # Traverse each link to find article and save it.
7576 for link in links :
7677 try :
77- if (i % 11 == 0 ):
78- sleep (5 ) # Sleep for 5 seconds after getting every 10th link
78+ if (i % 10 == 0 ):
79+ sleep (5 ) # Sleep for 5 seconds before scraping every 10th link
7980 print ("Scraping link no: " + str (i ) + " Link: " + link )
8081 i = i + 1
8182 link_soup = BeautifulSoup (requests .get (link ).text )
8283 # Remove the space occupied by Google Ads (Drop script & ins node)
8384 [script .extract () for script in link_soup (["script" , "ins" ])]
85+ for code_tag in link_soup .find_all ('pre' ):
86+ code_tag ['class' ] = code_tag .get ('class' , []) + ['prettyprint' ]
8487 article = link_soup .find ('article' )
8588 # Now add this article to list of all articles
8689 articles .append (article .encode ('UTF-8' ))
You can’t perform that action at this time.
0 commit comments