File tree Expand file tree Collapse file tree 5 files changed +116
-0
lines changed
Expand file tree Collapse file tree 5 files changed +116
-0
lines changed Original file line number Diff line number Diff line change @@ -10,6 +10,8 @@ Python技术 公众号文章代码库
1010
1111## 实例代码
1212
13+ [ 知乎热门:如何提高爬虫速度?] ( https://github.com/JustDoPython/python-examples/tree/master/xianhuan/spiderspeed ) :知乎热门:如何提高爬虫速度?
14+
1315[ Python异常还能写得如此优雅!] ( https://github.com/JustDoPython/python-examples/tree/master/xianhuan/retry ) :Python异常还能写得如此优雅!
1416
1517[ 神器 Spider!几分钟入门分布式爬虫!] ( https://github.com/JustDoPython/python-examples/tree/master/xianhuan/disspider ) :神器 Spider!几分钟入门分布式爬虫!
Original file line number Diff line number Diff line change 1+ #!/usr/bin/env python3
2+ # -*- coding: utf-8 -*-
3+ """
4+ @author: 闲欢
5+ """
6+ import aiohttp
7+ import asyncio
8+ import time
9+
10+
11+ async def fetch (client ):
12+ async with client .get ('http://httpbin.org/get' ) as resp :
13+ assert resp .status == 200
14+ return await resp .text ()
15+
16+
17+ async def main ():
18+ async with aiohttp .ClientSession () as client :
19+ html = await fetch (client )
20+ print (html )
21+
22+ loop = asyncio .get_event_loop ()
23+
24+ tasks = []
25+ for i in range (100 ):
26+ task = loop .create_task (main ())
27+ tasks .append (task )
28+
29+ t1 = time .time ()
30+
31+ loop .run_until_complete (main ())
32+
33+ print ("aiohttp版爬虫耗时:" , time .time () - t1 )
Original file line number Diff line number Diff line change 1+ #!/usr/bin/env python3
2+ # -*- coding: utf-8 -*-
3+ """
4+ @author: 闲欢
5+ """
6+ import requests
7+ import time
8+ import multiprocessing
9+ from multiprocessing import Pool
10+
11+ MAX_WORKER_NUM = multiprocessing .cpu_count ()
12+
13+ def fetch ():
14+ r = requests .get ('http://httpbin.org/get' )
15+ print (r .text )
16+
17+ if __name__ == '__main__' :
18+ t1 = time .time ()
19+ p = Pool (MAX_WORKER_NUM )
20+ for i in range (100 ):
21+ p .apply_async (fetch , args = ())
22+ p .close ()
23+ p .join ()
24+
25+ print ('多进程爬虫耗时:' , time .time () - t1 )
Original file line number Diff line number Diff line change 1+ #!/usr/bin/env python3
2+ # -*- coding: utf-8 -*-
3+ """
4+ @author: 闲欢
5+ """
6+ import time
7+ import requests
8+ from datetime import datetime
9+
10+
11+ def fetch (url ):
12+ r = requests .get (url )
13+ print (r .text )
14+
15+ start = datetime .now ()
16+
17+ t1 = time .time ()
18+ for i in range (100 ):
19+ fetch ('http://httpbin.org/get' )
20+
21+ print ('requests版爬虫耗时:' , time .time () - t1 )
22+
23+
24+
25+
26+
27+
28+
29+
30+
Original file line number Diff line number Diff line change 1+ #!/usr/bin/env python3
2+ # -*- coding: utf-8 -*-
3+ """
4+ @author: 闲欢
5+ """
6+ import threading
7+ import time
8+ import requests
9+
10+
11+ def fetch ():
12+ r = requests .get ('http://httpbin.org/get' )
13+ print (r .text )
14+
15+ t1 = time .time ()
16+
17+ t_list = []
18+ for i in range (100 ):
19+ t = threading .Thread (target = fetch , args = ())
20+ t_list .append (t )
21+ t .start ()
22+
23+ for t in t_list :
24+ t .join ()
25+
26+ print ("多线程版爬虫耗时:" , time .time () - t1 )
You can’t perform that action at this time.
0 commit comments