1+ # coding=utf-8
2+ from urllib .request import urlretrieve
3+
4+ from selenium import webdriver
5+ from bs4 import BeautifulSoup
6+ import time
7+
8+ from selenium .webdriver import ActionChains
9+
10+ def login (login_qq ,password , business_qq ):
11+ '''
12+ 登陆
13+ :param login_qq: 登陆用的QQ
14+ :param password: 登陆的QQ密码
15+ :param business_qq: 业务QQ
16+ :return: driver
17+ '''
18+ driver = webdriver .Chrome ()
19+
20+ driver .get ('https://user.qzone.qq.com/{}/311' .format (business_qq )) # URL
21+ driver .implicitly_wait (10 ) # 隐示等待,为了等待充分加载好网址
22+ driver .find_element_by_id ('login_div' )
23+ driver .switch_to .frame ('login_frame' ) # 切到输入账号密码的frame
24+ driver .find_element_by_id ('switcher_plogin' ).click () ##点击‘账号密码登录’
25+ driver .find_element_by_id ('u' ).clear () ##清空账号栏
26+ driver .find_element_by_id ('u' ).send_keys (login_qq ) # 输入账号
27+ driver .find_element_by_id ('p' ).clear () # 清空密码栏
28+ driver .find_element_by_id ('p' ).send_keys (password ) # 输入密码
29+ driver .find_element_by_id ('login_button' ).click () # 点击‘登录’
30+ driver .switch_to .default_content ()
31+
32+ driver .implicitly_wait (10 )
33+ time .sleep (5 )
34+
35+ try :
36+ driver .find_element_by_id ('QM_OwnerInfo_Icon' )
37+ return driver
38+ except :
39+ print ('不能访问' + business_qq )
40+ return None
41+
42+
43+
44+ def get_photo (driver ):
45+
46+ # 照片下载路径
47+ photo_path = "C:/Users/xxx/Desktop/photo/{}/{}.jpg"
48+
49+ # 相册索引
50+ photoIndex = 1
51+
52+ while True :
53+ # 回到主文档
54+ driver .switch_to .default_content ()
55+ # driver.switch_to.parent_frame()
56+ # 点击头部的相册按钮
57+ driver .find_element_by_xpath ('//*[@id="menuContainer"]/div/ul/li[3]/a' ).click ()
58+ #等待加载
59+ driver .implicitly_wait (10 )
60+ time .sleep (3 )
61+ # 切换 frame
62+ driver .switch_to .frame ('app_canvas_frame' )
63+ # 各个相册的超链接
64+ a = driver .find_elements_by_class_name ('album-cover' )
65+ # 单个相册
66+ a [photoIndex ].click ()
67+
68+ driver .implicitly_wait (10 )
69+ time .sleep (3 )
70+ # 相册的第一张图
71+ p = driver .find_elements_by_class_name ('item-cover' )[0 ]
72+ p .click ()
73+ time .sleep (3 )
74+
75+ # 相册大图在父frame,切换到父frame
76+ driver .switch_to .parent_frame ()
77+ # 循环相册中的照片
78+ while True :
79+ # 照片url地址和名称
80+ img = driver .find_element_by_id ('js-img-disp' )
81+ src = img .get_attribute ('src' ).replace ('&t=5' , '' )
82+ name = driver .find_element_by_id ("js-photo-name" ).text
83+
84+ # 下载
85+ urlretrieve (src , photo_path .format (qq , name ))
86+
87+ # 取下面的 当前照片张数/总照片数量
88+ counts = driver .find_element_by_xpath ('//*[@id="js-ctn-infoBar"]/div/div[1]/span' ).text
89+
90+ counts = counts .split ('/' )
91+ # 最后一张的时候退出照片浏览
92+ if int (counts [0 ]) == int (counts [1 ]):
93+ # 右上角的 X 按钮
94+ driver .find_element_by_xpath ('//*[@id="js-viewer-main"]/div[1]/a' ).click ()
95+ break
96+ # 点击 下一张,网页加载慢,所以10次加载
97+ for i in (1 , 10 ):
98+ if driver .find_element_by_id ('js-btn-nextPhoto' ):
99+ n = driver .find_element_by_id ('js-btn-nextPhoto' )
100+ ActionChains (driver ).click (n ).perform ()
101+ break
102+ else :
103+ time .sleep (5 )
104+
105+ # 相册数量比较,是否下载了全部的相册
106+ photoIndex = photoIndex + 1
107+ if len (a ) <= photoIndex :
108+ break
109+
110+
111+ def get_shuoshuo (driver ):
112+
113+ page = 1
114+ while True :
115+ # 下拉滚动条
116+ for j in range (1 , 5 ):
117+ driver .execute_script ("window.scrollBy(0,5000)" )
118+ time .sleep (2 )
119+
120+ # 切换 frame
121+ driver .switch_to .frame ('app_canvas_frame' )
122+ # 构建 BeautifulSoup 对象
123+ bs = BeautifulSoup (driver .page_source .encode ('GBK' , 'ignore' ).decode ('gbk' ))
124+ # 找到页面上的所有说说
125+ pres = bs .find_all ('pre' , class_ = 'content' )
126+
127+ for pre in pres :
128+ shuoshuo = pre .text
129+ tx = pre .parent .parent .find ('a' , class_ = "c_tx c_tx3 goDetail" )['title' ]
130+ print (tx + ":" + shuoshuo )
131+
132+ # 页数判断
133+ page = page + 1
134+ maxPage = bs .find ('a' , title = '末页' ).text
135+
136+ if int (maxPage ) < page :
137+ break
138+
139+ driver .find_element_by_link_text (u'下一页' ).click ()
140+ # 回到主文档
141+ driver .switch_to .default_content ()
142+ # 等待页面加载
143+ time .sleep (3 )
144+
145+
146+ if __name__ == '__main__' :
147+
148+ driver = login ('11111111' , 'password' , '2222222' )
149+ if driver :
150+ get_shuoshuo (driver )
151+ get_photo (driver )
0 commit comments