Skip to content

Commit 0800f21

Browse files
committed
no message
1 parent 8837e2d commit 0800f21

File tree

1 file changed

+151
-0
lines changed

1 file changed

+151
-0
lines changed

moumoubaimifan/qqzone/qqzone.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
# coding=utf-8
2+
from urllib.request import urlretrieve
3+
4+
from selenium import webdriver
5+
from bs4 import BeautifulSoup
6+
import time
7+
8+
from selenium.webdriver import ActionChains
9+
10+
def login(login_qq,password, business_qq):
11+
'''
12+
登陆
13+
:param login_qq: 登陆用的QQ
14+
:param password: 登陆的QQ密码
15+
:param business_qq: 业务QQ
16+
:return: driver
17+
'''
18+
driver = webdriver.Chrome()
19+
20+
driver.get('https://user.qzone.qq.com/{}/311'.format(business_qq)) # URL
21+
driver.implicitly_wait(10) # 隐示等待,为了等待充分加载好网址
22+
driver.find_element_by_id('login_div')
23+
driver.switch_to.frame('login_frame') # 切到输入账号密码的frame
24+
driver.find_element_by_id('switcher_plogin').click() ##点击‘账号密码登录’
25+
driver.find_element_by_id('u').clear() ##清空账号栏
26+
driver.find_element_by_id('u').send_keys(login_qq) # 输入账号
27+
driver.find_element_by_id('p').clear() # 清空密码栏
28+
driver.find_element_by_id('p').send_keys(password) # 输入密码
29+
driver.find_element_by_id('login_button').click() # 点击‘登录’
30+
driver.switch_to.default_content()
31+
32+
driver.implicitly_wait(10)
33+
time.sleep(5)
34+
35+
try:
36+
driver.find_element_by_id('QM_OwnerInfo_Icon')
37+
return driver
38+
except:
39+
print('不能访问' + business_qq)
40+
return None
41+
42+
43+
44+
def get_photo(driver):
45+
46+
# 照片下载路径
47+
photo_path = "C:/Users/xxx/Desktop/photo/{}/{}.jpg"
48+
49+
# 相册索引
50+
photoIndex = 1
51+
52+
while True:
53+
# 回到主文档
54+
driver.switch_to.default_content()
55+
# driver.switch_to.parent_frame()
56+
# 点击头部的相册按钮
57+
driver.find_element_by_xpath('//*[@id="menuContainer"]/div/ul/li[3]/a').click()
58+
#等待加载
59+
driver.implicitly_wait(10)
60+
time.sleep(3)
61+
# 切换 frame
62+
driver.switch_to.frame('app_canvas_frame')
63+
# 各个相册的超链接
64+
a = driver.find_elements_by_class_name('album-cover')
65+
# 单个相册
66+
a[photoIndex].click()
67+
68+
driver.implicitly_wait(10)
69+
time.sleep(3)
70+
# 相册的第一张图
71+
p = driver.find_elements_by_class_name('item-cover')[0]
72+
p.click()
73+
time.sleep(3)
74+
75+
# 相册大图在父frame,切换到父frame
76+
driver.switch_to.parent_frame()
77+
# 循环相册中的照片
78+
while True:
79+
# 照片url地址和名称
80+
img = driver.find_element_by_id('js-img-disp')
81+
src = img.get_attribute('src').replace('&t=5', '')
82+
name = driver.find_element_by_id("js-photo-name").text
83+
84+
# 下载
85+
urlretrieve(src, photo_path.format(qq, name))
86+
87+
# 取下面的 当前照片张数/总照片数量
88+
counts = driver.find_element_by_xpath('//*[@id="js-ctn-infoBar"]/div/div[1]/span').text
89+
90+
counts = counts.split('/')
91+
# 最后一张的时候退出照片浏览
92+
if int(counts[0]) == int(counts[1]):
93+
# 右上角的 X 按钮
94+
driver.find_element_by_xpath('//*[@id="js-viewer-main"]/div[1]/a').click()
95+
break
96+
# 点击 下一张,网页加载慢,所以10次加载
97+
for i in (1, 10):
98+
if driver.find_element_by_id('js-btn-nextPhoto'):
99+
n = driver.find_element_by_id('js-btn-nextPhoto')
100+
ActionChains(driver).click(n).perform()
101+
break
102+
else:
103+
time.sleep(5)
104+
105+
# 相册数量比较,是否下载了全部的相册
106+
photoIndex = photoIndex + 1
107+
if len(a) <= photoIndex:
108+
break
109+
110+
111+
def get_shuoshuo(driver):
112+
113+
page = 1
114+
while True:
115+
# 下拉滚动条
116+
for j in range(1, 5):
117+
driver.execute_script("window.scrollBy(0,5000)")
118+
time.sleep(2)
119+
120+
# 切换 frame
121+
driver.switch_to.frame('app_canvas_frame')
122+
# 构建 BeautifulSoup 对象
123+
bs = BeautifulSoup(driver.page_source.encode('GBK', 'ignore').decode('gbk'))
124+
# 找到页面上的所有说说
125+
pres = bs.find_all('pre', class_='content')
126+
127+
for pre in pres:
128+
shuoshuo = pre.text
129+
tx = pre.parent.parent.find('a', class_="c_tx c_tx3 goDetail")['title']
130+
print(tx + ":" + shuoshuo)
131+
132+
# 页数判断
133+
page = page + 1
134+
maxPage = bs.find('a', title='末页').text
135+
136+
if int(maxPage) < page:
137+
break
138+
139+
driver.find_element_by_link_text(u'下一页').click()
140+
# 回到主文档
141+
driver.switch_to.default_content()
142+
# 等待页面加载
143+
time.sleep(3)
144+
145+
146+
if __name__ == '__main__':
147+
148+
driver = login('11111111', 'password', '2222222')
149+
if driver:
150+
get_shuoshuo(driver)
151+
get_photo(driver)

0 commit comments

Comments
 (0)