1+ # -*- coding: utf-8 -*-
2+ """
3+ Created on 2021-09-14 15:07:49
4+ ---------
5+ @summary:
6+ ---------
7+ @author: 闲欢
8+ """
9+
10+ import feapder
11+ import json
12+
13+ from feapder .db .mysqldb import MysqlDB
14+
15+
16+ class ReportSpider (feapder .AirSpider ):
17+ def __init__ (self , * args , ** kwargs ):
18+ super ().__init__ (* args , ** kwargs )
19+ self .db = MysqlDB ()
20+
21+ def start_requests (self ):
22+ yield feapder .Request ("http://reportapi.eastmoney.com/report/list?cb=datatable1351846&industryCode=*&pageSize=50&industry=*&rating=&ratingChange=&beginTime=2021-09-13&endTime=2021-09-14&pageNo=1&fields=&qType=0&orgCode=&code=*&rcode=&p=2&pageNum=2&_=1603724062679" ,
23+ callback = self .parse_report_info , pageNo = 1 )
24+
25+ def parse_report_info (self , request , response ):
26+ print (request .pageNo )
27+ html = response .content .decode ("utf-8" )
28+ if len (html ):
29+ content = html .replace ('datatable1351846(' , '' )[:- 1 ]
30+ content_json = json .loads (content )
31+ print (content_json )
32+ self .save_data (content_json )
33+
34+ def save_data (self , items ):
35+ result_list = []
36+ for i in items ['data' ]:
37+ result = {}
38+ obj = i
39+ result ['title' ] = obj ['title' ] #报告名称
40+ result ['stockName' ] = obj ['stockName' ] #股票名称
41+ result ['stockCode' ] = obj ['stockCode' ] #股票code
42+ result ['orgCode' ] = obj ['stockCode' ] #机构code
43+ result ['orgName' ] = obj ['orgName' ] #机构名称
44+ result ['orgSName' ] = obj ['orgSName' ] #机构简称
45+ result ['publishDate' ] = obj ['publishDate' ] #发布日期
46+ result ['predictNextTwoYearEps' ] = obj ['predictNextTwoYearEps' ] #后年每股盈利
47+ result ['predictNextTwoYearPe' ] = obj ['predictNextTwoYearPe' ] #后年市盈率
48+ result ['predictNextYearEps' ] = obj ['predictNextYearEps' ] # 明年每股盈利
49+ result ['predictNextYearPe' ] = obj ['predictNextYearPe' ] # 明年市盈率
50+ result ['predictThisYearEps' ] = obj ['predictThisYearEps' ] #今年每股盈利
51+ result ['predictThisYearPe' ] = obj ['predictThisYearPe' ] #今年市盈率
52+ result ['indvInduCode' ] = obj ['indvInduCode' ] # 行业代码
53+ result ['indvInduName' ] = obj ['indvInduName' ] # 行业名称
54+ result ['lastEmRatingName' ] = obj ['lastEmRatingName' ] # 上次评级名称
55+ result ['lastEmRatingValue' ] = obj ['lastEmRatingValue' ] # 上次评级代码
56+ result ['emRatingValue' ] = obj ['emRatingValue' ] # 评级代码
57+ result ['emRatingName' ] = obj ['emRatingName' ] # 评级名称
58+ result ['ratingChange' ] = obj ['ratingChange' ] # 评级变动
59+ result ['researcher' ] = obj ['researcher' ] # 研究员
60+ result ['encodeUrl' ] = obj ['encodeUrl' ] # 链接
61+ result ['count' ] = int (obj ['count' ]) # 近一月个股研报数
62+
63+ result_list .append (result )
64+
65+ self .insertdb (result_list )
66+
67+ return result_list
68+
69+ def download_midware (self , request ):
70+ request .headers = {
71+ "Connection" : "keep-alive" ,
72+ "Cookie" : "qgqp_b_id=0f1ac887e1e3e484715bf0e3f148dbd8; intellpositionL=1182.07px; st_si=32385320684787; st_asi=delete; cowCookie=true; intellpositionT=741px; st_pvi=73966577539485; st_sp=2021-03-22%2009%3A25%3A40; st_inirUrl=https%3A%2F%2Fwww.baidu.com%2Flink; st_sn=4; st_psi=20210914160650551-113300303753-3491653988" ,
73+ "User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36" ,
74+ "Host" : "reportapi.eastmoney.com"
75+ }
76+ return request
77+
78+ def validate (self , request , response ):
79+ if response .status_code != 200 :
80+ raise Exception ("response code not 200" ) # 重试
81+
82+
83+ def insertdb (self , data_list ):
84+ attrs = ['title' , 'stockName' , 'stockCode' , 'orgCode' , 'orgName' , 'orgSName' , 'publishDate' , 'predictNextTwoYearEps' ,
85+ 'predictNextTwoYearPe' , 'predictNextYearEps' , 'predictNextYearPe' , 'predictThisYearEps' , 'predictThisYearPe' ,
86+ 'indvInduCode' , 'indvInduName' , 'lastEmRatingName' , 'lastEmRatingValue' , 'emRatingValue' ,
87+ 'emRatingName' , 'ratingChange' , 'researcher' , 'encodeUrl' , 'count' ]
88+ insert_tuple = []
89+ for obj in data_list :
90+ insert_tuple .append ((obj ['title' ], obj ['stockName' ], obj ['stockCode' ], obj ['orgCode' ], obj ['orgName' ], obj ['orgSName' ], obj ['publishDate' ], obj ['predictNextTwoYearEps' ], obj ['predictNextTwoYearPe' ], obj ['predictNextYearEps' ], obj ['predictNextYearPe' ], obj ['predictThisYearEps' ], obj ['predictThisYearPe' ], obj ['indvInduCode' ], obj ['indvInduName' ], obj ['lastEmRatingName' ], obj ['lastEmRatingValue' ], obj ['emRatingValue' ],obj ['emRatingName' ], obj ['ratingChange' ], obj ['researcher' ], obj ['encodeUrl' ], obj ['count' ]))
91+ values_sql = ['%s' for v in attrs ]
92+ attrs_sql = '(' + ',' .join (attrs )+ ')'
93+ values_sql = ' values(' + ',' .join (values_sql )+ ')'
94+ sql = 'insert into %s' % 'report'
95+ sql = sql + attrs_sql + values_sql
96+
97+ self .db .add_batch (sql , insert_tuple )
98+
99+
100+
101+ if __name__ == "__main__" :
102+ ReportSpider ().start ()
0 commit comments