Python网络爬虫获取股票信息

#-*-coding:utf-8-*-
'''
Created on 2017年3月17日
@author: lavi
'''
import requests
from bs4 import BeautifulSoup
import bs4
import re
import traceback
from setuptools.package_index import HREF
def getHTMLText(url,code="utf-8"):try:r = requests.get(url,timeout=30)r.raise_for_status#r.encoding = r.apparent_encodingr.encoding = code #编码识别优化return r.textexcept:return ""
'''
从东方财富网获取股票代码列表
'''
def getStockList(stock_list,stock_list_url):html= getHTMLText(stock_list_url,"GB2312")soup = BeautifulSoup(html,"html.parser")a = soup.find_all("a")for i in a:try:href = i.attrs["href"] #相当于i.get("href") stock_list.append(re.findall(r's[zh]\d{6}',href)[0])except:continue
'''
根据东方财富网获取的股票代码,从百度股票获取个股的信息
百度股票上显示个股信息的url的前缀是相同的,后缀是股票代码
'''
def getStockinfo(stock_list,stock_info_url,output_file):count = 0for stockCode in stock_list:html = getHTMLText(stock_info_url+stockCode+".html")try:if html == "":continueinfoDict={}soup = BeautifulSoup(html,"html.parser")stockInfo = soup.find("div",attrs={'class':'stock-bets'})name = stockInfo.find_all(attrs={'class':'bets-name'})[0]infoDict.update({"股票名称":name.text.split()[0]})  #Python 字典(Dictionary) update() 函数把一个字典dict2的键/值对更新到一个新的字典dict里。key_list = stockInfo.find_all("dt")value_list = stockInfo.find_all("dd")for i in range(len(key_list)):key = key_list[i].text #也可以使用.string?value = value_list[i].stringinfoDict[key]=valuewith open(output_file, "a", encoding="utf-8") as f:f.write(str(infoDict)+"\n")count = count+1print("\r当前进度:{:.2f}%".format(count*100/len(stock_list)),end="")#转义符\r,一行打印结束后将光标提到该行的头部,在这一行重新输出内容,覆盖原来的内容,实现不换行的进度条显示except:traceback.print_exc()count = count+1print("\r当前进度:{:.2f}%".format(count*100/len(stock_list)),end="")continue        
def main():stock_list_url = "http://quote.eastmoney.com/stocklist.html"stock_info_url = "https://gupiao.baidu.com/stock/"output_file = "e://BaiduStock1.txt"stock_list=[]getStockList(stock_list,stock_list_url)getStockinfo(stock_list,stock_info_url,output_file)main()


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部