python怎么爬取网页上的图片

 这里以百度图片怎么爬取为目的,话不多说直接上代码。代码简单易懂

import os
import json
import urllib
import random
import time
import shutil
import requests
from urllib import parseimg_name = input("请输入要获取的图片名称:")
path2 = "F:/lsj/"  # 该路径自己随意更改
path = os.path.exists(path2)
if (path == False):  # 自动判断文件夹是否存在,并创建文件夹os.mkdir(path2)
else:shutil.rmtree(path2)os.mkdir(path2)
next_page = 0
num = 0
while True:num += 1next_page += 30try:name = urllib.parse.quote(img_name)  # 将汉字转义headers_list = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1','Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0','Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; InfoPath.3)'"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36"]headers = {'Accept-Encoding': 'gzip, deflate, br',"Cookie": "BDqhfp=%E5%B0%8F%E7%8B%97%26%260-10-1undefined%26%266136%26%264; BAIDUID=D3CC40DFB336DA01E44AE82AF411BF55:FG=1; BIDUPSID=D3CC40DFB336DA01E44AE82AF411BF55; PSTM=1653291829; BDUSS=drTDU4eWNxfjg1c2FJRU1vYklveHIzTHdoZ2J6VmpTbHB0cU5DNS1YdWZLYjFpSUFBQUFBJCQAAAAAAAAAAAEAAABU7OHovquyytPQWU9VAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJ-clWKfnJVicW; BDUSS_BFESS=drTDU4eWNxfjg1c2FJRU1vYklveHIzTHdoZ2J6VmpTbHB0cU5DNS1YdWZLYjFpSUFBQUFBJCQAAAAAAAAAAAEAAABU7OHovquyytPQWU9VAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJ-clWKfnJVicW; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BA_HECTOR=21018ka40h01ag0h8k1h9g7r414; ZFY=d4m9E1snb74UmogCcjoMaQOPE:AlJfjvIb9trkr4mYzs:C; BAIDUID_BFESS=D3CC40DFB336DA01E44AE82AF411BF55:FG=1; delPer=0; PSINO=7; H_PS_PSSID=36425_36502_36454_31253_36452_36421_36166_36488_36518_36055_36520_26350_36469_36311; BDRCVFR[X_XKQks0S63]=mk3SLVN4HKm; firstShowTip=1; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; cleanHistoryStatus=0; indexPageSugList=%5B%22%E5%B0%8F%E7%8B%97%22%2C%22%E7%BE%8E%E5%A5%B3%22%5D; kleck=31e66bb4fd4e2d1e3e1fffff596f993a; userFrom=null; ab_sr=1.0.1_ZTMxYmZkZjUxMWYyMTYxMmMxYWJlMjhmMWYzYmYyMGIwNDQxYTAzZWJiMjdjODg5NTU4NjM2NmYzYzkyODRhYzY5ZDdhZGIxODM4MThlNmE4ZDhlYmE2MTA1ODE1YTEwY2UwNmIwOWE0MTZlMmU0ZDk5ZTRmN2UyMTk0OTAyNTNiMDVjMWQ2MDhkNzU0Yjk3YWM4ODAxZTI3N2RmMzE5Yg==","Referer": "https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&fm=index&pos=history&word=%E5%B0%8F%E7%8B%97",'User-Agent': random.choice(headers_list)}input_url_api = "https://image.baidu.com/search/acjson?tn=resultjson_com&logid=9643182277295175202&ipn=rj&ct=201326592&is=&fp=result&fr=&word=" + name + "&queryWord=" + name + "&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&expermode=&nojc=&isAsync=&pn=" + str(next_page)  # 百度图片apidata_html = requests.get(input_url_api, headers=headers).textdatas = json.loads(data_html)data = datas['data']for img_data in data:try:img_url = img_data["thumbURL"]print("第", num, "页:", img_url)img_text = requests.get(img_url, stream=True)alphabet = 'abcdefghijklmnopqrstuvwxyz'characters = ''.join(random.sample(alphabet, 8))suiji_name = str(int(time.time())) + characterswith open(path2 + suiji_name + '.jpg', 'wb') as fd:for imgurls in img_text.iter_content():  # 循环写入图片fd.write(imgurls)except Exception as err:continue# 在此可以加入页面的限制if(next_page >= “页面数量”): breakexcept Exception as err:continue


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部