【urllib手动在请求头中添加Cookies】、【urllib利用CookieJar,代码登录成功后自动保存cookie】、【requests模块】

一、手动在请求头中添加Cookies
1.手动登录成功某网站。
2.直接获取个人中心的界面,进入开发者模式。
3.手动复制对应的Cookies,放在请求头里。

headers_cookies = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1","Cookie":"enc=iiGrPCjNaKrg1v%2F5lXtnIqp6o%2FrPXXy7tXVpHeqN9oxQye4rQFxwrGx4tDd1pRTSCkvf9%2B6ErFQs4v7x8SNIkw%3D%3D; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; t=c889c0e1e273cf2939a6c990381730d6; cookie2=1ecb15e9d9de99d3fd25e7e25e472f00; _tb_token_=fdee4735e845e; _mw_us_time_=1574585189795; cna=JpntEn4zKmUCAdr3/fi0olME; v=0; unb=475474234; uc3=lg2=Vq8l%2BKCLz3%2F65A%3D%3D&nk2=rq2ne9aHIYI%2B8A%3D%3D&id2=Vyu7Cf8teWiu&vt3=F8dByuQCLcivnL%2FWsNc%3D; csg=ecb1b0f5; lgc=%5Cu817F%5Cu817F%5Cu94C1%5Cu7B97%5Cu76D8; cookie17=Vyu7Cf8teWiu; dnk=%5Cu817F%5Cu817F%5Cu94C1%5Cu7B97%5Cu76D8; skt=4e2c539eff7b5151; existShop=MTU3NDU4NTMxOA%3D%3D; uc4=nk4=0%40rPfgv7AcZSglmIa%2F5ul%2BWfgTkEBz&id4=0%40VXwn3O44NGB%2FEcfNGooHpY51Kmo%3D; tracknick=%5Cu817F%5Cu817F%5Cu94C1%5Cu7B97%5Cu76D8; _cc_=Vq8l%2BKCLiw%3D%3D; tg=0; _l_g_=Ug%3D%3D; sg=%E7%9B%9842; _nk_=%5Cu817F%5Cu817F%5Cu94C1%5Cu7B97%5Cu76D8; cookie1=UNdc8FKi%2BMtMXWKoS6v4RYwgEuOzL2IAGSRqfWHJq4g%3D; mt=ci=98_1; uc1=cookie16=VFC%2FuZ9az08KUQ56dCrZDlbNdA%3D%3D&cookie21=WqG3DMC9FxUx&cookie15=U%2BGCWk%2F75gdr5Q%3D%3D&existShop=false&pas=0&cookie14=UoTbmVUxGse4ow%3D%3D&tag=8&lng=zh_CN; isg=BI-P0iWzDlwu1A0dmv2dMV_NHiNZHCRe7y2xe6GcK_4FcK9yqYRzJo1iduDrE7tO; l=dBLhG9xIvMdJJxVEBOCanurza77OSIRYYuPzaNbMi_5B26T_bgbOkdW_cF96VjWf9STB4HAa5IJ9-etkZG6gldd8E5vrzDc."}

4.创建请求对象

request_cookies = urllib.request.Request(url,headers=headers_cookies)

二、利用CookieJar,代码登录成功后自动保存cookie
1.整体步骤
①使用代码登录,登录成功后,CookieJar自动保存cookie
②使用自动保存cookie 的Openner访问个人中心
2.注意问题
①需要查看登录网站时涉及的是Post请求还是Get请求,因二者请求的不同,后续发送请求时,urllib.request.Request传的参数也不同(如是Post请求,urllib.request.Request(data=XXX)需填写data参数)。
在这里插入图片描述
②登录的四个参数中,username、pwd已知,而formhash、backurl两个参数是在个人中心之前的登录页的网址:https://www.yaozh.com/login/中通过检查前端源码找到的。
在这里插入图片描述
在这里插入图片描述

login_form_data = {"username":"xxxxxxx","pwd":"xxxxxxx",#formhash以及backurl两个参数是在个人中心之前的登录页的网址:https://www.yaozh.com/login/中通过检查前端源码找到的"formhash":"xxxxxxxxxxxx",#该参数是在最开始的登录网页中寻找的。"backurl":"xxxxxxxxxxxx"#该参数是在最开始的登录网页中寻找的,注意添加参数前添加https}

三、代码1——手动在请求头中添加Cookies

import urllib.requestdef function_cookies():#1.URL信息url = "https://i.taobao.com/my_taobao.htm?spm=a21bo.2017.754894437.3.2fba11d9o37aoc&ad_id=&am_id=&cm_id=&pm_id=1501036000a02c5c3739"#2. 添加请求头headers_cookies = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1","Cookie":"enc=iiGrPCjNaKrg1v%2F5lXtnIqp6o%2FrPXXy7tXVpHeqN9oxQye4rQFxwrGx4tDd1pRTSCkvf9%2B6ErFQs4v7x8SNIkw%3D%3D; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; t=c889c0e1e273cf2939a6c990381730d6; cookie2=1ecb15e9d9de99d3fd25e7e25e472f00; _tb_token_=fdee4735e845e; _mw_us_time_=1574585189795; cna=JpntEn4zKmUCAdr3/fi0olME; v=0; unb=475474234; uc3=lg2=Vq8l%2BKCLz3%2F65A%3D%3D&nk2=rq2ne9aHIYI%2B8A%3D%3D&id2=Vyu7Cf8teWiu&vt3=F8dByuQCLcivnL%2FWsNc%3D; csg=ecb1b0f5; lgc=%5Cu817F%5Cu817F%5Cu94C1%5Cu7B97%5Cu76D8; cookie17=Vyu7Cf8teWiu; dnk=%5Cu817F%5Cu817F%5Cu94C1%5Cu7B97%5Cu76D8; skt=4e2c539eff7b5151; existShop=MTU3NDU4NTMxOA%3D%3D; uc4=nk4=0%40rPfgv7AcZSglmIa%2F5ul%2BWfgTkEBz&id4=0%40VXwn3O44NGB%2FEcfNGooHpY51Kmo%3D; tracknick=%5Cu817F%5Cu817F%5Cu94C1%5Cu7B97%5Cu76D8; _cc_=Vq8l%2BKCLiw%3D%3D; tg=0; _l_g_=Ug%3D%3D; sg=%E7%9B%9842; _nk_=%5Cu817F%5Cu817F%5Cu94C1%5Cu7B97%5Cu76D8; cookie1=UNdc8FKi%2BMtMXWKoS6v4RYwgEuOzL2IAGSRqfWHJq4g%3D; mt=ci=98_1; uc1=cookie16=VFC%2FuZ9az08KUQ56dCrZDlbNdA%3D%3D&cookie21=WqG3DMC9FxUx&cookie15=U%2BGCWk%2F75gdr5Q%3D%3D&existShop=false&pas=0&cookie14=UoTbmVUxGse4ow%3D%3D&tag=8&lng=zh_CN; isg=BI-P0iWzDlwu1A0dmv2dMV_NHiNZHCRe7y2xe6GcK_4FcK9yqYRzJo1iduDrE7tO; l=dBLhG9xIvMdJJxVEBOCanurza77OSIRYYuPzaNbMi_5B26T_bgbOkdW_cF96VjWf9STB4HAa5IJ9-etkZG6gldd8E5vrzDc."}#3.创建请求对象request_cookies = urllib.request.Request(url,headers=headers_cookies)#4发送请求对象,获取responseresponse = urllib.request.urlopen(request_cookies)data = response.read()print(type(data))#with open("cookies2.html",'wb')as f:f.write(data)function_cookies()

四、代码2——CookieJar自动保存cookie

import urllib.request
from http import cookiejar#从http模块中导入cookiejar这个包,cookiejar自动保存cookie。
from urllib import parse#有着转译的功能,可将字典类型转换为字符串类型
#分为两步:1.代码登录。2.使用带着cookie的openner访问个人中心。
#1. 代码登录#1.1 登录的网址
login_url ="https://www.yaozh.com/login"#1.2 登录的参数
login_form_data = {"username":"xxxxxx","pwd":"xxxxxx",#个人中心之前的登录页的网址:https://www.yaozh.com/login/#从上述网站找登录参数formhash、backurl#后台根据发送的请求方式来判断的  如果你是get(登录页面),如果POST(登录结果)"formhash":"xxxx",#该参数是在最开始的登录网页中寻找的。"backurl":"xxxxx"#该参数是在最开始的登录网页中寻找的,注意添加参数前添加https}
#1.3 发送登录请求 POST请求
cookie_jar = cookiejar.CookieJar()#用来保存Cookie
#定义有添加cookie功能的处理器
cookie_handler = urllib.request.HTTPCookieProcessor(cookie_jar)#有cookie功能的处理器
#根据处理器,生成openner
openner = urllib.request.build_opener(cookie_handler)
#创建请求头
headers = {"User-Agent": "Mozilla/5.0(Linux;Android6.0;Nexus5Build/MRA58N)AppleWebKit / 537.36(KHTML, likeGecko)Chrome / 75.0.3770.100MobileSafari / 537.36"
}
#1.参数 login_form_data 需要由dic转译/转码为str,因为urllib.request.Request(data=XXX)中data不能使用字典形式;2.post请求的data需要是bytes类型,因此用.encode("utf-8")将str转为bytes
login_form_data_str = urllib.parse.urlencode(login_form_data).encode("utf-8")#.encode 将str--->bytes
#带着参数 发送POST请求
login_request = urllib.request.Request(login_url,headers=headers,data=login_form_data_str)
#如果登录成功,cookiejar 自动保存 cookie
openner.open(login_request)# 如果成功,则就有了cookie#2. 代码带着cookie 访问个人中心
center_url = "https://www.yaozh.com/member/"#个人中心的网站
center_request = urllib.request.Request(center_url,headers=headers)
response = openner.open(center_url)#带有CookiJar的openner去请求
#bytes-->str
data = response.read().decode()
with open("cookie3.html",'w',encoding="utf-8") as f:f.write(data)

五、爬虫中重要的第三方库——requests
1.安装:pip install requests
2.基本代码

import requests
url = "http://www.baidu.com"
response = requests.get(url)
#content属性返回的数据类型是 bytes;而text 属性返回类型是str(data = response.text())
#优先使用.content()
data = response.content.decode("utf-8")

3.发送post请求和添加参数的方法相似
requests.post(url, data=(参数{}) , json= (参数))
六、代码3——requests获取各种请求头、响应头等信息

import requestsclass RequestSpider(object):def __init__(self):url = "http://www.baidu.com"headers = {"User-Agent": "Mozilla/5.0(Linux;Android6.0;Nexus5Build/MRA58N)AppleWebKit / 537.36(KHTML, likeGecko)Chrome / 75.0.3770.100MobileSafari / 537.36"}self.response = requests.get(url, headers=headers)def run(self):data = self.response.content#1.获取请求头request_headers = self.response.request.headersprint(request_headers)#2.获取响应头response_headers = self.response.headersprint(response_headers)#3.获取响应状态码code = self.response.status_codeprint(code)#4.请求的cookierequest_cookie = self.response.request._cookiesprint(request_cookie)#5.响应的cookieresponse_cookie = self.response.cookiesprint(response_cookie)
RequestSpider().run()

七、代码4——requests基本代码

import requestsurl = "http://www.baidu.com/s?"
params = {"wd":"刀塔"}#在requests模块中,中文字段的参数自动转译
headers = {"User-Agent": "Mozilla/5.0(Linux;Android6.0;Nexus5Build/MRA58N)AppleWebKit / 537.36(KHTML, likeGecko)Chrome / 75.0.3770.100MobileSafari / 537.36"
}
response = requests.get(url, headers=headers, params=params)
data = response.content.decode("utf-8")#.content()返回的是bytes类型的数据
with open("request_use2.html", 'w',encoding="utf-8") as f:f.write(data)


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部