1、urllib的简单使用
import urllib.request
url = 'http://www.baidu.com'
response = urllib.request.urlopen(url)
content = response.read().decode('utf-8')
print(content)
2、urllib:一个类型和六个方法
import urllib.request
url = 'http://www.baidu.com'
response = urllib.request.urlopen(url)
3、urllib:下载
import urllib.request
url_video = 'https://vd4.bdstatic.com/mda-kfsm974k6xwx9y8j/v1-cae/sc/mda-kfsm974k6xwx9y8j.mp4?v_from_s=hkapp-haokan-nanjing&auth_key=1652930638-0-0-dce2ef63e5e2c4d4cbafeea2dd68cff1&bcevod_channel=searchbox_feed&pd=1&cd=0&pt=3&logid=3238681392&vid=2292765971694816622&abtest=102148_2-17451_1&klogid=3238681392'
urllib.request.urlretrieve(url_video,'news.mp4')
4、urllib:请求对象的定制
import urllib.request
url = 'https://www.baidu.com'
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36'
}
request = urllib.request.Request(url = url,headers = headers)response = urllib.request.urlopen(request)
content = response.read().decode('utf8')
print(content)
5、get请求
(1)quote方法:转换为Unicode编码(单个参数)
import urllib.requesturl = 'https://www.baidu.com/s?wd='
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36'
}
name = urllib.parse.quote('周杰伦')url = url + name
request = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(request)content = response.read().decode('utf-8')print(content)
(2) urlencode方法:多个参数的时候
import urllib.request
import urllib.parse
base_url = 'https://www.baidu.com/s?'data = {'wd': '周杰伦','sex': '男','location': '中国台湾省'
}new_data = urllib.parse.urlencode(data)
url = base_url + new_data
print(url)
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36'
}
request = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
print(content)
6、post请求
(1)请求百度翻译
import urllib.request
import urllib.parseurl = 'https://fanyi.baidu.com/sug'
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36'
}
data = {'kw':'spider',
}
data = urllib.parse.urlencode(data).encode('utf-8')
request = urllib.request.Request(url=url,data=data,headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
print(content)
print(type(content))import json
obj = json.loads(content)
print(obj)"""
总结:
1、post请求方式的参数必须编码 data = urllib.parse.urlencode(data)
2、编码之后必须调用encode的方法data=urllib.parse.erlencode(data).encode('utf-8')
3、参数是放在请求对象定制的方法中 :request = urllib.request.Request(url=url,data=data,headers=headers)"""
(1)请求百度翻译详解
import urllib.request
import urllib.parse
url = 'https://fanyi.baidu.com/v2transapi?from=en&to=zh'
headers = {'Cookie': 'BIDUPSID=9C33D9AF842C65882B22D090B46042A7; PSTM=1610940896; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; __yjs_duid=1_0eb9831c9922fbeed75979b5e0cfd7231619776633784; BAIDUID=9139193BF8529188B5B5F4741D0FD50D:FG=1; APPGUIDE_10_0_2=1; BDSFRCVID_BFESS=tP8OJeC62mn-gd3DoJHSKm8DOg3F4AQTH6aoOpQpj2wA39Ni7uPrEG0PHU8g0KubT5mFogKKy2OTH9DF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF_BFESS=tbuJ_KDyJKP3fP36qR6VMPIHqxby26nL3jn9aJ5nJDoCVnTojTJUXfk_jNoThlvMtDCj2qkaQpP-HJ7yM-5HbfPwbbbTJbcz3CrPKl0MLPOYbb0xynoDLT0B5xnMBMnGamOnanra3fAKftnOM46JehL3346-35543bRTLnLy5KJtMDFljTu2DjQyeU5eetjK2CntsJOOaCvVHlQOy4oWK441Dh7MQt6R36chWqvEfp-WDqvoD-Jc3M04X-o9-hvT-54e2p3FBUQJHC33Qft20b0m3gFq3q8La5bD-R7jWhvBhl72y5rUQlRX5q79atTMfNTJ-qcH0KQpsIJM5-DWbT8IjHCDt5FjtRIeV-35b5rfjJrT5-QSMICShUFsWlOCB2Q-XPoO3KJWeUokQMo-b4CUhH7RqPriWbRM2MbgylRpjM3K0462XjKBKPOpK-DfX2TxoUJ2XM0VMloMqtnWKqLebPRih6j9Qg-8KpQ7tt5W8ncFbT7l5hKpbt-q0x-jLTnhVn0MBCK0hC09j68MjjoM5h5K5-LXJR6BWb_8Kb7VbPbMeMnkbfJBDxcXe4bbt2nyaqnRWbCWsh7TDUDMLq_7yajK2MRjL6RBot3ptKTFjD3G0b5pQT8ryb_OK5OibCrn_R6Iab3vOPI4XpO1ef0zBN5thURB2DkO-4bCWJ5TMl5jDh3Mb6ksD-FtqtJHKbDt_II-JfK; BDUSS=NOUXhyMjAwblBqV2NpfkF0dU9Ca1Z3cGNhOTh0R0lBMHU1TWl-bjU0ck1aNUJpRVFBQUFBJCQAAAAAAAAAAAEAAAAe0iJm0ru49tXm1qqw9AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMzaaGLM2mhiV3; BDUSS_BFESS=NOUXhyMjAwblBqV2NpfkF0dU9Ca1Z3cGNhOTh0R0lBMHU1TWl-bjU0ck1aNUJpRVFBQUFBJCQAAAAAAAAAAAEAAAAe0iJm0ru49tXm1qqw9AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMzaaGLM2mhiV3; BAIDUID_BFESS=875209CD6EF1B6A11342D5227CD66391:FG=1; delPer=0; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1650515716,1650872384,1651827471,1652925735; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; BDRCVFR[tox4WRQ4-Km]=mk3SLVN4HKm; av1_switch_v3=0; PSINO=7; BA_HECTOR=a4210hal8la101ag821h8brmv0r; RT="z=1&dm=baidu.com&si=6304f9glu2&ss=l3cf5iav&sl=0&tt=0&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=46b&ul=c3gs0&hd=c3gvq"; H_PS_PSSID=36425_36367_34812_35914_36166_34584_35978_36055_26350_36315_36447; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1652952606; ab_sr=1.0.1_ZWI2ZWRiZWI1Y2JiODRlYWRjZTlkNThjZjAyNDI5NDFhZDg1MGFmN2VkYzI1ZmVlMGIxYjVmZGNhOTEyOWU5ODJlMDI5ZDUxYjc2ODJhOGRlZWZmMzk1NmZjM2U2NDVjNWE3YWIwNjVkOTQ3Zjc2Mjk3Nzg0ZmQ3MmZmNmJlODkzMTQzMjE4OWZiMWExODJiMzZlOTA2NGJkMTkxZmNhZDZjNWQxMzgwZWE1YmM4ZjI5OGViNzM0Y2EyNmRjMjVl',
}
data = {'from': 'en','to': 'zh','query': 'spider','transtype': 'realtime','simple_means_flag': '3','sign': '63766.268839','token': '92389bc1e4d32b64ec36f56fb41f03db','domain': 'common'
}
data = urllib.parse.urlencode(data).encode('utf-8')request = urllib.request.Request(url=url,data=data,headers=headers)response = urllib.request.urlopen(request)content = response.read().decode('utf-8')import json
obj = json.loads(content)
print(obj)
7、ajax的get请求豆瓣电影第一页
import urllib.request
url = 'https://movie.douban.com/j/new_search_subjects?sort=U&range=0,10&tags=&start=0&genres=%E5%96%9C%E5%89%A7'
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36'
}
request = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
with open('dounban1.json','w',encoding='utf-8') as fp:fp.write(content)
8、ajax的get请求豆瓣电影前10页
import urllib.parse
import urllib.request
def create_request(page):base_url = 'https://movie.douban.com/j/new_search_subjects?sort=U&range=0,10&tags=&'data = {'start':(page - 1) * 20,'limit':20}data = urllib.parse.urlencode(data)url = base_url + dataprint(url)headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36'}request = urllib.request.Request(url=url,headers=headers)return requestdef get_content(request):response = urllib.request.urlopen(request)content = response.read().decode('utf-8')return contentdef down_load(page,content):with open('douban_'+str(page) +'.json','w',encoding='utf-8')as fp:fp.write(content)if __name__ == '__main__':start_page = int(input('请输入起始的页码:'))end_page = int(input('请输入结束的页码:'))for page in range(start_page,end_page+1):request = create_request(page)content = get_content(request)down_load(page,content)
9、ajax的post请求肯德基官网
import urllib.request
import urllib.parse
def create_request(page):base_url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'data = {'cname':'','pid':'','keyword': '北京','pageIndex': page,'pageSize': '10'}data = urllib.parse.urlencode(data).encode('utf-8')headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36'}request = urllib.request.Request(url=base_url,data=data,headers=headers)return request
def get_content(requset):response = urllib.request.urlopen(requset)content = response.read().decode('utf-8')return contentdef down_load(page,content):with open('kfc_'+str(page)+'.json','w',encoding='utf-8')as fp:fp.write(content)if __name__ == '__main__':start_page = int(input('请输入起始页码:'))end_page = int(input('请输入结束页码:'))for page in range(start_page,end_page+1):requests = create_request(page)content = get_content(requests)down_load(page,content)
10、urllib异常
import urllib.request
import urllib.error
url = 'http://www.goudan111.com'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36'
}
try:request = urllib.request.Request(url=url,headers=headers)response = urllib.request.urlopen(request)content = response.read().decode('utf-8')print(content)
except urllib.error.HTTPError:print('系统正在升级。。')
except urllib.error.URLError:print('我都说了 系统正在升级。。。')
11、handler的基本使用
import urllib.request
url = 'http://www.baidu.com'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36'
}request = urllib.request.Request(url=url,headers=headers)
handler = urllib.request.HTTPHandler()
opener = urllib.request.build_opener(handler)
response = opener.open(request)content = response.read().decode('utf-8')
print(content)
12、代理
import urllib.request
url = 'https://www.baidu.com/s?ie=UTF-8&wd=ip'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36'
}
request = urllib.request.Request(url=url,headers=headers)
proxies = {'http':'118.24.219.151:16817'
}
handler = urllib.request.ProxyHandler(proxies=proxies)
opener = urllib.request.build_opener(handler)
response = opener.open(request)
content = response.read().decode('utf-8')with open('daili.html','w',encoding='utf-8')as fp:fp.write(content)
13、代理池
import urllib.request
proxies_pool = [{'http':'14.215.212.37:9168'},{'http':'14.215.212.37:9168'},{'http':'14.215.212.37:9168'}
]import random
proxies = random.choice(proxies_pool)url = 'https://www.baidu.com/s?ie=UTF-8&wd=ip'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36'
}request = urllib.request.Request(url=url,headers=headers)
handler = urllib.request.ProxyHandler(proxies=proxies)
opener = urllib.request.build_opener(handler)
response = opener.open(request)
content = response.read().decode('utf-8')with open('dailichi.html','w',encoding='utf-8')as fp:fp.write(content)
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!