全书网

import requests
import re
from lxml import etree
start_url ='http://www.quanshuwang.com/list/5_{}.html'
book_page=1
# 1到128页书的网址
for book_page in range(1):
url =start_url.format(book_page)
response =requests.get(url).content.decode('gbk')
# print(response)
book_html= '
  • '

    # book_title ='alt="(.*?)"'
    book_title ='
    re_html= re.findall(book_html,response)
    re_title =re.findall(book_title,response)
    # for title,html in zip(re_title,re_html):
    # print(title,html)

    #range(1,20)书的数量,1到20的书籍数量
    for j in range(1,20):

    response_book =requests.get(re_html[j]).content.decode('gbk')
    read_start='
    开始阅读'
    read_menu=re.findall(read_start,response_book)
    print(read_menu[0][0])
    print(read_menu[0][1])
    menu_url =requests.get(read_menu[0][0]).content.decode('gbk')
    print(menu_url)
    book_mulu='

    (.*?)

    '
    print(re.findall(book_mulu,menu_url))
  • 转载于:https://www.cnblogs.com/LQ970811/p/10508680.html


    本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

    相关文章

    立即
    投稿

    微信公众账号

    微信扫一扫加关注

    返回
    顶部