欢迎来到DIVCSS5查找CSS资料与学习DIV CSS布局技术!
  #需要的库
 
  importrequests
 
  fromlxmlimportetree
 
  frommultiprocessingimportPool
 
  importos
 
  #请求头
 
  headers={
 
  'User-Agent':'Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/65.0.3325.181Safari/537.36'
 
  }
 
  #创建存储路径
 
  pathname='./全书网/'
 
  ifnotos.path.exists(pathname):
 
  os.mkdir(pathname)
 
  #获取书籍列表
 
  defget_booklist(url):
 
  try:
 
  response=requests.get(url=url,headers=headers)
 
  etrees=etree.HTML(response.text)
 
  sum=etrees.xpath('//a[@class="last"]/text()')[0]
 
  booklist=etrees.xpath('//ul[@class="seeWellcf"]/li')
 
  book_list=[]
 
  forbooksinbooklist:
 
  book=books.xpath('./a/@href')[0]
 
  book_list.append(book)
 
  pool.map(get_book,book_list)
 
  urls=['http://www.quanshuwang.com/list/3_{}.html'.format(i)foriinrange(2,int(sum)+1)]
 
  pool.map(get_booklist,urls)
 
  exceptException:
 
  print('get_booklistfailed')
 
  #获取具体书籍
 
  defget_book(url):
 
  try:
 
  response=requests.get(url=url,headers=headers)
 
  etrees=etree.HTML(response.content.decode("gb18030"))
 
  book_name=etrees.xpath('//div[@class="b-info"]/h1/text()')[0]
 
  ifos.path.exists(pathname+book_name+'.txt'):
 
  print(book_name+'.书籍已存在,如需重新下载请删除原文件')
 
  returnNone
 
  book=etrees.xpath('//div[@class="b-oper"]/a/@href')[0]
 
  get_mulu(book)
 
  exceptException:
 
  print('get_bookfailed')
 
  #获取书籍目录
 
  defget_mulu(url):
 
  try:
 
  response=requests.get(url=url,headers=headers)
 
  etrees=etree.HTML(response.text)
 
  book=etrees.xpath('//div[@class="clearfixdirconone"]/li')
 
  foriinbook:
 
  book=i.xpath('./a/@href')[0]
 
  get_content(book)
 
  exceptException:
 
  print('get_mulufailed')
 
  #获取并写入书籍内容
 
  defget_content(url):
 
  try:
 
  response=requests.get(url=url,headers=headers)
 
  etrees=etree.HTML(response.content.decode("gb18030"))
 
  title=etrees.xpath('//a[@class="article_title"]/text()')[0]
 
  zhangjie=etrees.xpath('//strong[@class="ljieqi_title"]/text()')[0]
 
  contents=etrees.xpath('//div[@class="mainContenr"]/text()')
 
  content=''.join(contents)
 
  withopen(pathname+title+'.txt','a+',encoding='utf-8')asf:
 
  f.write(zhangjie+'\n\n'+content+'\n\n')
 
  print('正在下载:',zhangjie)
 
  exceptException:
 
  print('get_contentfailed')
 
  #程序入口
 
  if__name__=='__main__':
 
  url='http://www.quanshuwang.com/list/3_1.html'
 
  #创建进程池
 
  pool=Pool()
 
  #启动程序
 
  get_booklist(url)
 
  控制台输出

如需转载,请注明文章出处和来源网址:http://www.divcss5.com/html/h54896.shtml