欢迎来到DIVCSS5查找CSS资料与学习DIV CSS布局技术!
  fromseleniumimportwebdriver
 
  importtime
 
  fromlxmlimportetree
 
  importre
 
  classLagouSpider(object):
 
  def__init__(self):
 
  self.driver=webdriver.Chrome()
 
  self.url="https://www.lagou.com/jobs/list_python?px=default&city=%E5%85%A8%E5%9B%BD#filterBox"
 
  defrun(self):
 
  self.driver.get(self.url)
 
  whileTrue:
 
  source=self.driver.page_source
 
  self.parse_page_list(source)
 
  next_btn=self.driver.find_element_by_xpath("//div[@class='pager_container']/span[last()]")
 
  if"pager_next_disabled"innext_btn.get_attribute("class"):
 
  break
 
  else:
 
  next_btn.click()
 
  defparse_page_list(self,source):
 
  html=etree.HTML(source)
 
  detail_urls=html.xpath("//div/a[@class='position_link']/@href")
 
  fordetail_urlindetail_urls:
 
  self.get_detail_page(detail_url)
 
  time.sleep(1)
 
  defget_detail_page(self,detail_url):
 
  #self.driver.get(detail_url)
 
  #新打开一个窗口
 
  self.driver.execute_script("window.open('%s')"%detail_url)
 
  self.driver.switch_to.window(self.driver.window_handles[1])
 
  source=self.driver.page_source
 
  self.parse_datail_page(source)
 
  #关闭该窗口
 
  self.driver.close()
 
  #继续切换回职位列表页
 
  self.driver.switch_to.window(self.driver.window_handles[0])
 
  defparse_datail_page(self,source):
 
  html=etree.HTML(source)
 
  job_name=html.xpath("//div[@class='job-name']/h2/text()")[0].strip()
 
  job_request_spans=html.xpath("//dd[@class='job_request']//span")
 
  job_salary=job_request_spans[0].xpath("./text()")[0].strip()
 
  city=job_request_spans[1].xpath("./text()")[0].strip()
 
  city=re.sub(r'[/\s]','',city)
 
  work_year=job_request_spans[2].xpath("./text()")[0].strip()
 
  work_year=re.sub(r'[/\s]','',work_year)
 
  education=job_request_spans[3].xpath("./text()")[0].strip()
 
  education=re.sub(r'[/\s]','',education)
 
  company_name=html.xpath("//h3[@class='fl']//text()")[0].strip()
 
  desc="".join(html.xpath("//dl[@id='job_detail']/dd[@class='job_bt']//text()")).strip()
 
  desc=re.sub(r'[/\s\\xa]','',desc)
 
  position={
 
  "name":job_name,
 
  "job_salary":job_salary,
 
  "city":city,
 
  "work_year":work_year,
 
  "education":education,
 
  "company_name":company_name,
 
  "desc":desc
 
  }
 
  print(position)
 
  lagou=LagouSpider()
 
  lagou.run()
 
  ViewCode

如需转载,请注明文章出处和来源网址:http://www.divcss5.com/html/h54706.shtml