#备注以下小段代码描述了页面变化规律,这个得自己去研究 if not has_pre_page: #点击第2页时会出现上一页,页号加1 has_pre_page = True page_num = page_num + 1 if page_num % 7 == 0: #page_num等于7时,页号减1 page_num = page_num - 1 time.sleep(2) driver.execute_script(js) time.sleep(2) time.sleep(3) driver.quit() |
改进版(自动翻页,前翻页后翻页)
# -*- coding: utf-8 -*- from selenium import webdriver import time if __name__ == "__main__": driver = webdriver.Firefox() driver.maximize_window() driver.get('http://www.baidu.com') driver.implicitly_wait(5) driver.find_element_by_id('kw1').send_keys('selenium selenium')#测试数据 selenium zhidashso dld#selenium zhidashso dldld driver.find_element_by_id('su1').click() js = 'document.documentElement.scrollTop=10000' total = 0 #页面数 is_next_page = True #存在下一页 page_num = 0 #要点击的页面号 #往后翻页 while is_next_page:#'sv_page\=1' in one_page.get_attribute('href') driver.execute_script(js) page_num = page_num + 1 #设置页号为下一页 total = page_num #记录页面数 value=str(page_num) try: #查找指定页面 one_page = driver.find_element_by_css_selector('p[id="page"]>a[href*=pn\='+value+']') one_page.click() time.sleep(1) driver.execute_script(js) time.sleep(1) except: print('no next page') is_next_page = False total = total - 1 break #往前翻页 while total >= 0: driver.execute_script(js) try: total = total -1 value = str(total) one_page = driver.find_element_by_css_selector('p[id="page"]>a[href*=pn\='+value+']') one_page.click() time.sleep(1) driver.execute_script(js) time.sleep(1) except: print('no pre page') break; time.sleep(3) driver.quit() |