免费代理可用性确实很低很低,非生产需求还是可以满足的。有免费代理,对学生党还是挺好的。
# -*- coding:utf-8 -*- #
import requests
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver import ChromeOptions
import random
def testip(ip):
#用requests库初步测试代理IP是否可用,单线程低效率的一个测试例子
headers={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",\
}
#URL
url0='https://www.baidu.com'
#设置代理
proxies = {'http':ip, 'https':ip}
#清理requests异常告警
requests.packages.urllib3.disable_warnings()
flag=0
while flag<1:
try:
r=requests.get(url0,timeout=2,proxies=proxies,verify=False,headers=headers,stream=False)
r_ok=r.status_code
if r_ok==200:
return ip
except Exception as e:
print(e)
flag=flag+1
return ''
def findiplist(driver0,wz):
#在代理网页内找代理IP信息
if wz=='nima':
code = driver0.find_elements_by_xpath("//table/tbody/tr")
print(len(code))
iplist=[]
if code:
for i in range(len(code)):
tp=code[i]
tqq=tp.text.split(' ')
tq=tqq[0]
iplist.append(tq)
ip_ok=testip(tq)
if ip_ok:
with open('ip_ok.txt','a')as f:
f.write(ip_ok+'\n')
return iplist
def readip():
#读保存的代理IP信息
a=[]
with open('ip_ok.txt','r')as f:
a=f.readlines()
if a:
for i in range(len(a)):
a[i]=a[i].split('\n')[0]
print(a)
return a
def sel_test(ip):
#用selenium 通过代理IP模拟浏览器工作
####用代理IP做事的一个例子
option=ChromeOptions()
option.add_argument(('--proxy-server=' + ip))
driver = webdriver.Chrome(chrome_options=option) #谷歌浏览器驱动调用方法
driver.set_page_load_timeout(8)
try:
driver.get(url='https://www.baidu.com')
elem=WebDriverWait(driver,10,0.5).until(
EC.visibility_of_element_located((By.ID,"kw"))
)
#driver.implicitly_wait(5)
time.sleep(2)
print('ip ok---',ip)
except:
print('Wait time out',ip)
driver.quit()
def seleniumwork():
#用selenium 通过代理IP模拟浏览器工作
driver = webdriver.Chrome()#谷歌浏览器驱动调用方法
driver.set_page_load_timeout(8)
driver.get(url='http://www.nimadaili.com/https/1/')
time.sleep(2)
with open('ip_ok.txt','w')as f:
pass
for i in range(1):
print(i,'in one')
elem=WebDriverWait(driver,10).until(
EC.visibility_of(driver.find_element_by_xpath("//li/a[text()='下一页']"))
)
try:
findiplist(driver,'nima')
except:
pass
elem.click()
time.sleep(2)
driver.quit()
iplist=readip()
for ip in iplist:
try:
print(ip)
####用代理IP做事
sel_test(ip)
except:
continue
if __name__=='__main__':
while 1:
seleniumwork()
time.sleep(60*random.randint(5,20)) #隔5~20分钟运行一次seleniumwork()
本文内容不用于商业目的,如涉及知识产权问题,请权利人联系51Testing小编(021-64471599-8017),我们将立即处理