diff --git a/.gitignore b/.gitignore index 5a77b81..51843eb 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,3 @@ logs/sessions.log 222.py 333.py 444.py -ran diff --git a/README.md b/README.md index dbfa34e..e20386f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +# 0.1.8 +- proxy_list 新增代理 # 0.1.7 - 修复自动bug # 0.1.6 diff --git a/spider/111.py b/spider/111.py deleted file mode 100644 index d2a0f8d..0000000 --- a/spider/111.py +++ /dev/null @@ -1,139 +0,0 @@ -from math import log -import random -from re import S -import time -from tkinter import N -from DrissionPage import Chromium -from loguru import logger -from work import get_random_canada_info -from mail_ import mail_ -from bit_browser import bit_browser -from api import api - - -class Auto: - def __init__(self, http: str = None): - self.browser = Chromium(http) - self.tab = self.browser.latest_tab - pass - - # cf打码 - def solve_cloudflare(self, is_ok: bool = False): - tab = self.browser.latest_tab - for _ in range(5): - tab.wait(1) - res = tab.ele( - 't:h1@text()=Sorry, you have been blocked', timeout=1) - if res: - logger.error("Cloudflare验证失败") - return False - - try: - shadow1 = tab.ele( - 'x://*[@name="cf-turnstile-response"]').parent().shadow_root - iframe = shadow1.get_frame(1) - if iframe: - logger.debug("找到Cloudflare iframe") - shadow2 = iframe.ele('x:/html/body').shadow_root - if shadow2: - logger.debug("找到Cloudflare iframe body shadow root") - status = shadow2.ele( - 'x://span[text()="Verifying..."]', timeout=1.5) - if status: - logger.debug("Cloudflare验证中,等待3秒") - tab.wait(3) - status = shadow2.ele( - 'x://span[text()="Success!"]', timeout=1.5) - if status: - logger.debug("Cloudflare验证成功") - return True - checkbox = shadow2.ele( - 'x://input[@type="checkbox"]', timeout=1.5) - if checkbox: - logger.debug("点击Cloudflare复选框") - checkbox.click() - tab.wait(3) - logger.debug("重新获取状态") - # return False - except Exception as e: - # logger.error(f"处理Cloudflare异常: {e}") - if is_ok: - logger.debug(f"cloudflare处理通过: {e}") - return True - return self.solve_cloudflare(is_ok=True) - tab.wait(1) - return False - - def wait_home(self): - logger.debug("等待进入首页") - jc = 0 - while True: - if jc > 3: - logger.error("等待进入首页超过5次,未成功") - return False - self.tab.wait(1) - # 判断cf是否通过 - bol = self.solve_cloudflare() - if not bol: - logger.debug("Cloudflare验证失败.") - # 刷新网页 - self.tab.refresh() - self.tab.wait(1.5) - jc += 1 - continue - else: - logger.debug("Cloudflare验证成功.") - self.tab.wait(1.5) - bol = self.tab.ele( - 't:h1@text()=Sorry, you have been blocked', timeout=1) - if bol: - logger.debug("ip被ban秒") - return False - - bol = self.tab.ele( - 't:div@text():ERR_TIMED_OUT', timeout=1) - if bol: - logger.debug("刷新网页") - self.tab.refresh() - self.tab.wait(1.5) - bol = self.tab.ele( - 't:div@text():ERR_SSL_PROTOCOL_ERROR', timeout=1) - if bol: - logger.debug("刷新网页") - self.tab.refresh() - self.tab.wait(1.5) - bol = self.tab.ele( - 't:div@text():ERR_SOCKS_CONNECTION_FAILED', timeout=1) - if bol: - logger.debug("刷新网页") - self.tab.refresh() - self.tab.wait(1.5) - html = self.tab.url - logger.debug(f"当前URL: {html}") - if 'https://veritaconnect.ca/canadianbreadsettlement/en-us' == html: - logger.debug("成功进入首页") - return True - jc += 1 - - def open_url(self, url: str): - self.tab.get(url) - - -def main(): - browser_id = bit_browser.bit_browser_create( - remark=f"us.novproxy.io:1000:ozua8623-region-CA-st-Ontario-city-Toronto:6wdcv4gq", - host="us.novproxy.io", - port=1000, - proxy_user="ozua8623-region-CA-st-Alberta-city-Calgary", - proxy_pwd="6wdcv4gq", - proxy_type='socks5' - ) - http = bit_browser.bit_browser_open(browser_id) - logger.debug(f"打开浏览器 {browser_id}, http: {http}") - auto = Auto(http) - auto.open_url( - "https://veritaconnect.ca/canadianbreadsettlement/en-us/Claimant/UnknownClaimForm") - auto.wait_home() - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/spider/main.py b/spider/main.py index ac79305..ecbe930 100644 --- a/spider/main.py +++ b/spider/main.py @@ -9,6 +9,8 @@ from work import get_random_canada_info from mail_ import mail_ from bit_browser import bit_browser from api import api +from proxys import proxy_list + class Auto: @@ -721,6 +723,8 @@ def create_fingerprint_browser_with_proxy(proxy: list[str]): if not proxy or len(proxy) < 4: logger.error("代理参数不完整,结束该线程") return + # 随机等待0.1秒 + time.sleep(random.uniform(0.1, 1.0)) logger.info(f"使用代理 {proxy[2]} 创建浏览器") browser_id = bit_browser.bit_browser_create( remark=f"{proxy[2]}", @@ -731,6 +735,7 @@ def create_fingerprint_browser_with_proxy(proxy: list[str]): proxy_type='socks5' ) logger.debug(f"创建浏览器 {browser_id}") + time.sleep(random.uniform(0.1, 1.0)) http = bit_browser.bit_browser_open(browser_id) logger.debug(f"打开浏览器 {browser_id}") auto = Auto(http) @@ -777,7 +782,8 @@ def run_all_proxies_concurrently(): 按固定代理列表一一创建并发浏览器 """ import threading - proxies = get_all_proxies() + # proxies = get_all_proxies() + proxies = proxy_list if not proxies: logger.warning("未找到可用代理,结束执行") return diff --git a/spider/proxys.py b/spider/proxys.py new file mode 100644 index 0000000..414ad40 --- /dev/null +++ b/spider/proxys.py @@ -0,0 +1,96 @@ +work = [ + "us.novproxy.io:1000:zhiyu111-region-CA:zhiyu111", + "us.novproxy.io:1000:zhiyu222-region-US:zhiyu222", + "us.novproxy.io:1000:zhiyu333-region-CA:zhiyu333", + "us.novproxy.io:1000:zhiyu444-region-US:zhiyu444", + ] + +ca1 = [ + "us.novproxy.io:1000:zhiyu555-region-CA:zhiyu555", + "us.novproxy.io:1000:zhiyu666-region-US:zhiyu666", + "us.novproxy.io:1000:zhiyu777-region-CA:zhiyu777", + "us.novproxy.io:1000:zhiyu888-region-US:zhiyu888", +] +ca2 = [ + "us.novproxy.io:1000:zhiyu999-region-CA:zhiyu999", + "us.novproxy.io:1000:zhiyu122-region-US:zhiyu122", + "us.novproxy.io:1000:zhiyu133-region-CA:zhiyu133", + "us.novproxy.io:1000:zhiyu144-region-US:zhiyu144", + "us.novproxy.io:1000:zhiyu155-region-CA:zhiyu155", + "us.novproxy.io:1000:zhiyu166-region-US:zhiyu166", + "us.novproxy.io:1000:zhiyu177-region-CA:zhiyu177", + "us.novproxy.io:1000:zhiyu188-region-US:zhiyu188", + ] + +ca3 = [ + "us.novproxy.io:1000:zhiyu1111-region-CA:zhiyu1111", + "us.novproxy.io:1000:zhiyu1222-region-US:zhiyu1222", + "us.novproxy.io:1000:zhiyu1333-region-CA:zhiyu1333", + "us.novproxy.io:1000:zhiyu1444-region-US:zhiyu1444", + ] + +cwd = [ + "us.novproxy.io:1000:cwd11111-region-CA:cwd11111", + "us.novproxy.io:1000:cwd11112-region-US:cwd11112", + "us.novproxy.io:1000:cwd11113-region-CA:cwd11113", + "us.novproxy.io:1000:cwd11114-region-US:cwd11114", + ] + +wt = [ + "us.novproxy.io:1000:cwd11115-region-CA:cwd11115", + "us.novproxy.io:1000:cwd11116-region-US:cwd11116", + "us.novproxy.io:1000:cwd11117-region-CA:cwd11117", + "us.novproxy.io:1000:cwd11118-region-US:cwd11118", +] + +hc = [ + "us.novproxy.io:1000:qzl11111-region-CA:qzl11111", + "us.novproxy.io:1000:qzl11112-region-US:qzl11112", + "us.novproxy.io:1000:qzl11113-region-CA:qzl11113", + "us.novproxy.io:1000:qzl11114-region-US:qzl11114", +] + +zlj = [ + "us.novproxy.io:1000:qzl11115-region-CA:qzl11115", + "us.novproxy.io:1000:qzl11116-region-US:qzl11116", + "us.novproxy.io:1000:qzl11117-region-CA:qzl11117", + "us.novproxy.io:1000:qzl11118-region-US:qzl11118", +] + +wzq = [ + "us.novproxy.io:1000:qzl11119-region-CA:qzl11119", + "us.novproxy.io:1000:qzl11120-region-US:qzl11120", + "us.novproxy.io:1000:qzl11121-region-CA:qzl11121", + "us.novproxy.io:1000:qzl11122-region-US:qzl11122", +] + +xy = [ + "us.novproxy.io:1000:qzl11123-region-CA:qzl11123", + "us.novproxy.io:1000:qzl11124-region-US:qzl11124", + "us.novproxy.io:1000:qzl11125-region-CA:qzl11125", + "us.novproxy.io:1000:qzl11126-region-US:qzl11126", +] + +yll = [ + "us.novproxy.io:1000:qzl11127-region-CA:qzl11127", + "us.novproxy.io:1000:qzl11128-region-US:qzl11128", + "us.novproxy.io:1000:qzl11129-region-CA:qzl11129", + "us.novproxy.io:1000:qzl11130-region-US:qzl11130", +] + +szt = [ + "us.novproxy.io:1000:qzl11131-region-CA:qzl11131", + "us.novproxy.io:1000:qzl11132-region-US:qzl11132", + "us.novproxy.io:1000:qzl11133-region-CA:qzl11133", + "us.novproxy.io:1000:qzl11134-region-US:qzl11134", +] + +hz = [ + "us.novproxy.io:1000:qzl11135-region-CA:qzl11135", + "us.novproxy.io:1000:qzl11136-region-US:qzl11136", + "us.novproxy.io:1000:qzl11137-region-CA:qzl11137", + "us.novproxy.io:1000:qzl11138-region-US:qzl11138", +] + + +proxy_list = work \ No newline at end of file