From f3b4ec2601c39ed1efed24dbdd4aa82e05e2ff88 Mon Sep 17 00:00:00 2001 From: bvwl <2201101122@qq.com> Date: Fri, 21 Nov 2025 01:56:01 +0800 Subject: [PATCH] 0.1.0 --- README.md | 2 ++ spider/main.py | 62 +++++++++++++++++++++++++++++--------------------- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 4f85a6a..3f44e00 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +# 0.1.0 +- 修复cloudflare验证问题 # 0.0.9 - 修正cloudflare验证 # 0.0.8 diff --git a/spider/main.py b/spider/main.py index 77e0149..bbd573e 100644 --- a/spider/main.py +++ b/spider/main.py @@ -10,16 +10,17 @@ from mail_ import mail_ from bit_browser import bit_browser from api import api + class Auto: - def __init__(self,http:str=None): + def __init__(self, http: str = None): self.browser = Chromium(http) self.tab = self.browser.latest_tab pass # cf打码 - def solve_cloudflare(self): + def solve_cloudflare(self, is_ok: bool = False): tab = self.browser.latest_tab - for _ in range(5): + for _ in range(3): self.tab.wait(1) try: shadow1 = tab.ele( @@ -31,16 +32,16 @@ class Auto: if shadow2: logger.debug("找到Cloudflare iframe body shadow root") status = shadow2.ele( - 'x://span[text()="Verifying..."]', timeout=1) + 'x://span[text()="Verifying..."]', timeout=1.5) if status: tab.wait(3) status = shadow2.ele( - 'x://span[text()="Success!"]', timeout=1) + 'x://span[text()="Success!"]', timeout=1.5) if status: logger.debug("Cloudflare验证成功") return True checkbox = shadow2.ele( - 'x://input[@type="checkbox"]', timeout=1) + 'x://input[@type="checkbox"]', timeout=1.5) if checkbox: checkbox.click() logger.debug("点击Cloudflare复选框") @@ -49,8 +50,10 @@ class Auto: # return False except Exception as e: # logger.error(f"处理Cloudflare异常: {e}") - logger.debug(f"cloudflare处理通过: {e}") - return True + if is_ok: + logger.debug(f"cloudflare处理通过: {e}") + return True + return self.solve_cloudflare(is_ok=True) tab.wait(1) return False @@ -75,11 +78,13 @@ class Auto: else: logger.debug("Cloudflare验证成功.") self.tab.wait(1.5) - bol = self.tab.ele('t:h1@text():Sorry, you have been blocked', timeout=1) + bol = self.tab.ele( + 't:h1@text():Sorry, you have been blocked', timeout=1) if bol: logger.debug("ip被ban秒") return False - bol = self.tab.ele('t:div@text():ERR_SSL_PROTOCOL_ERROR', timeout=1) + bol = self.tab.ele( + 't:div@text():ERR_SSL_PROTOCOL_ERROR', timeout=1) if bol: logger.debug("刷新网页") self.tab.refresh() @@ -116,7 +121,7 @@ class Auto: continue_button.click() logger.debug("点击Continue按钮成功") self.tab.wait(1.5) - bol = self.tab.ele('t:div@text():Loading...',timeout=1) + bol = self.tab.ele('t:div@text():Loading...', timeout=1) if bol: logger.debug("Loading...") if bl: @@ -125,7 +130,8 @@ class Auto: logger.debug("异常界面") self.tab.wait(1) return self.click_continue(bl=True) - bol = self.tab.ele('t:h2@text()=You are being rate limited', timeout=1) + bol = self.tab.ele( + 't:h2@text()=You are being rate limited', timeout=1) if bol: logger.debug("被限流, 退出") return False @@ -149,7 +155,7 @@ class Auto: return False # 随机取城市 - def get_random_city(self, province: str|None=None): + def get_random_city(self, province: str | None = None): cities = { "Alberta": ["Calgary", "Edmonton"], "British Columbia": ["Vancouver"], @@ -164,8 +170,7 @@ class Auto: } if province is None: province = random.choice(list(cities.keys())) - return province,random.choice(cities.get(province, [])) - + return province, random.choice(cities.get(province, [])) def get_province_by_city(self, city: str) -> str | None: """ @@ -186,9 +191,8 @@ class Auto: } return mapping.get(city) - # 随机实物 - def get_random_food(self,city: str, shop: str) -> list[str]: + def get_random_food(self, city: str, shop: str) -> list[str]: """ 随机选择 1~2 种食物类别,并为每个类别至少选择 1 个具体产品 @@ -270,8 +274,8 @@ class Auto: return j = 0 while True: - if j >3: - return False + if j > 3: + return False info = get_random_canada_info(province, city) if len(info.get('postcode')) > 5: break @@ -314,7 +318,7 @@ class Auto: logger.debug(f"填写province: {province}") self.tab.ele( 't:select@id=CanProv').ele(f't:option@text()={province}').click() - self.tab.wait(0.1) + self.tab.wait(0.1) logger.debug(f"填写postal_code: {postal_code}") self.tab.ele('t:input@id=CanPostal').set.value(postal_code) self.tab.wait(0.1) @@ -352,7 +356,7 @@ class Auto: self.tab.wait(0.1) for i in range(3): bol = self.solve_cloudflare() - if not bol: + if not bol: logger.debug("Cloudflare验证失败.") self.tab.wait(0.1) else: @@ -378,14 +382,14 @@ class Auto: # 取对应城市的代理 -def get_proxy( city: str): +def get_proxy(city: str): if city == "Calgary": return "us.novproxy.io:1000:ozua8623-region-CA-st-Alberta-city-Calgary:6wdcv4gq".split(':') - elif city =='Edmonton': + elif city == 'Edmonton': return 'us.novproxy.io:1000:ozua8623-region-CA-st-Alberta-city-Edmonton:6wdcv4gq'.split(':') - elif city =='Vancouver': + elif city == 'Vancouver': return 'us.novproxy.io:1000:ozua8623-region-CA-st-British Columbia-city-Vancouver:6wdcv4gq'.split(':') - elif city =='Halifax': + elif city == 'Halifax': return 'us.novproxy.io:1000:ozua8623-region-CA-st-Nova Scotia-city-Halifax:6wdcv4gq'.split(':') elif city == 'Toronto': return 'us.novproxy.io:1000:ozua8623-region-CA-st-Ontario-city-Toronto:6wdcv4gq'.split(':') @@ -395,6 +399,8 @@ def get_proxy( city: str): """指纹浏览器操作""" # 创建指纹浏览器 + + def create_fingerprint_browser(city: str): """ 根据城市创建指纹浏览器并执行问卷流程 @@ -448,6 +454,7 @@ def create_fingerprint_browser(city: str): except Exception as e: logger.error(f"{city} 删除浏览器异常: {e}") + def run_city_forever(city: str): """ 持续循环运行指定城市流程:完成一次即关闭并删除浏览器,然后重新创建继续运行 @@ -462,6 +469,7 @@ def run_city_forever(city: str): logger.error(f"{city} 流程异常: {e}") time.sleep(2) + def run_all_cities_concurrently(): """ 多线程并发运行所有城市流程 @@ -471,7 +479,8 @@ def run_all_cities_concurrently(): # cities = ['Calgary'] threads = [] for city in cities: - t = threading.Thread(target=run_city_forever, args=(city,), name=f"{city}-thread") + t = threading.Thread(target=run_city_forever, + args=(city,), name=f"{city}-thread") t.start() threads.append(t) logger.info(f"{city} 线程已启动") @@ -480,6 +489,7 @@ def run_all_cities_concurrently(): t.join() logger.info("所有城市流程执行完成") + if __name__ == "__main__": # auto = Auto() # auto.get_random_food('a')