0.0.1
This commit is contained in:
120
spider/api.py
Normal file
120
spider/api.py
Normal file
@@ -0,0 +1,120 @@
|
||||
import requests
|
||||
from loguru import logger
|
||||
import csv
|
||||
import os
|
||||
import random
|
||||
class Api:
|
||||
def __init__(self) -> None:
|
||||
# self.base_url = 'http://127.0.0.1:6060'
|
||||
self.base_url = 'http://192.168.11.67:6060'
|
||||
|
||||
# 创建店铺
|
||||
def create_shop(self, city: str, street: str, shop_name: str) -> dict:
|
||||
url = f'{self.base_url}/country/shop'
|
||||
item = {
|
||||
'city': city,
|
||||
'street': street,
|
||||
'shop_name': shop_name,
|
||||
}
|
||||
response = requests.post(url, json=item).json()
|
||||
logger.info(response)
|
||||
return response
|
||||
|
||||
# 查询店铺
|
||||
def get_shop(self, city: str) -> dict:
|
||||
url = f'{self.base_url}/country/shop'
|
||||
response = requests.get(url).json()
|
||||
# logger.info(response)
|
||||
return response
|
||||
|
||||
# 创建信息
|
||||
def create_info(self, child_full_name: str, parent_full_name: str, child_birthday: str, address_str: str, city_name: str, parent_phone: str, postcode: str, province: str, email: str, text: str, status: bool = False, email_content: str | None = None) -> dict:
|
||||
"""
|
||||
创建信息记录(孩子与家长字段)
|
||||
|
||||
参数:
|
||||
child_full_name (str): 孩子全名
|
||||
parent_full_name (str): 家长全名
|
||||
child_birthday (str): 孩子生日(字符串)
|
||||
address_str (str): 街道地址
|
||||
city_name (str): 城市
|
||||
parent_phone (str): 家长电话
|
||||
postcode (str): 邮编
|
||||
province (str): 省/州全称
|
||||
email (str): 邮箱
|
||||
text (str): 文本内容(如反馈地址)
|
||||
status (bool): 状态
|
||||
email_content (str | None): 邮件内容
|
||||
|
||||
返回值:
|
||||
dict: 接口返回的数据
|
||||
"""
|
||||
url = f'{self.base_url}/country/info'
|
||||
item = {
|
||||
"child_full_name": child_full_name,
|
||||
"parent_full_name": parent_full_name,
|
||||
"child_birthday": child_birthday,
|
||||
"address_str": address_str,
|
||||
"city_name": city_name,
|
||||
"parent_phone": parent_phone,
|
||||
"postcode": postcode,
|
||||
"province": province,
|
||||
"status": status,
|
||||
"email": email,
|
||||
"email_content": email_content,
|
||||
"text": text
|
||||
}
|
||||
response = requests.post(url, json=item).json()
|
||||
logger.info(response)
|
||||
return response
|
||||
|
||||
# 根据城市 随机获取一个店铺
|
||||
def get_random_shop(self) -> dict:
|
||||
url = f'{self.base_url}/country/shop/random'
|
||||
response = requests.get(url).json()
|
||||
# logger.info(response)
|
||||
if not response.get('street'):
|
||||
logger.error(f'没有店铺')
|
||||
return None
|
||||
return response
|
||||
|
||||
def main():
|
||||
"""
|
||||
从同目录的 `bakeries.csv` 读取面包店数据,按列映射输出或创建店铺
|
||||
|
||||
列顺序:`Name,Address,City`
|
||||
"""
|
||||
api = Api()
|
||||
csv_path = os.path.join(os.path.dirname(__file__), 'data.csv')
|
||||
if not os.path.exists(csv_path):
|
||||
logger.error(f'CSV 文件不存在: {csv_path}')
|
||||
return
|
||||
|
||||
with open(csv_path, 'r', encoding='utf-8') as file:
|
||||
reader = csv.reader(file)
|
||||
header = next(reader, None)
|
||||
for row in reader:
|
||||
if len(row) < 3:
|
||||
logger.warning(f'行列数不足,跳过: {row}')
|
||||
continue
|
||||
shop_name, street, city = row[1], row[2], row[0]
|
||||
if ' (city)' in city:
|
||||
city = city.replace(' (city)', '')
|
||||
if 'Quebec' in city:
|
||||
continue
|
||||
if ',' in city:
|
||||
city = city.split(',')[0]
|
||||
logger.info(f'city: {city}, street: {street}, shop_name: {shop_name}')
|
||||
api.create_shop(city, street, shop_name)
|
||||
|
||||
# def main2():
|
||||
# api = Api()
|
||||
# city = 'Toronto'
|
||||
# shop = api.get_random_shop()
|
||||
# if shop:
|
||||
# logger.info(shop)
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# main()
|
||||
|
||||
api = Api()
|
||||
313
spider/auto_challenge.py
Normal file
313
spider/auto_challenge.py
Normal file
@@ -0,0 +1,313 @@
|
||||
import io
|
||||
import time
|
||||
import uuid
|
||||
from typing import Optional, List
|
||||
import requests
|
||||
from PIL import Image
|
||||
import base64
|
||||
from loguru import logger
|
||||
RESAMPLE_FILTER = Image.Resampling.LANCZOS
|
||||
class ReCaptchaHandler:
|
||||
|
||||
path_map_44 = {
|
||||
0: "//table/tbody/tr[1]/td[1]",
|
||||
1: "//table/tbody/tr[1]/td[2]",
|
||||
2: "//table/tbody/tr[1]/td[3]",
|
||||
3: "//table/tbody/tr[1]/td[4]",
|
||||
4: "//table/tbody/tr[2]/td[1]",
|
||||
5: "//table/tbody/tr[2]/td[2]",
|
||||
6: "//table/tbody/tr[2]/td[3]",
|
||||
7: "//table/tbody/tr[2]/td[4]",
|
||||
8: "//table/tbody/tr[3]/td[1]",
|
||||
9: "//table/tbody/tr[3]/td[2]",
|
||||
10: "//table/tbody/tr[3]/td[3]",
|
||||
11: "//table/tbody/tr[3]/td[4]",
|
||||
12: "//table/tbody/tr[4]/td[1]",
|
||||
13: "//table/tbody/tr[4]/td[2]",
|
||||
14: "//table/tbody/tr[4]/td[3]",
|
||||
15: "//table/tbody/tr[4]/td[4]",
|
||||
}
|
||||
|
||||
path_map_33 = {
|
||||
0: "//table/tbody/tr[1]/td[1]",
|
||||
1: "//table/tbody/tr[1]/td[2]",
|
||||
2: "//table/tbody/tr[1]/td[3]",
|
||||
3: "//table/tbody/tr[2]/td[1]",
|
||||
4: "//table/tbody/tr[2]/td[2]",
|
||||
5: "//table/tbody/tr[2]/td[3]",
|
||||
6: "//table/tbody/tr[3]/td[1]",
|
||||
7: "//table/tbody/tr[3]/td[2]",
|
||||
8: "//table/tbody/tr[3]/td[3]",
|
||||
}
|
||||
|
||||
api_host="http://192.168.11.13:7070/analyze_batch/"
|
||||
def __init__(self, driver):
|
||||
self.driver = driver
|
||||
self.checkbox_iframe = None
|
||||
self.challenge_iframe = None
|
||||
self.challenge_type = None
|
||||
self.challenge_question = None
|
||||
self.challenge_i33_first = True
|
||||
self.i11s = {}
|
||||
self.challenge_44_img = None
|
||||
|
||||
@staticmethod
|
||||
def split_image(image_bytes: bytes) -> Optional[List[str]]:
|
||||
try:
|
||||
image_stream = io.BytesIO(image_bytes)
|
||||
img = Image.open(image_stream)
|
||||
except:
|
||||
return None
|
||||
|
||||
width, height = img.size
|
||||
tile_width = width // 3
|
||||
tile_height = height // 3
|
||||
|
||||
base64_tiles = []
|
||||
for i in range(3):
|
||||
for j in range(3):
|
||||
left = j * tile_width
|
||||
upper = i * tile_height
|
||||
right = (j + 1) * tile_width if j < 2 else width
|
||||
lower = (i + 1) * tile_height if i < 2 else height
|
||||
|
||||
tile = img.crop((left, upper, right, lower))
|
||||
buf = io.BytesIO()
|
||||
tile.save(buf, format="PNG")
|
||||
b64 = base64.b64encode(buf.getvalue()).decode()
|
||||
base64_tiles.append(b64)
|
||||
|
||||
return base64_tiles
|
||||
|
||||
def find_checkbox_iframe(self):
|
||||
time.sleep(1)
|
||||
try:
|
||||
iframe = self.driver.ele('css: iframe[title="reCAPTCHA"]')
|
||||
if iframe:
|
||||
self.checkbox_iframe = iframe
|
||||
self.checkbox_iframe.ele("#recaptcha-anchor").click()
|
||||
return True
|
||||
except:
|
||||
pass
|
||||
return False
|
||||
|
||||
def find_challenge_iframe(self):
|
||||
try:
|
||||
iframe = self.driver.ele("@|title=recaptcha challenge expires in two minutes@|title=reCAPTCHA 验证任务将于 2 分钟后过期")
|
||||
# logger.info(f"iframe: {iframe}")
|
||||
if iframe:
|
||||
self.challenge_iframe = iframe
|
||||
return True
|
||||
except:
|
||||
pass
|
||||
return False
|
||||
|
||||
def check_11_refresh(self, check_ele):
|
||||
for k, v in self.i11s.items():
|
||||
if v.get("new"):
|
||||
self.i11s[k]['new'] = False
|
||||
|
||||
check_ele = [i[0] for i in check_ele]
|
||||
|
||||
for idx in check_ele:
|
||||
if idx not in self.i11s:
|
||||
self.i11s[idx] = {'srcs': [], 'new': False}
|
||||
|
||||
while True:
|
||||
ele = self.challenge_iframe.ele('#rc-imageselect-target').ele(
|
||||
f"xpath:{self.path_map_33[idx]}")
|
||||
|
||||
img_ele = ele.ele('.rc-image-tile-11', timeout=0.1)
|
||||
if not img_ele:
|
||||
time.sleep(0.1)
|
||||
continue
|
||||
|
||||
byte_data = img_ele.src()
|
||||
b64_str = base64.b64encode(byte_data).decode()
|
||||
|
||||
if b64_str not in self.i11s[idx]['srcs']:
|
||||
self.i11s[idx]['srcs'].append(b64_str)
|
||||
self.i11s[idx]['new'] = True
|
||||
break
|
||||
|
||||
def click_answer(self, result, challenge_type):
|
||||
if challenge_type == 4:
|
||||
for x in result["results"][0]['result']:
|
||||
self.challenge_iframe.ele('#rc-imageselect-target').ele(
|
||||
f"xpath:{self.path_map_44[x]}").click()
|
||||
time.sleep(0.1)
|
||||
|
||||
# if not result["results"][0]['result']:
|
||||
# try:
|
||||
# image_bytes = base64.b64decode(self.challenge_44_img)
|
||||
# name = str(uuid.uuid4())
|
||||
# with open(rf"{name}.png",'wb') as f:
|
||||
# f.write(image_bytes)
|
||||
# except:
|
||||
# pass
|
||||
|
||||
self.challenge_iframe.ele('#recaptcha-verify-button').click()
|
||||
self.i11s.clear()
|
||||
return True
|
||||
|
||||
if challenge_type == 3:
|
||||
found_ele = []
|
||||
|
||||
for res in result["results"]:
|
||||
if res["result"].get('target_found'):
|
||||
idx = int(res["image_id"])
|
||||
self.challenge_iframe.ele('#rc-imageselect-target').ele(
|
||||
f"xpath:{self.path_map_33[idx]}").click()
|
||||
found_ele.append((idx, self.path_map_33[idx]))
|
||||
time.sleep(0.1)
|
||||
|
||||
if found_ele:
|
||||
if len(found_ele) <= 2 and self.challenge_i33_first:
|
||||
self.challenge_iframe.ele('#recaptcha-reload-button').click()
|
||||
return False
|
||||
|
||||
cls = self.challenge_iframe.ele('#rc-imageselect-target').ele(
|
||||
f"xpath:{found_ele[0][1]}").attr('class')
|
||||
if 'rc-imageselect-tileselected' in cls:
|
||||
self.challenge_iframe.ele('#recaptcha-verify-button').click()
|
||||
self.i11s.clear()
|
||||
return True
|
||||
|
||||
self.check_11_refresh(found_ele)
|
||||
return False
|
||||
|
||||
self.challenge_iframe.ele('#recaptcha-verify-button').click()
|
||||
self.i11s.clear()
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def challenge_i33(self):
|
||||
if len(self.challenge_iframe.eles('.rc-image-tile-33', timeout=1)) == 9:
|
||||
self.challenge_i33_first = True
|
||||
self.i11s.clear()
|
||||
|
||||
first_ele = self.challenge_iframe.eles('.rc-image-tile-33')[0]
|
||||
byte_data = first_ele.src()
|
||||
|
||||
tiles = self.split_image(byte_data)
|
||||
if tiles:
|
||||
images = {i: t for i, t in enumerate(tiles)}
|
||||
if res := self.identify_verification_code(images):
|
||||
self.click_answer(res, 3)
|
||||
else:
|
||||
self.challenge_i33_first = False
|
||||
data = {}
|
||||
|
||||
for k, v in self.i11s.items():
|
||||
if v['new']:
|
||||
img_b64 = v['srcs'][-1]
|
||||
data[k] = img_b64
|
||||
if res := self.identify_verification_code(data):
|
||||
self.click_answer(res, 3)
|
||||
|
||||
def challenge_i44(self):
|
||||
ele = self.challenge_iframe.eles('.rc-image-tile-44')[0]
|
||||
byte_data = ele.src()
|
||||
b64_str = base64.b64encode(byte_data).decode()
|
||||
self.challenge_44_img = b64_str
|
||||
if res := self.identify_verification_code({0: b64_str}):
|
||||
self.click_answer(res, 4)
|
||||
def identify_verification_code(self, images):
|
||||
data = {"images": []}
|
||||
for k, img in images.items():
|
||||
if img:
|
||||
data["images"].append({
|
||||
"image_id": str(k),
|
||||
"image_base64": img,
|
||||
"target_class": self.challenge_question
|
||||
})
|
||||
if data['images']:
|
||||
res = requests.post(self.api_host, json=data)
|
||||
return res.json()
|
||||
return None
|
||||
|
||||
def challenge(self):
|
||||
if not self.find_checkbox_iframe():
|
||||
return {"status": False, "message": "no verification code found"}
|
||||
url_before = self.driver.url
|
||||
# logger.info(f"url_before: {url_before}")
|
||||
self.find_challenge_iframe()
|
||||
if not self.challenge_iframe:
|
||||
return {"status": False, "message": "no verification code found"}
|
||||
while True:
|
||||
time.sleep(1)
|
||||
|
||||
if self.driver.url != url_before:
|
||||
return {"status": True, "message": "验证码自动通过1"}
|
||||
if self.checkbox_iframe.ele("#recaptcha-anchor").attr('aria-checked') == 'true':
|
||||
return {"status": True, "message": "验证码自动通过2"}
|
||||
# 兼容 ChromiumFrame 无 style() 方法:优先读取 style 属性,其次使用 JS 计算样式
|
||||
vis = None
|
||||
try:
|
||||
style_str = self.challenge_iframe.attr('style') or ''
|
||||
if 'visibility' in style_str:
|
||||
vis = 'hidden' if 'visibility: hidden' in style_str.replace(' ', '') else 'visible'
|
||||
except Exception:
|
||||
pass
|
||||
if vis is None:
|
||||
try:
|
||||
# 通过 JS 获取 iframe 的可见性
|
||||
vis = self.driver.run_js(
|
||||
'var f = document.querySelector("iframe[title=\\"recaptcha challenge expires in two minutes\\"]") || document.querySelector("iframe[title=\\"reCAPTCHA 验证任务将于 2 分钟后过期\\"]");'
|
||||
'f ? getComputedStyle(f).visibility : null;'
|
||||
)
|
||||
except Exception:
|
||||
vis = None
|
||||
if vis != 'hidden':
|
||||
break
|
||||
# try:
|
||||
# if self.driver.url != url_before:
|
||||
# return {"status": True, "message": "验证码自动通过1"}
|
||||
# if self.checkbox_iframe.ele("#recaptcha-anchor").attr('aria-checked') == 'true':
|
||||
# return {"status": True, "message": "验证码自动通过2"}
|
||||
# if self.challenge_iframe.style('visibility') != 'hidden':
|
||||
# logger.info(222)
|
||||
# break
|
||||
# except:
|
||||
# logger.error("challenge error")
|
||||
# pass
|
||||
try:
|
||||
while True:
|
||||
# 重复使用可见性判断,避免依赖不存在的 style()
|
||||
vis = None
|
||||
try:
|
||||
style_str = self.challenge_iframe.attr('style') or ''
|
||||
if 'visibility' in style_str:
|
||||
vis = 'hidden' if 'visibility: hidden' in style_str.replace(' ', '') else 'visible'
|
||||
except Exception:
|
||||
pass
|
||||
if vis is None:
|
||||
try:
|
||||
vis = self.driver.run_js(
|
||||
'var f = document.querySelector("iframe[title=\\"recaptcha challenge expires in two minutes\\"]") || document.querySelector("iframe[title=\\"reCAPTCHA 验证任务将于 2 分钟后过期\\"]");'
|
||||
'f ? getComputedStyle(f).visibility : null;'
|
||||
)
|
||||
except Exception:
|
||||
vis = None
|
||||
if vis == 'hidden':
|
||||
break
|
||||
time.sleep(1)
|
||||
if self.driver.url != url_before:
|
||||
return {"status": True, "message": "captcha successfully resolved"}
|
||||
if self.checkbox_iframe.ele("#recaptcha-anchor").attr('aria-checked') == 'true':
|
||||
return {"status": True, "message": "captcha successfully resolved"}
|
||||
# 获取题目
|
||||
self.challenge_question = self.challenge_iframe.ele("tag:strong").text
|
||||
|
||||
# 判断 4×4
|
||||
if self.challenge_iframe.ele('.rc-image-tile-44', timeout=0.1):
|
||||
self.challenge_i44()
|
||||
|
||||
# 判断 3×3 或 1×1
|
||||
elif self.challenge_iframe.ele('.rc-image-tile-33', timeout=0.1) or \
|
||||
self.challenge_iframe.ele('.rc-image-tile-11', timeout=0.1):
|
||||
self.challenge_i33()
|
||||
except:
|
||||
pass
|
||||
return {"status": True, "message": "captcha successfully resolved"}
|
||||
318
spider/bit_browser.py
Normal file
318
spider/bit_browser.py
Normal file
@@ -0,0 +1,318 @@
|
||||
import time
|
||||
import requests
|
||||
from loguru import logger
|
||||
from functools import wraps
|
||||
|
||||
|
||||
def retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0):
|
||||
"""
|
||||
通用重试装饰器
|
||||
:param max_retries: 最大重试次数
|
||||
:param delay: 每次重试的初始延迟(秒)
|
||||
:param backoff: 每次重试延迟的递增倍数
|
||||
"""
|
||||
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
retries = 0
|
||||
current_delay = delay
|
||||
while retries < max_retries:
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
retries += 1
|
||||
if retries >= max_retries:
|
||||
logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}")
|
||||
return None # 重试次数用尽后返回 None
|
||||
logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}")
|
||||
time.sleep(current_delay)
|
||||
current_delay *= backoff
|
||||
|
||||
return None # 三次重试仍未成功,返回 None
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
|
||||
# 比特浏览器模块
|
||||
class BitBrowser:
|
||||
def __init__(self):
|
||||
self.bit_host = "http://127.0.0.1"
|
||||
pass
|
||||
|
||||
# 创建比特币浏览器
|
||||
@retry(max_retries=3, delay=1.0, backoff=1.0)
|
||||
def bit_browser_create(self, remark: str = '指纹浏览器', ua: str = None, host: str = None, port: str = None,
|
||||
proxy_user: str = None,
|
||||
proxy_pwd: str = None, proxy_type: str = 'noproxy', urls: str = None,
|
||||
bit_port: str = "54345") -> str:
|
||||
"""
|
||||
创建比特币浏览器
|
||||
:param bit_port: 可选,默认54345
|
||||
:param ua: 可选,默认随机
|
||||
:param proxy_type: 代理类型 (可选) ['noproxy', 'http', 'https', 'socks5', 'ssh']
|
||||
:param urls: 额外打开的url (可选) 多个用,分割
|
||||
:param host: 代理IP地址 (可选)
|
||||
:param port: 代理IP端口 (可选)
|
||||
:param proxy_user: 代理账号 (可选)
|
||||
:param proxy_pwd: 代理密码 (可选)
|
||||
:param remark: 备注 (可选)
|
||||
:param bit_port: 可选,默认54345
|
||||
:return: 返回浏览器ID
|
||||
"""
|
||||
url = f"{self.bit_host}:{bit_port}/browser/update"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
data = {
|
||||
'name': f'{remark if len(remark) < 40 else remark[:40]}', # 窗口名称
|
||||
'remark': f'{remark}', # 备注
|
||||
'proxyMethod': 2, # 代理方式 2自定义 3 提取IP
|
||||
# 代理类型 ['noproxy', 'http', 'https', 'socks5', 'ssh']
|
||||
'proxyType': f'{proxy_type}',
|
||||
"browserFingerPrint": {"userAgent": ua} # 留空,随机指纹
|
||||
}
|
||||
if host is not None:
|
||||
data['host'] = host
|
||||
if port is not None:
|
||||
data['port'] = port
|
||||
if proxy_user is not None:
|
||||
data['proxyUserName'] = proxy_user
|
||||
if proxy_pwd is not None:
|
||||
data['proxyPassword'] = proxy_pwd
|
||||
if urls is not None:
|
||||
data['url'] = urls # 额外打开的url 多个用,分割
|
||||
res = requests.post(url, json=data, headers=headers).json()
|
||||
if not res.get('success'):
|
||||
raise Exception(res)
|
||||
browser_pk = res['data']['id']
|
||||
return browser_pk
|
||||
|
||||
# 修改比特币浏览器
|
||||
@retry(max_retries=3, delay=1.0, backoff=1.0)
|
||||
def bit_browser_update(self, pk: str, remark: str = None, proxyType: str = 'noproxy', host: str = None,
|
||||
port: str = None, proxy_user: str = None, proxy_pwd: str = None, urls: str = None,
|
||||
bit_port: str = "54345") -> bool:
|
||||
"""
|
||||
修改比特币浏览器 传入某个参数则修改某个参数
|
||||
:param proxyType: 代理类型 noproxy|http|https|socks5(默认noproxy)
|
||||
:param pk: # 浏览器ID
|
||||
:param remark: # 备注
|
||||
:param host: # 代理主机
|
||||
:param port: # 代理端口
|
||||
:param proxy_user: # 代理账号
|
||||
:param proxy_pwd: # 代理密码
|
||||
:param urls: # 额外打开的url 多个用,分割
|
||||
:param bit_port: # 可选,默认54345
|
||||
:return: bool
|
||||
"""
|
||||
url = f"{self.bit_host}:{bit_port}/browser/update/partial"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
data = dict()
|
||||
data['ids'] = [pk]
|
||||
if remark is not None:
|
||||
data['remark'] = remark
|
||||
data['name'] = remark
|
||||
if urls is not None:
|
||||
data['url'] = urls
|
||||
if proxyType != 'noproxy':
|
||||
data['proxyType'] = proxyType
|
||||
if host is not None:
|
||||
data['host'] = host
|
||||
if port is not None:
|
||||
data['port'] = port if isinstance(port, int) else int(port)
|
||||
if proxy_user is not None:
|
||||
data['proxyUserName'] = proxy_user
|
||||
if proxy_pwd is not None:
|
||||
data['proxyPassword'] = proxy_pwd
|
||||
res = requests.post(url, json=data, headers=headers).json()
|
||||
if not res.get('success'):
|
||||
raise Exception(res)
|
||||
return True
|
||||
|
||||
# 打开比特币浏览器
|
||||
@retry(max_retries=3, delay=1.0, backoff=1.0)
|
||||
def bit_browser_open(self, pk: str, bit_port: str = "54345") -> str:
|
||||
"""
|
||||
打开比特币浏览器
|
||||
:param pk: 浏览器ID
|
||||
:param bit_port: 可选,默认54345
|
||||
:return: 返回浏览器地址
|
||||
"""
|
||||
url = f"{self.bit_host}:{bit_port}/browser/open"
|
||||
data = {"id": f'{pk}'}
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
res = requests.post(url, json=data, headers=headers).json()
|
||||
if not res.get('success'):
|
||||
raise Exception(res)
|
||||
debugger_address = res['data']['http']
|
||||
return debugger_address
|
||||
|
||||
# 关闭比特币浏览器
|
||||
@retry(max_retries=3, delay=1.0, backoff=1.0)
|
||||
def bit_browser_close(self, pk: str, bit_port: str = "54345"):
|
||||
"""
|
||||
关闭比特币浏览器 - 执行后需要等待5s
|
||||
:param pk: 浏览器ID
|
||||
:param bit_port: 可选,默认54345
|
||||
:return: 无返回值
|
||||
"""
|
||||
url = f"{self.bit_host}:{bit_port}/browser/close"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
data = {'id': f'{pk}'}
|
||||
res = requests.post(url, json=data, headers=headers).json()
|
||||
if not res.get('success'):
|
||||
raise Exception(res)
|
||||
# 等待3秒
|
||||
time.sleep(3)
|
||||
bol = self.bit_browser_status(pk)
|
||||
if bol:
|
||||
raise Exception(f'浏览器ID {pk} 未正常关闭, 等待3秒后重试')
|
||||
return True
|
||||
|
||||
# 删除比特币浏览器
|
||||
@retry(max_retries=3, delay=1.0, backoff=1.0)
|
||||
def bit_browser_delete(self, pk: str, bit_port: str = "54345"):
|
||||
"""
|
||||
删除比特币浏览器
|
||||
:param pk: 浏览器ID
|
||||
:param bit_port: 可选,默认54345
|
||||
:return: 无返回值
|
||||
"""
|
||||
url = f"{self.bit_host}:{bit_port}/browser/delete"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
data = {'id': f'{pk}'}
|
||||
res = requests.post(url, json=data, headers=headers).json()
|
||||
if not res.get('success'):
|
||||
raise Exception(res)
|
||||
return True
|
||||
|
||||
# 获取所有比特币浏览器
|
||||
@retry(max_retries=3, delay=1.0, backoff=1.0)
|
||||
def bit_browser_get(self, page: int = 0, limit: int = 10, group_id: str | None = None,
|
||||
bit_port: str | None = "54345") -> dict:
|
||||
"""
|
||||
获取所有比特币浏览器
|
||||
:param page: 页码
|
||||
:param limit: 每页数量
|
||||
:param group_id: 组ID(可选)
|
||||
:param bit_port: 可选,默认54345
|
||||
:return: {'success': True, 'data': {'page': 1, 'pageSize': 10, 'totalNum': 128, 'list': [{'id': '12a3126accc14c93bd34adcccfc3083c'},{'id':'edc5d61a56214e9f8a8bbf1a2e1b405d'}]}}
|
||||
"""
|
||||
|
||||
url = f"{self.bit_host}:{bit_port}/browser/list"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
data = {'page': page, 'pageSize': limit}
|
||||
if group_id is not None:
|
||||
data['groupId'] = group_id
|
||||
res = requests.post(url, json=data, headers=headers).json()
|
||||
if not res.get('success'):
|
||||
raise Exception(res)
|
||||
return res
|
||||
|
||||
# 获取比特浏览器窗口详情
|
||||
@retry(max_retries=3, delay=1.0, backoff=1.0)
|
||||
def bit_browser_detail(self, pk: str, bit_port: str = "54345") -> dict:
|
||||
"""
|
||||
获取比特浏览器窗口详情
|
||||
:param pk: 浏览器ID
|
||||
:param bit_port: 可选,默认54345
|
||||
:return: {'success': True, 'data': {'id': '12a3126accc14c93bd34adcccfc3083c', 'name': '12a3126accc14c93bd34adcccfc3083c', 'remark': '12a3126accc14c93bd34adcccfc3083c', '
|
||||
"""
|
||||
url = f"{self.bit_host}:{bit_port}/browser/detail"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
data = {'id': f'{pk}'}
|
||||
res = requests.post(url, json=data, headers=headers).json()
|
||||
if not res.get('success'):
|
||||
raise Exception(res)
|
||||
return res
|
||||
|
||||
# 获取比特浏览器的进程id
|
||||
def bit_browser_pid(self, pk: str, bit_port: str = "54345") -> str:
|
||||
"""
|
||||
获取比特浏览器的进程id
|
||||
:param pk: 浏览器ID
|
||||
:param bit_port: 可选,默认54345
|
||||
:return: 返回进程id
|
||||
"""
|
||||
url = f"{self.bit_host}:{bit_port}/browser/pids/alive"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
data = {
|
||||
"ids": [pk]
|
||||
}
|
||||
res = requests.post(url, json=data, headers=headers).json()
|
||||
if not res.get('success'):
|
||||
raise Exception(res)
|
||||
return res['data'][pk]
|
||||
|
||||
# 获取窗口状态
|
||||
@retry(max_retries=3, delay=1.0, backoff=1.0)
|
||||
def bit_browser_status(self, pk: str, bit_port: str = "54345") -> dict:
|
||||
"""
|
||||
获取比特浏览器窗口状态
|
||||
:param pk: 浏览器ID
|
||||
:param bit_port: 可选,默认54345
|
||||
:return: {'success': True, 'data': {'id': '12a3126accc14c93bd34adcccfc3083c', 'name': '12a3126accc14c93bd34adcccfc3083c', 'remark': '12a3126accc14c93bd34adcccfc3083c', '
|
||||
"""
|
||||
url = f"{self.bit_host}:{bit_port}/browser/pids"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
data = {'ids': [pk]}
|
||||
res = requests.post(url, json=data, headers=headers).json()
|
||||
# print(f'res --> {res}')
|
||||
if not res.get('success'):
|
||||
raise Exception(res)
|
||||
if res.get('data').get(pk) is None:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
async def main():
|
||||
bit = BitBrowser()
|
||||
# res = await bit._bit_browser_get()
|
||||
jc = 0
|
||||
while 1:
|
||||
res = await bit._bit_browser_get(
|
||||
page=jc,
|
||||
limit=100,
|
||||
group_id='4028808b9a52223a019a581bbea1275c')
|
||||
li = res["data"]["list"]
|
||||
if len(li) == 0:
|
||||
break
|
||||
|
||||
for i in li:
|
||||
id = i["id"]
|
||||
# 读取浏览器详情
|
||||
res = await bit._bit_browser_detail(id)
|
||||
|
||||
# print(f'id -->{id} --> {res}')
|
||||
data = res["data"]
|
||||
ua = data["browserFingerPrint"]["userAgent"]
|
||||
proxy_type = data.get("proxyType")
|
||||
host = data.get("host")
|
||||
port = data.get("port")
|
||||
proxy_account = data.get("proxyUserName")
|
||||
proxy_password = data.get("proxyPassword")
|
||||
print(f'id -->{id}')
|
||||
print(f'ua -->{ua}')
|
||||
print(f'proxy_type -->{proxy_type}')
|
||||
print(f'host -->{host}')
|
||||
print(f'port -->{port}')
|
||||
print(f'proxy_account -->{proxy_account}')
|
||||
print(f'proxy_password -->{proxy_password}')
|
||||
print(f'='*50)
|
||||
jc += 1
|
||||
|
||||
def main2():
|
||||
bit = BitBrowser()
|
||||
browser_id = '5ba9eb974c7c45e2bb086585c75f70e8'
|
||||
# 关闭浏览器
|
||||
# res = bit.bit_browser_close(browser_id)
|
||||
# res = bit.bit_browser_get()
|
||||
# print(res)
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# main2()
|
||||
|
||||
bit_browser = BitBrowser()
|
||||
851
spider/mail_.py
Normal file
851
spider/mail_.py
Normal file
@@ -0,0 +1,851 @@
|
||||
import asyncio
|
||||
import imaplib
|
||||
import email
|
||||
import random
|
||||
import socket
|
||||
import string
|
||||
import time
|
||||
from email.header import decode_header
|
||||
from datetime import timezone, timedelta
|
||||
import email.utils
|
||||
import aiohttp
|
||||
import socks
|
||||
import requests
|
||||
import smtplib
|
||||
from email.mime.text import MIMEText
|
||||
from email.header import Header
|
||||
from functools import wraps
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0):
|
||||
"""
|
||||
通用重试装饰器
|
||||
:param max_retries: 最大重试次数
|
||||
:param delay: 每次重试的初始延迟(秒)
|
||||
:param backoff: 每次重试延迟的递增倍数
|
||||
"""
|
||||
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
retries = 0
|
||||
current_delay = delay
|
||||
while retries < max_retries:
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
retries += 1
|
||||
if retries >= max_retries:
|
||||
logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}")
|
||||
return None # 重试次数用尽后返回 None
|
||||
logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}")
|
||||
time.sleep(current_delay)
|
||||
current_delay *= backoff
|
||||
|
||||
return None # 三次重试仍未成功,返回 None
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def async_retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0):
|
||||
"""
|
||||
支持异步函数的通用重试装饰器
|
||||
:param max_retries: 最大重试次数
|
||||
:param delay: 每次重试的初始延迟(秒)
|
||||
:param backoff: 每次重试延迟的递增倍数
|
||||
"""
|
||||
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
retries = 0
|
||||
current_delay = delay
|
||||
while retries < max_retries:
|
||||
try:
|
||||
return await func(*args, **kwargs) # 直接执行原始方法
|
||||
except Exception as e:
|
||||
retries += 1
|
||||
if retries >= max_retries:
|
||||
logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}")
|
||||
return None # 重试次数用尽后返回 None
|
||||
logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}")
|
||||
|
||||
await asyncio.sleep(current_delay) # 异步延迟
|
||||
current_delay *= backoff # 根据backoff递增延迟
|
||||
|
||||
return None # 三次重试仍未成功,返回 None
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
# 域名管理类 - 高内聚低耦合的域名管理方案
|
||||
class DomainManager:
|
||||
"""
|
||||
域名管理器 - 统一管理所有邮箱域名相关操作
|
||||
实现高内聚低耦合的设计原则
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# 域名列表 - 只需要在这里添加新域名
|
||||
self._domains = [
|
||||
"gmail.com",
|
||||
"qianyouduo.com",
|
||||
"rxybb.com",
|
||||
"cqrxy.vip",
|
||||
"0n.lv",
|
||||
"qianyouduo.com",
|
||||
"ziyouzuan.com",
|
||||
"emaing.online",
|
||||
"emaing.fun",
|
||||
"emaing.asia",
|
||||
"isemaing.site",
|
||||
"emaing.cyou",
|
||||
"emaing.site",
|
||||
"emaing.icu",
|
||||
"emaing.store",
|
||||
"emaing.pw",
|
||||
"emaing.xyz",
|
||||
"qydkjgs.asia",
|
||||
"qydkj.homes",
|
||||
"qydkj.baby",
|
||||
"qydkj.cyou",
|
||||
"qydkjgs.autos",
|
||||
"qydkj.autos",
|
||||
"qydkjgs.cyou",
|
||||
"qydkjgs.homes",
|
||||
"qydgs.asia",
|
||||
"qydkj.asia",
|
||||
"qydgs.cyou",
|
||||
"lulanjing.asia",
|
||||
"lisihan.asia",
|
||||
"mmwan.asia",
|
||||
"xyttan.asia",
|
||||
"zpaily.asia",
|
||||
"youxinzhiguo.asia",
|
||||
"huijinfenmu.asia",
|
||||
"linghao.asia",
|
||||
"cqhc.asia",
|
||||
"huacun.asia",
|
||||
"huachen.asia",
|
||||
"yisabeier.asia",
|
||||
"xinxinr.cyou",
|
||||
"lilisi.asia",
|
||||
"xybbwan.cyou",
|
||||
"zhongjing.cyou",
|
||||
"zprxy.cyou",
|
||||
"cqhuacun.cyou",
|
||||
"huazong.icu",
|
||||
"huacun.cyou"
|
||||
]
|
||||
|
||||
def get_domain_by_type(self, mail_type: int) -> str:
|
||||
"""
|
||||
根据邮箱类型获取域名
|
||||
:param mail_type: 邮箱类型编号
|
||||
:return: 对应的域名
|
||||
"""
|
||||
if 0 <= mail_type < len(self._domains):
|
||||
return self._domains[mail_type]
|
||||
return self._domains[1] # 默认返回 qianyouduo.com
|
||||
|
||||
def get_domain_type(self, domain: str) -> int:
|
||||
"""
|
||||
根据域名获取类型编号
|
||||
:param domain: 域名
|
||||
:return: 对应的类型编号,如果不存在返回1
|
||||
"""
|
||||
try:
|
||||
return self._domains.index(domain)
|
||||
except ValueError:
|
||||
return 1 # 默认返回 qianyouduo.com 的类型
|
||||
|
||||
def get_imap_server(self, mail_type: int) -> str:
|
||||
"""
|
||||
根据邮箱类型获取IMAP服务器地址
|
||||
:param mail_type: 邮箱类型编号
|
||||
:return: IMAP服务器地址
|
||||
"""
|
||||
domain = self.get_domain_by_type(mail_type)
|
||||
return f"imap.{domain}"
|
||||
|
||||
def get_imap_server_by_domain(self, domain: str) -> str:
|
||||
"""
|
||||
根据域名获取IMAP服务器地址
|
||||
:param domain: 域名
|
||||
:return: IMAP服务器地址
|
||||
"""
|
||||
return f"imap.{domain}"
|
||||
|
||||
def is_valid_domain(self, domain: str) -> bool:
|
||||
"""
|
||||
检查域名是否在支持列表中
|
||||
:param domain: 域名
|
||||
:return: 是否支持该域名
|
||||
"""
|
||||
return domain in self._domains
|
||||
|
||||
def get_all_domains(self) -> list:
|
||||
"""
|
||||
获取所有支持的域名列表
|
||||
:return: 域名列表的副本
|
||||
"""
|
||||
return self._domains.copy()
|
||||
|
||||
def get_domain_count(self) -> int:
|
||||
"""
|
||||
获取支持的域名总数
|
||||
:return: 域名总数
|
||||
"""
|
||||
return len(self._domains)
|
||||
|
||||
def get_creatable_domains(self) -> list:
|
||||
"""
|
||||
获取可用于创建邮箱的域名列表(排除gmail.com)
|
||||
:return: 可创建邮箱的域名列表
|
||||
"""
|
||||
return [domain for domain in self._domains if domain != "gmail.com"]
|
||||
|
||||
def get_creatable_domain_by_type(self, mail_type: int) -> str:
|
||||
"""
|
||||
根据邮箱类型获取可创建的域名(排除gmail.com)
|
||||
:param mail_type: 邮箱类型编号
|
||||
:return: 对应的域名,如果是gmail.com则返回默认域名
|
||||
"""
|
||||
domain = self.get_domain_by_type(mail_type)
|
||||
if domain == "gmail.com":
|
||||
return self._domains[1] # 返回qianyouduo.com作为默认
|
||||
return domain
|
||||
|
||||
def get_random_creatable_domain(self) -> str:
|
||||
"""
|
||||
随机获取一个可创建邮箱的域名(排除 gmail.com)
|
||||
|
||||
返回值:
|
||||
str: 随机选取的域名
|
||||
"""
|
||||
creatable = self.get_creatable_domains()
|
||||
if not creatable:
|
||||
raise ValueError("无可用域名用于创建邮箱")
|
||||
return random.choice(creatable)
|
||||
|
||||
|
||||
# 邮箱模块
|
||||
class Mail:
|
||||
def __init__(self):
|
||||
self.domain_manager = DomainManager()
|
||||
self.api_host = 'http://111.10.175.206:5020'
|
||||
|
||||
def email_account_read(self, pk: int = None, account: str = None, status: bool = None, host: str = None,
|
||||
proxy_account: str = None,
|
||||
parent_account: str = None, order_by: str = None, level: int = None,
|
||||
update_time_start: str = None, update_time_end: str = None, res_count: bool = False,
|
||||
create_time_start: str = None, create_time_end: str = None, page: int = None,
|
||||
limit: int = None) -> dict:
|
||||
"""
|
||||
读取mail账号
|
||||
:param level: 邮箱等级(可选)
|
||||
:param status: 状态(可选)
|
||||
:param update_time_start: 更新时间起始(可选)
|
||||
:param update_time_end: 更新时间结束(可选)
|
||||
:param res_count: 返回总数 (可选)
|
||||
:param parent_account: 母邮箱账号 (可选)
|
||||
:param pk: 主键 (可选)
|
||||
:param account: 账号 (可选)
|
||||
:param host: 代理 (可选)
|
||||
:param proxy_account: 代理账号 (可选)
|
||||
:param order_by: 排序方式 (可选) id|create_time|update_time 前面加-表示倒序
|
||||
:param create_time_start: 创建起始时间 (可选)
|
||||
:param create_time_end: 创建结束时间 (可选)
|
||||
:param page: 页码 (可选)
|
||||
:param limit: 每页数量 (可选)
|
||||
:return: 返回json 成功字段code=200
|
||||
"""
|
||||
if pk is not None:
|
||||
url = f'{self.api_host}/mail/account/{pk}'
|
||||
return requests.get(url).json()
|
||||
|
||||
url = f'{self.api_host}/mail/account'
|
||||
data = dict()
|
||||
if account is not None:
|
||||
data['account'] = account
|
||||
if status is not None:
|
||||
data['status'] = status
|
||||
if host is not None:
|
||||
data['host'] = host
|
||||
if proxy_account is not None:
|
||||
data['proxy_account'] = proxy_account
|
||||
if parent_account is not None:
|
||||
data['parent_account'] = parent_account
|
||||
if order_by is not None:
|
||||
data['order_by'] = order_by
|
||||
if level is not None:
|
||||
data['level'] = level
|
||||
if create_time_start is not None:
|
||||
data['create_time_start'] = create_time_start
|
||||
if create_time_end is not None:
|
||||
data['create_time_end'] = create_time_end
|
||||
if update_time_start is not None:
|
||||
data['update_time_start'] = update_time_start
|
||||
if update_time_end is not None:
|
||||
data['update_time_end'] = update_time_end
|
||||
if res_count:
|
||||
data['res_count'] = res_count
|
||||
if page is not None:
|
||||
data['page'] = page
|
||||
if limit is not None:
|
||||
data['limit'] = limit
|
||||
res = requests.get(url, params=data).json()
|
||||
if res.get('code') not in [200, 400, 404]:
|
||||
raise Exception(res)
|
||||
return res
|
||||
|
||||
# 创建随机邮箱
|
||||
@retry(max_retries=3, delay=1.0, backoff=1.0)
|
||||
def email_create_random(self, count: int = 8, pwd: str = 'Zpaily88', mail_type: int | None = None) -> str:
|
||||
"""
|
||||
创建随机邮箱(随机域名,排除 gmail.com)
|
||||
:param count: 邮箱长度(默认8位)
|
||||
:param pwd: 邮箱密码(默认Zpaily88)
|
||||
:param mail_type: 指定邮箱类型编号;为 None 时随机选择可创建域名
|
||||
:return: 邮箱账号
|
||||
"""
|
||||
headers = {
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"Content-Type": "application/json",
|
||||
"Origin": "https://mail.qianyouduo.com",
|
||||
"Pragma": "no-cache",
|
||||
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
||||
"accept": "*/*",
|
||||
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"macOS\""
|
||||
}
|
||||
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
|
||||
name = ''.join(random.choices(string.ascii_letters + string.digits, k=count)).lower()
|
||||
|
||||
# 随机选择可创建域名(排除 gmail.com);如指定类型则按类型选择
|
||||
mail_end = (
|
||||
self.domain_manager.get_creatable_domain_by_type(mail_type)
|
||||
if mail_type is not None
|
||||
else self.domain_manager.get_random_creatable_domain()
|
||||
)
|
||||
data = {
|
||||
"name": name,
|
||||
"email": f"{name}@{mail_end}",
|
||||
"passwordPlaintext": pwd
|
||||
}
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
if 'Validation errors: [user] This combination of username and domain is already in database' in response.text:
|
||||
return f'{name}@{mail_end}'
|
||||
if response.status_code != 201:
|
||||
raise Exception(response.status_code)
|
||||
return f"{name}@{mail_end}"
|
||||
|
||||
# 异步创建随机邮箱
|
||||
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
|
||||
async def _email_create_random(self, count: int = 8, pwd: str = 'Zpaily88', mail_type: int | None = None) -> str:
|
||||
"""
|
||||
创建随机邮箱(随机域名,排除 gmail.com)
|
||||
:param count: 邮箱长度(默认8位)
|
||||
:param pwd: 邮箱密码(默认Zpaily88)
|
||||
:param mail_type: 指定邮箱类型编号;为 None 时随机选择可创建域名
|
||||
:return:邮箱账号
|
||||
"""
|
||||
headers = {
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"Content-Type": "application/json",
|
||||
"Origin": "https://mail.qianyouduo.com",
|
||||
"Pragma": "no-cache",
|
||||
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
||||
"accept": "*/*",
|
||||
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"macOS\""
|
||||
}
|
||||
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
|
||||
name = ''.join(random.choices(string.ascii_letters + string.digits, k=count)).lower()
|
||||
|
||||
# 随机选择可创建域名(排除 gmail.com);如指定类型则按类型选择
|
||||
mail_end = (
|
||||
self.domain_manager.get_creatable_domain_by_type(mail_type)
|
||||
if mail_type is not None
|
||||
else self.domain_manager.get_random_creatable_domain()
|
||||
)
|
||||
data = {
|
||||
"name": name,
|
||||
"email": f"{name}@{mail_end}",
|
||||
"passwordPlaintext": pwd
|
||||
}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(url, headers=headers, json=data) as response:
|
||||
status = response.status
|
||||
text = await response.text()
|
||||
if 'Validation errors: [user] This combination of username and domain is already in database' in text:
|
||||
return f"{name}@{mail_end}"
|
||||
if status != 201:
|
||||
raise Exception(status)
|
||||
return f"{name}@{mail_end}"
|
||||
|
||||
# 创建邮箱
|
||||
@retry(max_retries=3, delay=1.0, backoff=1.0)
|
||||
def email_create(self, account: str, pwd: str = 'Zpaily88') -> str | None:
|
||||
"""
|
||||
创建邮箱
|
||||
:param account: 邮箱账号
|
||||
:param pwd: 邮箱密码(默认Zpaily88)
|
||||
:return:邮箱账号
|
||||
"""
|
||||
headers = {
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"Content-Type": "application/json",
|
||||
"Origin": "https://mail.qianyouduo.com",
|
||||
"Pragma": "no-cache",
|
||||
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
||||
"accept": "*/*",
|
||||
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"macOS\""
|
||||
}
|
||||
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
|
||||
name = account.split('@')[0]
|
||||
mail_end = account.split('@')[1]
|
||||
|
||||
# 排除gmail.com域名
|
||||
if mail_end == "gmail.com":
|
||||
return None
|
||||
# 验证域名是否支持
|
||||
if not self.domain_manager.is_valid_domain(mail_end):
|
||||
raise ValueError(f"不支持的域名: {mail_end},支持的域名列表: {self.domain_manager.get_all_domains()}")
|
||||
|
||||
data = {
|
||||
"name": name,
|
||||
"email": f"{name}@{mail_end}",
|
||||
"passwordPlaintext": pwd
|
||||
}
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
print(f'创建邮箱响应: {response.status_code}')
|
||||
if response.status_code not in [201, 400]:
|
||||
raise Exception(response.status_code)
|
||||
return f"{name}@{mail_end}"
|
||||
|
||||
# 异步创建邮箱
|
||||
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
|
||||
async def _email_create(self, account: str, pwd: str = 'Zpaily88') -> str | None:
|
||||
"""
|
||||
创建邮箱
|
||||
:param account: 邮箱账号
|
||||
:param pwd: 邮箱密码(默认Zpaily88)
|
||||
:return: 邮箱账号
|
||||
"""
|
||||
headers = {
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"Content-Type": "application/json",
|
||||
"Origin": "https://mail.qianyouduo.com",
|
||||
"Pragma": "no-cache",
|
||||
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
||||
"accept": "*/*",
|
||||
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"macOS\""
|
||||
}
|
||||
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
|
||||
name = account.split('@')[0]
|
||||
mail_end = account.split('@')[1]
|
||||
# 排除gmail.com域名
|
||||
if mail_end == "gmail.com":
|
||||
return None
|
||||
|
||||
# 验证域名是否支持
|
||||
if not self.domain_manager.is_valid_domain(mail_end):
|
||||
raise ValueError(f"不支持的域名: {mail_end},支持的域名列表: {self.domain_manager.get_all_domains()}")
|
||||
|
||||
data = {
|
||||
"name": name,
|
||||
"email": f"{name}@{mail_end}",
|
||||
"passwordPlaintext": pwd
|
||||
}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(url, headers=headers, json=data) as response:
|
||||
status = response.status
|
||||
if status not in [201, 400]:
|
||||
raise Exception(f'status code: {status}')
|
||||
return f"{name}@{mail_end}"
|
||||
|
||||
# 删除邮箱
|
||||
@retry(max_retries=3, delay=1.0, backoff=1.0)
|
||||
def email_delete(self, account: str) -> bool:
|
||||
"""
|
||||
删除邮箱
|
||||
:param account: 邮箱账号
|
||||
:return: True表示删除成功,False表示删除失败
|
||||
"""
|
||||
headers = {
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"Content-Type": "application/json",
|
||||
"Origin": "https://mail.qianyouduo.com",
|
||||
"Pragma": "no-cache",
|
||||
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
||||
"accept": "*/*",
|
||||
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"macOS\""
|
||||
}
|
||||
url = f"https://mail.qianyouduo.com/admin/api/v1/boxes/{account}"
|
||||
if '@gmail.com' in account:
|
||||
return False
|
||||
response = requests.delete(url, headers=headers)
|
||||
print(f'删除邮箱响应: --> {response.status_code}')
|
||||
if response.status_code not in [204, 404]:
|
||||
raise Exception(response.status_code)
|
||||
return True
|
||||
|
||||
# 异步删除邮箱
|
||||
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
|
||||
async def _email_delete(self, account: str) -> bool:
|
||||
"""
|
||||
删除邮箱
|
||||
:param account: 邮箱账号
|
||||
:return: True表示删除成功,False表示删除失败
|
||||
"""
|
||||
headers = {
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"Content-Type": "application/json",
|
||||
"Origin": "https://mail.qianyouduo.com",
|
||||
"Pragma": "no-cache",
|
||||
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
||||
"accept": "*/*",
|
||||
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"macOS\""
|
||||
}
|
||||
url = f"https://mail.qianyouduo.com/admin/api/v1/boxes/{account}"
|
||||
if '@gmail.com' in account:
|
||||
return False
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.delete(url, headers=headers) as response:
|
||||
status = response.status
|
||||
if status not in [204, 404]:
|
||||
raise Exception(f'status code: {status}')
|
||||
return True
|
||||
|
||||
# 处理邮件正文
|
||||
@staticmethod
|
||||
def extract_body(msg):
|
||||
"""
|
||||
提取邮件正文,优先返回 HTML 文本
|
||||
- 更健壮的字符集解析:优先使用 part 的 charset 信息,失败回退到 utf-8 / latin-1
|
||||
- 仅处理 inline 的 text/html 与 text/plain 内容
|
||||
"""
|
||||
html_text = None
|
||||
plain_text = None
|
||||
|
||||
def _decode_part(part):
|
||||
payload = part.get_payload(decode=True)
|
||||
if payload is None:
|
||||
return None
|
||||
# 优先从内容中解析 charset
|
||||
charset = (part.get_content_charset() or part.get_param('charset') or 'utf-8')
|
||||
try:
|
||||
return payload.decode(charset, errors='replace')
|
||||
except LookupError:
|
||||
# 未知编码时回退
|
||||
try:
|
||||
return payload.decode('utf-8', errors='replace')
|
||||
except Exception:
|
||||
return payload.decode('latin-1', errors='replace')
|
||||
|
||||
if msg.is_multipart():
|
||||
for part in msg.walk():
|
||||
content_type = part.get_content_type()
|
||||
content_disposition = part.get_content_disposition()
|
||||
|
||||
if content_type == "text/html" and (not content_disposition or content_disposition == "inline"):
|
||||
html_text = _decode_part(part) or html_text
|
||||
elif content_type == "text/plain" and (not content_disposition or content_disposition == "inline"):
|
||||
plain_text = _decode_part(part) or plain_text
|
||||
else:
|
||||
content_type = msg.get_content_type()
|
||||
if content_type == "text/html":
|
||||
html_text = _decode_part(msg)
|
||||
elif content_type == "text/plain":
|
||||
plain_text = _decode_part(msg)
|
||||
|
||||
# 优先返回 HTML 文本,如果没有 HTML 文本,则返回纯文本
|
||||
return html_text or plain_text or ""
|
||||
|
||||
# 转换邮件日期
|
||||
@staticmethod
|
||||
def convert_to_china_time(date_str):
|
||||
"""
|
||||
将邮件日期转换为10位时间戳(中国时区)
|
||||
- 保留原始邮件的时区信息;若无时区,则按 UTC 处理
|
||||
- 异常时返回当前时间戳,避免解析失败导致崩溃
|
||||
"""
|
||||
try:
|
||||
email_date = email.utils.parsedate_to_datetime(date_str)
|
||||
if email_date is None:
|
||||
return int(time.time())
|
||||
if email_date.tzinfo is None:
|
||||
email_date = email_date.replace(tzinfo=timezone.utc)
|
||||
china_time = email_date.astimezone(timezone(timedelta(hours=8)))
|
||||
return int(china_time.timestamp())
|
||||
except Exception:
|
||||
return int(time.time())
|
||||
|
||||
# 获取邮件
|
||||
def email_read(self, user: str, from_: str, limit: int = 1, is_del: bool = False) -> list | None:
|
||||
"""
|
||||
获取最新邮件
|
||||
:param user: 母账号
|
||||
:param from_: 发件人匹配关键字(可为邮箱或显示名,大小写不敏感)
|
||||
:param limit: 获取邮件数量(默认1封)
|
||||
:param is_del: 是否删除整个邮箱账号(非 Gmail 才会执行账号删除)
|
||||
:return: 返回邮件列表,每个元素格式为:
|
||||
{
|
||||
"title": "邮件标题",
|
||||
"from": "发件人",
|
||||
"date": "邮件日期(中国时区时间戳)",
|
||||
"content": "邮件正文",
|
||||
"code": 200
|
||||
}
|
||||
"""
|
||||
user_li = user.split('@')
|
||||
domain = user_li[1]
|
||||
|
||||
# 使用域名管理器获取邮箱类型
|
||||
if not self.domain_manager.is_valid_domain(domain):
|
||||
return None
|
||||
|
||||
mail_type = self.domain_manager.get_domain_type(domain)
|
||||
# 仅对 Gmail 进行点号归一化,其它域名按原样处理
|
||||
local_part = user_li[0]
|
||||
if domain == "gmail.com":
|
||||
local_part = local_part.replace('.', '')
|
||||
user = local_part + '@' + user_li[1]
|
||||
proxy_host = None
|
||||
proxy_port = None
|
||||
proxy_user = None
|
||||
proxy_pwd = None
|
||||
if mail_type == 0:
|
||||
res = self.email_account_read(parent_account=user, status=True, level=0)
|
||||
if res['code'] != 200:
|
||||
return None
|
||||
pwd = res['items'][0]['parent_pwd']
|
||||
proxy_host = res['items'][0]['host']
|
||||
proxy_port = res['items'][0]['port']
|
||||
proxy_user = res['items'][0]['proxy_account']
|
||||
proxy_pwd = res['items'][0]['proxy_pwd']
|
||||
else:
|
||||
pwd = 'Zpaily88'
|
||||
|
||||
items = [] # 存储邮件列表
|
||||
|
||||
# 保存原始socket
|
||||
original_socket = None
|
||||
if proxy_host is not None and proxy_port is not None:
|
||||
original_socket = socket.socket
|
||||
if proxy_user is not None and proxy_pwd is not None:
|
||||
socks.setdefaultproxy(socks.SOCKS5, proxy_host, int(proxy_port), True, proxy_user, proxy_pwd)
|
||||
else:
|
||||
socks.setdefaultproxy(socks.SOCKS5, proxy_host, int(proxy_port), True)
|
||||
socket.socket = socks.socksocket
|
||||
|
||||
imap_server = None
|
||||
had_error = False
|
||||
try:
|
||||
# 在设置代理后创建IMAP连接
|
||||
imap_server = imaplib.IMAP4_SSL(self.domain_manager.get_imap_server(mail_type))
|
||||
if not imap_server:
|
||||
had_error = True
|
||||
else:
|
||||
|
||||
# pwd去除空格
|
||||
pwd = pwd.replace(' ', '')
|
||||
# print(f'pwd: {pwd}')
|
||||
imap_server.login(user, pwd)
|
||||
status, _ = imap_server.select("INBOX")
|
||||
if status != 'OK':
|
||||
had_error = True
|
||||
else:
|
||||
status, email_ids = imap_server.search(None, "ALL")
|
||||
if status != 'OK':
|
||||
had_error = True
|
||||
else:
|
||||
email_id_list = email_ids[0].split()
|
||||
|
||||
# 获取最近limit条邮件ID
|
||||
recent_ids = email_id_list[-20:] # 仍然获取最近20封以确保有足够的邮件可以筛选
|
||||
found_count = 0 # 记录找到的符合条件的邮件数量
|
||||
|
||||
for email_id in recent_ids[::-1]: # 从最新的邮件开始处理
|
||||
if found_count >= limit: # 如果已经找到足够数量的邮件,就退出循环
|
||||
break
|
||||
|
||||
status, msg_data = imap_server.fetch(email_id, "(RFC822)")
|
||||
for response in msg_data:
|
||||
if isinstance(response, tuple):
|
||||
msg = email.message_from_bytes(response[1])
|
||||
# 兼容性发件人匹配:解析地址与显示名,大小写不敏感,支持子串匹配
|
||||
from_field = msg.get("From", "")
|
||||
addresses = email.utils.getaddresses([from_field])
|
||||
needle = (from_ or "").lower()
|
||||
candidates = []
|
||||
for name, addr in addresses:
|
||||
if name:
|
||||
candidates.append(name.lower())
|
||||
if addr:
|
||||
candidates.append(addr.lower())
|
||||
if any(needle in c for c in candidates):
|
||||
# 标题解码,处理无标题或编码缺失的情况
|
||||
raw_subject = msg.get("Subject")
|
||||
subject = ""
|
||||
if raw_subject is not None:
|
||||
dh = decode_header(raw_subject)
|
||||
if dh:
|
||||
s, enc = dh[0]
|
||||
if isinstance(s, bytes):
|
||||
try:
|
||||
subject = s.decode(enc or 'utf-8', errors='replace')
|
||||
except LookupError:
|
||||
subject = s.decode('utf-8', errors='replace')
|
||||
else:
|
||||
subject = s
|
||||
|
||||
item = {
|
||||
"title": subject,
|
||||
"from": msg["From"],
|
||||
"content": self.extract_body(msg),
|
||||
"code": 200
|
||||
}
|
||||
|
||||
# 获取并转换邮件时间
|
||||
date_str = msg["Date"]
|
||||
if date_str:
|
||||
item["date"] = self.convert_to_china_time(date_str)
|
||||
|
||||
items.append(item)
|
||||
found_count += 1
|
||||
|
||||
if found_count >= limit: # 如果已经找到足够数量的邮件,就跳出内层循环
|
||||
break
|
||||
|
||||
# 读取完成不再对单封邮件做删除标记与 expunge
|
||||
|
||||
except imaplib.IMAP4.error as e:
|
||||
# items.append({'title': 'error', 'from': 'error', 'content': f'连接邮箱失败: {e}', 'code': 500})
|
||||
had_error = True
|
||||
except Exception as e:
|
||||
# items.append({'title': 'error', 'from': 'error', 'content': f'获取邮件异常: {e}', 'code': 500})
|
||||
had_error = True
|
||||
finally:
|
||||
try:
|
||||
# 检查连接是否建立
|
||||
if 'imap_server' in locals() and imap_server is not None:
|
||||
try:
|
||||
# 先检查是否处于已选择状态
|
||||
if hasattr(imap_server, 'state') and imap_server.state == 'SELECTED':
|
||||
imap_server.close()
|
||||
except Exception as e:
|
||||
logger.error(f"关闭IMAP文件夹时发生错误: {e}")
|
||||
try:
|
||||
# 无论如何尝试登出
|
||||
imap_server.logout()
|
||||
except Exception as e:
|
||||
logger.error(f"登出IMAP服务器时发生错误: {e}")
|
||||
# 在Windows上可能需要强制关闭socket
|
||||
try:
|
||||
if hasattr(imap_server, 'sock') and imap_server.sock is not None:
|
||||
imap_server.sock.close()
|
||||
except Exception as sock_err:
|
||||
logger.error(f"强制关闭socket时发生错误: {sock_err}")
|
||||
except Exception as outer_e:
|
||||
logger.error(f"处理IMAP连接关闭时发生错误: {outer_e}")
|
||||
finally:
|
||||
# 重置socket设置(如果使用了代理)
|
||||
if proxy_host is not None and original_socket is not None:
|
||||
socket.socket = original_socket
|
||||
|
||||
# 若成功获取到至少一封匹配邮件且请求删除,则删除整个邮箱账号
|
||||
if is_del and len(items) > 0:
|
||||
try:
|
||||
self.email_delete(user)
|
||||
except Exception as del_err:
|
||||
logger.error(f"删除邮箱账号失败: {del_err}")
|
||||
|
||||
if had_error:
|
||||
return None
|
||||
if len(items) == 0:
|
||||
return None
|
||||
return items # 返回邮件列表
|
||||
|
||||
|
||||
async def main():
|
||||
"""
|
||||
使用示例:展示新的域名管理系统的使用方法
|
||||
"""
|
||||
mail = Mail()
|
||||
# mai = '0gz3vvd4@'+'qydgs.asia'
|
||||
# res = mail.email_create(mai)
|
||||
# print(f"创建的邮箱: {res}")
|
||||
random_email = mail.email_create_random()
|
||||
print(f"创建的随机邮箱: {random_email}")
|
||||
|
||||
# 读取邮件
|
||||
# res = mail.email_read('0gz3vvd4@qydgs.asia', '@', 1, is_del=True)
|
||||
# print(f'读取的邮件: {res}')
|
||||
|
||||
# 删除邮箱
|
||||
res = mail.email_delete(random_email)
|
||||
print(f"删除的邮箱: {res}")
|
||||
|
||||
mail_ = Mail()
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# asyncio.run(main())
|
||||
765
spider/main.py
Normal file
765
spider/main.py
Normal file
@@ -0,0 +1,765 @@
|
||||
import random
|
||||
import time
|
||||
from datetime import datetime
|
||||
from DrissionPage import Chromium
|
||||
from loguru import logger
|
||||
from work import generate_child_parent_names
|
||||
from mail_ import mail_
|
||||
from bit_browser import bit_browser
|
||||
from api import api
|
||||
from proxys import proxy_list
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from auto_challenge import ReCaptchaHandler
|
||||
|
||||
|
||||
class Auto:
|
||||
def __init__(self, http: str = None):
|
||||
# self.browser = Chromium(http)
|
||||
self.browser = Chromium()
|
||||
self.tab = self.browser.latest_tab
|
||||
pass
|
||||
|
||||
# cf打码
|
||||
def solve_cloudflare(self, is_ok: bool = False):
|
||||
tab = self.browser.latest_tab
|
||||
for _ in range(5):
|
||||
tab.wait(1)
|
||||
res = tab.ele(
|
||||
't:h1@text()=Sorry, you have been blocked', timeout=1)
|
||||
if res:
|
||||
logger.error("Cloudflare验证失败")
|
||||
return False
|
||||
|
||||
try:
|
||||
shadow1 = tab.ele(
|
||||
'x://*[@name="cf-turnstile-response"]').parent().shadow_root
|
||||
iframe = shadow1.get_frame(1)
|
||||
if iframe:
|
||||
logger.debug("找到Cloudflare iframe")
|
||||
shadow2 = iframe.ele('x:/html/body').shadow_root
|
||||
if shadow2:
|
||||
logger.debug("找到Cloudflare iframe body shadow root")
|
||||
status = shadow2.ele(
|
||||
'x://span[text()="Verifying..."]', timeout=1.5)
|
||||
if status:
|
||||
tab.wait(3)
|
||||
status = shadow2.ele(
|
||||
'x://span[text()="Success!"]', timeout=1.5)
|
||||
if status:
|
||||
logger.debug("Cloudflare验证成功")
|
||||
return True
|
||||
checkbox = shadow2.ele(
|
||||
'x://input[@type="checkbox"]', timeout=1.5)
|
||||
if checkbox:
|
||||
checkbox.click()
|
||||
logger.debug("点击Cloudflare复选框")
|
||||
tab.wait(3)
|
||||
logger.debug("重新获取状态")
|
||||
# return False
|
||||
except Exception as e:
|
||||
# logger.error(f"处理Cloudflare异常: {e}")
|
||||
if is_ok:
|
||||
logger.debug(f"cloudflare处理通过: {e}")
|
||||
return True
|
||||
return self.solve_cloudflare(is_ok=True)
|
||||
tab.wait(1)
|
||||
return False
|
||||
|
||||
# 谷歌验证码
|
||||
def solve_recaptcha(self):
|
||||
logger.debug("开始解决谷歌验证码")
|
||||
recaptcha_handler = ReCaptchaHandler(self.tab)
|
||||
res = recaptcha_handler.challenge()
|
||||
if res.get("status"):
|
||||
logger.debug("谷歌验证码成功")
|
||||
iframe = self.tab.ele('t:iframe@title=reCAPTCHA')
|
||||
# print(iframe)
|
||||
res = iframe.ele('t:div@class=recaptcha-checkbox-border')
|
||||
if res:
|
||||
logger.debug(f"html: {res.html}")
|
||||
if 'display: none;' in res.html:
|
||||
logger.debug("谷歌验证码成功")
|
||||
return True
|
||||
else:
|
||||
print("No element found")
|
||||
return False
|
||||
logger.error("谷歌验证码失败")
|
||||
|
||||
return False
|
||||
|
||||
# 打开URL
|
||||
def open_url(self, url: str):
|
||||
self.tab.get(url)
|
||||
|
||||
def get_tab(self):
|
||||
return self.tab
|
||||
|
||||
# 等待进入首页
|
||||
def wait_home(self):
|
||||
logger.debug("等待进入首页")
|
||||
jc = 0
|
||||
while True:
|
||||
if jc > 3:
|
||||
logger.error("等待进入首页超过5次,未成功")
|
||||
return False
|
||||
self.tab.wait(1)
|
||||
bol = self.tab.ele(
|
||||
't:div@text():YOUTUBE PRIVACY SETTLEMENT', timeout=1)
|
||||
if bol:
|
||||
logger.debug("成功进入首页")
|
||||
return True
|
||||
|
||||
jc += 1
|
||||
|
||||
|
||||
# 随机取城市
|
||||
def get_random_city(self, province: str | None = None):
|
||||
cities = {
|
||||
"Alberta": ["Calgary", "Edmonton"],
|
||||
"British Columbia": ["Vancouver"],
|
||||
# "Manitoba": ["Winnipeg", "Rochester"],
|
||||
# "New Brunswick": ["Fredericton", "Moncton"],
|
||||
# "Newfoundland and Labrador": ["St. John's", "Halifax"],
|
||||
"Nova Scotia": ["Halifax"],
|
||||
"Ontario": ["Toronto"],
|
||||
# "Prince Edward Island": ["Charlottetown", "St. John's"],
|
||||
# "Quebec": ["Quebec City", "Montreal"],
|
||||
# "Saskatchewan": ["Saskatoon", "Regina"],
|
||||
}
|
||||
if province is None:
|
||||
province = random.choice(list(cities.keys()))
|
||||
return province, random.choice(cities.get(province, []))
|
||||
|
||||
def get_province_by_city(self) -> str | None:
|
||||
"""
|
||||
根据城市名称解析对应省份
|
||||
|
||||
参数:
|
||||
city (str): 城市名称,例如 `Calgary`、`Edmonton` 等
|
||||
|
||||
返回值:
|
||||
str | None: 对应的省份名称;未匹配返回 None
|
||||
"""
|
||||
mapping = {
|
||||
"Calgary": "Alberta",
|
||||
"Edmonton": "Alberta",
|
||||
"Vancouver": "British Columbia",
|
||||
"Halifax": "Nova Scotia",
|
||||
"Toronto": "Ontario",
|
||||
"Ottawa": "Ontario",
|
||||
"Mississauga": "Ontario",
|
||||
"Brampton": "Ontario",
|
||||
"Hamilton": "Ontario",
|
||||
"Kitchener": "Ontario",
|
||||
"London": "Ontario",
|
||||
"Markham": "Ontario",
|
||||
"Vaughan": "Ontario",
|
||||
"Windsor": "Ontario",
|
||||
"Oshawa": "Ontario",
|
||||
"Brantford": "Ontario",
|
||||
"Barrie": "Ontario",
|
||||
"Sudbury": "Ontario",
|
||||
"Kingston": "Ontario",
|
||||
"Guelph": "Ontario",
|
||||
"Cambridge": "Ontario",
|
||||
"Sarnia": "Ontario",
|
||||
"Peterborough": "Ontario",
|
||||
"Waterloo": "Ontario",
|
||||
"Belleville": "Ontario",
|
||||
"Brockville": "Ontario",
|
||||
"Burlington": "Ontario",
|
||||
"Cornwall": "Ontario",
|
||||
"Kawartha Lakes": "Ontario",
|
||||
"North Bay": "Ontario",
|
||||
"Orillia": "Ontario",
|
||||
"Pickering": "Ontario",
|
||||
"Sault Ste. Marie": "Ontario",
|
||||
"Stratford": "Ontario",
|
||||
"Durham": "Ontario",
|
||||
"Norfolk County": "Ontario",
|
||||
"Prince Edward County": "Ontario",
|
||||
"Quinte West": "Ontario",
|
||||
"St. Catharines": "Ontario",
|
||||
"Welland": "Ontario",
|
||||
"Thorold": "Ontario",
|
||||
"Niagara Falls": "Ontario",
|
||||
"Pelham": "Ontario",
|
||||
"Port Colborne": "Ontario",
|
||||
}
|
||||
# 随机返回一条 key 和 value
|
||||
return random.choice(list(mapping.items()))
|
||||
|
||||
# 随机实物
|
||||
|
||||
def get_random_food(self, city: str, shop: str) -> list[str]:
|
||||
"""
|
||||
随机选择 1~2 种食物类别,并为每个类别至少选择 1 个具体产品
|
||||
|
||||
参数:
|
||||
shop (str): 商店名称(当前未使用,占位参数)
|
||||
|
||||
返回值:
|
||||
list[str]: 随机选取的产品名称列表
|
||||
"""
|
||||
categories = [
|
||||
[
|
||||
'Wonder Bread White',
|
||||
'Villaggio White Bread',
|
||||
'No Name Sliced White Bread',
|
||||
"President's Choice White Sliced Bread",
|
||||
],
|
||||
[
|
||||
"Ben's Original Whole Wheat Bread",
|
||||
"POM Whole Wheat Bread",
|
||||
"Silver Hills Bakery Whole Wheat Sliced Bread",
|
||||
"Country Harvest Whole Wheat Bread",
|
||||
],
|
||||
[
|
||||
"Wonder Bread Hot Dog Buns",
|
||||
"Villaggio Hamburger Buns",
|
||||
"Dempster's Dinner Rolls",
|
||||
"No Frills Hot Dog Buns",
|
||||
],
|
||||
[
|
||||
"Stonemill Bakehouse Bagels",
|
||||
"Wonder Bagels",
|
||||
"Montreal Bagels (pre-packaged, e.g., St. Lawrence brand)",
|
||||
"President's Choice Bagels",
|
||||
],
|
||||
[
|
||||
"Silver Hills Multi-Grain Sliced Bread",
|
||||
"POM Multi-Grain Bread",
|
||||
"Country Harvest Multi-Grain Loaf",
|
||||
],
|
||||
[
|
||||
"President's Choice French Stick",
|
||||
"Dempster's Italian Style Bread",
|
||||
"Wonder Italian Bread",
|
||||
"Villaggio Country Style Loaf",
|
||||
],
|
||||
]
|
||||
|
||||
# 随机选择 1~2 个类别(不重复)
|
||||
category_count = random.randint(1, 2)
|
||||
chosen_categories = random.sample(categories, k=category_count)
|
||||
|
||||
# 每个类别至少选择 1 个产品,最多选择 3 个以避免过多
|
||||
selected_products: list[str] = []
|
||||
for cat in chosen_categories:
|
||||
max_pick = min(3, len(cat))
|
||||
pick_count = random.randint(1, max_pick)
|
||||
selected_products.extend(random.sample(cat, k=pick_count))
|
||||
logger.debug(f"随机选择的产品: {selected_products}")
|
||||
text = f'{shop}, {city} buy: '
|
||||
for p in selected_products:
|
||||
text += f'{p} * {random.randint(1, 3)}, '
|
||||
text = text[:-2]
|
||||
text = text + '.'
|
||||
logger.debug(f'随机选择的产品文本: {text}')
|
||||
return text
|
||||
|
||||
# 填写问卷
|
||||
def fill_questionnaire(self):
|
||||
"""
|
||||
完成问卷填写
|
||||
|
||||
参数:
|
||||
city (str): 线程启动时传入的城市名称,用于匹配省份并填写数据
|
||||
"""
|
||||
try:
|
||||
info = generate_child_parent_names()
|
||||
child_full_name = info['child_full_name']
|
||||
parent_full_name = info['parent_full_name']
|
||||
child_birthday = info['child_birthday']
|
||||
# 2023-04-01转为MM/DD/YYYY
|
||||
child_birthday = datetime.strptime(child_birthday, '%Y-%m-%d').strftime('%m/%d/%Y')
|
||||
address_str = info['child_address_str']
|
||||
city_name = info['child_city_name']
|
||||
postcode = info['child_postcode']
|
||||
parent_phone = info['parent_phone']
|
||||
province = info['parent_state']
|
||||
# email = mail_.email_create_random()
|
||||
email = 'zhiyu@qq.com'
|
||||
logger.debug(f"child_full_name --> {child_full_name}")
|
||||
logger.debug(f"parent_full_name --> {parent_full_name}")
|
||||
logger.debug(f"child_birthday --> {child_birthday}")
|
||||
logger.debug(f"address_str --> {address_str}")
|
||||
logger.debug(f"city_name --> {city_name}")
|
||||
logger.debug(f"postcode --> {postcode}")
|
||||
logger.debug(f"parent_phone --> {parent_phone}")
|
||||
logger.debug(f"province --> {province}")
|
||||
logger.debug(f"email --> {email}")
|
||||
self.tab.wait(0.1)
|
||||
self.tab.ele('t:input@id=name1').input(child_full_name)
|
||||
self.tab.wait(0.1)
|
||||
self.tab.ele('t:input@id=name2').input(parent_full_name)
|
||||
self.tab.wait(0.1)
|
||||
self.tab.ele('t:input@id=dateOfBirth').input(child_birthday)
|
||||
self.tab.wait(0.1)
|
||||
self.tab.ele('t:input@id=street1').input(address_str)
|
||||
self.tab.wait(0.1)
|
||||
self.tab.ele('t:input@id=city').input(city_name)
|
||||
self.tab.wait(0.1)
|
||||
self.tab.ele(
|
||||
't:select@formcontrolname=state').ele(f't:option@text():{province}').click()
|
||||
self.tab.wait(0.1)
|
||||
self.tab.ele('t:input@id=zip').input(postcode)
|
||||
self.tab.wait(0.1)
|
||||
self.tab.ele('t:input@id=phone1').input(parent_phone)
|
||||
self.tab.wait(0.1)
|
||||
self.tab.ele('t:input@id=emailAddress').input(email)
|
||||
self.tab.wait(0.1)
|
||||
self.tab.ele('t:input@id=confirmEmailemail').input(email)
|
||||
self.tab.wait(0.1)
|
||||
self.tab.ele('t:input@@formcontrolname=resideInUS@@id=Yes').click()
|
||||
self.tab.wait(0.1)
|
||||
self.tab.ele('t:input@@formcontrolname=watchedDuringPeriod@@id=Yes').click()
|
||||
self.tab.wait(0.1)
|
||||
self.tab.ele('t:input@id=signatureMinor').input(child_full_name)
|
||||
self.tab.wait(0.1)
|
||||
self.tab.ele('t:input@id=signatureParentGuardian').input(parent_full_name)
|
||||
self.solve_recaptcha()
|
||||
|
||||
return self.submit_file(
|
||||
child_full_name=child_full_name,
|
||||
parent_full_name=parent_full_name,
|
||||
child_birthday=child_birthday,
|
||||
address_str=address_str,
|
||||
city_name=city_name,
|
||||
parent_phone=parent_phone,
|
||||
postcode=postcode,
|
||||
province=province,
|
||||
email=email,
|
||||
text=""
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"填写问卷失败: {e}")
|
||||
|
||||
# 提交问卷
|
||||
def submit_file(self, child_full_name: str, parent_full_name: str, child_birthday: str, address_str: str, city_name: str, parent_phone: str, postcode: str, province: str, email: str, text: str):
|
||||
"""
|
||||
提交问卷后的数据保存到后端服务(孩子与家长字段)
|
||||
|
||||
参数:
|
||||
child_full_name (str): 孩子全名
|
||||
parent_full_name (str): 家长全名
|
||||
child_birthday (str): 孩子生日(字符串,已为 MM/DD/YYYY)
|
||||
address_str (str): 街道地址
|
||||
city_name (str): 城市
|
||||
parent_phone (str): 家长电话
|
||||
postcode (str): 邮编
|
||||
province (str): 省/州全称
|
||||
email (str): 邮箱
|
||||
text (str): 文本内容(如反馈地址)
|
||||
"""
|
||||
jc = 0
|
||||
while True:
|
||||
if jc >= 3:
|
||||
logger.error("提交问卷失败")
|
||||
return False
|
||||
res = self.solve_recaptcha()
|
||||
if not res:
|
||||
jc += 1
|
||||
continue
|
||||
res = self.tab.ele('t:button@text():SUBMIT')
|
||||
if res:
|
||||
logger.debug(f"点击Submit按钮")
|
||||
res.click()
|
||||
self.tab.wait(3)
|
||||
res = self.tab.ele(
|
||||
't:h2@text()=THANK YOU FOR SUBMITTING YOUR INFORMATION', timeout=1)
|
||||
if res:
|
||||
logger.info("提交问卷成功")
|
||||
logger.info(f"反馈地址: {text}")
|
||||
|
||||
res = self.tab.ele('t:b')
|
||||
if res:
|
||||
logger.info(f"反馈地址: {res.text}")
|
||||
text = res.text
|
||||
status = True
|
||||
|
||||
else:
|
||||
status=False
|
||||
|
||||
api.create_info(
|
||||
child_full_name=child_full_name,
|
||||
parent_full_name=parent_full_name,
|
||||
child_birthday=child_birthday,
|
||||
address_str=address_str,
|
||||
city_name=city_name,
|
||||
parent_phone=parent_phone,
|
||||
postcode=postcode,
|
||||
province=province,
|
||||
email=email,
|
||||
text=text,
|
||||
status=status
|
||||
)
|
||||
return True
|
||||
|
||||
bol = self.tab.ele(
|
||||
't:div@text():ERR_TIMED_OUT', timeout=1)
|
||||
if bol:
|
||||
logger.debug("刷新网页")
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
bol = self.tab.ele(
|
||||
't:div@text():ERR_SSL_PROTOCOL_ERROR', timeout=1)
|
||||
if bol:
|
||||
logger.debug("刷新网页")
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
bol = self.tab.ele(
|
||||
't:div@text():ERR_SOCKS_CONNECTION_FAILED', timeout=1)
|
||||
if bol:
|
||||
logger.debug("刷新网页")
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
jc += 1
|
||||
|
||||
|
||||
def parse_proxy(proxy: str) -> tuple[str, int, str, str] | None:
|
||||
"""
|
||||
解析代理字符串为四元组 `(host, port, user, pwd)`
|
||||
|
||||
参数:
|
||||
proxy: 形如 `host:port:user:pwd`
|
||||
|
||||
返回值:
|
||||
(host, port, user, pwd) 或 None(格式错误)
|
||||
"""
|
||||
try:
|
||||
host, port, user, pwd = proxy.split(":", 3)
|
||||
return host, int(port), user, pwd
|
||||
except Exception:
|
||||
logger.error(f"代理格式错误: {proxy}")
|
||||
return None
|
||||
|
||||
|
||||
def create_fingerprint_browser(proxy: str) -> tuple[str, str] | None:
|
||||
"""
|
||||
创建指纹浏览器并打开窗口,返回 `(browser_id, debugger_http)`
|
||||
|
||||
参数:
|
||||
proxy: 代理字符串
|
||||
|
||||
返回值:
|
||||
(browser_id, http) 或 None(失败)
|
||||
"""
|
||||
info = parse_proxy(proxy)
|
||||
if info is None:
|
||||
return None
|
||||
host, port, user, pwd = info
|
||||
try:
|
||||
browser_id = bit_browser.bit_browser_create(
|
||||
remark=f"{user}",
|
||||
proxy_type="socks5",
|
||||
host=host,
|
||||
port=str(port),
|
||||
proxy_user=user,
|
||||
proxy_pwd=pwd,
|
||||
)
|
||||
if not browser_id:
|
||||
return None
|
||||
logger.info(f"创建指纹浏览器成功: {browser_id}")
|
||||
time.sleep(1)
|
||||
http = bit_browser.bit_browser_open(browser_id)
|
||||
if not http:
|
||||
return None
|
||||
logger.info(f"打开指纹浏览器成功: {browser_id}")
|
||||
return browser_id, http
|
||||
except Exception as e:
|
||||
logger.error(f"创建指纹浏览器失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def close_and_delete_browser(browser_id: str) -> None:
|
||||
"""
|
||||
关闭并删除指定指纹浏览器
|
||||
|
||||
参数:
|
||||
browser_id: 指纹浏览器ID
|
||||
"""
|
||||
try:
|
||||
bit_browser.bit_browser_close(browser_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"关闭浏览器失败或已关闭: {browser_id} - {e}")
|
||||
time.sleep(1)
|
||||
try:
|
||||
bit_browser.bit_browser_delete(browser_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"删除浏览器失败或已删除: {browser_id} - {e}")
|
||||
|
||||
|
||||
def run_task_with_proxy(proxy: str, stop_event: threading.Event) -> None:
|
||||
"""
|
||||
使用代理创建指纹浏览器、执行自动化,并在结束后清理
|
||||
|
||||
参数:
|
||||
proxy: 代理字符串
|
||||
"""
|
||||
browser_id: str | None = None
|
||||
try:
|
||||
created = create_fingerprint_browser(proxy)
|
||||
if not created:
|
||||
return
|
||||
browser_id, http = created
|
||||
if stop_event.is_set():
|
||||
return
|
||||
auto = Auto(http=http)
|
||||
auto.open_url('https://www.claimform.youtubeprivacysettlement.com')
|
||||
if stop_event.is_set():
|
||||
return
|
||||
if not auto.wait_home():
|
||||
return
|
||||
if stop_event.is_set():
|
||||
return
|
||||
if not auto.click_continue():
|
||||
return
|
||||
if stop_event.is_set():
|
||||
return
|
||||
auto.fill_questionnaire()
|
||||
except Exception as e:
|
||||
logger.error(f"执行任务异常: {e}")
|
||||
finally:
|
||||
if browser_id:
|
||||
try:
|
||||
close_and_delete_browser(browser_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def proxy_loop(proxy: str, stop_event: threading.Event) -> None:
|
||||
"""
|
||||
为单个代理保持持续运行:任务结束后立即重建并再次执行
|
||||
|
||||
参数:
|
||||
proxy: 代理字符串
|
||||
stop_event: 停止事件,用于外部触发退出循环
|
||||
"""
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
if is_forbidden_time():
|
||||
if stop_event.wait(timeout=60):
|
||||
break
|
||||
cleanup_all_browsers()
|
||||
secs = seconds_until(20, 0)
|
||||
if stop_event.wait(timeout=secs):
|
||||
break
|
||||
continue
|
||||
run_task_with_proxy(proxy, stop_event)
|
||||
except Exception as e:
|
||||
logger.error(f"代理循环异常: {proxy} - {e}")
|
||||
if stop_event.is_set():
|
||||
break
|
||||
if stop_event.wait(timeout=0.1):
|
||||
break
|
||||
|
||||
|
||||
def is_forbidden_time() -> bool:
|
||||
"""
|
||||
判断当前是否处于禁跑时段(每日 18:30 ~ 20:00,本地时间)
|
||||
|
||||
返回值:
|
||||
bool: True 表示处于禁跑时段
|
||||
"""
|
||||
# 去除晚上停止功能
|
||||
return False
|
||||
# 禁跑时段为 18:30 ~ 20:00
|
||||
now = datetime.now()
|
||||
start = now.replace(hour=18, minute=30, second=0, microsecond=0)
|
||||
end = now.replace(hour=20, minute=0, second=0, microsecond=0)
|
||||
return start <= now < end
|
||||
|
||||
def wait_until_out_of_forbidden(interval_sec: float = 5.0, stop_event: threading.Event | None = None) -> None:
|
||||
"""
|
||||
在禁跑时段内循环等待,直到禁跑时段结束
|
||||
|
||||
参数:
|
||||
interval_sec: 轮询间隔秒数
|
||||
stop_event: 可选停止事件,若设置则在等待期间可提前结束
|
||||
"""
|
||||
while is_forbidden_time():
|
||||
if stop_event is not None and stop_event.wait(timeout=interval_sec):
|
||||
break
|
||||
time.sleep(interval_sec)
|
||||
|
||||
|
||||
def seconds_until(hour: int, minute: int) -> float:
|
||||
"""
|
||||
计算到今天指定时间点的剩余秒数
|
||||
|
||||
参数:
|
||||
hour: 目标小时(24小时制)
|
||||
minute: 目标分钟
|
||||
|
||||
返回值:
|
||||
float: 剩余秒数,若目标时间已过则为 0
|
||||
"""
|
||||
now = datetime.now()
|
||||
target = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
if target <= now:
|
||||
return 0.0
|
||||
return (target - now).total_seconds()
|
||||
|
||||
|
||||
def count_fingerprint_browsers() -> int:
|
||||
"""
|
||||
统计当前指纹浏览器数量
|
||||
|
||||
返回值:
|
||||
int: 当前总数量
|
||||
"""
|
||||
try:
|
||||
res = bit_browser.bit_browser_get(0, 100)
|
||||
data = res.get("data", {}) if isinstance(res, dict) else {}
|
||||
total = data.get("totalNum")
|
||||
lst = data.get("list", [])
|
||||
if isinstance(total, int) and total >= 0:
|
||||
return total
|
||||
return len(lst)
|
||||
except Exception as e:
|
||||
logger.warning(f"统计指纹浏览器数量失败: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
def cleanup_all_browsers() -> None:
|
||||
"""
|
||||
关闭并删除所有指纹浏览器
|
||||
"""
|
||||
try:
|
||||
res = bit_browser.bit_browser_get(0, 100)
|
||||
data = res.get("data", {}) if isinstance(res, dict) else {}
|
||||
lst = data.get("list", [])
|
||||
ids = [i.get("id") for i in lst if i.get("id")]
|
||||
for bid in ids:
|
||||
close_and_delete_browser(bid)
|
||||
except Exception as e:
|
||||
logger.warning(f"清理所有指纹浏览器失败: {e}")
|
||||
|
||||
|
||||
def delete_excess_browsers(limit: int) -> None:
|
||||
"""
|
||||
删除超出上限的指纹浏览器,从列表末尾开始删除
|
||||
|
||||
参数:
|
||||
limit: 允许的最大浏览器数量
|
||||
"""
|
||||
try:
|
||||
res = bit_browser.bit_browser_get(0, 100)
|
||||
data = res.get("data", {}) if isinstance(res, dict) else {}
|
||||
lst = data.get("list", [])
|
||||
ids = [i.get("id") for i in lst if i.get("id")]
|
||||
count = len(ids)
|
||||
if count <= limit:
|
||||
return
|
||||
excess = count - limit
|
||||
to_delete = ids[-excess:]
|
||||
for bid in reversed(to_delete):
|
||||
close_and_delete_browser(bid)
|
||||
logger.info(f"已删除超出数量 {excess},当前限制为 {limit}")
|
||||
except Exception as e:
|
||||
logger.warning(f"删除超额浏览器失败: {e}")
|
||||
|
||||
|
||||
def monitor_browsers_and_restart(limit: int, stop_event: threading.Event, restart_event: threading.Event) -> None:
|
||||
"""
|
||||
每 3 秒检测指纹浏览器数量,超过 `limit` 则从末尾删除超出部分
|
||||
|
||||
参数:
|
||||
limit: 允许的最大浏览器数量(通常为代理数量)
|
||||
restart_event: 触发重启的事件(当前策略不使用)
|
||||
"""
|
||||
while not stop_event.is_set():
|
||||
time.sleep(3)
|
||||
count = count_fingerprint_browsers()
|
||||
if count > limit:
|
||||
logger.warning(f"指纹浏览器数量 {count} 超过限制 {limit},开始删除超出部分")
|
||||
delete_excess_browsers(limit)
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
多线程并发管理:按代理数量并发创建指纹浏览器并执行任务;每 3 秒监控数量,超限则从末尾删除多余浏览器。
|
||||
"""
|
||||
proxies = list(proxy_list)
|
||||
while True:
|
||||
stop_event = threading.Event()
|
||||
restart_event = threading.Event()
|
||||
|
||||
if is_forbidden_time():
|
||||
if stop_event.wait(timeout=60):
|
||||
continue
|
||||
cleanup_all_browsers()
|
||||
logger.info("处于禁跑时段,等待至禁跑结束")
|
||||
wait_until_out_of_forbidden()
|
||||
continue
|
||||
|
||||
executor = ThreadPoolExecutor(max_workers=len(proxies))
|
||||
try:
|
||||
futures_map = {executor.submit(proxy_loop, p, stop_event): p for p in proxies}
|
||||
|
||||
monitor_thread = threading.Thread(
|
||||
target=monitor_browsers_and_restart,
|
||||
args=(len(proxies), stop_event, restart_event),
|
||||
daemon=True,
|
||||
)
|
||||
monitor_thread.start()
|
||||
|
||||
while True:
|
||||
if restart_event.is_set():
|
||||
stop_event.set()
|
||||
try:
|
||||
executor.shutdown(wait=True)
|
||||
except Exception:
|
||||
pass
|
||||
break
|
||||
if is_forbidden_time():
|
||||
logger.info("进入禁跑时段,停止当前批次,等待1分钟后清理指纹浏览器")
|
||||
stop_event.set()
|
||||
try:
|
||||
executor.shutdown(wait=True)
|
||||
except Exception:
|
||||
pass
|
||||
time.sleep(60)
|
||||
cleanup_all_browsers()
|
||||
wait_until_out_of_forbidden()
|
||||
break
|
||||
for f, proxy in list(futures_map.items()):
|
||||
if f.done() and not stop_event.is_set() and not restart_event.is_set():
|
||||
try:
|
||||
_ = f.exception()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
new_future = executor.submit(proxy_loop, proxy, stop_event)
|
||||
del futures_map[f]
|
||||
futures_map[new_future] = proxy
|
||||
except Exception as e:
|
||||
logger.error(f"重启代理线程失败: {proxy} - {e}")
|
||||
time.sleep(0.2)
|
||||
|
||||
try:
|
||||
monitor_thread.join(timeout=5)
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
try:
|
||||
executor.shutdown(wait=True)
|
||||
except Exception:
|
||||
pass
|
||||
continue
|
||||
|
||||
def main2():
|
||||
auto = Auto()
|
||||
auto.open_url('https://www.claimform.youtubeprivacysettlement.com')
|
||||
bol = auto.wait_home()
|
||||
if not bol:
|
||||
return
|
||||
auto.fill_questionnaire()
|
||||
# auto.solve_recaptcha()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main2()
|
||||
95
spider/proxys.py
Normal file
95
spider/proxys.py
Normal file
@@ -0,0 +1,95 @@
|
||||
work = [
|
||||
"us.novproxy.io:1000:qyd00056-region-CA:qyd00056",
|
||||
"us.novproxy.io:1000:qyd00054-region-US:qyd00054",
|
||||
"us.novproxy.io:1000:qyd00053-region-CA:qyd00053",
|
||||
"us.novproxy.io:1000:qyd00052-region-US:qyd00052",
|
||||
]
|
||||
|
||||
ca1 = [
|
||||
"us.novproxy.io:1000:qyd00051-region-CA:qyd00051",
|
||||
"us.novproxy.io:1000:qyd00050-region-US:qyd00050",
|
||||
"us.novproxy.io:1000:qyd00049-region-CA:qyd00049",
|
||||
"us.novproxy.io:1000:qyd00048-region-US:qyd00048",
|
||||
"us.novproxy.io:1000:qyd00047-region-CA:qyd00047",
|
||||
]
|
||||
ca2 = [
|
||||
"us.novproxy.io:1000:qyd00046-region-US:qyd00046",
|
||||
"us.novproxy.io:1000:qyd00045-region-CA:qyd00045",
|
||||
"us.novproxy.io:1000:qyd00044-region-US:qyd00044",
|
||||
"us.novproxy.io:1000:qyd00043-region-CA:qyd00043",
|
||||
"us.novproxy.io:1000:qyd00042-region-US:qyd00042",
|
||||
]
|
||||
|
||||
ca3 = [
|
||||
"us.novproxy.io:1000:qyd00041-region-CA:qyd00041",
|
||||
"us.novproxy.io:1000:qyd00040-region-CA:qyd00040",
|
||||
"us.novproxy.io:1000:qyd00039-region-US:qyd00039",
|
||||
"us.novproxy.io:1000:qyd00038-region-CA:qyd00038",
|
||||
"us.novproxy.io:1000:qyd00037-region-US:qyd00037",
|
||||
]
|
||||
|
||||
cwd = [
|
||||
"us.novproxy.io:1000:qyd00036-region-CA:qyd00036",
|
||||
"us.novproxy.io:1000:qyd00035-region-US:qyd00035",
|
||||
"us.novproxy.io:1000:qyd00034-region-CA:qyd00034",
|
||||
"us.novproxy.io:1000:qyd00033-region-US:qyd00033",
|
||||
]
|
||||
|
||||
wt = [
|
||||
"us.novproxy.io:1000:qyd00032-region-CA:qyd00032",
|
||||
"us.novproxy.io:1000:qyd00031-region-US:qyd00031",
|
||||
"us.novproxy.io:1000:qyd00030-region-CA:qyd00030",
|
||||
"us.novproxy.io:1000:qyd00029-region-US:qyd00029",
|
||||
]
|
||||
|
||||
hc = [
|
||||
"us.novproxy.io:1000:qyd00028-region-CA:qyd00028",
|
||||
"us.novproxy.io:1000:qyd00027-region-US:qyd00027",
|
||||
"us.novproxy.io:1000:qyd00026-region-CA:qyd00026",
|
||||
"us.novproxy.io:1000:qyd00025-region-US:qyd00025",
|
||||
]
|
||||
|
||||
zlj = [
|
||||
"us.novproxy.io:1000:qyd00024-region-CA:qyd00024",
|
||||
"us.novproxy.io:1000:qyd00023-region-US:qyd00023",
|
||||
"us.novproxy.io:1000:qyd00022-region-CA:qyd00022",
|
||||
"us.novproxy.io:1000:qyd00021-region-US:qyd00021",
|
||||
]
|
||||
|
||||
wzq = [
|
||||
"us.novproxy.io:1000:qyd00020-region-CA:qyd00020",
|
||||
"us.novproxy.io:1000:qyd00019-region-US:qyd00019",
|
||||
"us.novproxy.io:1000:qyd00018-region-CA:qyd00018",
|
||||
"us.novproxy.io:1000:qyd00017-region-US:qyd00017",
|
||||
]
|
||||
|
||||
xy = [
|
||||
"us.novproxy.io:1000:qyd00016-region-CA:qyd00016",
|
||||
"us.novproxy.io:1000:qyd00015-region-US:qyd00015",
|
||||
"us.novproxy.io:1000:qyd00014-region-CA:qyd00014",
|
||||
"us.novproxy.io:1000:qyd00013-region-US:qyd00013",
|
||||
]
|
||||
|
||||
yll = [
|
||||
"us.novproxy.io:1000:qyd00012-region-CA:qyd00012",
|
||||
"us.novproxy.io:1000:qyd00011-region-US:qyd00011",
|
||||
"us.novproxy.io:1000:qyd00010-region-CA:qyd00010",
|
||||
"us.novproxy.io:1000:qyd00009-region-US:qyd00009",
|
||||
]
|
||||
|
||||
szt = [
|
||||
"us.novproxy.io:1000:qyd00008-region-CA:qyd00008",
|
||||
"us.novproxy.io:1000:qyd00007-region-US:qyd00007",
|
||||
"us.novproxy.io:1000:qyd00006-region-CA:qyd00006",
|
||||
"us.novproxy.io:1000:qyd00005-region-US:qyd00005",
|
||||
]
|
||||
|
||||
hz = [
|
||||
"us.novproxy.io:1000:qyd00004-region-CA:qyd00004",
|
||||
"us.novproxy.io:1000:qyd00003-region-US:qyd00003",
|
||||
"us.novproxy.io:1000:qyd00002-region-CA:qyd00002",
|
||||
"us.novproxy.io:1000:qyd00001-region-US:qyd00001",
|
||||
]
|
||||
|
||||
|
||||
proxy_list = work
|
||||
31
spider/requirements.txt
Normal file
31
spider/requirements.txt
Normal file
@@ -0,0 +1,31 @@
|
||||
aiohttp
|
||||
requests
|
||||
curl_cffi
|
||||
aiohttp-socks
|
||||
requests[socks]
|
||||
fake_useragent
|
||||
apscheduler
|
||||
aiofiles
|
||||
loguru
|
||||
portalocker
|
||||
aiomultiprocess
|
||||
faker
|
||||
eth_account
|
||||
eth_utils
|
||||
solders
|
||||
toncli
|
||||
ecdsa
|
||||
base58
|
||||
ddddocr
|
||||
aiohttp_socks
|
||||
websockets
|
||||
psutil
|
||||
socks
|
||||
drissionpage
|
||||
fastapi
|
||||
uvicorn
|
||||
pydantic
|
||||
ultralytics
|
||||
opencv-python-headless
|
||||
torch
|
||||
pillow
|
||||
22
spider/test.py
Normal file
22
spider/test.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from DrissionPage import Chromium
|
||||
from loguru import logger
|
||||
from bit_browser import bit_browser
|
||||
# http = bit_browser.bit_browser_open('871851b9835d42b3911f39162b3427d5')
|
||||
# print(http)
|
||||
browser = Chromium('127.0.0.1:65480')
|
||||
tab = browser.latest_tab
|
||||
# tab.get('bitbrowser://settings/clearBrowserData')
|
||||
res = tab.ele('t:settings-ui',timeout=3).sr('t:settings-main').sr('t:settings-basic-page').sr('t:settings-privacy-page').sr('t:settings-clear-browsing-data-dialog').sr('t:cr-dialog')
|
||||
res = res.ele('t:cr-page-selector@id=pages')
|
||||
res = res.ele('t:settings-dropdown-menu@id=clearFromBasic').shadow_root
|
||||
res.ele('t:select@id=dropdownMenu').ele('t:option@value=4').click()
|
||||
# res = tab.ele('t:settings-dropdown-menu@id=clearFromBasic',timeout=3)
|
||||
print(res)
|
||||
if res:
|
||||
logger.info(f"html: {res.html}")
|
||||
# res = tab.ele('t:h2@text()=THANK YOU FOR SUBMITTING YOUR INFORMATION', timeout=3)
|
||||
# if res:
|
||||
# logger.info("提交问卷成功")
|
||||
# res = tab.ele('t:b')
|
||||
# if res:
|
||||
# logger.info(f"反馈地址: {res.text}")
|
||||
1051
spider/work.py
Normal file
1051
spider/work.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user