This commit is contained in:
2025-12-12 14:40:04 +08:00
commit 45ff5a62e3
36 changed files with 5640 additions and 0 deletions

120
spider/api.py Normal file
View File

@@ -0,0 +1,120 @@
import requests
from loguru import logger
import csv
import os
import random
class Api:
def __init__(self) -> None:
# self.base_url = 'http://127.0.0.1:6060'
self.base_url = 'http://192.168.11.67:6060'
# 创建店铺
def create_shop(self, city: str, street: str, shop_name: str) -> dict:
url = f'{self.base_url}/country/shop'
item = {
'city': city,
'street': street,
'shop_name': shop_name,
}
response = requests.post(url, json=item).json()
logger.info(response)
return response
# 查询店铺
def get_shop(self, city: str) -> dict:
url = f'{self.base_url}/country/shop'
response = requests.get(url).json()
# logger.info(response)
return response
# 创建信息
def create_info(self, child_full_name: str, parent_full_name: str, child_birthday: str, address_str: str, city_name: str, parent_phone: str, postcode: str, province: str, email: str, text: str, status: bool = False, email_content: str | None = None) -> dict:
"""
创建信息记录(孩子与家长字段)
参数:
child_full_name (str): 孩子全名
parent_full_name (str): 家长全名
child_birthday (str): 孩子生日(字符串)
address_str (str): 街道地址
city_name (str): 城市
parent_phone (str): 家长电话
postcode (str): 邮编
province (str): 省/州全称
email (str): 邮箱
text (str): 文本内容(如反馈地址)
status (bool): 状态
email_content (str | None): 邮件内容
返回值:
dict: 接口返回的数据
"""
url = f'{self.base_url}/country/info'
item = {
"child_full_name": child_full_name,
"parent_full_name": parent_full_name,
"child_birthday": child_birthday,
"address_str": address_str,
"city_name": city_name,
"parent_phone": parent_phone,
"postcode": postcode,
"province": province,
"status": status,
"email": email,
"email_content": email_content,
"text": text
}
response = requests.post(url, json=item).json()
logger.info(response)
return response
# 根据城市 随机获取一个店铺
def get_random_shop(self) -> dict:
url = f'{self.base_url}/country/shop/random'
response = requests.get(url).json()
# logger.info(response)
if not response.get('street'):
logger.error(f'没有店铺')
return None
return response
def main():
"""
从同目录的 `bakeries.csv` 读取面包店数据,按列映射输出或创建店铺
列顺序:`Name,Address,City`
"""
api = Api()
csv_path = os.path.join(os.path.dirname(__file__), 'data.csv')
if not os.path.exists(csv_path):
logger.error(f'CSV 文件不存在: {csv_path}')
return
with open(csv_path, 'r', encoding='utf-8') as file:
reader = csv.reader(file)
header = next(reader, None)
for row in reader:
if len(row) < 3:
logger.warning(f'行列数不足,跳过: {row}')
continue
shop_name, street, city = row[1], row[2], row[0]
if ' (city)' in city:
city = city.replace(' (city)', '')
if 'Quebec' in city:
continue
if ',' in city:
city = city.split(',')[0]
logger.info(f'city: {city}, street: {street}, shop_name: {shop_name}')
api.create_shop(city, street, shop_name)
# def main2():
# api = Api()
# city = 'Toronto'
# shop = api.get_random_shop()
# if shop:
# logger.info(shop)
# if __name__ == '__main__':
# main()
api = Api()

313
spider/auto_challenge.py Normal file
View File

@@ -0,0 +1,313 @@
import io
import time
import uuid
from typing import Optional, List
import requests
from PIL import Image
import base64
from loguru import logger
RESAMPLE_FILTER = Image.Resampling.LANCZOS
class ReCaptchaHandler:
path_map_44 = {
0: "//table/tbody/tr[1]/td[1]",
1: "//table/tbody/tr[1]/td[2]",
2: "//table/tbody/tr[1]/td[3]",
3: "//table/tbody/tr[1]/td[4]",
4: "//table/tbody/tr[2]/td[1]",
5: "//table/tbody/tr[2]/td[2]",
6: "//table/tbody/tr[2]/td[3]",
7: "//table/tbody/tr[2]/td[4]",
8: "//table/tbody/tr[3]/td[1]",
9: "//table/tbody/tr[3]/td[2]",
10: "//table/tbody/tr[3]/td[3]",
11: "//table/tbody/tr[3]/td[4]",
12: "//table/tbody/tr[4]/td[1]",
13: "//table/tbody/tr[4]/td[2]",
14: "//table/tbody/tr[4]/td[3]",
15: "//table/tbody/tr[4]/td[4]",
}
path_map_33 = {
0: "//table/tbody/tr[1]/td[1]",
1: "//table/tbody/tr[1]/td[2]",
2: "//table/tbody/tr[1]/td[3]",
3: "//table/tbody/tr[2]/td[1]",
4: "//table/tbody/tr[2]/td[2]",
5: "//table/tbody/tr[2]/td[3]",
6: "//table/tbody/tr[3]/td[1]",
7: "//table/tbody/tr[3]/td[2]",
8: "//table/tbody/tr[3]/td[3]",
}
api_host="http://192.168.11.13:7070/analyze_batch/"
def __init__(self, driver):
self.driver = driver
self.checkbox_iframe = None
self.challenge_iframe = None
self.challenge_type = None
self.challenge_question = None
self.challenge_i33_first = True
self.i11s = {}
self.challenge_44_img = None
@staticmethod
def split_image(image_bytes: bytes) -> Optional[List[str]]:
try:
image_stream = io.BytesIO(image_bytes)
img = Image.open(image_stream)
except:
return None
width, height = img.size
tile_width = width // 3
tile_height = height // 3
base64_tiles = []
for i in range(3):
for j in range(3):
left = j * tile_width
upper = i * tile_height
right = (j + 1) * tile_width if j < 2 else width
lower = (i + 1) * tile_height if i < 2 else height
tile = img.crop((left, upper, right, lower))
buf = io.BytesIO()
tile.save(buf, format="PNG")
b64 = base64.b64encode(buf.getvalue()).decode()
base64_tiles.append(b64)
return base64_tiles
def find_checkbox_iframe(self):
time.sleep(1)
try:
iframe = self.driver.ele('css: iframe[title="reCAPTCHA"]')
if iframe:
self.checkbox_iframe = iframe
self.checkbox_iframe.ele("#recaptcha-anchor").click()
return True
except:
pass
return False
def find_challenge_iframe(self):
try:
iframe = self.driver.ele("@|title=recaptcha challenge expires in two minutes@|title=reCAPTCHA 验证任务将于 2 分钟后过期")
# logger.info(f"iframe: {iframe}")
if iframe:
self.challenge_iframe = iframe
return True
except:
pass
return False
def check_11_refresh(self, check_ele):
for k, v in self.i11s.items():
if v.get("new"):
self.i11s[k]['new'] = False
check_ele = [i[0] for i in check_ele]
for idx in check_ele:
if idx not in self.i11s:
self.i11s[idx] = {'srcs': [], 'new': False}
while True:
ele = self.challenge_iframe.ele('#rc-imageselect-target').ele(
f"xpath:{self.path_map_33[idx]}")
img_ele = ele.ele('.rc-image-tile-11', timeout=0.1)
if not img_ele:
time.sleep(0.1)
continue
byte_data = img_ele.src()
b64_str = base64.b64encode(byte_data).decode()
if b64_str not in self.i11s[idx]['srcs']:
self.i11s[idx]['srcs'].append(b64_str)
self.i11s[idx]['new'] = True
break
def click_answer(self, result, challenge_type):
if challenge_type == 4:
for x in result["results"][0]['result']:
self.challenge_iframe.ele('#rc-imageselect-target').ele(
f"xpath:{self.path_map_44[x]}").click()
time.sleep(0.1)
# if not result["results"][0]['result']:
# try:
# image_bytes = base64.b64decode(self.challenge_44_img)
# name = str(uuid.uuid4())
# with open(rf"{name}.png",'wb') as f:
# f.write(image_bytes)
# except:
# pass
self.challenge_iframe.ele('#recaptcha-verify-button').click()
self.i11s.clear()
return True
if challenge_type == 3:
found_ele = []
for res in result["results"]:
if res["result"].get('target_found'):
idx = int(res["image_id"])
self.challenge_iframe.ele('#rc-imageselect-target').ele(
f"xpath:{self.path_map_33[idx]}").click()
found_ele.append((idx, self.path_map_33[idx]))
time.sleep(0.1)
if found_ele:
if len(found_ele) <= 2 and self.challenge_i33_first:
self.challenge_iframe.ele('#recaptcha-reload-button').click()
return False
cls = self.challenge_iframe.ele('#rc-imageselect-target').ele(
f"xpath:{found_ele[0][1]}").attr('class')
if 'rc-imageselect-tileselected' in cls:
self.challenge_iframe.ele('#recaptcha-verify-button').click()
self.i11s.clear()
return True
self.check_11_refresh(found_ele)
return False
self.challenge_iframe.ele('#recaptcha-verify-button').click()
self.i11s.clear()
return True
return False
def challenge_i33(self):
if len(self.challenge_iframe.eles('.rc-image-tile-33', timeout=1)) == 9:
self.challenge_i33_first = True
self.i11s.clear()
first_ele = self.challenge_iframe.eles('.rc-image-tile-33')[0]
byte_data = first_ele.src()
tiles = self.split_image(byte_data)
if tiles:
images = {i: t for i, t in enumerate(tiles)}
if res := self.identify_verification_code(images):
self.click_answer(res, 3)
else:
self.challenge_i33_first = False
data = {}
for k, v in self.i11s.items():
if v['new']:
img_b64 = v['srcs'][-1]
data[k] = img_b64
if res := self.identify_verification_code(data):
self.click_answer(res, 3)
def challenge_i44(self):
ele = self.challenge_iframe.eles('.rc-image-tile-44')[0]
byte_data = ele.src()
b64_str = base64.b64encode(byte_data).decode()
self.challenge_44_img = b64_str
if res := self.identify_verification_code({0: b64_str}):
self.click_answer(res, 4)
def identify_verification_code(self, images):
data = {"images": []}
for k, img in images.items():
if img:
data["images"].append({
"image_id": str(k),
"image_base64": img,
"target_class": self.challenge_question
})
if data['images']:
res = requests.post(self.api_host, json=data)
return res.json()
return None
def challenge(self):
if not self.find_checkbox_iframe():
return {"status": False, "message": "no verification code found"}
url_before = self.driver.url
# logger.info(f"url_before: {url_before}")
self.find_challenge_iframe()
if not self.challenge_iframe:
return {"status": False, "message": "no verification code found"}
while True:
time.sleep(1)
if self.driver.url != url_before:
return {"status": True, "message": "验证码自动通过1"}
if self.checkbox_iframe.ele("#recaptcha-anchor").attr('aria-checked') == 'true':
return {"status": True, "message": "验证码自动通过2"}
# 兼容 ChromiumFrame 无 style() 方法:优先读取 style 属性,其次使用 JS 计算样式
vis = None
try:
style_str = self.challenge_iframe.attr('style') or ''
if 'visibility' in style_str:
vis = 'hidden' if 'visibility: hidden' in style_str.replace(' ', '') else 'visible'
except Exception:
pass
if vis is None:
try:
# 通过 JS 获取 iframe 的可见性
vis = self.driver.run_js(
'var f = document.querySelector("iframe[title=\\"recaptcha challenge expires in two minutes\\"]") || document.querySelector("iframe[title=\\"reCAPTCHA 验证任务将于 2 分钟后过期\\"]");'
'f ? getComputedStyle(f).visibility : null;'
)
except Exception:
vis = None
if vis != 'hidden':
break
# try:
# if self.driver.url != url_before:
# return {"status": True, "message": "验证码自动通过1"}
# if self.checkbox_iframe.ele("#recaptcha-anchor").attr('aria-checked') == 'true':
# return {"status": True, "message": "验证码自动通过2"}
# if self.challenge_iframe.style('visibility') != 'hidden':
# logger.info(222)
# break
# except:
# logger.error("challenge error")
# pass
try:
while True:
# 重复使用可见性判断,避免依赖不存在的 style()
vis = None
try:
style_str = self.challenge_iframe.attr('style') or ''
if 'visibility' in style_str:
vis = 'hidden' if 'visibility: hidden' in style_str.replace(' ', '') else 'visible'
except Exception:
pass
if vis is None:
try:
vis = self.driver.run_js(
'var f = document.querySelector("iframe[title=\\"recaptcha challenge expires in two minutes\\"]") || document.querySelector("iframe[title=\\"reCAPTCHA 验证任务将于 2 分钟后过期\\"]");'
'f ? getComputedStyle(f).visibility : null;'
)
except Exception:
vis = None
if vis == 'hidden':
break
time.sleep(1)
if self.driver.url != url_before:
return {"status": True, "message": "captcha successfully resolved"}
if self.checkbox_iframe.ele("#recaptcha-anchor").attr('aria-checked') == 'true':
return {"status": True, "message": "captcha successfully resolved"}
# 获取题目
self.challenge_question = self.challenge_iframe.ele("tag:strong").text
# 判断 4×4
if self.challenge_iframe.ele('.rc-image-tile-44', timeout=0.1):
self.challenge_i44()
# 判断 3×3 或 1×1
elif self.challenge_iframe.ele('.rc-image-tile-33', timeout=0.1) or \
self.challenge_iframe.ele('.rc-image-tile-11', timeout=0.1):
self.challenge_i33()
except:
pass
return {"status": True, "message": "captcha successfully resolved"}

318
spider/bit_browser.py Normal file
View File

@@ -0,0 +1,318 @@
import time
import requests
from loguru import logger
from functools import wraps
def retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0):
"""
通用重试装饰器
:param max_retries: 最大重试次数
:param delay: 每次重试的初始延迟(秒)
:param backoff: 每次重试延迟的递增倍数
"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
retries = 0
current_delay = delay
while retries < max_retries:
try:
return func(*args, **kwargs)
except Exception as e:
retries += 1
if retries >= max_retries:
logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}")
return None # 重试次数用尽后返回 None
logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}")
time.sleep(current_delay)
current_delay *= backoff
return None # 三次重试仍未成功,返回 None
return wrapper
return decorator
# 比特浏览器模块
class BitBrowser:
def __init__(self):
self.bit_host = "http://127.0.0.1"
pass
# 创建比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_create(self, remark: str = '指纹浏览器', ua: str = None, host: str = None, port: str = None,
proxy_user: str = None,
proxy_pwd: str = None, proxy_type: str = 'noproxy', urls: str = None,
bit_port: str = "54345") -> str:
"""
创建比特币浏览器
:param bit_port: 可选默认54345
:param ua: 可选,默认随机
:param proxy_type: 代理类型 (可选) ['noproxy', 'http', 'https', 'socks5', 'ssh']
:param urls: 额外打开的url (可选) 多个用,分割
:param host: 代理IP地址 (可选)
:param port: 代理IP端口 (可选)
:param proxy_user: 代理账号 (可选)
:param proxy_pwd: 代理密码 (可选)
:param remark: 备注 (可选)
:param bit_port: 可选默认54345
:return: 返回浏览器ID
"""
url = f"{self.bit_host}:{bit_port}/browser/update"
headers = {'Content-Type': 'application/json'}
data = {
'name': f'{remark if len(remark) < 40 else remark[:40]}', # 窗口名称
'remark': f'{remark}', # 备注
'proxyMethod': 2, # 代理方式 2自定义 3 提取IP
# 代理类型 ['noproxy', 'http', 'https', 'socks5', 'ssh']
'proxyType': f'{proxy_type}',
"browserFingerPrint": {"userAgent": ua} # 留空,随机指纹
}
if host is not None:
data['host'] = host
if port is not None:
data['port'] = port
if proxy_user is not None:
data['proxyUserName'] = proxy_user
if proxy_pwd is not None:
data['proxyPassword'] = proxy_pwd
if urls is not None:
data['url'] = urls # 额外打开的url 多个用,分割
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
browser_pk = res['data']['id']
return browser_pk
# 修改比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_update(self, pk: str, remark: str = None, proxyType: str = 'noproxy', host: str = None,
port: str = None, proxy_user: str = None, proxy_pwd: str = None, urls: str = None,
bit_port: str = "54345") -> bool:
"""
修改比特币浏览器 传入某个参数则修改某个参数
:param proxyType: 代理类型 noproxy|http|https|socks5(默认noproxy)
:param pk: # 浏览器ID
:param remark: # 备注
:param host: # 代理主机
:param port: # 代理端口
:param proxy_user: # 代理账号
:param proxy_pwd: # 代理密码
:param urls: # 额外打开的url 多个用,分割
:param bit_port: # 可选默认54345
:return: bool
"""
url = f"{self.bit_host}:{bit_port}/browser/update/partial"
headers = {'Content-Type': 'application/json'}
data = dict()
data['ids'] = [pk]
if remark is not None:
data['remark'] = remark
data['name'] = remark
if urls is not None:
data['url'] = urls
if proxyType != 'noproxy':
data['proxyType'] = proxyType
if host is not None:
data['host'] = host
if port is not None:
data['port'] = port if isinstance(port, int) else int(port)
if proxy_user is not None:
data['proxyUserName'] = proxy_user
if proxy_pwd is not None:
data['proxyPassword'] = proxy_pwd
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
return True
# 打开比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_open(self, pk: str, bit_port: str = "54345") -> str:
"""
打开比特币浏览器
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 返回浏览器地址
"""
url = f"{self.bit_host}:{bit_port}/browser/open"
data = {"id": f'{pk}'}
headers = {'Content-Type': 'application/json'}
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
debugger_address = res['data']['http']
return debugger_address
# 关闭比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_close(self, pk: str, bit_port: str = "54345"):
"""
关闭比特币浏览器 - 执行后需要等待5s
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 无返回值
"""
url = f"{self.bit_host}:{bit_port}/browser/close"
headers = {'Content-Type': 'application/json'}
data = {'id': f'{pk}'}
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
# 等待3秒
time.sleep(3)
bol = self.bit_browser_status(pk)
if bol:
raise Exception(f'浏览器ID {pk} 未正常关闭, 等待3秒后重试')
return True
# 删除比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_delete(self, pk: str, bit_port: str = "54345"):
"""
删除比特币浏览器
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 无返回值
"""
url = f"{self.bit_host}:{bit_port}/browser/delete"
headers = {'Content-Type': 'application/json'}
data = {'id': f'{pk}'}
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
return True
# 获取所有比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_get(self, page: int = 0, limit: int = 10, group_id: str | None = None,
bit_port: str | None = "54345") -> dict:
"""
获取所有比特币浏览器
:param page: 页码
:param limit: 每页数量
:param group_id: 组ID(可选)
:param bit_port: 可选默认54345
:return: {'success': True, 'data': {'page': 1, 'pageSize': 10, 'totalNum': 128, 'list': [{'id': '12a3126accc14c93bd34adcccfc3083c'},{'id':'edc5d61a56214e9f8a8bbf1a2e1b405d'}]}}
"""
url = f"{self.bit_host}:{bit_port}/browser/list"
headers = {'Content-Type': 'application/json'}
data = {'page': page, 'pageSize': limit}
if group_id is not None:
data['groupId'] = group_id
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
return res
# 获取比特浏览器窗口详情
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_detail(self, pk: str, bit_port: str = "54345") -> dict:
"""
获取比特浏览器窗口详情
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: {'success': True, 'data': {'id': '12a3126accc14c93bd34adcccfc3083c', 'name': '12a3126accc14c93bd34adcccfc3083c', 'remark': '12a3126accc14c93bd34adcccfc3083c', '
"""
url = f"{self.bit_host}:{bit_port}/browser/detail"
headers = {'Content-Type': 'application/json'}
data = {'id': f'{pk}'}
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
return res
# 获取比特浏览器的进程id
def bit_browser_pid(self, pk: str, bit_port: str = "54345") -> str:
"""
获取比特浏览器的进程id
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 返回进程id
"""
url = f"{self.bit_host}:{bit_port}/browser/pids/alive"
headers = {'Content-Type': 'application/json'}
data = {
"ids": [pk]
}
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
return res['data'][pk]
# 获取窗口状态
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_status(self, pk: str, bit_port: str = "54345") -> dict:
"""
获取比特浏览器窗口状态
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: {'success': True, 'data': {'id': '12a3126accc14c93bd34adcccfc3083c', 'name': '12a3126accc14c93bd34adcccfc3083c', 'remark': '12a3126accc14c93bd34adcccfc3083c', '
"""
url = f"{self.bit_host}:{bit_port}/browser/pids"
headers = {'Content-Type': 'application/json'}
data = {'ids': [pk]}
res = requests.post(url, json=data, headers=headers).json()
# print(f'res --> {res}')
if not res.get('success'):
raise Exception(res)
if res.get('data').get(pk) is None:
return False
else:
return True
async def main():
bit = BitBrowser()
# res = await bit._bit_browser_get()
jc = 0
while 1:
res = await bit._bit_browser_get(
page=jc,
limit=100,
group_id='4028808b9a52223a019a581bbea1275c')
li = res["data"]["list"]
if len(li) == 0:
break
for i in li:
id = i["id"]
# 读取浏览器详情
res = await bit._bit_browser_detail(id)
# print(f'id -->{id} --> {res}')
data = res["data"]
ua = data["browserFingerPrint"]["userAgent"]
proxy_type = data.get("proxyType")
host = data.get("host")
port = data.get("port")
proxy_account = data.get("proxyUserName")
proxy_password = data.get("proxyPassword")
print(f'id -->{id}')
print(f'ua -->{ua}')
print(f'proxy_type -->{proxy_type}')
print(f'host -->{host}')
print(f'port -->{port}')
print(f'proxy_account -->{proxy_account}')
print(f'proxy_password -->{proxy_password}')
print(f'='*50)
jc += 1
def main2():
bit = BitBrowser()
browser_id = '5ba9eb974c7c45e2bb086585c75f70e8'
# 关闭浏览器
# res = bit.bit_browser_close(browser_id)
# res = bit.bit_browser_get()
# print(res)
# if __name__ == '__main__':
# main2()
bit_browser = BitBrowser()

851
spider/mail_.py Normal file
View File

@@ -0,0 +1,851 @@
import asyncio
import imaplib
import email
import random
import socket
import string
import time
from email.header import decode_header
from datetime import timezone, timedelta
import email.utils
import aiohttp
import socks
import requests
import smtplib
from email.mime.text import MIMEText
from email.header import Header
from functools import wraps
from loguru import logger
def retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0):
"""
通用重试装饰器
:param max_retries: 最大重试次数
:param delay: 每次重试的初始延迟(秒)
:param backoff: 每次重试延迟的递增倍数
"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
retries = 0
current_delay = delay
while retries < max_retries:
try:
return func(*args, **kwargs)
except Exception as e:
retries += 1
if retries >= max_retries:
logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}")
return None # 重试次数用尽后返回 None
logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}")
time.sleep(current_delay)
current_delay *= backoff
return None # 三次重试仍未成功,返回 None
return wrapper
return decorator
def async_retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0):
"""
支持异步函数的通用重试装饰器
:param max_retries: 最大重试次数
:param delay: 每次重试的初始延迟(秒)
:param backoff: 每次重试延迟的递增倍数
"""
def decorator(func):
@wraps(func)
async def wrapper(*args, **kwargs):
retries = 0
current_delay = delay
while retries < max_retries:
try:
return await func(*args, **kwargs) # 直接执行原始方法
except Exception as e:
retries += 1
if retries >= max_retries:
logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}")
return None # 重试次数用尽后返回 None
logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}")
await asyncio.sleep(current_delay) # 异步延迟
current_delay *= backoff # 根据backoff递增延迟
return None # 三次重试仍未成功,返回 None
return wrapper
return decorator
# 域名管理类 - 高内聚低耦合的域名管理方案
class DomainManager:
"""
域名管理器 - 统一管理所有邮箱域名相关操作
实现高内聚低耦合的设计原则
"""
def __init__(self):
# 域名列表 - 只需要在这里添加新域名
self._domains = [
"gmail.com",
"qianyouduo.com",
"rxybb.com",
"cqrxy.vip",
"0n.lv",
"qianyouduo.com",
"ziyouzuan.com",
"emaing.online",
"emaing.fun",
"emaing.asia",
"isemaing.site",
"emaing.cyou",
"emaing.site",
"emaing.icu",
"emaing.store",
"emaing.pw",
"emaing.xyz",
"qydkjgs.asia",
"qydkj.homes",
"qydkj.baby",
"qydkj.cyou",
"qydkjgs.autos",
"qydkj.autos",
"qydkjgs.cyou",
"qydkjgs.homes",
"qydgs.asia",
"qydkj.asia",
"qydgs.cyou",
"lulanjing.asia",
"lisihan.asia",
"mmwan.asia",
"xyttan.asia",
"zpaily.asia",
"youxinzhiguo.asia",
"huijinfenmu.asia",
"linghao.asia",
"cqhc.asia",
"huacun.asia",
"huachen.asia",
"yisabeier.asia",
"xinxinr.cyou",
"lilisi.asia",
"xybbwan.cyou",
"zhongjing.cyou",
"zprxy.cyou",
"cqhuacun.cyou",
"huazong.icu",
"huacun.cyou"
]
def get_domain_by_type(self, mail_type: int) -> str:
"""
根据邮箱类型获取域名
:param mail_type: 邮箱类型编号
:return: 对应的域名
"""
if 0 <= mail_type < len(self._domains):
return self._domains[mail_type]
return self._domains[1] # 默认返回 qianyouduo.com
def get_domain_type(self, domain: str) -> int:
"""
根据域名获取类型编号
:param domain: 域名
:return: 对应的类型编号如果不存在返回1
"""
try:
return self._domains.index(domain)
except ValueError:
return 1 # 默认返回 qianyouduo.com 的类型
def get_imap_server(self, mail_type: int) -> str:
"""
根据邮箱类型获取IMAP服务器地址
:param mail_type: 邮箱类型编号
:return: IMAP服务器地址
"""
domain = self.get_domain_by_type(mail_type)
return f"imap.{domain}"
def get_imap_server_by_domain(self, domain: str) -> str:
"""
根据域名获取IMAP服务器地址
:param domain: 域名
:return: IMAP服务器地址
"""
return f"imap.{domain}"
def is_valid_domain(self, domain: str) -> bool:
"""
检查域名是否在支持列表中
:param domain: 域名
:return: 是否支持该域名
"""
return domain in self._domains
def get_all_domains(self) -> list:
"""
获取所有支持的域名列表
:return: 域名列表的副本
"""
return self._domains.copy()
def get_domain_count(self) -> int:
"""
获取支持的域名总数
:return: 域名总数
"""
return len(self._domains)
def get_creatable_domains(self) -> list:
"""
获取可用于创建邮箱的域名列表排除gmail.com
:return: 可创建邮箱的域名列表
"""
return [domain for domain in self._domains if domain != "gmail.com"]
def get_creatable_domain_by_type(self, mail_type: int) -> str:
"""
根据邮箱类型获取可创建的域名排除gmail.com
:param mail_type: 邮箱类型编号
:return: 对应的域名如果是gmail.com则返回默认域名
"""
domain = self.get_domain_by_type(mail_type)
if domain == "gmail.com":
return self._domains[1] # 返回qianyouduo.com作为默认
return domain
def get_random_creatable_domain(self) -> str:
"""
随机获取一个可创建邮箱的域名(排除 gmail.com
返回值:
str: 随机选取的域名
"""
creatable = self.get_creatable_domains()
if not creatable:
raise ValueError("无可用域名用于创建邮箱")
return random.choice(creatable)
# 邮箱模块
class Mail:
def __init__(self):
self.domain_manager = DomainManager()
self.api_host = 'http://111.10.175.206:5020'
def email_account_read(self, pk: int = None, account: str = None, status: bool = None, host: str = None,
proxy_account: str = None,
parent_account: str = None, order_by: str = None, level: int = None,
update_time_start: str = None, update_time_end: str = None, res_count: bool = False,
create_time_start: str = None, create_time_end: str = None, page: int = None,
limit: int = None) -> dict:
"""
读取mail账号
:param level: 邮箱等级(可选)
:param status: 状态(可选)
:param update_time_start: 更新时间起始(可选)
:param update_time_end: 更新时间结束(可选)
:param res_count: 返回总数 (可选)
:param parent_account: 母邮箱账号 (可选)
:param pk: 主键 (可选)
:param account: 账号 (可选)
:param host: 代理 (可选)
:param proxy_account: 代理账号 (可选)
:param order_by: 排序方式 (可选) id|create_time|update_time 前面加-表示倒序
:param create_time_start: 创建起始时间 (可选)
:param create_time_end: 创建结束时间 (可选)
:param page: 页码 (可选)
:param limit: 每页数量 (可选)
:return: 返回json 成功字段code=200
"""
if pk is not None:
url = f'{self.api_host}/mail/account/{pk}'
return requests.get(url).json()
url = f'{self.api_host}/mail/account'
data = dict()
if account is not None:
data['account'] = account
if status is not None:
data['status'] = status
if host is not None:
data['host'] = host
if proxy_account is not None:
data['proxy_account'] = proxy_account
if parent_account is not None:
data['parent_account'] = parent_account
if order_by is not None:
data['order_by'] = order_by
if level is not None:
data['level'] = level
if create_time_start is not None:
data['create_time_start'] = create_time_start
if create_time_end is not None:
data['create_time_end'] = create_time_end
if update_time_start is not None:
data['update_time_start'] = update_time_start
if update_time_end is not None:
data['update_time_end'] = update_time_end
if res_count:
data['res_count'] = res_count
if page is not None:
data['page'] = page
if limit is not None:
data['limit'] = limit
res = requests.get(url, params=data).json()
if res.get('code') not in [200, 400, 404]:
raise Exception(res)
return res
# 创建随机邮箱
@retry(max_retries=3, delay=1.0, backoff=1.0)
def email_create_random(self, count: int = 8, pwd: str = 'Zpaily88', mail_type: int | None = None) -> str:
"""
创建随机邮箱(随机域名,排除 gmail.com
:param count: 邮箱长度(默认8位)
:param pwd: 邮箱密码(默认Zpaily88)
:param mail_type: 指定邮箱类型编号;为 None 时随机选择可创建域名
:return: 邮箱账号
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
name = ''.join(random.choices(string.ascii_letters + string.digits, k=count)).lower()
# 随机选择可创建域名(排除 gmail.com如指定类型则按类型选择
mail_end = (
self.domain_manager.get_creatable_domain_by_type(mail_type)
if mail_type is not None
else self.domain_manager.get_random_creatable_domain()
)
data = {
"name": name,
"email": f"{name}@{mail_end}",
"passwordPlaintext": pwd
}
response = requests.post(url, headers=headers, json=data)
if 'Validation errors: [user] This combination of username and domain is already in database' in response.text:
return f'{name}@{mail_end}'
if response.status_code != 201:
raise Exception(response.status_code)
return f"{name}@{mail_end}"
# 异步创建随机邮箱
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _email_create_random(self, count: int = 8, pwd: str = 'Zpaily88', mail_type: int | None = None) -> str:
"""
创建随机邮箱(随机域名,排除 gmail.com
:param count: 邮箱长度(默认8位)
:param pwd: 邮箱密码(默认Zpaily88)
:param mail_type: 指定邮箱类型编号;为 None 时随机选择可创建域名
:return:邮箱账号
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
name = ''.join(random.choices(string.ascii_letters + string.digits, k=count)).lower()
# 随机选择可创建域名(排除 gmail.com如指定类型则按类型选择
mail_end = (
self.domain_manager.get_creatable_domain_by_type(mail_type)
if mail_type is not None
else self.domain_manager.get_random_creatable_domain()
)
data = {
"name": name,
"email": f"{name}@{mail_end}",
"passwordPlaintext": pwd
}
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=data) as response:
status = response.status
text = await response.text()
if 'Validation errors: [user] This combination of username and domain is already in database' in text:
return f"{name}@{mail_end}"
if status != 201:
raise Exception(status)
return f"{name}@{mail_end}"
# 创建邮箱
@retry(max_retries=3, delay=1.0, backoff=1.0)
def email_create(self, account: str, pwd: str = 'Zpaily88') -> str | None:
"""
创建邮箱
:param account: 邮箱账号
:param pwd: 邮箱密码(默认Zpaily88)
:return:邮箱账号
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
name = account.split('@')[0]
mail_end = account.split('@')[1]
# 排除gmail.com域名
if mail_end == "gmail.com":
return None
# 验证域名是否支持
if not self.domain_manager.is_valid_domain(mail_end):
raise ValueError(f"不支持的域名: {mail_end},支持的域名列表: {self.domain_manager.get_all_domains()}")
data = {
"name": name,
"email": f"{name}@{mail_end}",
"passwordPlaintext": pwd
}
response = requests.post(url, headers=headers, json=data)
print(f'创建邮箱响应: {response.status_code}')
if response.status_code not in [201, 400]:
raise Exception(response.status_code)
return f"{name}@{mail_end}"
# 异步创建邮箱
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _email_create(self, account: str, pwd: str = 'Zpaily88') -> str | None:
"""
创建邮箱
:param account: 邮箱账号
:param pwd: 邮箱密码(默认Zpaily88)
:return: 邮箱账号
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
name = account.split('@')[0]
mail_end = account.split('@')[1]
# 排除gmail.com域名
if mail_end == "gmail.com":
return None
# 验证域名是否支持
if not self.domain_manager.is_valid_domain(mail_end):
raise ValueError(f"不支持的域名: {mail_end},支持的域名列表: {self.domain_manager.get_all_domains()}")
data = {
"name": name,
"email": f"{name}@{mail_end}",
"passwordPlaintext": pwd
}
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=data) as response:
status = response.status
if status not in [201, 400]:
raise Exception(f'status code: {status}')
return f"{name}@{mail_end}"
# 删除邮箱
@retry(max_retries=3, delay=1.0, backoff=1.0)
def email_delete(self, account: str) -> bool:
"""
删除邮箱
:param account: 邮箱账号
:return: True表示删除成功False表示删除失败
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = f"https://mail.qianyouduo.com/admin/api/v1/boxes/{account}"
if '@gmail.com' in account:
return False
response = requests.delete(url, headers=headers)
print(f'删除邮箱响应: --> {response.status_code}')
if response.status_code not in [204, 404]:
raise Exception(response.status_code)
return True
# 异步删除邮箱
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _email_delete(self, account: str) -> bool:
"""
删除邮箱
:param account: 邮箱账号
:return: True表示删除成功False表示删除失败
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = f"https://mail.qianyouduo.com/admin/api/v1/boxes/{account}"
if '@gmail.com' in account:
return False
async with aiohttp.ClientSession() as session:
async with session.delete(url, headers=headers) as response:
status = response.status
if status not in [204, 404]:
raise Exception(f'status code: {status}')
return True
# 处理邮件正文
@staticmethod
def extract_body(msg):
"""
提取邮件正文,优先返回 HTML 文本
- 更健壮的字符集解析:优先使用 part 的 charset 信息,失败回退到 utf-8 / latin-1
- 仅处理 inline 的 text/html 与 text/plain 内容
"""
html_text = None
plain_text = None
def _decode_part(part):
payload = part.get_payload(decode=True)
if payload is None:
return None
# 优先从内容中解析 charset
charset = (part.get_content_charset() or part.get_param('charset') or 'utf-8')
try:
return payload.decode(charset, errors='replace')
except LookupError:
# 未知编码时回退
try:
return payload.decode('utf-8', errors='replace')
except Exception:
return payload.decode('latin-1', errors='replace')
if msg.is_multipart():
for part in msg.walk():
content_type = part.get_content_type()
content_disposition = part.get_content_disposition()
if content_type == "text/html" and (not content_disposition or content_disposition == "inline"):
html_text = _decode_part(part) or html_text
elif content_type == "text/plain" and (not content_disposition or content_disposition == "inline"):
plain_text = _decode_part(part) or plain_text
else:
content_type = msg.get_content_type()
if content_type == "text/html":
html_text = _decode_part(msg)
elif content_type == "text/plain":
plain_text = _decode_part(msg)
# 优先返回 HTML 文本,如果没有 HTML 文本,则返回纯文本
return html_text or plain_text or ""
# 转换邮件日期
@staticmethod
def convert_to_china_time(date_str):
"""
将邮件日期转换为10位时间戳中国时区
- 保留原始邮件的时区信息;若无时区,则按 UTC 处理
- 异常时返回当前时间戳,避免解析失败导致崩溃
"""
try:
email_date = email.utils.parsedate_to_datetime(date_str)
if email_date is None:
return int(time.time())
if email_date.tzinfo is None:
email_date = email_date.replace(tzinfo=timezone.utc)
china_time = email_date.astimezone(timezone(timedelta(hours=8)))
return int(china_time.timestamp())
except Exception:
return int(time.time())
# 获取邮件
def email_read(self, user: str, from_: str, limit: int = 1, is_del: bool = False) -> list | None:
"""
获取最新邮件
:param user: 母账号
:param from_: 发件人匹配关键字(可为邮箱或显示名,大小写不敏感)
:param limit: 获取邮件数量(默认1封)
:param is_del: 是否删除整个邮箱账号(非 Gmail 才会执行账号删除)
:return: 返回邮件列表,每个元素格式为:
{
"title": "邮件标题",
"from": "发件人",
"date": "邮件日期(中国时区时间戳)",
"content": "邮件正文",
"code": 200
}
"""
user_li = user.split('@')
domain = user_li[1]
# 使用域名管理器获取邮箱类型
if not self.domain_manager.is_valid_domain(domain):
return None
mail_type = self.domain_manager.get_domain_type(domain)
# 仅对 Gmail 进行点号归一化,其它域名按原样处理
local_part = user_li[0]
if domain == "gmail.com":
local_part = local_part.replace('.', '')
user = local_part + '@' + user_li[1]
proxy_host = None
proxy_port = None
proxy_user = None
proxy_pwd = None
if mail_type == 0:
res = self.email_account_read(parent_account=user, status=True, level=0)
if res['code'] != 200:
return None
pwd = res['items'][0]['parent_pwd']
proxy_host = res['items'][0]['host']
proxy_port = res['items'][0]['port']
proxy_user = res['items'][0]['proxy_account']
proxy_pwd = res['items'][0]['proxy_pwd']
else:
pwd = 'Zpaily88'
items = [] # 存储邮件列表
# 保存原始socket
original_socket = None
if proxy_host is not None and proxy_port is not None:
original_socket = socket.socket
if proxy_user is not None and proxy_pwd is not None:
socks.setdefaultproxy(socks.SOCKS5, proxy_host, int(proxy_port), True, proxy_user, proxy_pwd)
else:
socks.setdefaultproxy(socks.SOCKS5, proxy_host, int(proxy_port), True)
socket.socket = socks.socksocket
imap_server = None
had_error = False
try:
# 在设置代理后创建IMAP连接
imap_server = imaplib.IMAP4_SSL(self.domain_manager.get_imap_server(mail_type))
if not imap_server:
had_error = True
else:
# pwd去除空格
pwd = pwd.replace(' ', '')
# print(f'pwd: {pwd}')
imap_server.login(user, pwd)
status, _ = imap_server.select("INBOX")
if status != 'OK':
had_error = True
else:
status, email_ids = imap_server.search(None, "ALL")
if status != 'OK':
had_error = True
else:
email_id_list = email_ids[0].split()
# 获取最近limit条邮件ID
recent_ids = email_id_list[-20:] # 仍然获取最近20封以确保有足够的邮件可以筛选
found_count = 0 # 记录找到的符合条件的邮件数量
for email_id in recent_ids[::-1]: # 从最新的邮件开始处理
if found_count >= limit: # 如果已经找到足够数量的邮件,就退出循环
break
status, msg_data = imap_server.fetch(email_id, "(RFC822)")
for response in msg_data:
if isinstance(response, tuple):
msg = email.message_from_bytes(response[1])
# 兼容性发件人匹配:解析地址与显示名,大小写不敏感,支持子串匹配
from_field = msg.get("From", "")
addresses = email.utils.getaddresses([from_field])
needle = (from_ or "").lower()
candidates = []
for name, addr in addresses:
if name:
candidates.append(name.lower())
if addr:
candidates.append(addr.lower())
if any(needle in c for c in candidates):
# 标题解码,处理无标题或编码缺失的情况
raw_subject = msg.get("Subject")
subject = ""
if raw_subject is not None:
dh = decode_header(raw_subject)
if dh:
s, enc = dh[0]
if isinstance(s, bytes):
try:
subject = s.decode(enc or 'utf-8', errors='replace')
except LookupError:
subject = s.decode('utf-8', errors='replace')
else:
subject = s
item = {
"title": subject,
"from": msg["From"],
"content": self.extract_body(msg),
"code": 200
}
# 获取并转换邮件时间
date_str = msg["Date"]
if date_str:
item["date"] = self.convert_to_china_time(date_str)
items.append(item)
found_count += 1
if found_count >= limit: # 如果已经找到足够数量的邮件,就跳出内层循环
break
# 读取完成不再对单封邮件做删除标记与 expunge
except imaplib.IMAP4.error as e:
# items.append({'title': 'error', 'from': 'error', 'content': f'连接邮箱失败: {e}', 'code': 500})
had_error = True
except Exception as e:
# items.append({'title': 'error', 'from': 'error', 'content': f'获取邮件异常: {e}', 'code': 500})
had_error = True
finally:
try:
# 检查连接是否建立
if 'imap_server' in locals() and imap_server is not None:
try:
# 先检查是否处于已选择状态
if hasattr(imap_server, 'state') and imap_server.state == 'SELECTED':
imap_server.close()
except Exception as e:
logger.error(f"关闭IMAP文件夹时发生错误: {e}")
try:
# 无论如何尝试登出
imap_server.logout()
except Exception as e:
logger.error(f"登出IMAP服务器时发生错误: {e}")
# 在Windows上可能需要强制关闭socket
try:
if hasattr(imap_server, 'sock') and imap_server.sock is not None:
imap_server.sock.close()
except Exception as sock_err:
logger.error(f"强制关闭socket时发生错误: {sock_err}")
except Exception as outer_e:
logger.error(f"处理IMAP连接关闭时发生错误: {outer_e}")
finally:
# 重置socket设置如果使用了代理
if proxy_host is not None and original_socket is not None:
socket.socket = original_socket
# 若成功获取到至少一封匹配邮件且请求删除,则删除整个邮箱账号
if is_del and len(items) > 0:
try:
self.email_delete(user)
except Exception as del_err:
logger.error(f"删除邮箱账号失败: {del_err}")
if had_error:
return None
if len(items) == 0:
return None
return items # 返回邮件列表
async def main():
"""
使用示例:展示新的域名管理系统的使用方法
"""
mail = Mail()
# mai = '0gz3vvd4@'+'qydgs.asia'
# res = mail.email_create(mai)
# print(f"创建的邮箱: {res}")
random_email = mail.email_create_random()
print(f"创建的随机邮箱: {random_email}")
# 读取邮件
# res = mail.email_read('0gz3vvd4@qydgs.asia', '@', 1, is_del=True)
# print(f'读取的邮件: {res}')
# 删除邮箱
res = mail.email_delete(random_email)
print(f"删除的邮箱: {res}")
mail_ = Mail()
# if __name__ == '__main__':
# asyncio.run(main())

765
spider/main.py Normal file
View File

@@ -0,0 +1,765 @@
import random
import time
from datetime import datetime
from DrissionPage import Chromium
from loguru import logger
from work import generate_child_parent_names
from mail_ import mail_
from bit_browser import bit_browser
from api import api
from proxys import proxy_list
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from auto_challenge import ReCaptchaHandler
class Auto:
def __init__(self, http: str = None):
# self.browser = Chromium(http)
self.browser = Chromium()
self.tab = self.browser.latest_tab
pass
# cf打码
def solve_cloudflare(self, is_ok: bool = False):
tab = self.browser.latest_tab
for _ in range(5):
tab.wait(1)
res = tab.ele(
't:h1@text()=Sorry, you have been blocked', timeout=1)
if res:
logger.error("Cloudflare验证失败")
return False
try:
shadow1 = tab.ele(
'x://*[@name="cf-turnstile-response"]').parent().shadow_root
iframe = shadow1.get_frame(1)
if iframe:
logger.debug("找到Cloudflare iframe")
shadow2 = iframe.ele('x:/html/body').shadow_root
if shadow2:
logger.debug("找到Cloudflare iframe body shadow root")
status = shadow2.ele(
'x://span[text()="Verifying..."]', timeout=1.5)
if status:
tab.wait(3)
status = shadow2.ele(
'x://span[text()="Success!"]', timeout=1.5)
if status:
logger.debug("Cloudflare验证成功")
return True
checkbox = shadow2.ele(
'x://input[@type="checkbox"]', timeout=1.5)
if checkbox:
checkbox.click()
logger.debug("点击Cloudflare复选框")
tab.wait(3)
logger.debug("重新获取状态")
# return False
except Exception as e:
# logger.error(f"处理Cloudflare异常: {e}")
if is_ok:
logger.debug(f"cloudflare处理通过: {e}")
return True
return self.solve_cloudflare(is_ok=True)
tab.wait(1)
return False
# 谷歌验证码
def solve_recaptcha(self):
logger.debug("开始解决谷歌验证码")
recaptcha_handler = ReCaptchaHandler(self.tab)
res = recaptcha_handler.challenge()
if res.get("status"):
logger.debug("谷歌验证码成功")
iframe = self.tab.ele('t:iframe@title=reCAPTCHA')
# print(iframe)
res = iframe.ele('t:div@class=recaptcha-checkbox-border')
if res:
logger.debug(f"html: {res.html}")
if 'display: none;' in res.html:
logger.debug("谷歌验证码成功")
return True
else:
print("No element found")
return False
logger.error("谷歌验证码失败")
return False
# 打开URL
def open_url(self, url: str):
self.tab.get(url)
def get_tab(self):
return self.tab
# 等待进入首页
def wait_home(self):
logger.debug("等待进入首页")
jc = 0
while True:
if jc > 3:
logger.error("等待进入首页超过5次未成功")
return False
self.tab.wait(1)
bol = self.tab.ele(
't:div@text():YOUTUBE PRIVACY SETTLEMENT', timeout=1)
if bol:
logger.debug("成功进入首页")
return True
jc += 1
# 随机取城市
def get_random_city(self, province: str | None = None):
cities = {
"Alberta": ["Calgary", "Edmonton"],
"British Columbia": ["Vancouver"],
# "Manitoba": ["Winnipeg", "Rochester"],
# "New Brunswick": ["Fredericton", "Moncton"],
# "Newfoundland and Labrador": ["St. John's", "Halifax"],
"Nova Scotia": ["Halifax"],
"Ontario": ["Toronto"],
# "Prince Edward Island": ["Charlottetown", "St. John's"],
# "Quebec": ["Quebec City", "Montreal"],
# "Saskatchewan": ["Saskatoon", "Regina"],
}
if province is None:
province = random.choice(list(cities.keys()))
return province, random.choice(cities.get(province, []))
def get_province_by_city(self) -> str | None:
"""
根据城市名称解析对应省份
参数:
city (str): 城市名称,例如 `Calgary`、`Edmonton` 等
返回值:
str | None: 对应的省份名称;未匹配返回 None
"""
mapping = {
"Calgary": "Alberta",
"Edmonton": "Alberta",
"Vancouver": "British Columbia",
"Halifax": "Nova Scotia",
"Toronto": "Ontario",
"Ottawa": "Ontario",
"Mississauga": "Ontario",
"Brampton": "Ontario",
"Hamilton": "Ontario",
"Kitchener": "Ontario",
"London": "Ontario",
"Markham": "Ontario",
"Vaughan": "Ontario",
"Windsor": "Ontario",
"Oshawa": "Ontario",
"Brantford": "Ontario",
"Barrie": "Ontario",
"Sudbury": "Ontario",
"Kingston": "Ontario",
"Guelph": "Ontario",
"Cambridge": "Ontario",
"Sarnia": "Ontario",
"Peterborough": "Ontario",
"Waterloo": "Ontario",
"Belleville": "Ontario",
"Brockville": "Ontario",
"Burlington": "Ontario",
"Cornwall": "Ontario",
"Kawartha Lakes": "Ontario",
"North Bay": "Ontario",
"Orillia": "Ontario",
"Pickering": "Ontario",
"Sault Ste. Marie": "Ontario",
"Stratford": "Ontario",
"Durham": "Ontario",
"Norfolk County": "Ontario",
"Prince Edward County": "Ontario",
"Quinte West": "Ontario",
"St. Catharines": "Ontario",
"Welland": "Ontario",
"Thorold": "Ontario",
"Niagara Falls": "Ontario",
"Pelham": "Ontario",
"Port Colborne": "Ontario",
}
# 随机返回一条 key 和 value
return random.choice(list(mapping.items()))
# 随机实物
def get_random_food(self, city: str, shop: str) -> list[str]:
"""
随机选择 1~2 种食物类别,并为每个类别至少选择 1 个具体产品
参数:
shop (str): 商店名称(当前未使用,占位参数)
返回值:
list[str]: 随机选取的产品名称列表
"""
categories = [
[
'Wonder Bread White',
'Villaggio White Bread',
'No Name Sliced White Bread',
"President's Choice White Sliced Bread",
],
[
"Ben's Original Whole Wheat Bread",
"POM Whole Wheat Bread",
"Silver Hills Bakery Whole Wheat Sliced Bread",
"Country Harvest Whole Wheat Bread",
],
[
"Wonder Bread Hot Dog Buns",
"Villaggio Hamburger Buns",
"Dempster's Dinner Rolls",
"No Frills Hot Dog Buns",
],
[
"Stonemill Bakehouse Bagels",
"Wonder Bagels",
"Montreal Bagels (pre-packaged, e.g., St. Lawrence brand)",
"President's Choice Bagels",
],
[
"Silver Hills Multi-Grain Sliced Bread",
"POM Multi-Grain Bread",
"Country Harvest Multi-Grain Loaf",
],
[
"President's Choice French Stick",
"Dempster's Italian Style Bread",
"Wonder Italian Bread",
"Villaggio Country Style Loaf",
],
]
# 随机选择 1~2 个类别(不重复)
category_count = random.randint(1, 2)
chosen_categories = random.sample(categories, k=category_count)
# 每个类别至少选择 1 个产品,最多选择 3 个以避免过多
selected_products: list[str] = []
for cat in chosen_categories:
max_pick = min(3, len(cat))
pick_count = random.randint(1, max_pick)
selected_products.extend(random.sample(cat, k=pick_count))
logger.debug(f"随机选择的产品: {selected_products}")
text = f'{shop}, {city} buy: '
for p in selected_products:
text += f'{p} * {random.randint(1, 3)}, '
text = text[:-2]
text = text + '.'
logger.debug(f'随机选择的产品文本: {text}')
return text
# 填写问卷
def fill_questionnaire(self):
"""
完成问卷填写
参数:
city (str): 线程启动时传入的城市名称,用于匹配省份并填写数据
"""
try:
info = generate_child_parent_names()
child_full_name = info['child_full_name']
parent_full_name = info['parent_full_name']
child_birthday = info['child_birthday']
# 2023-04-01转为MM/DD/YYYY
child_birthday = datetime.strptime(child_birthday, '%Y-%m-%d').strftime('%m/%d/%Y')
address_str = info['child_address_str']
city_name = info['child_city_name']
postcode = info['child_postcode']
parent_phone = info['parent_phone']
province = info['parent_state']
# email = mail_.email_create_random()
email = 'zhiyu@qq.com'
logger.debug(f"child_full_name --> {child_full_name}")
logger.debug(f"parent_full_name --> {parent_full_name}")
logger.debug(f"child_birthday --> {child_birthday}")
logger.debug(f"address_str --> {address_str}")
logger.debug(f"city_name --> {city_name}")
logger.debug(f"postcode --> {postcode}")
logger.debug(f"parent_phone --> {parent_phone}")
logger.debug(f"province --> {province}")
logger.debug(f"email --> {email}")
self.tab.wait(0.1)
self.tab.ele('t:input@id=name1').input(child_full_name)
self.tab.wait(0.1)
self.tab.ele('t:input@id=name2').input(parent_full_name)
self.tab.wait(0.1)
self.tab.ele('t:input@id=dateOfBirth').input(child_birthday)
self.tab.wait(0.1)
self.tab.ele('t:input@id=street1').input(address_str)
self.tab.wait(0.1)
self.tab.ele('t:input@id=city').input(city_name)
self.tab.wait(0.1)
self.tab.ele(
't:select@formcontrolname=state').ele(f't:option@text():{province}').click()
self.tab.wait(0.1)
self.tab.ele('t:input@id=zip').input(postcode)
self.tab.wait(0.1)
self.tab.ele('t:input@id=phone1').input(parent_phone)
self.tab.wait(0.1)
self.tab.ele('t:input@id=emailAddress').input(email)
self.tab.wait(0.1)
self.tab.ele('t:input@id=confirmEmailemail').input(email)
self.tab.wait(0.1)
self.tab.ele('t:input@@formcontrolname=resideInUS@@id=Yes').click()
self.tab.wait(0.1)
self.tab.ele('t:input@@formcontrolname=watchedDuringPeriod@@id=Yes').click()
self.tab.wait(0.1)
self.tab.ele('t:input@id=signatureMinor').input(child_full_name)
self.tab.wait(0.1)
self.tab.ele('t:input@id=signatureParentGuardian').input(parent_full_name)
self.solve_recaptcha()
return self.submit_file(
child_full_name=child_full_name,
parent_full_name=parent_full_name,
child_birthday=child_birthday,
address_str=address_str,
city_name=city_name,
parent_phone=parent_phone,
postcode=postcode,
province=province,
email=email,
text=""
)
except Exception as e:
logger.error(f"填写问卷失败: {e}")
# 提交问卷
def submit_file(self, child_full_name: str, parent_full_name: str, child_birthday: str, address_str: str, city_name: str, parent_phone: str, postcode: str, province: str, email: str, text: str):
"""
提交问卷后的数据保存到后端服务(孩子与家长字段)
参数:
child_full_name (str): 孩子全名
parent_full_name (str): 家长全名
child_birthday (str): 孩子生日(字符串,已为 MM/DD/YYYY
address_str (str): 街道地址
city_name (str): 城市
parent_phone (str): 家长电话
postcode (str): 邮编
province (str): 省/州全称
email (str): 邮箱
text (str): 文本内容(如反馈地址)
"""
jc = 0
while True:
if jc >= 3:
logger.error("提交问卷失败")
return False
res = self.solve_recaptcha()
if not res:
jc += 1
continue
res = self.tab.ele('t:button@text():SUBMIT')
if res:
logger.debug(f"点击Submit按钮")
res.click()
self.tab.wait(3)
res = self.tab.ele(
't:h2@text()=THANK YOU FOR SUBMITTING YOUR INFORMATION', timeout=1)
if res:
logger.info("提交问卷成功")
logger.info(f"反馈地址: {text}")
res = self.tab.ele('t:b')
if res:
logger.info(f"反馈地址: {res.text}")
text = res.text
status = True
else:
status=False
api.create_info(
child_full_name=child_full_name,
parent_full_name=parent_full_name,
child_birthday=child_birthday,
address_str=address_str,
city_name=city_name,
parent_phone=parent_phone,
postcode=postcode,
province=province,
email=email,
text=text,
status=status
)
return True
bol = self.tab.ele(
't:div@text():ERR_TIMED_OUT', timeout=1)
if bol:
logger.debug("刷新网页")
self.tab.refresh()
self.tab.wait(1.5)
bol = self.tab.ele(
't:div@text():ERR_SSL_PROTOCOL_ERROR', timeout=1)
if bol:
logger.debug("刷新网页")
self.tab.refresh()
self.tab.wait(1.5)
bol = self.tab.ele(
't:div@text():ERR_SOCKS_CONNECTION_FAILED', timeout=1)
if bol:
logger.debug("刷新网页")
self.tab.refresh()
self.tab.wait(1.5)
jc += 1
def parse_proxy(proxy: str) -> tuple[str, int, str, str] | None:
"""
解析代理字符串为四元组 `(host, port, user, pwd)`
参数:
proxy: 形如 `host:port:user:pwd`
返回值:
(host, port, user, pwd) 或 None格式错误
"""
try:
host, port, user, pwd = proxy.split(":", 3)
return host, int(port), user, pwd
except Exception:
logger.error(f"代理格式错误: {proxy}")
return None
def create_fingerprint_browser(proxy: str) -> tuple[str, str] | None:
"""
创建指纹浏览器并打开窗口,返回 `(browser_id, debugger_http)`
参数:
proxy: 代理字符串
返回值:
(browser_id, http) 或 None失败
"""
info = parse_proxy(proxy)
if info is None:
return None
host, port, user, pwd = info
try:
browser_id = bit_browser.bit_browser_create(
remark=f"{user}",
proxy_type="socks5",
host=host,
port=str(port),
proxy_user=user,
proxy_pwd=pwd,
)
if not browser_id:
return None
logger.info(f"创建指纹浏览器成功: {browser_id}")
time.sleep(1)
http = bit_browser.bit_browser_open(browser_id)
if not http:
return None
logger.info(f"打开指纹浏览器成功: {browser_id}")
return browser_id, http
except Exception as e:
logger.error(f"创建指纹浏览器失败: {e}")
return None
def close_and_delete_browser(browser_id: str) -> None:
"""
关闭并删除指定指纹浏览器
参数:
browser_id: 指纹浏览器ID
"""
try:
bit_browser.bit_browser_close(browser_id)
except Exception as e:
logger.warning(f"关闭浏览器失败或已关闭: {browser_id} - {e}")
time.sleep(1)
try:
bit_browser.bit_browser_delete(browser_id)
except Exception as e:
logger.warning(f"删除浏览器失败或已删除: {browser_id} - {e}")
def run_task_with_proxy(proxy: str, stop_event: threading.Event) -> None:
"""
使用代理创建指纹浏览器、执行自动化,并在结束后清理
参数:
proxy: 代理字符串
"""
browser_id: str | None = None
try:
created = create_fingerprint_browser(proxy)
if not created:
return
browser_id, http = created
if stop_event.is_set():
return
auto = Auto(http=http)
auto.open_url('https://www.claimform.youtubeprivacysettlement.com')
if stop_event.is_set():
return
if not auto.wait_home():
return
if stop_event.is_set():
return
if not auto.click_continue():
return
if stop_event.is_set():
return
auto.fill_questionnaire()
except Exception as e:
logger.error(f"执行任务异常: {e}")
finally:
if browser_id:
try:
close_and_delete_browser(browser_id)
except Exception:
pass
def proxy_loop(proxy: str, stop_event: threading.Event) -> None:
"""
为单个代理保持持续运行:任务结束后立即重建并再次执行
参数:
proxy: 代理字符串
stop_event: 停止事件,用于外部触发退出循环
"""
while not stop_event.is_set():
try:
if is_forbidden_time():
if stop_event.wait(timeout=60):
break
cleanup_all_browsers()
secs = seconds_until(20, 0)
if stop_event.wait(timeout=secs):
break
continue
run_task_with_proxy(proxy, stop_event)
except Exception as e:
logger.error(f"代理循环异常: {proxy} - {e}")
if stop_event.is_set():
break
if stop_event.wait(timeout=0.1):
break
def is_forbidden_time() -> bool:
"""
判断当前是否处于禁跑时段(每日 18:30 ~ 20:00本地时间
返回值:
bool: True 表示处于禁跑时段
"""
# 去除晚上停止功能
return False
# 禁跑时段为 18:30 ~ 20:00
now = datetime.now()
start = now.replace(hour=18, minute=30, second=0, microsecond=0)
end = now.replace(hour=20, minute=0, second=0, microsecond=0)
return start <= now < end
def wait_until_out_of_forbidden(interval_sec: float = 5.0, stop_event: threading.Event | None = None) -> None:
"""
在禁跑时段内循环等待,直到禁跑时段结束
参数:
interval_sec: 轮询间隔秒数
stop_event: 可选停止事件,若设置则在等待期间可提前结束
"""
while is_forbidden_time():
if stop_event is not None and stop_event.wait(timeout=interval_sec):
break
time.sleep(interval_sec)
def seconds_until(hour: int, minute: int) -> float:
"""
计算到今天指定时间点的剩余秒数
参数:
hour: 目标小时24小时制
minute: 目标分钟
返回值:
float: 剩余秒数,若目标时间已过则为 0
"""
now = datetime.now()
target = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
if target <= now:
return 0.0
return (target - now).total_seconds()
def count_fingerprint_browsers() -> int:
"""
统计当前指纹浏览器数量
返回值:
int: 当前总数量
"""
try:
res = bit_browser.bit_browser_get(0, 100)
data = res.get("data", {}) if isinstance(res, dict) else {}
total = data.get("totalNum")
lst = data.get("list", [])
if isinstance(total, int) and total >= 0:
return total
return len(lst)
except Exception as e:
logger.warning(f"统计指纹浏览器数量失败: {e}")
return 0
def cleanup_all_browsers() -> None:
"""
关闭并删除所有指纹浏览器
"""
try:
res = bit_browser.bit_browser_get(0, 100)
data = res.get("data", {}) if isinstance(res, dict) else {}
lst = data.get("list", [])
ids = [i.get("id") for i in lst if i.get("id")]
for bid in ids:
close_and_delete_browser(bid)
except Exception as e:
logger.warning(f"清理所有指纹浏览器失败: {e}")
def delete_excess_browsers(limit: int) -> None:
"""
删除超出上限的指纹浏览器,从列表末尾开始删除
参数:
limit: 允许的最大浏览器数量
"""
try:
res = bit_browser.bit_browser_get(0, 100)
data = res.get("data", {}) if isinstance(res, dict) else {}
lst = data.get("list", [])
ids = [i.get("id") for i in lst if i.get("id")]
count = len(ids)
if count <= limit:
return
excess = count - limit
to_delete = ids[-excess:]
for bid in reversed(to_delete):
close_and_delete_browser(bid)
logger.info(f"已删除超出数量 {excess},当前限制为 {limit}")
except Exception as e:
logger.warning(f"删除超额浏览器失败: {e}")
def monitor_browsers_and_restart(limit: int, stop_event: threading.Event, restart_event: threading.Event) -> None:
"""
每 3 秒检测指纹浏览器数量,超过 `limit` 则从末尾删除超出部分
参数:
limit: 允许的最大浏览器数量(通常为代理数量)
restart_event: 触发重启的事件(当前策略不使用)
"""
while not stop_event.is_set():
time.sleep(3)
count = count_fingerprint_browsers()
if count > limit:
logger.warning(f"指纹浏览器数量 {count} 超过限制 {limit},开始删除超出部分")
delete_excess_browsers(limit)
def main():
"""
多线程并发管理:按代理数量并发创建指纹浏览器并执行任务;每 3 秒监控数量,超限则从末尾删除多余浏览器。
"""
proxies = list(proxy_list)
while True:
stop_event = threading.Event()
restart_event = threading.Event()
if is_forbidden_time():
if stop_event.wait(timeout=60):
continue
cleanup_all_browsers()
logger.info("处于禁跑时段,等待至禁跑结束")
wait_until_out_of_forbidden()
continue
executor = ThreadPoolExecutor(max_workers=len(proxies))
try:
futures_map = {executor.submit(proxy_loop, p, stop_event): p for p in proxies}
monitor_thread = threading.Thread(
target=monitor_browsers_and_restart,
args=(len(proxies), stop_event, restart_event),
daemon=True,
)
monitor_thread.start()
while True:
if restart_event.is_set():
stop_event.set()
try:
executor.shutdown(wait=True)
except Exception:
pass
break
if is_forbidden_time():
logger.info("进入禁跑时段停止当前批次等待1分钟后清理指纹浏览器")
stop_event.set()
try:
executor.shutdown(wait=True)
except Exception:
pass
time.sleep(60)
cleanup_all_browsers()
wait_until_out_of_forbidden()
break
for f, proxy in list(futures_map.items()):
if f.done() and not stop_event.is_set() and not restart_event.is_set():
try:
_ = f.exception()
except Exception:
pass
try:
new_future = executor.submit(proxy_loop, proxy, stop_event)
del futures_map[f]
futures_map[new_future] = proxy
except Exception as e:
logger.error(f"重启代理线程失败: {proxy} - {e}")
time.sleep(0.2)
try:
monitor_thread.join(timeout=5)
except Exception:
pass
finally:
try:
executor.shutdown(wait=True)
except Exception:
pass
continue
def main2():
auto = Auto()
auto.open_url('https://www.claimform.youtubeprivacysettlement.com')
bol = auto.wait_home()
if not bol:
return
auto.fill_questionnaire()
# auto.solve_recaptcha()
if __name__ == "__main__":
main2()

95
spider/proxys.py Normal file
View File

@@ -0,0 +1,95 @@
work = [
"us.novproxy.io:1000:qyd00056-region-CA:qyd00056",
"us.novproxy.io:1000:qyd00054-region-US:qyd00054",
"us.novproxy.io:1000:qyd00053-region-CA:qyd00053",
"us.novproxy.io:1000:qyd00052-region-US:qyd00052",
]
ca1 = [
"us.novproxy.io:1000:qyd00051-region-CA:qyd00051",
"us.novproxy.io:1000:qyd00050-region-US:qyd00050",
"us.novproxy.io:1000:qyd00049-region-CA:qyd00049",
"us.novproxy.io:1000:qyd00048-region-US:qyd00048",
"us.novproxy.io:1000:qyd00047-region-CA:qyd00047",
]
ca2 = [
"us.novproxy.io:1000:qyd00046-region-US:qyd00046",
"us.novproxy.io:1000:qyd00045-region-CA:qyd00045",
"us.novproxy.io:1000:qyd00044-region-US:qyd00044",
"us.novproxy.io:1000:qyd00043-region-CA:qyd00043",
"us.novproxy.io:1000:qyd00042-region-US:qyd00042",
]
ca3 = [
"us.novproxy.io:1000:qyd00041-region-CA:qyd00041",
"us.novproxy.io:1000:qyd00040-region-CA:qyd00040",
"us.novproxy.io:1000:qyd00039-region-US:qyd00039",
"us.novproxy.io:1000:qyd00038-region-CA:qyd00038",
"us.novproxy.io:1000:qyd00037-region-US:qyd00037",
]
cwd = [
"us.novproxy.io:1000:qyd00036-region-CA:qyd00036",
"us.novproxy.io:1000:qyd00035-region-US:qyd00035",
"us.novproxy.io:1000:qyd00034-region-CA:qyd00034",
"us.novproxy.io:1000:qyd00033-region-US:qyd00033",
]
wt = [
"us.novproxy.io:1000:qyd00032-region-CA:qyd00032",
"us.novproxy.io:1000:qyd00031-region-US:qyd00031",
"us.novproxy.io:1000:qyd00030-region-CA:qyd00030",
"us.novproxy.io:1000:qyd00029-region-US:qyd00029",
]
hc = [
"us.novproxy.io:1000:qyd00028-region-CA:qyd00028",
"us.novproxy.io:1000:qyd00027-region-US:qyd00027",
"us.novproxy.io:1000:qyd00026-region-CA:qyd00026",
"us.novproxy.io:1000:qyd00025-region-US:qyd00025",
]
zlj = [
"us.novproxy.io:1000:qyd00024-region-CA:qyd00024",
"us.novproxy.io:1000:qyd00023-region-US:qyd00023",
"us.novproxy.io:1000:qyd00022-region-CA:qyd00022",
"us.novproxy.io:1000:qyd00021-region-US:qyd00021",
]
wzq = [
"us.novproxy.io:1000:qyd00020-region-CA:qyd00020",
"us.novproxy.io:1000:qyd00019-region-US:qyd00019",
"us.novproxy.io:1000:qyd00018-region-CA:qyd00018",
"us.novproxy.io:1000:qyd00017-region-US:qyd00017",
]
xy = [
"us.novproxy.io:1000:qyd00016-region-CA:qyd00016",
"us.novproxy.io:1000:qyd00015-region-US:qyd00015",
"us.novproxy.io:1000:qyd00014-region-CA:qyd00014",
"us.novproxy.io:1000:qyd00013-region-US:qyd00013",
]
yll = [
"us.novproxy.io:1000:qyd00012-region-CA:qyd00012",
"us.novproxy.io:1000:qyd00011-region-US:qyd00011",
"us.novproxy.io:1000:qyd00010-region-CA:qyd00010",
"us.novproxy.io:1000:qyd00009-region-US:qyd00009",
]
szt = [
"us.novproxy.io:1000:qyd00008-region-CA:qyd00008",
"us.novproxy.io:1000:qyd00007-region-US:qyd00007",
"us.novproxy.io:1000:qyd00006-region-CA:qyd00006",
"us.novproxy.io:1000:qyd00005-region-US:qyd00005",
]
hz = [
"us.novproxy.io:1000:qyd00004-region-CA:qyd00004",
"us.novproxy.io:1000:qyd00003-region-US:qyd00003",
"us.novproxy.io:1000:qyd00002-region-CA:qyd00002",
"us.novproxy.io:1000:qyd00001-region-US:qyd00001",
]
proxy_list = work

31
spider/requirements.txt Normal file
View File

@@ -0,0 +1,31 @@
aiohttp
requests
curl_cffi
aiohttp-socks
requests[socks]
fake_useragent
apscheduler
aiofiles
loguru
portalocker
aiomultiprocess
faker
eth_account
eth_utils
solders
toncli
ecdsa
base58
ddddocr
aiohttp_socks
websockets
psutil
socks
drissionpage
fastapi
uvicorn
pydantic
ultralytics
opencv-python-headless
torch
pillow

22
spider/test.py Normal file
View File

@@ -0,0 +1,22 @@
from DrissionPage import Chromium
from loguru import logger
from bit_browser import bit_browser
# http = bit_browser.bit_browser_open('871851b9835d42b3911f39162b3427d5')
# print(http)
browser = Chromium('127.0.0.1:65480')
tab = browser.latest_tab
# tab.get('bitbrowser://settings/clearBrowserData')
res = tab.ele('t:settings-ui',timeout=3).sr('t:settings-main').sr('t:settings-basic-page').sr('t:settings-privacy-page').sr('t:settings-clear-browsing-data-dialog').sr('t:cr-dialog')
res = res.ele('t:cr-page-selector@id=pages')
res = res.ele('t:settings-dropdown-menu@id=clearFromBasic').shadow_root
res.ele('t:select@id=dropdownMenu').ele('t:option@value=4').click()
# res = tab.ele('t:settings-dropdown-menu@id=clearFromBasic',timeout=3)
print(res)
if res:
logger.info(f"html: {res.html}")
# res = tab.ele('t:h2@text()=THANK YOU FOR SUBMITTING YOUR INFORMATION', timeout=3)
# if res:
# logger.info("提交问卷成功")
# res = tab.ele('t:b')
# if res:
# logger.info(f"反馈地址: {res.text}")

1051
spider/work.py Normal file

File diff suppressed because it is too large Load Diff