This commit is contained in:
2025-12-12 14:40:04 +08:00
commit 45ff5a62e3
36 changed files with 5640 additions and 0 deletions

14
.gitignore vendored Normal file
View File

@@ -0,0 +1,14 @@
__pycache__
.env
.trae
.idea
.DS_Store
*.baiduyun.*
.vscode
对比
logs/sessions.json
logs/sessions.log
222.py
333.py
444.py
chain

3
README.md Normal file
View File

@@ -0,0 +1,3 @@
# 0.0.1
- 初始化项目

30
back/Dockerfile Executable file
View File

@@ -0,0 +1,30 @@
# 运行环境
FROM python:3.12-slim
# 设置时区
ENV TZ=Asia/Shanghai
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
# 设置工作目录和Python环境变量
WORKDIR /app
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1
# 安装系统依赖
RUN sed -i 's|http://deb.debian.org/debian|http://mirrors.aliyun.com/debian|g' /etc/apt/sources.list.d/debian.sources \
&& sed -i 's|http://security.debian.org/debian-security|http://mirrors.aliyun.com/debian-security|g' /etc/apt/sources.list.d/debian.sources \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
gcc \
python3-dev \
tzdata \
&& rm -rf /var/lib/apt/lists/*
# 优化:先复制依赖文件,避免每次代码变更都重新安装依赖
COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt -i https://mirrors.cloud.tencent.com/pypi/simple
# 复制项目文件
COPY . /app
# 设置启动命令
CMD ["python", "main.py"]

5
back/apis/__init__.py Normal file
View File

@@ -0,0 +1,5 @@
from fastapi import APIRouter
from .country import app as country_app
app = APIRouter()
app.include_router(country_app, prefix='/country')

View File

@@ -0,0 +1,9 @@
from fastapi import APIRouter
from .info.view import app as info_app
from .food.view import app as food_app
from .shop.view import app as shop_app
app = APIRouter()
app.include_router(info_app, prefix='/info', tags=['信息'])
app.include_router(food_app, prefix='/food', tags=['食物'])
app.include_router(shop_app, prefix='/shop', tags=['商店'])

View File

@@ -0,0 +1,66 @@
from datetime import datetime, timezone, timedelta
from pydantic import BaseModel, Field, computed_field
from typing import List
from uuid import UUID
from utils.time_tool import TimestampModel
CHINA_TZ = timezone(timedelta(hours=8))
class Base(BaseModel):
"""
基础食物信息模型
仅包含食物名称
"""
name: str = Field(..., description='食物名称')
class Create(Base):
"""
创建请求模型
"""
pass
class Update(BaseModel):
"""
更新请求模型,支持部分更新
"""
name: str | None = Field(None, description='食物名称')
class Out(TimestampModel, Base):
"""
输出模型
"""
code: int = Field(200, description='状态码')
message: str = Field('成功', description='提示信息')
id: UUID = Field(..., description='ID')
create_time: datetime = Field(..., description='创建时间')
update_time: datetime = Field(..., description='更新时间')
@computed_field
@property
def create_time_cn(self) -> str:
return self.create_time.astimezone(CHINA_TZ).strftime("%Y-%m-%d %H:%M:%S")
@computed_field
@property
def update_time_cn(self) -> str:
return self.update_time.astimezone(CHINA_TZ).strftime("%Y-%m-%d %H:%M:%S")
class Config:
from_attributes = True
class OutList(BaseModel):
"""
列表输出模型
"""
code: int = Field(200, description='状态码')
message: str = Field('成功', description='提示信息')
count: int = Field(0, description='总数')
num: int = Field(0, description='当前数量')
items: List[Out] = Field([], description='列表数据')

View File

@@ -0,0 +1,122 @@
from fastapi import APIRouter, Query, Body, HTTPException
from uuid import UUID
from .schema import Create, Update, Out, OutList
from ..models import Food
from utils.decorators import handle_exceptions_unified
from utils.time_tool import parse_time
from utils.out_base import CommonOut
app = APIRouter()
# 创建食物
@app.post("", response_model=Out, description='创建食物', summary='创建食物')
@handle_exceptions_unified()
async def post(item: Create = Body(..., description='创建数据')):
"""
创建食物记录
"""
res = await Food.create(**item.model_dump())
if not res:
raise HTTPException(status_code=400, detail='创建失败')
return res
# 查询食物
@app.get("", response_model=OutList, description='获取食物', summary='获取食物')
@handle_exceptions_unified()
async def gets(
id: UUID | None = Query(None, description='主键ID'),
name: str | None = Query(None, description='食物名称'),
order_by: str | None = Query('create_time', description='排序字段',
regex='^(-)?(id|name|create_time|update_time)$'),
res_count: bool = Query(False, description='是否返回总数'),
create_time_start: str | int | None = Query(
None, description='创建时间开始 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'),
create_time_end: str | int | None = Query(
None, description='创建时间结束 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'),
update_time_start: str | int | None = Query(
None, description='更新时间开始 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'),
update_time_end: str | int | None = Query(
None, description='更新时间结束 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'),
page: int = Query(1, ge=1, description='页码'),
limit: int = Query(10, ge=1, le=1000, description='每页数量'),
):
"""
获取食物列表
"""
query = Food.all()
if id:
query = query.filter(id=id)
if name:
query = query.filter(name=name)
if create_time_start:
query = query.filter(create_time__gte=parse_time(create_time_start))
if create_time_end:
query = query.filter(create_time__lte=parse_time(
create_time_end, is_end=True))
if update_time_start:
query = query.filter(update_time__gte=parse_time(update_time_start))
if update_time_end:
query = query.filter(update_time__lte=parse_time(
update_time_end, is_end=True))
if order_by:
query = query.order_by(order_by)
if res_count:
count = await query.count()
else:
count = -1
offset = (page - 1) * limit # 计算偏移量
query = query.limit(limit).offset(offset) # 应用分页
res = await query
if not res:
raise HTTPException(status_code=404, detail='食物不存在')
num = len(res)
return OutList(count=count, num=num, items=res)
# 更新食物
@app.put("", response_model=Out, description='更新食物', summary='更新食物')
@handle_exceptions_unified()
async def put(id: UUID = Query(..., description='主键ID'),
item: Update = Body(..., description='更新数据'),
):
"""
部分更新食物,只更新传入的非空字段
"""
# 检查食物是否存在
secret = await Food.get_or_none(id=id)
if not secret:
raise HTTPException(status_code=404, detail='食物不存在')
# 获取要更新的字段排除None值的字段
update_data = item.model_dump(exclude_unset=True)
# 如果没有要更新的字段
if not update_data:
raise HTTPException(status_code=400, detail='没有要更新的字段')
# 更新食物字段
await secret.update_from_dict(update_data)
await secret.save()
return secret
# 删除食物
@app.delete("", response_model=CommonOut, description='删除食物', summary='删除食物')
@handle_exceptions_unified()
async def delete(id: UUID = Query(..., description='主键ID'),
):
"""删除食物"""
secret = await Food.get_or_none(id=id)
if not secret:
raise HTTPException(status_code=404, detail='食物不存在')
await secret.delete()
# Tortoise ORM 单个实例的 delete() 方法返回 None而不是删除的记录数
# 删除成功时手动返回 1如果有异常会被装饰器捕获
return CommonOut(count=1)

View File

@@ -0,0 +1,88 @@
from datetime import datetime, timezone, timedelta
from pydantic import BaseModel, Field, computed_field
from typing import List
from uuid import UUID
from utils.time_tool import TimestampModel
CHINA_TZ = timezone(timedelta(hours=8))
class Base(BaseModel):
"""
基础信息模型
字段与数据库模型 Info 保持一致(孩子与家长字段)
"""
child_full_name: str = Field(..., description='孩子全名')
parent_full_name: str = Field(..., description='家长全名')
child_birthday: str = Field(..., description='孩子生日')
address_str: str = Field(..., description='街道地址')
city_name: str = Field(..., description='城市')
parent_phone: str = Field(..., description='家长电话')
postcode: str = Field(..., description='邮编')
province: str = Field(..., description='省/州全称')
status: bool = Field(False, description='状态')
email: str | None = Field(None, description='邮箱')
email_content: str | None = Field(None, description='邮件内容')
text: str | None = Field(None, description='文本内容')
class Create(Base):
"""
创建请求模型
"""
pass
class Update(BaseModel):
"""
更新请求模型,支持部分更新
"""
child_full_name: str | None = Field(None, description='孩子全名')
parent_full_name: str | None = Field(None, description='家长全名')
child_birthday: str | None = Field(None, description='孩子生日')
address_str: str | None = Field(None, description='街道地址')
city_name: str | None = Field(None, description='城市')
parent_phone: str | None = Field(None, description='家长电话')
postcode: str | None = Field(None, description='邮编')
province: str | None = Field(None, description='省/州全称')
status: bool | None = Field(None, description='状态')
email: str | None = Field(None, description='邮箱')
email_content: str | None = Field(None, description='邮件内容')
text: str | None = Field(None, description='文本内容')
class Out(TimestampModel, Base):
"""
输出模型
"""
code: int = Field(200, description='状态码')
message: str = Field('成功', description='提示信息')
id: UUID = Field(..., description='ID')
create_time: datetime = Field(..., description='创建时间')
update_time: datetime = Field(..., description='更新时间')
@computed_field
@property
def create_time_cn(self) -> str:
return self.create_time.astimezone(CHINA_TZ).strftime("%Y-%m-%d %H:%M:%S")
@computed_field
@property
def update_time_cn(self) -> str:
return self.update_time.astimezone(CHINA_TZ).strftime("%Y-%m-%d %H:%M:%S")
class Config:
from_attributes = True
class OutList(BaseModel):
"""
列表输出模型
"""
code: int = Field(200, description='状态码')
message: str = Field('成功', description='提示信息')
count: int = Field(0, description='总数')
num: int = Field(0, description='当前数量')
items: List[Out] = Field([], description='列表数据')

View File

@@ -0,0 +1,171 @@
from fastapi import APIRouter, Query, Body, HTTPException
import random
from uuid import UUID
from .schema import Create, Update, Out, OutList
from ..models import Info
from utils.decorators import handle_exceptions_unified
from utils.time_tool import parse_time
from utils.out_base import CommonOut
from tortoise.transactions import in_transaction
app = APIRouter()
# 创建信息
@app.post("", response_model=Out, description='创建信息', summary='创建信息')
@handle_exceptions_unified()
async def post(item: Create = Body(..., description='创建数据')):
"""
创建信息记录
"""
res = await Info.create(**item.model_dump())
if not res:
raise HTTPException(status_code=400, detail='创建失败')
return res
# 查询信息
@app.get("", response_model=OutList, description='获取信息', summary='获取信息')
@handle_exceptions_unified()
async def gets(
id: UUID | None = Query(None, description='主键ID'),
child_full_name: str | None = Query(None, description='孩子全名'),
parent_full_name: str | None = Query(None, description='家长全名'),
child_birthday: str | None = Query(None, description='孩子生日'),
address_str: str | None = Query(None, description='街道地址'),
city_name: str | None = Query(None, description='城市'),
parent_phone: str | None = Query(None, description='家长电话'),
postcode: str | None = Query(None, description='邮编'),
province: str | None = Query(None, description='州全称'),
status: bool | None = Query(None, description='状态'),
email: str | None = Query(None, description='邮箱'),
order_by: str | None = Query('create_time', description='排序字段',
regex='^(-)?(id|child_full_name|parent_full_name|city_name|postcode|province|create_time|update_time)$'),
res_count: bool = Query(False, description='是否返回总数'),
create_time_start: str | int | None = Query(
None, description='创建时间开始 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'),
create_time_end: str | int | None = Query(
None, description='创建时间结束 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'),
update_time_start: str | int | None = Query(
None, description='更新时间开始 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'),
update_time_end: str | int | None = Query(
None, description='更新时间结束 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'),
page: int = Query(1, ge=1, description='页码'),
limit: int = Query(10, ge=1, le=1000, description='每页数量'),
):
"""
获取信息列表
"""
query = Info.all()
if id:
query = query.filter(id=id)
if child_full_name:
query = query.filter(child_full_name=child_full_name)
if parent_full_name:
query = query.filter(parent_full_name=parent_full_name)
if child_birthday:
query = query.filter(child_birthday=child_birthday)
if address_str:
query = query.filter(address_str=address_str)
if city_name:
query = query.filter(city_name=city_name)
if parent_phone:
query = query.filter(parent_phone=parent_phone)
if postcode:
query = query.filter(postcode=postcode)
if province:
query = query.filter(province=province)
if email:
query = query.filter(email=email)
if status is not None:
query = query.filter(status=status)
if create_time_start:
query = query.filter(create_time__gte=parse_time(create_time_start))
if create_time_end:
query = query.filter(create_time__lte=parse_time(
create_time_end, is_end=True))
if update_time_start:
query = query.filter(update_time__gte=parse_time(update_time_start))
if update_time_end:
query = query.filter(update_time__lte=parse_time(
update_time_end, is_end=True))
if order_by:
query = query.order_by(order_by)
if res_count:
count = await query.count()
else:
count = -1
offset = (page - 1) * limit # 计算偏移量
query = query.limit(limit).offset(offset) # 应用分页
res = await query
if not res:
raise HTTPException(status_code=404, detail='信息不存在')
num = len(res)
return OutList(count=count, num=num, items=res)
# 更新信息
@app.put("", response_model=Out, description='更新信息', summary='更新信息')
@handle_exceptions_unified()
async def put(id: UUID = Query(..., description='主键ID'),
item: Update = Body(..., description='更新数据'),
):
"""
部分更新信息,只更新传入的非空字段
"""
# 检查信息是否存在
secret = await Info.get_or_none(id=id)
if not secret:
raise HTTPException(status_code=404, detail='信息不存在')
# 获取要更新的字段排除None值的字段
update_data = item.model_dump(exclude_unset=True)
# 如果没有要更新的字段
if not update_data:
raise HTTPException(status_code=400, detail='没有要更新的字段')
# 更新信息字段
await secret.update_from_dict(update_data)
await secret.save()
return secret
# 删除信息
@app.delete("", response_model=CommonOut, description='删除信息', summary='删除信息')
@handle_exceptions_unified()
async def delete(id: UUID = Query(..., description='主键ID'),
):
"""删除信息"""
secret = await Info.get_or_none(id=id)
if not secret:
raise HTTPException(status_code=404, detail='信息不存在')
await secret.delete()
# Tortoise ORM 单个实例的 delete() 方法返回 None而不是删除的记录数
# 删除成功时手动返回 1如果有异常会被装饰器捕获
return CommonOut(count=1)
# 随机获取一条状态修改为True的记录
@app.get("/one", response_model=Out, description='随机获取一条状态修改为True的记录', summary='随机获取一条状态修改为True的记录')
@handle_exceptions_unified()
async def random_update_status():
"""
随机获取一条状态为 False 的记录并在事务中更新为 True
"""
async with in_transaction() as conn:
q = Info.filter(status=False).using_db(conn)
current_running_count = await q.count()
if current_running_count == 0:
raise HTTPException(status_code=404, detail='没有状态为False的记录')
pick_index = random.choice(range(current_running_count))
item = await q.order_by('create_time').offset(pick_index).first()
updated = await Info.filter(id=item.id, status=False).using_db(conn).update(status=True)
if updated == 0:
raise HTTPException(status_code=400, detail='并发冲突,未更新')
return item

116
back/apis/country/models.py Normal file
View File

@@ -0,0 +1,116 @@
import uuid
from tortoise import fields
from tortoise.models import Model
class Shop(Model):
"""
店铺模型
字段:
id (UUIDField): 主键,默认使用 UUID 生成
province (CharField): 省份,最大长度 255
city (CharField): 城市,最大长度 255
street (CharField): 街道,最大长度 255
shop_name (CharField): 店铺名称,最大长度 255
shop_number (CharField): 店铺号码,最大长度 255 nullable 为 True
"""
id = fields.UUIDField(pk=True, default=uuid.uuid4, description="ID")
province = fields.CharField(max_length=255, null=True, index=True, description="省份")
city = fields.CharField(max_length=255, index=True, description="城市")
street = fields.CharField(max_length=255, index=True, description="街道")
shop_name = fields.CharField(max_length=255, index=True, description="店铺名称")
shop_number = fields.CharField(max_length=255, null=True, description="店铺号码")
create_time = fields.DatetimeField(auto_now_add=True, index=True, description='创建时间')
update_time = fields.DatetimeField(auto_now=True, description='更新时间')
class Meta:
table = "shop"
table_description = "店铺表"
ordering = ["create_time"]
indexes = [
("province", "city", "street"),
]
def __repr__(self):
return f"<Shop(id={self.id}, province={self.province}, city={self.city}, street={self.street}, shop_name={self.shop_name})>"
__str__ = __repr__
class Food(Model):
"""
食物模型
字段:
id (UUIDField): 主键,默认使用 UUID 生成
name (CharField): 食物名称,最大长度 255
"""
id = fields.UUIDField(pk=True, default=uuid.uuid4, description="ID")
name = fields.CharField(max_length=255, index=True, description="食物名称")
create_time = fields.DatetimeField(auto_now_add=True, index=True, description='创建时间')
update_time = fields.DatetimeField(auto_now=True, description='更新时间')
class Meta:
table = "food"
table_description = "食物表"
ordering = ["create_time"]
indexes = [
("name",),
]
def __repr__(self):
return f"<Food(id={self.id}, name={self.name})>"
__str__ = __repr__
class Info(Model):
"""
信息模型(孩子与家长字段)
字段:
id (UUIDField): 主键,默认使用 UUID 生成
child_full_name (CharField): 孩子全名,最大长度 255
parent_full_name (CharField): 家长全名,最大长度 255
child_birthday (CharField): 孩子生日(原始字符串),最大长度 32
address_str (CharField): 街道地址,最大长度 255
city_name (CharField): 城市,最大长度 255
parent_phone (CharField): 家长电话,最大长度 64
postcode (CharField): 邮编,最大长度 20
province (CharField): 省/州全称,最大长度 255
status (BooleanField): 状态,默认值 False
email (CharField): 邮箱,最大长度 255 nullable 为 True
text (TextField): 文本内容, nullable 为 True
"""
id = fields.UUIDField(pk=True, default=uuid.uuid4, description="ID")
child_full_name = fields.CharField(max_length=255, index=True, description="孩子全名")
parent_full_name = fields.CharField(max_length=255, index=True, description="家长全名")
child_birthday = fields.CharField(max_length=32, description="孩子生日")
address_str = fields.CharField(max_length=255, index=True, description="街道地址")
city_name = fields.CharField(max_length=255, index=True, description="城市")
parent_phone = fields.CharField(max_length=64, description="家长电话")
postcode = fields.CharField(max_length=20, index=True, description="邮编")
province = fields.CharField(max_length=255, index=True, description="省/州全称")
status = fields.BooleanField(default=False, description="状态")
# 邮件内容
email = fields.CharField(max_length=255, unique=True, index=True, description="邮箱")
email_content = fields.TextField(null=True, description="邮件内容")
text = fields.TextField(null=True, description="文本内容")
create_time = fields.DatetimeField(auto_now_add=True, index=True, description='创建时间')
update_time = fields.DatetimeField(auto_now=True, description='更新时间')
class Meta:
table = "info"
table_description = "信息表"
ordering = ["create_time"]
indexes = [
("city_name", "postcode", "province"),
("child_full_name", "parent_full_name"),
]
def __repr__(self):
return f"<Info(id={self.id}, child_full_name={self.child_full_name}, parent_full_name={self.parent_full_name}, child_birthday={self.child_birthday}, address_str={self.address_str}, city_name={self.city_name}, parent_phone={self.parent_phone}, postcode={self.postcode}, province={self.province})>"
__str__ = __repr__

View File

@@ -0,0 +1,74 @@
from datetime import datetime, timezone, timedelta
from pydantic import BaseModel, Field, computed_field
from typing import List
from uuid import UUID
from utils.time_tool import TimestampModel
CHINA_TZ = timezone(timedelta(hours=8))
class Base(BaseModel):
"""
基础店铺信息模型
包含店铺相关的通用字段,供创建与输出模型复用
"""
province: str | None = Field(None, description='省份')
city: str = Field(..., description='城市')
street: str = Field(..., description='街道')
shop_name: str = Field(..., description='店铺名称')
shop_number: str | None = Field(None, description='店铺号码')
class Create(Base):
"""
创建请求模型
"""
pass
class Update(BaseModel):
"""
更新请求模型,支持部分更新
"""
province: str | None = Field(None, description='省份')
city: str | None = Field(None, description='城市')
street: str | None = Field(None, description='街道')
shop_name: str | None = Field(None, description='店铺名称')
shop_number: str | None = Field(None, description='店铺号码')
class Out(TimestampModel, Base):
"""
输出模型
"""
code: int = Field(200, description='状态码')
message: str = Field('成功', description='提示信息')
id: UUID = Field(..., description='ID')
create_time: datetime = Field(..., description='创建时间')
update_time: datetime = Field(..., description='更新时间')
@computed_field
@property
def create_time_cn(self) -> str:
return self.create_time.astimezone(CHINA_TZ).strftime("%Y-%m-%d %H:%M:%S")
@computed_field
@property
def update_time_cn(self) -> str:
return self.update_time.astimezone(CHINA_TZ).strftime("%Y-%m-%d %H:%M:%S")
class Config:
from_attributes = True
class OutList(BaseModel):
"""
列表输出模型
"""
code: int = Field(200, description='状态码')
message: str = Field('成功', description='提示信息')
count: int = Field(0, description='总数')
num: int = Field(0, description='当前数量')
items: List[Out] = Field([], description='列表数据')

View File

@@ -0,0 +1,155 @@
from fastapi import APIRouter, Query, Body, HTTPException
from uuid import UUID
from .schema import Create, Update, Out, OutList
from ..models import Shop
from utils.decorators import handle_exceptions_unified
from utils.time_tool import parse_time
from utils.out_base import CommonOut
from tortoise.transactions import in_transaction
import random
app = APIRouter()
# 创建店铺
@app.post("", response_model=Out, description='创建店铺', summary='创建店铺')
@handle_exceptions_unified()
async def post(item: Create = Body(..., description='创建数据')):
"""
创建店铺记录
"""
res = await Shop.filter(street=item.street).first()
if res:
raise HTTPException(status_code=400, detail='店铺已存在')
res = await Shop.create(**item.model_dump())
if not res:
raise HTTPException(status_code=400, detail='创建失败')
return res
# 查询店铺
@app.get("", response_model=OutList, description='获取店铺', summary='获取店铺')
@handle_exceptions_unified()
async def gets(
id: UUID | None = Query(None, description='主键ID'),
province: str | None = Query(None, description='省份'),
city: str | None = Query(None, description='城市'),
street: str | None = Query(None, description='街道'),
shop_name: str | None = Query(None, description='店铺名称'),
shop_number: str | None = Query(None, description='店铺号码'),
order_by: str | None = Query('create_time', description='排序字段',
regex='^(-)?(id|province|city|street|shop_name|create_time|update_time)$'),
res_count: bool = Query(False, description='是否返回总数'),
create_time_start: str | int | None = Query(
None, description='创建时间开始 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'),
create_time_end: str | int | None = Query(
None, description='创建时间结束 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'),
update_time_start: str | int | None = Query(
None, description='更新时间开始 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'),
update_time_end: str | int | None = Query(
None, description='更新时间结束 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'),
page: int = Query(1, ge=1, description='页码'),
limit: int = Query(10, ge=1, le=1000, description='每页数量'),
):
"""
获取店铺列表
"""
query = Shop.all()
if id:
query = query.filter(id=id)
if province:
query = query.filter(province=province)
if city:
query = query.filter(city=city)
if street:
query = query.filter(street=street)
if shop_name:
query = query.filter(shop_name=shop_name)
if shop_number:
query = query.filter(shop_number=shop_number)
if create_time_start:
query = query.filter(create_time__gte=parse_time(create_time_start))
if create_time_end:
query = query.filter(create_time__lte=parse_time(
create_time_end, is_end=True))
if update_time_start:
query = query.filter(update_time__gte=parse_time(update_time_start))
if update_time_end:
query = query.filter(update_time__lte=parse_time(
update_time_end, is_end=True))
if order_by:
query = query.order_by(order_by)
if res_count:
count = await query.count()
else:
count = -1
offset = (page - 1) * limit # 计算偏移量
query = query.limit(limit).offset(offset) # 应用分页
res = await query
if not res:
raise HTTPException(status_code=404, detail='店铺不存在')
num = len(res)
return OutList(count=count, num=num, items=res)
# 更新店铺
@app.put("", response_model=Out, description='更新店铺', summary='更新店铺')
@handle_exceptions_unified()
async def put(id: UUID = Query(..., description='主键ID'),
item: Update = Body(..., description='更新数据'),
):
"""
部分更新店铺,只更新传入的非空字段
"""
# 检查店铺是否存在
secret = await Shop.get_or_none(id=id)
if not secret:
raise HTTPException(status_code=404, detail='店铺不存在')
# 获取要更新的字段排除None值的字段
update_data = item.model_dump(exclude_unset=True)
# 如果没有要更新的字段
if not update_data:
raise HTTPException(status_code=400, detail='没有要更新的字段')
# 更新店铺字段
await secret.update_from_dict(update_data)
await secret.save()
return secret
# 删除店铺
@app.delete("", response_model=CommonOut, description='删除店铺', summary='删除店铺')
@handle_exceptions_unified()
async def delete(id: UUID = Query(..., description='主键ID'),
):
"""删除店铺"""
secret = await Shop.get_or_none(id=id)
if not secret:
raise HTTPException(status_code=404, detail='店铺不存在')
await secret.delete()
# Tortoise ORM 单个实例的 delete() 方法返回 None而不是删除的记录数
# 删除成功时手动返回 1如果有异常会被装饰器捕获
return CommonOut(count=1)
# 随机取一个店铺
@app.get("/random", response_model=Out, description='随机取一个店铺', summary='随机取一个店铺')
@handle_exceptions_unified()
async def get_random_shop():
"""
随机取一个店铺(事务内计数与偏移选择,避免数据库不稳定的随机排序)
"""
async with in_transaction() as conn:
q = Shop.all().using_db(conn)
total = await q.count()
if total == 0:
raise HTTPException(status_code=404, detail='店铺不存在')
pick_index = random.choice(range(total))
item = await q.order_by('create_time').offset(pick_index).first()
return item

29
back/compose.yml Executable file
View File

@@ -0,0 +1,29 @@
services:
# 容器服务名称
ca_auto_table:
# 容器名称
container_name: ca_auto_table
build:
# 在当前目录下寻找Dockerfile文件并构建镜像
context: .
dockerfile: Dockerfile
# 重启策略
restart: always
# 挂载目录 本地化容器数据
# 这里挂载了本地当前目录的app目录到容器的/app目录
volumes:
- .:/app
# 环境变量 可以在Dockerfile中配置环境变量应用中获取
environment:
- NAME=ca_auto_table
- TZ=Asia/Shanghai
# 端口映射 容器端口映射到主机端口
ports:
- "6060:6060"
# 日志配置 - 限制日志大小并启用日志轮转
logging:
driver: "json-file"
options:
max-size: "10m" # 单个日志文件最大10MB
max-file: "3" # 保留最多3个日志文件
compress: "true" # 压缩旧日志文件

152
back/main.py Normal file
View File

@@ -0,0 +1,152 @@
from fastapi import FastAPI
from settings import TORTOISE_ORM
from fastapi.middleware.cors import CORSMiddleware
from tortoise.contrib.fastapi import register_tortoise
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger
from tortoise import Tortoise
from contextlib import asynccontextmanager
from apis import app as main_router
import asyncio
import signal
import sys
@asynccontextmanager
async def lifespan(app: FastAPI):
"""
应用生命周期管理函数
- 启动:注册定时任务并启动调度器
- 关闭:优雅关闭调度器与数据库连接
"""
print('项目启动...')
# 初始化数据库连接(使用 Tortoise 直接初始化,确保路由与定时任务可用)
try:
await Tortoise.init(config=TORTOISE_ORM)
print('数据库初始化完成')
except Exception as e:
print(f'数据库初始化失败: {e}')
# 每30分钟保持一次数据库连接活跃
scheduler.add_job(
keep_db_connection_alive,
IntervalTrigger(minutes=30),
id='keep_db_alive',
name='保持数据库连接',
coalesce=True,
misfire_grace_time=30,
)
scheduler.start()
try:
yield
finally:
print('项目结束...')
# 关闭数据库连接
print('关闭数据库连接...')
try:
await asyncio.wait_for(Tortoise.close_connections(), timeout=2)
except asyncio.TimeoutError:
print('关闭数据库连接超时')
except Exception as e:
print(f'关闭数据库连接出错: {e}')
# 关闭调度器
print('关闭调度器...')
try:
if scheduler is not None and hasattr(scheduler, 'shutdown'):
scheduler.shutdown(wait=False)
except Exception as e:
print(f'关闭调度器出错: {e}')
# 创建 FastAPI 应用实例
app = FastAPI(lifespan=lifespan)
# 配置 CORS 中间件
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# 创建调度器实例
scheduler = AsyncIOScheduler()
# 包含主路由
app.include_router(main_router)
# 注意:使用自定义 lifespan 已在启动时手动初始化数据库。
# 若改回默认事件机制,可重新启用 register_tortoise。
async def keep_db_connection_alive():
"""
保持数据库连接活跃的函数
定期执行简单查询以防止连接超时
"""
try:
conn = Tortoise.get_connection("default")
await conn.execute_query("SELECT 1")
print("数据库连接检查成功")
except Exception as e:
print(f"数据库连接检查失败: {e}")
def signal_handler():
"""
处理终止信号,确保资源正确释放
"""
async def shutdown():
print("收到终止信号,开始优雅关闭...")
# 关闭数据库连接
print("关闭数据库连接...")
try:
await Tortoise.close_connections()
except Exception as e:
print(f"关闭数据库连接时出错: {e}")
# 关闭调度器
print("关闭调度器...")
try:
scheduler.shutdown()
except Exception as e:
print(f"关闭调度器时出错: {e}")
print("所有资源已关闭,程序退出")
sys.exit(0)
loop = asyncio.get_event_loop()
loop.create_task(shutdown())
# 给异步任务一些时间完成
loop.run_until_complete(asyncio.sleep(2))
sys.exit(0)
if __name__ == '__main__':
from uvicorn import run
# 注册信号处理
for sig in (signal.SIGINT, signal.SIGTERM):
signal.signal(sig, lambda sig, frame: signal_handler())
run(
'main:app',
host='0.0.0.0',
port=6060,
reload=False,
workers=1,
# loop='uvloop',
http='httptools',
limit_concurrency=10000,
backlog=4096,
timeout_keep_alive=5
)

View File

@@ -0,0 +1,67 @@
from tortoise import BaseDBAsyncClient
async def upgrade(db: BaseDBAsyncClient) -> str:
return """
CREATE TABLE IF NOT EXISTS `food` (
`id` CHAR(36) NOT NULL PRIMARY KEY COMMENT 'ID',
`name` VARCHAR(255) NOT NULL COMMENT '食物名称',
`create_time` DATETIME(6) NOT NULL COMMENT '创建时间' DEFAULT CURRENT_TIMESTAMP(6),
`update_time` DATETIME(6) NOT NULL COMMENT '更新时间' DEFAULT CURRENT_TIMESTAMP(6) ON UPDATE CURRENT_TIMESTAMP(6),
KEY `idx_food_name_b88f83` (`name`),
KEY `idx_food_create__2db565` (`create_time`)
) CHARACTER SET utf8mb4 COMMENT='食物表';
CREATE TABLE IF NOT EXISTS `info` (
`id` CHAR(36) NOT NULL PRIMARY KEY COMMENT 'ID',
`child_full_name` VARCHAR(255) NOT NULL COMMENT '孩子全名',
`parent_full_name` VARCHAR(255) NOT NULL COMMENT '家长全名',
`child_birthday` VARCHAR(32) NOT NULL COMMENT '孩子生日',
`address_str` VARCHAR(255) NOT NULL COMMENT '街道地址',
`city_name` VARCHAR(255) NOT NULL COMMENT '城市',
`parent_phone` VARCHAR(64) NOT NULL COMMENT '家长电话',
`postcode` VARCHAR(20) NOT NULL COMMENT '邮编',
`province` VARCHAR(255) NOT NULL COMMENT '省/州全称',
`status` BOOL NOT NULL COMMENT '状态' DEFAULT 0,
`email` VARCHAR(255) NOT NULL UNIQUE COMMENT '邮箱',
`email_content` LONGTEXT COMMENT '邮件内容',
`text` LONGTEXT COMMENT '文本内容',
`create_time` DATETIME(6) NOT NULL COMMENT '创建时间' DEFAULT CURRENT_TIMESTAMP(6),
`update_time` DATETIME(6) NOT NULL COMMENT '更新时间' DEFAULT CURRENT_TIMESTAMP(6) ON UPDATE CURRENT_TIMESTAMP(6),
KEY `idx_info_child_f_dae7dc` (`child_full_name`),
KEY `idx_info_parent__d99e40` (`parent_full_name`),
KEY `idx_info_address_8c2b80` (`address_str`),
KEY `idx_info_city_na_ac7d8f` (`city_name`),
KEY `idx_info_postcod_9a4431` (`postcode`),
KEY `idx_info_provinc_58581b` (`province`),
KEY `idx_info_email_653be4` (`email`),
KEY `idx_info_create__3bea91` (`create_time`),
KEY `idx_info_city_na_a8ca74` (`city_name`, `postcode`, `province`),
KEY `idx_info_child_f_2cf26a` (`child_full_name`, `parent_full_name`)
) CHARACTER SET utf8mb4 COMMENT='信息表';
CREATE TABLE IF NOT EXISTS `shop` (
`id` CHAR(36) NOT NULL PRIMARY KEY COMMENT 'ID',
`province` VARCHAR(255) COMMENT '省份',
`city` VARCHAR(255) NOT NULL COMMENT '城市',
`street` VARCHAR(255) NOT NULL COMMENT '街道',
`shop_name` VARCHAR(255) NOT NULL COMMENT '店铺名称',
`shop_number` VARCHAR(255) COMMENT '店铺号码',
`create_time` DATETIME(6) NOT NULL COMMENT '创建时间' DEFAULT CURRENT_TIMESTAMP(6),
`update_time` DATETIME(6) NOT NULL COMMENT '更新时间' DEFAULT CURRENT_TIMESTAMP(6) ON UPDATE CURRENT_TIMESTAMP(6),
KEY `idx_shop_provinc_904758` (`province`),
KEY `idx_shop_city_69d82f` (`city`),
KEY `idx_shop_street_5aaa95` (`street`),
KEY `idx_shop_shop_na_938b2f` (`shop_name`),
KEY `idx_shop_create__e13964` (`create_time`),
KEY `idx_shop_provinc_72e64a` (`province`, `city`, `street`)
) CHARACTER SET utf8mb4 COMMENT='店铺表';
CREATE TABLE IF NOT EXISTS `aerich` (
`id` INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
`version` VARCHAR(255) NOT NULL,
`app` VARCHAR(100) NOT NULL,
`content` JSON NOT NULL
) CHARACTER SET utf8mb4;"""
async def downgrade(db: BaseDBAsyncClient) -> str:
return """
"""

4
back/pyproject.toml Normal file
View File

@@ -0,0 +1,4 @@
[tool.aerich]
tortoise_orm = "settings.TORTOISE_ORM"
location = "./migrations"
src_folder = "./."

25
back/requirements.txt Normal file
View File

@@ -0,0 +1,25 @@
aerich
aiohttp
aiomysql
APScheduler
fastapi
# numpy
tenacity
tortoise-orm
uvicorn
pycryptodome
curl_cffi
fake_useragent
aiohttp_socks
pynacl
eth-account
base58
aioredis
redis
httpx
loguru
uvloop
cryptography
uvicorn[standard]
psutil
DrissionPage

34
back/settings.py Normal file
View File

@@ -0,0 +1,34 @@
TORTOISE_ORM = {
'connections': {
'default': {
# 'engine': 'tortoise.backends.asyncpg', PostgreSQL
'engine': 'tortoise.backends.mysql', # MySQL or Mariadb
'credentials': {
'host': '192.168.11.67',
'port': 3306,
'user': 'us',
'password': 'BkftDZfBzjBFAFwD',
'database': 'us',
'minsize': 10, # 最小连接数设为10避免连接过多
'maxsize': 30, # 最大连接数设为30避免超出数据库限制
'charset': 'utf8mb4',
"echo": False,
'pool_recycle': 3600, # 增加连接回收时间从300秒到3600秒(1小时)
'connect_timeout': 10, # 连接超时时间
}
},
},
'apps': {
'models': {
# 仅注册实际存在的模型模块,移除不存在的 apis.project.models避免 Aerich 初始化失败
'models': [
"apis.country.models",
"aerich.models"
],
'default_connection': 'default',
}
},
'use_tz': False,
'timezone': 'Asia/Shanghai'
}

0
back/utils/__init__.py Normal file
View File

143
back/utils/browser_api.py Normal file
View File

@@ -0,0 +1,143 @@
import datetime
import asyncio
import httpx
from loguru import logger
from utils.decorators import handle_exceptions_unified
class BrowserApi:
"""
浏览器接口
"""
def __init__(self):
self.local_url = 'http://127.0.0.1:54345'
self.headers = {'Content-Type': 'application/json'}
# 使用异步 HTTP 客户端,启用连接池和超时设置
self.client = httpx.AsyncClient(
base_url=self.local_url,
headers=self.headers,
timeout=httpx.Timeout(30.0, connect=10.0), # 总超时30秒连接超时10秒
limits=httpx.Limits(max_keepalive_connections=50, max_connections=100), # 连接池配置
)
async def __aenter__(self):
"""异步上下文管理器入口"""
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""异步上下文管理器出口,关闭客户端"""
await self.aclose()
async def aclose(self):
"""关闭 HTTP 客户端"""
if self.client:
await self.client.aclose()
# 打开指纹浏览器
@handle_exceptions_unified()
async def open_browser(self, id: str, jc: int = 0):
"""
打开指纹浏览器(异步优化版本)
:param jc: 计次
:param id: 浏览器id
:return:http, pid
"""
if jc > 3:
return None, None
url = '/browser/open'
data = {
'id': id
}
try:
res = await self.client.post(url, json=data)
res.raise_for_status() # 检查 HTTP 状态码
res_data = res.json()
logger.info(f'打开指纹浏览器: {res_data}')
if not res_data.get('success'):
logger.error(f'打开指纹浏览器失败: {res_data}')
return await self.open_browser(id, jc + 1)
data = res_data.get('data')
http = data.get('http')
pid = data.get('pid')
logger.info(f'打开指纹浏览器成功: {http}, {pid}')
return http, pid
except httpx.TimeoutException as e:
logger.error(f'打开指纹浏览器超时: {e}')
if jc < 3:
return await self.open_browser(id, jc + 1)
return None, None
except httpx.RequestError as e:
logger.error(f'打开指纹浏览器请求错误: {e}')
if jc < 3:
return await self.open_browser(id, jc + 1)
return None, None
except Exception as e:
logger.error(f'打开指纹浏览器异常: {e}')
if jc < 3:
return await self.open_browser(id, jc + 1)
return None, None
# 关闭指纹浏览器
@handle_exceptions_unified()
async def close_browser(self, id: str, jc: int = 0):
"""
关闭指纹浏览器(异步优化版本)
:param jc: 计次
:param id: 浏览器id
:return:
"""
if jc > 3:
return None
url = '/browser/close'
data = {
'id': id
}
try:
res = await self.client.post(url, json=data)
res.raise_for_status() # 检查 HTTP 状态码
res_data = res.json()
logger.info(f'关闭指纹浏览器: {res_data}')
if not res_data.get('success'):
msg = res_data.get('msg', '')
# 如果浏览器正在打开中,等待后重试(不是真正的错误)
if '正在打开中' in msg or 'opening' in msg.lower():
if jc < 3:
# 等待 1-3 秒后重试(根据重试次数递增等待时间)
wait_time = (jc + 1) * 1.0 # 第1次重试等1秒第2次等2秒第3次等3秒
logger.info(f'浏览器正在打开中,等待 {wait_time} 秒后重试关闭: browser_id={id}')
await asyncio.sleep(wait_time)
return await self.close_browser(id, jc + 1)
else:
# 超过重试次数,记录警告但不作为错误
logger.warning(f'关闭指纹浏览器失败浏览器正在打开中已重试3次: browser_id={id}')
return None
else:
# 其他错误,记录为错误并重试
logger.error(f'关闭指纹浏览器失败: {res_data}')
if jc < 3:
await asyncio.sleep(0.5) # 短暂等待后重试
return await self.close_browser(id, jc + 1)
return None
logger.info(f'关闭指纹浏览器成功: browser_id={id}')
return True
except httpx.TimeoutException as e:
logger.error(f'关闭指纹浏览器超时: {e}')
if jc < 3:
await asyncio.sleep(1.0)
return await self.close_browser(id, jc + 1)
return None
except httpx.RequestError as e:
logger.error(f'关闭指纹浏览器请求错误: {e}')
if jc < 3:
await asyncio.sleep(1.0)
return await self.close_browser(id, jc + 1)
return None
except Exception as e:
logger.error(f'关闭指纹浏览器异常: {e}')
if jc < 3:
await asyncio.sleep(1.0)
return await self.close_browser(id, jc + 1)
return None
browser_api = BrowserApi()

165
back/utils/decorators.py Normal file
View File

@@ -0,0 +1,165 @@
from functools import wraps
from fastapi import HTTPException
from typing import Callable, Any, Optional
import logging
import asyncio
from tortoise.exceptions import OperationalError
# 获取日志记录器
logger = logging.getLogger(__name__)
def handle_exceptions_unified(
max_retries: int = 0,
retry_delay: float = 1.0,
status_code: int = 500,
custom_message: Optional[str] = None,
is_background_task: bool = False
):
"""
统一的异常处理装饰器
集成了所有异常处理功能:数据库重试、自定义状态码、自定义消息、后台任务处理
Args:
max_retries: 最大重试次数默认0不重试
retry_delay: 重试间隔时间默认1秒
status_code: HTTP状态码默认500
custom_message: 自定义错误消息前缀
is_background_task: 是否为后台任务不抛出HTTPException
使用方法:
# 基础异常处理
@handle_exceptions_unified()
async def basic_function(...):
pass
# 带数据库重试
@handle_exceptions_unified(max_retries=3, retry_delay=1.0)
async def db_function(...):
pass
# 自定义状态码和消息
@handle_exceptions_unified(status_code=400, custom_message="参数错误")
async def validation_function(...):
pass
# 后台任务处理
@handle_exceptions_unified(is_background_task=True)
async def background_function(...):
pass
"""
def decorator(func: Callable) -> Callable:
@wraps(func)
async def wrapper(*args, **kwargs) -> Any:
last_exception = None
for attempt in range(max_retries + 1):
try:
return await func(*args, **kwargs)
except HTTPException as e:
# HTTPException 直接抛出,不重试
if is_background_task:
logger.error(f"后台任务 {func.__name__} HTTPException: {str(e)}")
return False
raise
except OperationalError as e:
last_exception = e
error_msg = str(e).lower()
# 检查是否是连接相关的错误
if any(keyword in error_msg for keyword in [
'lost connection', 'connection', 'timeout',
'server has gone away', 'broken pipe'
]):
if attempt < max_retries:
logger.warning(
f"函数 {func.__name__} 数据库连接错误 (尝试 {attempt + 1}/{max_retries + 1}): {str(e)}"
)
# 等待一段时间后重试,使用指数退避
await asyncio.sleep(retry_delay * (2 ** attempt))
continue
else:
logger.error(
f"函数 {func.__name__} 数据库连接错误,已达到最大重试次数: {str(e)}"
)
else:
# 非连接错误,直接处理
logger.error(f"函数 {func.__name__} 发生数据库错误: {str(e)}")
if is_background_task:
return False
error_detail = f"{custom_message}: {str(e)}" if custom_message else f"数据库操作失败: {str(e)}"
raise HTTPException(status_code=status_code, detail=error_detail)
except Exception as e:
last_exception = e
if attempt < max_retries:
logger.warning(
f"函数 {func.__name__} 发生异常 (尝试 {attempt + 1}/{max_retries + 1}): {str(e)}"
)
await asyncio.sleep(retry_delay * (2 ** attempt))
continue
else:
logger.error(f"函数 {func.__name__} 发生异常: {str(e)}", exc_info=True)
if is_background_task:
return False
break
# 所有重试都失败了,处理最后一个异常
if is_background_task:
return False
if isinstance(last_exception, OperationalError):
error_detail = f"{custom_message}: 数据库连接失败: {str(last_exception)}" if custom_message else f"数据库连接失败: {str(last_exception)}"
else:
error_detail = f"{custom_message}: {str(last_exception)}" if custom_message else str(last_exception)
raise HTTPException(status_code=status_code, detail=error_detail)
return wrapper
return decorator
# 向后兼容的别名函数
def handle_exceptions_with_db_retry(max_retries: int = 3, retry_delay: float = 1.0):
"""
带数据库连接重试的异常处理装饰器(向后兼容)
这是 handle_exceptions_unified 的别名,保持向后兼容性
"""
return handle_exceptions_unified(max_retries=max_retries, retry_delay=retry_delay)
def handle_exceptions(func: Callable) -> Callable:
"""
基础异常处理装饰器(向后兼容)
这是 handle_exceptions_unified() 的别名,保持向后兼容性
"""
return handle_exceptions_unified()(func)
def handle_background_task_exceptions(func: Callable) -> Callable:
"""
后台任务异常处理装饰器(向后兼容)
这是 handle_exceptions_unified 的别名,保持向后兼容性
"""
return handle_exceptions_unified(is_background_task=True)(func)
def handle_exceptions_with_custom_message(message: str = "操作失败"):
"""
带自定义错误消息的异常处理装饰器(向后兼容)
这是 handle_exceptions_unified 的别名,保持向后兼容性
"""
return handle_exceptions_unified(custom_message=message)
def handle_exceptions_with_status_code(status_code: int = 500, message: str = None):
"""
带自定义状态码和错误消息的异常处理装饰器(向后兼容)
这是 handle_exceptions_unified 的别名,保持向后兼容性
"""
return handle_exceptions_unified(status_code=status_code, custom_message=message)

47
back/utils/exceptions.py Normal file
View File

@@ -0,0 +1,47 @@
import os
from fastapi import Request, status
from fastapi.exceptions import HTTPException, RequestValidationError
from fastapi.responses import JSONResponse
from .logs import getLogger
logger = getLogger(os.environ.get('APP_NAME'))
def global_http_exception_handler(request: Request, exc):
"""
全局HTTP请求处理异常
:param request: HTTP请求对象
:param exc: 本次发生的异常对象
:return:
"""
# 使用日志记录异常
logger.error(f"发生异常:{exc.detail}")
# 直接返回JSONResponse避免重新抛出异常导致循环
return JSONResponse(
status_code=exc.status_code,
content={
'err_msg': exc.detail,
'status': False
},
headers=getattr(exc, 'headers', None)
)
def global_request_exception_handler(request: Request, exc):
"""
全局请求校验异常处理函数
:param request: HTTP请求对象
:param exc: 本次发生的异常对象
:return:
"""
# 直接返回JSONResponse避免重新抛出异常
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST,
content={
'err_msg': exc.errors()[0],
'status': False
}
)

218
back/utils/logs.py Normal file
View File

@@ -0,0 +1,218 @@
import logging
import os
from logging import Logger
from concurrent_log_handler import ConcurrentRotatingFileHandler
from logging.handlers import TimedRotatingFileHandler
import gzip
import shutil
import glob
from datetime import datetime, timedelta
from pathlib import Path
def getLogger(name: str = 'root') -> Logger:
"""
创建一个按2小时滚动、支持多进程安全、自动压缩日志的 Logger
:param name: 日志器名称
:return: 单例 Logger 对象
"""
logger: Logger = logging.getLogger(name)
logger.setLevel(logging.DEBUG)
if not logger.handlers:
# 控制台输出
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG)
# 日志目录
log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
# 日志文件路径
log_file = os.path.join(log_dir, f"{name}.log")
# 文件处理器每2小时滚动一次保留7天共84个文件支持多进程写入
file_handler = TimedRotatingFileHandler(
filename=log_file,
when='H',
interval=2, # 每2小时切一次
backupCount=84, # 保留7天 = 7 * 24 / 2 = 84个文件
encoding='utf-8',
delay=False,
utc=False # 你也可以改成 True 表示按 UTC 时间切
)
# 设置 Formatter - 简化格式,去掉路径信息
formatter = logging.Formatter(
fmt="{name}{levelname} {asctime} {message}",
datefmt="%Y-%m-%d %H:%M:%S",
style="{"
)
console_formatter = logging.Formatter(
fmt="{levelname} {asctime} {message}",
datefmt="%Y-%m-%d %H:%M:%S",
style="{"
)
file_handler.setFormatter(formatter)
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)
logger.addHandler(file_handler)
# 添加压缩功能(在第一次创建 logger 时执行一次)
_compress_old_logs(log_dir, name)
return logger
def _compress_old_logs(log_dir: str, name: str):
"""
将旧日志压缩成 .gz 格式
"""
pattern = os.path.join(log_dir, f"{name}.log.*")
for filepath in glob.glob(pattern):
if filepath.endswith('.gz'):
continue
try:
with open(filepath, 'rb') as f_in:
with gzip.open(filepath + '.gz', 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
os.remove(filepath)
except Exception as e:
print(f"日志压缩失败: {filepath}, 原因: {e}")
def compress_old_logs(log_dir: str = None, name: str = "root"):
"""
压缩旧的日志文件(公共接口)
Args:
log_dir: 日志目录,如果不指定则使用默认目录
name: 日志器名称
"""
if log_dir is None:
log_dir = "logs"
_compress_old_logs(log_dir, name)
def log_api_call(logger: Logger, user_id: str = None, endpoint: str = None, method: str = None, params: dict = None, response_status: int = None, client_ip: str = None):
"""
记录API调用信息包含用户ID、接口路径、请求方法、参数、响应状态和来源IP
Args:
logger: 日志器对象
user_id: 用户ID
endpoint: 接口路径
method: 请求方法 (GET, POST, PUT, DELETE等)
params: 请求参数
response_status: 响应状态码
client_ip: 客户端IP地址
"""
try:
# 构建日志信息
log_parts = []
if user_id:
log_parts.append(f"用户={user_id}")
if client_ip:
log_parts.append(f"IP={client_ip}")
if method and endpoint:
log_parts.append(f"{method} {endpoint}")
elif endpoint:
log_parts.append(f"接口={endpoint}")
if params:
# 过滤敏感信息
safe_params = {k: v for k, v in params.items()
if k.lower() not in ['password', 'token', 'secret', 'key']}
if safe_params:
log_parts.append(f"参数={safe_params}")
if response_status:
log_parts.append(f"状态码={response_status}")
if log_parts:
log_message = " ".join(log_parts)
logger.info(log_message)
except Exception as e:
logger.error(f"记录API调用日志失败: {e}")
def delete_old_compressed_logs(log_dir: str = None, days: int = 7):
"""
删除超过指定天数的压缩日志文件
Args:
log_dir: 日志目录,如果不指定则使用默认目录
days: 保留天数默认7天
"""
try:
if log_dir is None:
log_dir = "logs"
log_path = Path(log_dir)
if not log_path.exists():
return
# 计算截止时间
cutoff_time = datetime.now() - timedelta(days=days)
# 获取所有压缩日志文件
gz_files = [f for f in log_path.iterdir()
if f.is_file() and f.name.endswith('.log.gz')]
deleted_count = 0
for gz_file in gz_files:
# 获取文件修改时间
file_mtime = datetime.fromtimestamp(gz_file.stat().st_mtime)
# 如果文件超过保留期限,删除它
if file_mtime < cutoff_time:
gz_file.unlink()
print(f"删除旧压缩日志文件: {gz_file}")
deleted_count += 1
if deleted_count > 0:
print(f"总共删除了 {deleted_count} 个旧压缩日志文件")
except Exception as e:
print(f"删除旧压缩日志文件失败: {e}")
if __name__ == '__main__':
logger = getLogger('WebAPI')
# 基础日志测试
logger.info("系统启动")
logger.debug("调试信息")
logger.warning("警告信息")
logger.error("错误信息")
# API调用日志测试
log_api_call(
logger=logger,
user_id="user123",
endpoint="/api/users/info",
method="GET",
params={"id": 123, "fields": ["name", "email"]},
response_status=200,
client_ip="192.168.1.100"
)
log_api_call(
logger=logger,
user_id="user456",
endpoint="/api/users/login",
method="POST",
params={"username": "test", "password": "hidden"}, # password会被过滤
response_status=401,
client_ip="10.0.0.50"
)
# 单例验证
logger2 = getLogger('WebAPI')
print(f"Logger单例验证: {id(logger) == id(logger2)}")

8
back/utils/out_base.py Normal file
View File

@@ -0,0 +1,8 @@
from pydantic import BaseModel, Field
class CommonOut(BaseModel):
"""操作结果详情模型"""
code: int = Field(200, description='状态码')
message: str = Field('成功', description='提示信息')
count: int = Field(0, description='操作影响的记录数')

96
back/utils/redis_tool.py Normal file
View File

@@ -0,0 +1,96 @@
import redis
from loguru import logger
class RedisClient:
def __init__(self, host: str = 'localhost', port: int = 6379, password: str = None):
self.host = host
self.port = port
self.password = password
self.browser_client = None
self.task_client = None
self.cache_client = None
self.ok_client = None
self.init()
# 初始化
def init(self):
"""
初始化Redis客户端
:return:
"""
if self.browser_client is None:
self.browser_client = redis.Redis(host=self.host, port=self.port, password=self.password, db=0,
decode_responses=True)
if self.task_client is None:
self.task_client = redis.Redis(host=self.host, port=self.port, password=self.password, db=1,
decode_responses=True)
if self.cache_client is None:
self.cache_client = redis.Redis(host=self.host, port=self.port, password=self.password, db=2,
decode_responses=True)
if self.ok_client is None:
self.ok_client = redis.Redis(host=self.host, port=self.port, password=self.password, db=3,
decode_responses=True)
logger.info("Redis连接已初始化")
# 关闭连接
def close(self):
self.browser_client.close()
self.task_client.close()
self.cache_client.close()
self.ok_client.close()
logger.info("Redis连接已关闭")
"""browser_client"""
# 写入浏览器信息
async def set_browser(self, browser_id: str, data: dict):
try:
# 处理None值将其转换为空字符串
processed_data = {}
for key, value in data.items():
if value is None:
processed_data[key] = ""
else:
processed_data[key] = value
self.browser_client.hset(browser_id, mapping=processed_data)
logger.info(f"写入浏览器信息: {browser_id} - {processed_data}")
return True
except Exception as e:
logger.error(f"写入浏览器信息失败: {browser_id} - {e}")
return False
# 获取浏览器信息
async def get_browser(self, browser_id: str = None):
try:
if browser_id is None:
# 获取全部数据
data = self.browser_client.hgetall()
else:
data = self.browser_client.hgetall(browser_id)
logger.info(f"获取浏览器信息: {browser_id} - {data}")
return data
except Exception as e:
logger.error(f"获取浏览器信息失败: {browser_id} - {e}")
async def main():
host = '183.66.27.14'
port = 50086
password = 'redis_AdJsBP'
redis_client = RedisClient(host, port, password)
# await redis_client.set_browser('9eac7f95ca2d47359ace4083a566e119', {'status': 'online', 'current_task_id': None})
await redis_client.get_browser('9eac7f95ca2d47359ace4083a566e119')
# 关闭连接
redis_client.close()
if __name__ == '__main__':
import asyncio
asyncio.run(main())

177
back/utils/session_store.py Normal file
View File

@@ -0,0 +1,177 @@
import os
import json
import threading
from datetime import datetime, timedelta
from typing import Optional, Dict, Any, List
from loguru import logger
class SessionStore:
"""
会话持久化存储(日志文件版 + 内存缓存)
优化方案:
1. 使用日志文件记录(追加模式,性能好,不会因为文件变大而变慢)
2. 在内存中保留最近的会话记录(用于快速查询)
3. 定期清理过期的内存记录保留最近1小时或最多1000条
"""
def __init__(self, file_path: str = 'logs/sessions.log', enable_log: bool = True, max_memory_records: int = 1000):
"""
初始化会话存储。
Args:
file_path (str): 日志文件路径(默认 logs/sessions.log
enable_log (bool): 是否启用日志记录False 则不记录到文件
max_memory_records (int): 内存中保留的最大记录数默认1000
"""
self.file_path = file_path
self.enable_log = enable_log
self.max_memory_records = max_memory_records
self._lock = threading.Lock()
# 内存中的会话记录 {pid: record}
self._memory_cache: Dict[int, Dict[str, Any]] = {}
# 记录创建时间,用于清理过期记录
self._cache_timestamps: Dict[int, datetime] = {}
if enable_log:
os.makedirs(os.path.dirname(file_path), exist_ok=True)
def _write_log(self, action: str, record: Dict[str, Any]) -> None:
"""
写入日志文件(追加模式,性能好)
Args:
action (str): 操作类型CREATE/UPDATE
record (Dict[str, Any]): 会话记录
"""
if not self.enable_log:
return
try:
with self._lock:
log_line = json.dumps({
'action': action,
'timestamp': datetime.now().isoformat(),
'data': record
}, ensure_ascii=False)
with open(self.file_path, 'a', encoding='utf-8') as f:
f.write(log_line + '\n')
except Exception as e:
# 静默处理日志写入错误,避免影响主流程
logger.debug(f"写入会话日志失败: {e}")
def _cleanup_old_cache(self) -> None:
"""
清理过期的内存缓存记录
- 保留最近1小时的记录
- 最多保留 max_memory_records 条记录
"""
now = datetime.now()
expire_time = now - timedelta(hours=1)
# 清理过期记录
expired_pids = [
pid for pid, timestamp in self._cache_timestamps.items()
if timestamp < expire_time
]
for pid in expired_pids:
self._memory_cache.pop(pid, None)
self._cache_timestamps.pop(pid, None)
# 如果记录数仍然超过限制,删除最旧的记录
if len(self._memory_cache) > self.max_memory_records:
# 按时间戳排序,删除最旧的
sorted_pids = sorted(
self._cache_timestamps.items(),
key=lambda x: x[1]
)
# 计算需要删除的数量
to_remove = len(self._memory_cache) - self.max_memory_records
for pid, _ in sorted_pids[:to_remove]:
self._memory_cache.pop(pid, None)
self._cache_timestamps.pop(pid, None)
def create_session(self, record: Dict[str, Any]) -> None:
"""
创建新会话记录。
Args:
record (Dict[str, Any]): 会话信息字典
"""
record = dict(record)
record.setdefault('created_at', datetime.now().isoformat())
pid = record.get('pid')
if pid is not None:
with self._lock:
# 保存到内存缓存
self._memory_cache[pid] = record
self._cache_timestamps[pid] = datetime.now()
# 清理过期记录
self._cleanup_old_cache()
# 写入日志文件(追加模式,性能好)
self._write_log('CREATE', record)
def update_session(self, pid: int, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""
按 PID 更新会话记录。
Args:
pid (int): 进程ID
updates (Dict[str, Any]): 更新字段字典
Returns:
Optional[Dict[str, Any]]: 更新后的会话记录
"""
with self._lock:
# 从内存缓存获取
record = self._memory_cache.get(pid)
if record:
record.update(updates)
record.setdefault('updated_at', datetime.now().isoformat())
self._cache_timestamps[pid] = datetime.now()
else:
# 如果内存中没有,创建一个新记录
record = {'pid': pid}
record.update(updates)
record.setdefault('created_at', datetime.now().isoformat())
record.setdefault('updated_at', datetime.now().isoformat())
self._memory_cache[pid] = record
self._cache_timestamps[pid] = datetime.now()
if record:
# 写入日志文件
self._write_log('UPDATE', record)
return record
def get_session_by_pid(self, pid: int) -> Optional[Dict[str, Any]]:
"""
按 PID 查询会话记录(仅从内存缓存查询,性能好)
Args:
pid (int): 进程ID
Returns:
Optional[Dict[str, Any]]: 会话记录
"""
with self._lock:
return self._memory_cache.get(pid)
def list_sessions(self, status: Optional[int] = None) -> List[Dict[str, Any]]:
"""
列出会话记录,可按状态过滤(仅从内存缓存查询)
Args:
status (Optional[int]): 状态码过滤(如 100 运行中、200 已结束、500 失败)
Returns:
List[Dict[str, Any]]: 会话记录列表
"""
with self._lock:
records = list(self._memory_cache.values())
if status is None:
return records
return [r for r in records if r.get('status') == status]

56
back/utils/time_tool.py Normal file
View File

@@ -0,0 +1,56 @@
from datetime import datetime, timedelta, timezone
from pydantic import BaseModel, field_serializer
CN_TZ = timezone(timedelta(hours=8))
def now_cn() -> datetime:
"""
获取中国时区的当前时间
返回带有中国时区信息的 datetime 对象
"""
return datetime.now(CN_TZ)
def parse_time(val: str | int, is_end: bool = False) -> datetime:
"""
将传入的字符串或时间戳解析为中国时区的 datetime用于数据库查询时间比较。
支持格式:
- "YYYY-MM-DD"
- "YYYY-MM-DD HH:mm:ss"
- 10 位时间戳(秒)
- 13 位时间戳(毫秒)
"""
dt_cn: datetime
if isinstance(val, int) or (isinstance(val, str) and val.isdigit()):
ts = int(val)
# 根据量级判断是秒还是毫秒
if ts >= 10**12:
dt_cn = datetime.fromtimestamp(ts / 1000, CN_TZ)
else:
dt_cn = datetime.fromtimestamp(ts, CN_TZ)
else:
try:
dt_cn = datetime.strptime(val, "%Y-%m-%d").replace(tzinfo=CN_TZ)
if is_end:
dt_cn = dt_cn.replace(hour=23, minute=59, second=59, microsecond=999999)
except ValueError:
try:
dt_cn = datetime.strptime(val, "%Y-%m-%d %H:%M:%S").replace(tzinfo=CN_TZ)
except ValueError:
raise ValueError("时间格式错误,支持 'YYYY-MM-DD''YYYY-MM-DD HH:mm:ss' 或 10/13位时间戳")
# 与 ORM 配置保持一致use_tz=False返回本地时区的“朴素”时间
return dt_cn.replace(tzinfo=None)
# 自动把 datetime 序列化为 13位时间戳的基类
class TimestampModel(BaseModel):
"""自动把 datetime 序列化为 13位时间戳的基类"""
model_config = {"arbitrary_types_allowed": True}
@field_serializer("*", when_used="json", check_fields=False) # "*" 表示作用于所有字段
def serialize_datetime(self, value):
if isinstance(value, datetime):
return int(value.timestamp()*1000) # 转成 13 位 int 时间戳
return value

120
spider/api.py Normal file
View File

@@ -0,0 +1,120 @@
import requests
from loguru import logger
import csv
import os
import random
class Api:
def __init__(self) -> None:
# self.base_url = 'http://127.0.0.1:6060'
self.base_url = 'http://192.168.11.67:6060'
# 创建店铺
def create_shop(self, city: str, street: str, shop_name: str) -> dict:
url = f'{self.base_url}/country/shop'
item = {
'city': city,
'street': street,
'shop_name': shop_name,
}
response = requests.post(url, json=item).json()
logger.info(response)
return response
# 查询店铺
def get_shop(self, city: str) -> dict:
url = f'{self.base_url}/country/shop'
response = requests.get(url).json()
# logger.info(response)
return response
# 创建信息
def create_info(self, child_full_name: str, parent_full_name: str, child_birthday: str, address_str: str, city_name: str, parent_phone: str, postcode: str, province: str, email: str, text: str, status: bool = False, email_content: str | None = None) -> dict:
"""
创建信息记录(孩子与家长字段)
参数:
child_full_name (str): 孩子全名
parent_full_name (str): 家长全名
child_birthday (str): 孩子生日(字符串)
address_str (str): 街道地址
city_name (str): 城市
parent_phone (str): 家长电话
postcode (str): 邮编
province (str): 省/州全称
email (str): 邮箱
text (str): 文本内容(如反馈地址)
status (bool): 状态
email_content (str | None): 邮件内容
返回值:
dict: 接口返回的数据
"""
url = f'{self.base_url}/country/info'
item = {
"child_full_name": child_full_name,
"parent_full_name": parent_full_name,
"child_birthday": child_birthday,
"address_str": address_str,
"city_name": city_name,
"parent_phone": parent_phone,
"postcode": postcode,
"province": province,
"status": status,
"email": email,
"email_content": email_content,
"text": text
}
response = requests.post(url, json=item).json()
logger.info(response)
return response
# 根据城市 随机获取一个店铺
def get_random_shop(self) -> dict:
url = f'{self.base_url}/country/shop/random'
response = requests.get(url).json()
# logger.info(response)
if not response.get('street'):
logger.error(f'没有店铺')
return None
return response
def main():
"""
从同目录的 `bakeries.csv` 读取面包店数据,按列映射输出或创建店铺
列顺序:`Name,Address,City`
"""
api = Api()
csv_path = os.path.join(os.path.dirname(__file__), 'data.csv')
if not os.path.exists(csv_path):
logger.error(f'CSV 文件不存在: {csv_path}')
return
with open(csv_path, 'r', encoding='utf-8') as file:
reader = csv.reader(file)
header = next(reader, None)
for row in reader:
if len(row) < 3:
logger.warning(f'行列数不足,跳过: {row}')
continue
shop_name, street, city = row[1], row[2], row[0]
if ' (city)' in city:
city = city.replace(' (city)', '')
if 'Quebec' in city:
continue
if ',' in city:
city = city.split(',')[0]
logger.info(f'city: {city}, street: {street}, shop_name: {shop_name}')
api.create_shop(city, street, shop_name)
# def main2():
# api = Api()
# city = 'Toronto'
# shop = api.get_random_shop()
# if shop:
# logger.info(shop)
# if __name__ == '__main__':
# main()
api = Api()

313
spider/auto_challenge.py Normal file
View File

@@ -0,0 +1,313 @@
import io
import time
import uuid
from typing import Optional, List
import requests
from PIL import Image
import base64
from loguru import logger
RESAMPLE_FILTER = Image.Resampling.LANCZOS
class ReCaptchaHandler:
path_map_44 = {
0: "//table/tbody/tr[1]/td[1]",
1: "//table/tbody/tr[1]/td[2]",
2: "//table/tbody/tr[1]/td[3]",
3: "//table/tbody/tr[1]/td[4]",
4: "//table/tbody/tr[2]/td[1]",
5: "//table/tbody/tr[2]/td[2]",
6: "//table/tbody/tr[2]/td[3]",
7: "//table/tbody/tr[2]/td[4]",
8: "//table/tbody/tr[3]/td[1]",
9: "//table/tbody/tr[3]/td[2]",
10: "//table/tbody/tr[3]/td[3]",
11: "//table/tbody/tr[3]/td[4]",
12: "//table/tbody/tr[4]/td[1]",
13: "//table/tbody/tr[4]/td[2]",
14: "//table/tbody/tr[4]/td[3]",
15: "//table/tbody/tr[4]/td[4]",
}
path_map_33 = {
0: "//table/tbody/tr[1]/td[1]",
1: "//table/tbody/tr[1]/td[2]",
2: "//table/tbody/tr[1]/td[3]",
3: "//table/tbody/tr[2]/td[1]",
4: "//table/tbody/tr[2]/td[2]",
5: "//table/tbody/tr[2]/td[3]",
6: "//table/tbody/tr[3]/td[1]",
7: "//table/tbody/tr[3]/td[2]",
8: "//table/tbody/tr[3]/td[3]",
}
api_host="http://192.168.11.13:7070/analyze_batch/"
def __init__(self, driver):
self.driver = driver
self.checkbox_iframe = None
self.challenge_iframe = None
self.challenge_type = None
self.challenge_question = None
self.challenge_i33_first = True
self.i11s = {}
self.challenge_44_img = None
@staticmethod
def split_image(image_bytes: bytes) -> Optional[List[str]]:
try:
image_stream = io.BytesIO(image_bytes)
img = Image.open(image_stream)
except:
return None
width, height = img.size
tile_width = width // 3
tile_height = height // 3
base64_tiles = []
for i in range(3):
for j in range(3):
left = j * tile_width
upper = i * tile_height
right = (j + 1) * tile_width if j < 2 else width
lower = (i + 1) * tile_height if i < 2 else height
tile = img.crop((left, upper, right, lower))
buf = io.BytesIO()
tile.save(buf, format="PNG")
b64 = base64.b64encode(buf.getvalue()).decode()
base64_tiles.append(b64)
return base64_tiles
def find_checkbox_iframe(self):
time.sleep(1)
try:
iframe = self.driver.ele('css: iframe[title="reCAPTCHA"]')
if iframe:
self.checkbox_iframe = iframe
self.checkbox_iframe.ele("#recaptcha-anchor").click()
return True
except:
pass
return False
def find_challenge_iframe(self):
try:
iframe = self.driver.ele("@|title=recaptcha challenge expires in two minutes@|title=reCAPTCHA 验证任务将于 2 分钟后过期")
# logger.info(f"iframe: {iframe}")
if iframe:
self.challenge_iframe = iframe
return True
except:
pass
return False
def check_11_refresh(self, check_ele):
for k, v in self.i11s.items():
if v.get("new"):
self.i11s[k]['new'] = False
check_ele = [i[0] for i in check_ele]
for idx in check_ele:
if idx not in self.i11s:
self.i11s[idx] = {'srcs': [], 'new': False}
while True:
ele = self.challenge_iframe.ele('#rc-imageselect-target').ele(
f"xpath:{self.path_map_33[idx]}")
img_ele = ele.ele('.rc-image-tile-11', timeout=0.1)
if not img_ele:
time.sleep(0.1)
continue
byte_data = img_ele.src()
b64_str = base64.b64encode(byte_data).decode()
if b64_str not in self.i11s[idx]['srcs']:
self.i11s[idx]['srcs'].append(b64_str)
self.i11s[idx]['new'] = True
break
def click_answer(self, result, challenge_type):
if challenge_type == 4:
for x in result["results"][0]['result']:
self.challenge_iframe.ele('#rc-imageselect-target').ele(
f"xpath:{self.path_map_44[x]}").click()
time.sleep(0.1)
# if not result["results"][0]['result']:
# try:
# image_bytes = base64.b64decode(self.challenge_44_img)
# name = str(uuid.uuid4())
# with open(rf"{name}.png",'wb') as f:
# f.write(image_bytes)
# except:
# pass
self.challenge_iframe.ele('#recaptcha-verify-button').click()
self.i11s.clear()
return True
if challenge_type == 3:
found_ele = []
for res in result["results"]:
if res["result"].get('target_found'):
idx = int(res["image_id"])
self.challenge_iframe.ele('#rc-imageselect-target').ele(
f"xpath:{self.path_map_33[idx]}").click()
found_ele.append((idx, self.path_map_33[idx]))
time.sleep(0.1)
if found_ele:
if len(found_ele) <= 2 and self.challenge_i33_first:
self.challenge_iframe.ele('#recaptcha-reload-button').click()
return False
cls = self.challenge_iframe.ele('#rc-imageselect-target').ele(
f"xpath:{found_ele[0][1]}").attr('class')
if 'rc-imageselect-tileselected' in cls:
self.challenge_iframe.ele('#recaptcha-verify-button').click()
self.i11s.clear()
return True
self.check_11_refresh(found_ele)
return False
self.challenge_iframe.ele('#recaptcha-verify-button').click()
self.i11s.clear()
return True
return False
def challenge_i33(self):
if len(self.challenge_iframe.eles('.rc-image-tile-33', timeout=1)) == 9:
self.challenge_i33_first = True
self.i11s.clear()
first_ele = self.challenge_iframe.eles('.rc-image-tile-33')[0]
byte_data = first_ele.src()
tiles = self.split_image(byte_data)
if tiles:
images = {i: t for i, t in enumerate(tiles)}
if res := self.identify_verification_code(images):
self.click_answer(res, 3)
else:
self.challenge_i33_first = False
data = {}
for k, v in self.i11s.items():
if v['new']:
img_b64 = v['srcs'][-1]
data[k] = img_b64
if res := self.identify_verification_code(data):
self.click_answer(res, 3)
def challenge_i44(self):
ele = self.challenge_iframe.eles('.rc-image-tile-44')[0]
byte_data = ele.src()
b64_str = base64.b64encode(byte_data).decode()
self.challenge_44_img = b64_str
if res := self.identify_verification_code({0: b64_str}):
self.click_answer(res, 4)
def identify_verification_code(self, images):
data = {"images": []}
for k, img in images.items():
if img:
data["images"].append({
"image_id": str(k),
"image_base64": img,
"target_class": self.challenge_question
})
if data['images']:
res = requests.post(self.api_host, json=data)
return res.json()
return None
def challenge(self):
if not self.find_checkbox_iframe():
return {"status": False, "message": "no verification code found"}
url_before = self.driver.url
# logger.info(f"url_before: {url_before}")
self.find_challenge_iframe()
if not self.challenge_iframe:
return {"status": False, "message": "no verification code found"}
while True:
time.sleep(1)
if self.driver.url != url_before:
return {"status": True, "message": "验证码自动通过1"}
if self.checkbox_iframe.ele("#recaptcha-anchor").attr('aria-checked') == 'true':
return {"status": True, "message": "验证码自动通过2"}
# 兼容 ChromiumFrame 无 style() 方法:优先读取 style 属性,其次使用 JS 计算样式
vis = None
try:
style_str = self.challenge_iframe.attr('style') or ''
if 'visibility' in style_str:
vis = 'hidden' if 'visibility: hidden' in style_str.replace(' ', '') else 'visible'
except Exception:
pass
if vis is None:
try:
# 通过 JS 获取 iframe 的可见性
vis = self.driver.run_js(
'var f = document.querySelector("iframe[title=\\"recaptcha challenge expires in two minutes\\"]") || document.querySelector("iframe[title=\\"reCAPTCHA 验证任务将于 2 分钟后过期\\"]");'
'f ? getComputedStyle(f).visibility : null;'
)
except Exception:
vis = None
if vis != 'hidden':
break
# try:
# if self.driver.url != url_before:
# return {"status": True, "message": "验证码自动通过1"}
# if self.checkbox_iframe.ele("#recaptcha-anchor").attr('aria-checked') == 'true':
# return {"status": True, "message": "验证码自动通过2"}
# if self.challenge_iframe.style('visibility') != 'hidden':
# logger.info(222)
# break
# except:
# logger.error("challenge error")
# pass
try:
while True:
# 重复使用可见性判断,避免依赖不存在的 style()
vis = None
try:
style_str = self.challenge_iframe.attr('style') or ''
if 'visibility' in style_str:
vis = 'hidden' if 'visibility: hidden' in style_str.replace(' ', '') else 'visible'
except Exception:
pass
if vis is None:
try:
vis = self.driver.run_js(
'var f = document.querySelector("iframe[title=\\"recaptcha challenge expires in two minutes\\"]") || document.querySelector("iframe[title=\\"reCAPTCHA 验证任务将于 2 分钟后过期\\"]");'
'f ? getComputedStyle(f).visibility : null;'
)
except Exception:
vis = None
if vis == 'hidden':
break
time.sleep(1)
if self.driver.url != url_before:
return {"status": True, "message": "captcha successfully resolved"}
if self.checkbox_iframe.ele("#recaptcha-anchor").attr('aria-checked') == 'true':
return {"status": True, "message": "captcha successfully resolved"}
# 获取题目
self.challenge_question = self.challenge_iframe.ele("tag:strong").text
# 判断 4×4
if self.challenge_iframe.ele('.rc-image-tile-44', timeout=0.1):
self.challenge_i44()
# 判断 3×3 或 1×1
elif self.challenge_iframe.ele('.rc-image-tile-33', timeout=0.1) or \
self.challenge_iframe.ele('.rc-image-tile-11', timeout=0.1):
self.challenge_i33()
except:
pass
return {"status": True, "message": "captcha successfully resolved"}

318
spider/bit_browser.py Normal file
View File

@@ -0,0 +1,318 @@
import time
import requests
from loguru import logger
from functools import wraps
def retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0):
"""
通用重试装饰器
:param max_retries: 最大重试次数
:param delay: 每次重试的初始延迟(秒)
:param backoff: 每次重试延迟的递增倍数
"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
retries = 0
current_delay = delay
while retries < max_retries:
try:
return func(*args, **kwargs)
except Exception as e:
retries += 1
if retries >= max_retries:
logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}")
return None # 重试次数用尽后返回 None
logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}")
time.sleep(current_delay)
current_delay *= backoff
return None # 三次重试仍未成功,返回 None
return wrapper
return decorator
# 比特浏览器模块
class BitBrowser:
def __init__(self):
self.bit_host = "http://127.0.0.1"
pass
# 创建比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_create(self, remark: str = '指纹浏览器', ua: str = None, host: str = None, port: str = None,
proxy_user: str = None,
proxy_pwd: str = None, proxy_type: str = 'noproxy', urls: str = None,
bit_port: str = "54345") -> str:
"""
创建比特币浏览器
:param bit_port: 可选默认54345
:param ua: 可选,默认随机
:param proxy_type: 代理类型 (可选) ['noproxy', 'http', 'https', 'socks5', 'ssh']
:param urls: 额外打开的url (可选) 多个用,分割
:param host: 代理IP地址 (可选)
:param port: 代理IP端口 (可选)
:param proxy_user: 代理账号 (可选)
:param proxy_pwd: 代理密码 (可选)
:param remark: 备注 (可选)
:param bit_port: 可选默认54345
:return: 返回浏览器ID
"""
url = f"{self.bit_host}:{bit_port}/browser/update"
headers = {'Content-Type': 'application/json'}
data = {
'name': f'{remark if len(remark) < 40 else remark[:40]}', # 窗口名称
'remark': f'{remark}', # 备注
'proxyMethod': 2, # 代理方式 2自定义 3 提取IP
# 代理类型 ['noproxy', 'http', 'https', 'socks5', 'ssh']
'proxyType': f'{proxy_type}',
"browserFingerPrint": {"userAgent": ua} # 留空,随机指纹
}
if host is not None:
data['host'] = host
if port is not None:
data['port'] = port
if proxy_user is not None:
data['proxyUserName'] = proxy_user
if proxy_pwd is not None:
data['proxyPassword'] = proxy_pwd
if urls is not None:
data['url'] = urls # 额外打开的url 多个用,分割
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
browser_pk = res['data']['id']
return browser_pk
# 修改比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_update(self, pk: str, remark: str = None, proxyType: str = 'noproxy', host: str = None,
port: str = None, proxy_user: str = None, proxy_pwd: str = None, urls: str = None,
bit_port: str = "54345") -> bool:
"""
修改比特币浏览器 传入某个参数则修改某个参数
:param proxyType: 代理类型 noproxy|http|https|socks5(默认noproxy)
:param pk: # 浏览器ID
:param remark: # 备注
:param host: # 代理主机
:param port: # 代理端口
:param proxy_user: # 代理账号
:param proxy_pwd: # 代理密码
:param urls: # 额外打开的url 多个用,分割
:param bit_port: # 可选默认54345
:return: bool
"""
url = f"{self.bit_host}:{bit_port}/browser/update/partial"
headers = {'Content-Type': 'application/json'}
data = dict()
data['ids'] = [pk]
if remark is not None:
data['remark'] = remark
data['name'] = remark
if urls is not None:
data['url'] = urls
if proxyType != 'noproxy':
data['proxyType'] = proxyType
if host is not None:
data['host'] = host
if port is not None:
data['port'] = port if isinstance(port, int) else int(port)
if proxy_user is not None:
data['proxyUserName'] = proxy_user
if proxy_pwd is not None:
data['proxyPassword'] = proxy_pwd
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
return True
# 打开比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_open(self, pk: str, bit_port: str = "54345") -> str:
"""
打开比特币浏览器
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 返回浏览器地址
"""
url = f"{self.bit_host}:{bit_port}/browser/open"
data = {"id": f'{pk}'}
headers = {'Content-Type': 'application/json'}
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
debugger_address = res['data']['http']
return debugger_address
# 关闭比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_close(self, pk: str, bit_port: str = "54345"):
"""
关闭比特币浏览器 - 执行后需要等待5s
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 无返回值
"""
url = f"{self.bit_host}:{bit_port}/browser/close"
headers = {'Content-Type': 'application/json'}
data = {'id': f'{pk}'}
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
# 等待3秒
time.sleep(3)
bol = self.bit_browser_status(pk)
if bol:
raise Exception(f'浏览器ID {pk} 未正常关闭, 等待3秒后重试')
return True
# 删除比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_delete(self, pk: str, bit_port: str = "54345"):
"""
删除比特币浏览器
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 无返回值
"""
url = f"{self.bit_host}:{bit_port}/browser/delete"
headers = {'Content-Type': 'application/json'}
data = {'id': f'{pk}'}
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
return True
# 获取所有比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_get(self, page: int = 0, limit: int = 10, group_id: str | None = None,
bit_port: str | None = "54345") -> dict:
"""
获取所有比特币浏览器
:param page: 页码
:param limit: 每页数量
:param group_id: 组ID(可选)
:param bit_port: 可选默认54345
:return: {'success': True, 'data': {'page': 1, 'pageSize': 10, 'totalNum': 128, 'list': [{'id': '12a3126accc14c93bd34adcccfc3083c'},{'id':'edc5d61a56214e9f8a8bbf1a2e1b405d'}]}}
"""
url = f"{self.bit_host}:{bit_port}/browser/list"
headers = {'Content-Type': 'application/json'}
data = {'page': page, 'pageSize': limit}
if group_id is not None:
data['groupId'] = group_id
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
return res
# 获取比特浏览器窗口详情
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_detail(self, pk: str, bit_port: str = "54345") -> dict:
"""
获取比特浏览器窗口详情
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: {'success': True, 'data': {'id': '12a3126accc14c93bd34adcccfc3083c', 'name': '12a3126accc14c93bd34adcccfc3083c', 'remark': '12a3126accc14c93bd34adcccfc3083c', '
"""
url = f"{self.bit_host}:{bit_port}/browser/detail"
headers = {'Content-Type': 'application/json'}
data = {'id': f'{pk}'}
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
return res
# 获取比特浏览器的进程id
def bit_browser_pid(self, pk: str, bit_port: str = "54345") -> str:
"""
获取比特浏览器的进程id
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 返回进程id
"""
url = f"{self.bit_host}:{bit_port}/browser/pids/alive"
headers = {'Content-Type': 'application/json'}
data = {
"ids": [pk]
}
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
return res['data'][pk]
# 获取窗口状态
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_status(self, pk: str, bit_port: str = "54345") -> dict:
"""
获取比特浏览器窗口状态
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: {'success': True, 'data': {'id': '12a3126accc14c93bd34adcccfc3083c', 'name': '12a3126accc14c93bd34adcccfc3083c', 'remark': '12a3126accc14c93bd34adcccfc3083c', '
"""
url = f"{self.bit_host}:{bit_port}/browser/pids"
headers = {'Content-Type': 'application/json'}
data = {'ids': [pk]}
res = requests.post(url, json=data, headers=headers).json()
# print(f'res --> {res}')
if not res.get('success'):
raise Exception(res)
if res.get('data').get(pk) is None:
return False
else:
return True
async def main():
bit = BitBrowser()
# res = await bit._bit_browser_get()
jc = 0
while 1:
res = await bit._bit_browser_get(
page=jc,
limit=100,
group_id='4028808b9a52223a019a581bbea1275c')
li = res["data"]["list"]
if len(li) == 0:
break
for i in li:
id = i["id"]
# 读取浏览器详情
res = await bit._bit_browser_detail(id)
# print(f'id -->{id} --> {res}')
data = res["data"]
ua = data["browserFingerPrint"]["userAgent"]
proxy_type = data.get("proxyType")
host = data.get("host")
port = data.get("port")
proxy_account = data.get("proxyUserName")
proxy_password = data.get("proxyPassword")
print(f'id -->{id}')
print(f'ua -->{ua}')
print(f'proxy_type -->{proxy_type}')
print(f'host -->{host}')
print(f'port -->{port}')
print(f'proxy_account -->{proxy_account}')
print(f'proxy_password -->{proxy_password}')
print(f'='*50)
jc += 1
def main2():
bit = BitBrowser()
browser_id = '5ba9eb974c7c45e2bb086585c75f70e8'
# 关闭浏览器
# res = bit.bit_browser_close(browser_id)
# res = bit.bit_browser_get()
# print(res)
# if __name__ == '__main__':
# main2()
bit_browser = BitBrowser()

851
spider/mail_.py Normal file
View File

@@ -0,0 +1,851 @@
import asyncio
import imaplib
import email
import random
import socket
import string
import time
from email.header import decode_header
from datetime import timezone, timedelta
import email.utils
import aiohttp
import socks
import requests
import smtplib
from email.mime.text import MIMEText
from email.header import Header
from functools import wraps
from loguru import logger
def retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0):
"""
通用重试装饰器
:param max_retries: 最大重试次数
:param delay: 每次重试的初始延迟(秒)
:param backoff: 每次重试延迟的递增倍数
"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
retries = 0
current_delay = delay
while retries < max_retries:
try:
return func(*args, **kwargs)
except Exception as e:
retries += 1
if retries >= max_retries:
logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}")
return None # 重试次数用尽后返回 None
logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}")
time.sleep(current_delay)
current_delay *= backoff
return None # 三次重试仍未成功,返回 None
return wrapper
return decorator
def async_retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0):
"""
支持异步函数的通用重试装饰器
:param max_retries: 最大重试次数
:param delay: 每次重试的初始延迟(秒)
:param backoff: 每次重试延迟的递增倍数
"""
def decorator(func):
@wraps(func)
async def wrapper(*args, **kwargs):
retries = 0
current_delay = delay
while retries < max_retries:
try:
return await func(*args, **kwargs) # 直接执行原始方法
except Exception as e:
retries += 1
if retries >= max_retries:
logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}")
return None # 重试次数用尽后返回 None
logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}")
await asyncio.sleep(current_delay) # 异步延迟
current_delay *= backoff # 根据backoff递增延迟
return None # 三次重试仍未成功,返回 None
return wrapper
return decorator
# 域名管理类 - 高内聚低耦合的域名管理方案
class DomainManager:
"""
域名管理器 - 统一管理所有邮箱域名相关操作
实现高内聚低耦合的设计原则
"""
def __init__(self):
# 域名列表 - 只需要在这里添加新域名
self._domains = [
"gmail.com",
"qianyouduo.com",
"rxybb.com",
"cqrxy.vip",
"0n.lv",
"qianyouduo.com",
"ziyouzuan.com",
"emaing.online",
"emaing.fun",
"emaing.asia",
"isemaing.site",
"emaing.cyou",
"emaing.site",
"emaing.icu",
"emaing.store",
"emaing.pw",
"emaing.xyz",
"qydkjgs.asia",
"qydkj.homes",
"qydkj.baby",
"qydkj.cyou",
"qydkjgs.autos",
"qydkj.autos",
"qydkjgs.cyou",
"qydkjgs.homes",
"qydgs.asia",
"qydkj.asia",
"qydgs.cyou",
"lulanjing.asia",
"lisihan.asia",
"mmwan.asia",
"xyttan.asia",
"zpaily.asia",
"youxinzhiguo.asia",
"huijinfenmu.asia",
"linghao.asia",
"cqhc.asia",
"huacun.asia",
"huachen.asia",
"yisabeier.asia",
"xinxinr.cyou",
"lilisi.asia",
"xybbwan.cyou",
"zhongjing.cyou",
"zprxy.cyou",
"cqhuacun.cyou",
"huazong.icu",
"huacun.cyou"
]
def get_domain_by_type(self, mail_type: int) -> str:
"""
根据邮箱类型获取域名
:param mail_type: 邮箱类型编号
:return: 对应的域名
"""
if 0 <= mail_type < len(self._domains):
return self._domains[mail_type]
return self._domains[1] # 默认返回 qianyouduo.com
def get_domain_type(self, domain: str) -> int:
"""
根据域名获取类型编号
:param domain: 域名
:return: 对应的类型编号如果不存在返回1
"""
try:
return self._domains.index(domain)
except ValueError:
return 1 # 默认返回 qianyouduo.com 的类型
def get_imap_server(self, mail_type: int) -> str:
"""
根据邮箱类型获取IMAP服务器地址
:param mail_type: 邮箱类型编号
:return: IMAP服务器地址
"""
domain = self.get_domain_by_type(mail_type)
return f"imap.{domain}"
def get_imap_server_by_domain(self, domain: str) -> str:
"""
根据域名获取IMAP服务器地址
:param domain: 域名
:return: IMAP服务器地址
"""
return f"imap.{domain}"
def is_valid_domain(self, domain: str) -> bool:
"""
检查域名是否在支持列表中
:param domain: 域名
:return: 是否支持该域名
"""
return domain in self._domains
def get_all_domains(self) -> list:
"""
获取所有支持的域名列表
:return: 域名列表的副本
"""
return self._domains.copy()
def get_domain_count(self) -> int:
"""
获取支持的域名总数
:return: 域名总数
"""
return len(self._domains)
def get_creatable_domains(self) -> list:
"""
获取可用于创建邮箱的域名列表排除gmail.com
:return: 可创建邮箱的域名列表
"""
return [domain for domain in self._domains if domain != "gmail.com"]
def get_creatable_domain_by_type(self, mail_type: int) -> str:
"""
根据邮箱类型获取可创建的域名排除gmail.com
:param mail_type: 邮箱类型编号
:return: 对应的域名如果是gmail.com则返回默认域名
"""
domain = self.get_domain_by_type(mail_type)
if domain == "gmail.com":
return self._domains[1] # 返回qianyouduo.com作为默认
return domain
def get_random_creatable_domain(self) -> str:
"""
随机获取一个可创建邮箱的域名(排除 gmail.com
返回值:
str: 随机选取的域名
"""
creatable = self.get_creatable_domains()
if not creatable:
raise ValueError("无可用域名用于创建邮箱")
return random.choice(creatable)
# 邮箱模块
class Mail:
def __init__(self):
self.domain_manager = DomainManager()
self.api_host = 'http://111.10.175.206:5020'
def email_account_read(self, pk: int = None, account: str = None, status: bool = None, host: str = None,
proxy_account: str = None,
parent_account: str = None, order_by: str = None, level: int = None,
update_time_start: str = None, update_time_end: str = None, res_count: bool = False,
create_time_start: str = None, create_time_end: str = None, page: int = None,
limit: int = None) -> dict:
"""
读取mail账号
:param level: 邮箱等级(可选)
:param status: 状态(可选)
:param update_time_start: 更新时间起始(可选)
:param update_time_end: 更新时间结束(可选)
:param res_count: 返回总数 (可选)
:param parent_account: 母邮箱账号 (可选)
:param pk: 主键 (可选)
:param account: 账号 (可选)
:param host: 代理 (可选)
:param proxy_account: 代理账号 (可选)
:param order_by: 排序方式 (可选) id|create_time|update_time 前面加-表示倒序
:param create_time_start: 创建起始时间 (可选)
:param create_time_end: 创建结束时间 (可选)
:param page: 页码 (可选)
:param limit: 每页数量 (可选)
:return: 返回json 成功字段code=200
"""
if pk is not None:
url = f'{self.api_host}/mail/account/{pk}'
return requests.get(url).json()
url = f'{self.api_host}/mail/account'
data = dict()
if account is not None:
data['account'] = account
if status is not None:
data['status'] = status
if host is not None:
data['host'] = host
if proxy_account is not None:
data['proxy_account'] = proxy_account
if parent_account is not None:
data['parent_account'] = parent_account
if order_by is not None:
data['order_by'] = order_by
if level is not None:
data['level'] = level
if create_time_start is not None:
data['create_time_start'] = create_time_start
if create_time_end is not None:
data['create_time_end'] = create_time_end
if update_time_start is not None:
data['update_time_start'] = update_time_start
if update_time_end is not None:
data['update_time_end'] = update_time_end
if res_count:
data['res_count'] = res_count
if page is not None:
data['page'] = page
if limit is not None:
data['limit'] = limit
res = requests.get(url, params=data).json()
if res.get('code') not in [200, 400, 404]:
raise Exception(res)
return res
# 创建随机邮箱
@retry(max_retries=3, delay=1.0, backoff=1.0)
def email_create_random(self, count: int = 8, pwd: str = 'Zpaily88', mail_type: int | None = None) -> str:
"""
创建随机邮箱(随机域名,排除 gmail.com
:param count: 邮箱长度(默认8位)
:param pwd: 邮箱密码(默认Zpaily88)
:param mail_type: 指定邮箱类型编号;为 None 时随机选择可创建域名
:return: 邮箱账号
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
name = ''.join(random.choices(string.ascii_letters + string.digits, k=count)).lower()
# 随机选择可创建域名(排除 gmail.com如指定类型则按类型选择
mail_end = (
self.domain_manager.get_creatable_domain_by_type(mail_type)
if mail_type is not None
else self.domain_manager.get_random_creatable_domain()
)
data = {
"name": name,
"email": f"{name}@{mail_end}",
"passwordPlaintext": pwd
}
response = requests.post(url, headers=headers, json=data)
if 'Validation errors: [user] This combination of username and domain is already in database' in response.text:
return f'{name}@{mail_end}'
if response.status_code != 201:
raise Exception(response.status_code)
return f"{name}@{mail_end}"
# 异步创建随机邮箱
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _email_create_random(self, count: int = 8, pwd: str = 'Zpaily88', mail_type: int | None = None) -> str:
"""
创建随机邮箱(随机域名,排除 gmail.com
:param count: 邮箱长度(默认8位)
:param pwd: 邮箱密码(默认Zpaily88)
:param mail_type: 指定邮箱类型编号;为 None 时随机选择可创建域名
:return:邮箱账号
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
name = ''.join(random.choices(string.ascii_letters + string.digits, k=count)).lower()
# 随机选择可创建域名(排除 gmail.com如指定类型则按类型选择
mail_end = (
self.domain_manager.get_creatable_domain_by_type(mail_type)
if mail_type is not None
else self.domain_manager.get_random_creatable_domain()
)
data = {
"name": name,
"email": f"{name}@{mail_end}",
"passwordPlaintext": pwd
}
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=data) as response:
status = response.status
text = await response.text()
if 'Validation errors: [user] This combination of username and domain is already in database' in text:
return f"{name}@{mail_end}"
if status != 201:
raise Exception(status)
return f"{name}@{mail_end}"
# 创建邮箱
@retry(max_retries=3, delay=1.0, backoff=1.0)
def email_create(self, account: str, pwd: str = 'Zpaily88') -> str | None:
"""
创建邮箱
:param account: 邮箱账号
:param pwd: 邮箱密码(默认Zpaily88)
:return:邮箱账号
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
name = account.split('@')[0]
mail_end = account.split('@')[1]
# 排除gmail.com域名
if mail_end == "gmail.com":
return None
# 验证域名是否支持
if not self.domain_manager.is_valid_domain(mail_end):
raise ValueError(f"不支持的域名: {mail_end},支持的域名列表: {self.domain_manager.get_all_domains()}")
data = {
"name": name,
"email": f"{name}@{mail_end}",
"passwordPlaintext": pwd
}
response = requests.post(url, headers=headers, json=data)
print(f'创建邮箱响应: {response.status_code}')
if response.status_code not in [201, 400]:
raise Exception(response.status_code)
return f"{name}@{mail_end}"
# 异步创建邮箱
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _email_create(self, account: str, pwd: str = 'Zpaily88') -> str | None:
"""
创建邮箱
:param account: 邮箱账号
:param pwd: 邮箱密码(默认Zpaily88)
:return: 邮箱账号
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
name = account.split('@')[0]
mail_end = account.split('@')[1]
# 排除gmail.com域名
if mail_end == "gmail.com":
return None
# 验证域名是否支持
if not self.domain_manager.is_valid_domain(mail_end):
raise ValueError(f"不支持的域名: {mail_end},支持的域名列表: {self.domain_manager.get_all_domains()}")
data = {
"name": name,
"email": f"{name}@{mail_end}",
"passwordPlaintext": pwd
}
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=data) as response:
status = response.status
if status not in [201, 400]:
raise Exception(f'status code: {status}')
return f"{name}@{mail_end}"
# 删除邮箱
@retry(max_retries=3, delay=1.0, backoff=1.0)
def email_delete(self, account: str) -> bool:
"""
删除邮箱
:param account: 邮箱账号
:return: True表示删除成功False表示删除失败
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = f"https://mail.qianyouduo.com/admin/api/v1/boxes/{account}"
if '@gmail.com' in account:
return False
response = requests.delete(url, headers=headers)
print(f'删除邮箱响应: --> {response.status_code}')
if response.status_code not in [204, 404]:
raise Exception(response.status_code)
return True
# 异步删除邮箱
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _email_delete(self, account: str) -> bool:
"""
删除邮箱
:param account: 邮箱账号
:return: True表示删除成功False表示删除失败
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = f"https://mail.qianyouduo.com/admin/api/v1/boxes/{account}"
if '@gmail.com' in account:
return False
async with aiohttp.ClientSession() as session:
async with session.delete(url, headers=headers) as response:
status = response.status
if status not in [204, 404]:
raise Exception(f'status code: {status}')
return True
# 处理邮件正文
@staticmethod
def extract_body(msg):
"""
提取邮件正文,优先返回 HTML 文本
- 更健壮的字符集解析:优先使用 part 的 charset 信息,失败回退到 utf-8 / latin-1
- 仅处理 inline 的 text/html 与 text/plain 内容
"""
html_text = None
plain_text = None
def _decode_part(part):
payload = part.get_payload(decode=True)
if payload is None:
return None
# 优先从内容中解析 charset
charset = (part.get_content_charset() or part.get_param('charset') or 'utf-8')
try:
return payload.decode(charset, errors='replace')
except LookupError:
# 未知编码时回退
try:
return payload.decode('utf-8', errors='replace')
except Exception:
return payload.decode('latin-1', errors='replace')
if msg.is_multipart():
for part in msg.walk():
content_type = part.get_content_type()
content_disposition = part.get_content_disposition()
if content_type == "text/html" and (not content_disposition or content_disposition == "inline"):
html_text = _decode_part(part) or html_text
elif content_type == "text/plain" and (not content_disposition or content_disposition == "inline"):
plain_text = _decode_part(part) or plain_text
else:
content_type = msg.get_content_type()
if content_type == "text/html":
html_text = _decode_part(msg)
elif content_type == "text/plain":
plain_text = _decode_part(msg)
# 优先返回 HTML 文本,如果没有 HTML 文本,则返回纯文本
return html_text or plain_text or ""
# 转换邮件日期
@staticmethod
def convert_to_china_time(date_str):
"""
将邮件日期转换为10位时间戳中国时区
- 保留原始邮件的时区信息;若无时区,则按 UTC 处理
- 异常时返回当前时间戳,避免解析失败导致崩溃
"""
try:
email_date = email.utils.parsedate_to_datetime(date_str)
if email_date is None:
return int(time.time())
if email_date.tzinfo is None:
email_date = email_date.replace(tzinfo=timezone.utc)
china_time = email_date.astimezone(timezone(timedelta(hours=8)))
return int(china_time.timestamp())
except Exception:
return int(time.time())
# 获取邮件
def email_read(self, user: str, from_: str, limit: int = 1, is_del: bool = False) -> list | None:
"""
获取最新邮件
:param user: 母账号
:param from_: 发件人匹配关键字(可为邮箱或显示名,大小写不敏感)
:param limit: 获取邮件数量(默认1封)
:param is_del: 是否删除整个邮箱账号(非 Gmail 才会执行账号删除)
:return: 返回邮件列表,每个元素格式为:
{
"title": "邮件标题",
"from": "发件人",
"date": "邮件日期(中国时区时间戳)",
"content": "邮件正文",
"code": 200
}
"""
user_li = user.split('@')
domain = user_li[1]
# 使用域名管理器获取邮箱类型
if not self.domain_manager.is_valid_domain(domain):
return None
mail_type = self.domain_manager.get_domain_type(domain)
# 仅对 Gmail 进行点号归一化,其它域名按原样处理
local_part = user_li[0]
if domain == "gmail.com":
local_part = local_part.replace('.', '')
user = local_part + '@' + user_li[1]
proxy_host = None
proxy_port = None
proxy_user = None
proxy_pwd = None
if mail_type == 0:
res = self.email_account_read(parent_account=user, status=True, level=0)
if res['code'] != 200:
return None
pwd = res['items'][0]['parent_pwd']
proxy_host = res['items'][0]['host']
proxy_port = res['items'][0]['port']
proxy_user = res['items'][0]['proxy_account']
proxy_pwd = res['items'][0]['proxy_pwd']
else:
pwd = 'Zpaily88'
items = [] # 存储邮件列表
# 保存原始socket
original_socket = None
if proxy_host is not None and proxy_port is not None:
original_socket = socket.socket
if proxy_user is not None and proxy_pwd is not None:
socks.setdefaultproxy(socks.SOCKS5, proxy_host, int(proxy_port), True, proxy_user, proxy_pwd)
else:
socks.setdefaultproxy(socks.SOCKS5, proxy_host, int(proxy_port), True)
socket.socket = socks.socksocket
imap_server = None
had_error = False
try:
# 在设置代理后创建IMAP连接
imap_server = imaplib.IMAP4_SSL(self.domain_manager.get_imap_server(mail_type))
if not imap_server:
had_error = True
else:
# pwd去除空格
pwd = pwd.replace(' ', '')
# print(f'pwd: {pwd}')
imap_server.login(user, pwd)
status, _ = imap_server.select("INBOX")
if status != 'OK':
had_error = True
else:
status, email_ids = imap_server.search(None, "ALL")
if status != 'OK':
had_error = True
else:
email_id_list = email_ids[0].split()
# 获取最近limit条邮件ID
recent_ids = email_id_list[-20:] # 仍然获取最近20封以确保有足够的邮件可以筛选
found_count = 0 # 记录找到的符合条件的邮件数量
for email_id in recent_ids[::-1]: # 从最新的邮件开始处理
if found_count >= limit: # 如果已经找到足够数量的邮件,就退出循环
break
status, msg_data = imap_server.fetch(email_id, "(RFC822)")
for response in msg_data:
if isinstance(response, tuple):
msg = email.message_from_bytes(response[1])
# 兼容性发件人匹配:解析地址与显示名,大小写不敏感,支持子串匹配
from_field = msg.get("From", "")
addresses = email.utils.getaddresses([from_field])
needle = (from_ or "").lower()
candidates = []
for name, addr in addresses:
if name:
candidates.append(name.lower())
if addr:
candidates.append(addr.lower())
if any(needle in c for c in candidates):
# 标题解码,处理无标题或编码缺失的情况
raw_subject = msg.get("Subject")
subject = ""
if raw_subject is not None:
dh = decode_header(raw_subject)
if dh:
s, enc = dh[0]
if isinstance(s, bytes):
try:
subject = s.decode(enc or 'utf-8', errors='replace')
except LookupError:
subject = s.decode('utf-8', errors='replace')
else:
subject = s
item = {
"title": subject,
"from": msg["From"],
"content": self.extract_body(msg),
"code": 200
}
# 获取并转换邮件时间
date_str = msg["Date"]
if date_str:
item["date"] = self.convert_to_china_time(date_str)
items.append(item)
found_count += 1
if found_count >= limit: # 如果已经找到足够数量的邮件,就跳出内层循环
break
# 读取完成不再对单封邮件做删除标记与 expunge
except imaplib.IMAP4.error as e:
# items.append({'title': 'error', 'from': 'error', 'content': f'连接邮箱失败: {e}', 'code': 500})
had_error = True
except Exception as e:
# items.append({'title': 'error', 'from': 'error', 'content': f'获取邮件异常: {e}', 'code': 500})
had_error = True
finally:
try:
# 检查连接是否建立
if 'imap_server' in locals() and imap_server is not None:
try:
# 先检查是否处于已选择状态
if hasattr(imap_server, 'state') and imap_server.state == 'SELECTED':
imap_server.close()
except Exception as e:
logger.error(f"关闭IMAP文件夹时发生错误: {e}")
try:
# 无论如何尝试登出
imap_server.logout()
except Exception as e:
logger.error(f"登出IMAP服务器时发生错误: {e}")
# 在Windows上可能需要强制关闭socket
try:
if hasattr(imap_server, 'sock') and imap_server.sock is not None:
imap_server.sock.close()
except Exception as sock_err:
logger.error(f"强制关闭socket时发生错误: {sock_err}")
except Exception as outer_e:
logger.error(f"处理IMAP连接关闭时发生错误: {outer_e}")
finally:
# 重置socket设置如果使用了代理
if proxy_host is not None and original_socket is not None:
socket.socket = original_socket
# 若成功获取到至少一封匹配邮件且请求删除,则删除整个邮箱账号
if is_del and len(items) > 0:
try:
self.email_delete(user)
except Exception as del_err:
logger.error(f"删除邮箱账号失败: {del_err}")
if had_error:
return None
if len(items) == 0:
return None
return items # 返回邮件列表
async def main():
"""
使用示例:展示新的域名管理系统的使用方法
"""
mail = Mail()
# mai = '0gz3vvd4@'+'qydgs.asia'
# res = mail.email_create(mai)
# print(f"创建的邮箱: {res}")
random_email = mail.email_create_random()
print(f"创建的随机邮箱: {random_email}")
# 读取邮件
# res = mail.email_read('0gz3vvd4@qydgs.asia', '@', 1, is_del=True)
# print(f'读取的邮件: {res}')
# 删除邮箱
res = mail.email_delete(random_email)
print(f"删除的邮箱: {res}")
mail_ = Mail()
# if __name__ == '__main__':
# asyncio.run(main())

765
spider/main.py Normal file
View File

@@ -0,0 +1,765 @@
import random
import time
from datetime import datetime
from DrissionPage import Chromium
from loguru import logger
from work import generate_child_parent_names
from mail_ import mail_
from bit_browser import bit_browser
from api import api
from proxys import proxy_list
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from auto_challenge import ReCaptchaHandler
class Auto:
def __init__(self, http: str = None):
# self.browser = Chromium(http)
self.browser = Chromium()
self.tab = self.browser.latest_tab
pass
# cf打码
def solve_cloudflare(self, is_ok: bool = False):
tab = self.browser.latest_tab
for _ in range(5):
tab.wait(1)
res = tab.ele(
't:h1@text()=Sorry, you have been blocked', timeout=1)
if res:
logger.error("Cloudflare验证失败")
return False
try:
shadow1 = tab.ele(
'x://*[@name="cf-turnstile-response"]').parent().shadow_root
iframe = shadow1.get_frame(1)
if iframe:
logger.debug("找到Cloudflare iframe")
shadow2 = iframe.ele('x:/html/body').shadow_root
if shadow2:
logger.debug("找到Cloudflare iframe body shadow root")
status = shadow2.ele(
'x://span[text()="Verifying..."]', timeout=1.5)
if status:
tab.wait(3)
status = shadow2.ele(
'x://span[text()="Success!"]', timeout=1.5)
if status:
logger.debug("Cloudflare验证成功")
return True
checkbox = shadow2.ele(
'x://input[@type="checkbox"]', timeout=1.5)
if checkbox:
checkbox.click()
logger.debug("点击Cloudflare复选框")
tab.wait(3)
logger.debug("重新获取状态")
# return False
except Exception as e:
# logger.error(f"处理Cloudflare异常: {e}")
if is_ok:
logger.debug(f"cloudflare处理通过: {e}")
return True
return self.solve_cloudflare(is_ok=True)
tab.wait(1)
return False
# 谷歌验证码
def solve_recaptcha(self):
logger.debug("开始解决谷歌验证码")
recaptcha_handler = ReCaptchaHandler(self.tab)
res = recaptcha_handler.challenge()
if res.get("status"):
logger.debug("谷歌验证码成功")
iframe = self.tab.ele('t:iframe@title=reCAPTCHA')
# print(iframe)
res = iframe.ele('t:div@class=recaptcha-checkbox-border')
if res:
logger.debug(f"html: {res.html}")
if 'display: none;' in res.html:
logger.debug("谷歌验证码成功")
return True
else:
print("No element found")
return False
logger.error("谷歌验证码失败")
return False
# 打开URL
def open_url(self, url: str):
self.tab.get(url)
def get_tab(self):
return self.tab
# 等待进入首页
def wait_home(self):
logger.debug("等待进入首页")
jc = 0
while True:
if jc > 3:
logger.error("等待进入首页超过5次未成功")
return False
self.tab.wait(1)
bol = self.tab.ele(
't:div@text():YOUTUBE PRIVACY SETTLEMENT', timeout=1)
if bol:
logger.debug("成功进入首页")
return True
jc += 1
# 随机取城市
def get_random_city(self, province: str | None = None):
cities = {
"Alberta": ["Calgary", "Edmonton"],
"British Columbia": ["Vancouver"],
# "Manitoba": ["Winnipeg", "Rochester"],
# "New Brunswick": ["Fredericton", "Moncton"],
# "Newfoundland and Labrador": ["St. John's", "Halifax"],
"Nova Scotia": ["Halifax"],
"Ontario": ["Toronto"],
# "Prince Edward Island": ["Charlottetown", "St. John's"],
# "Quebec": ["Quebec City", "Montreal"],
# "Saskatchewan": ["Saskatoon", "Regina"],
}
if province is None:
province = random.choice(list(cities.keys()))
return province, random.choice(cities.get(province, []))
def get_province_by_city(self) -> str | None:
"""
根据城市名称解析对应省份
参数:
city (str): 城市名称,例如 `Calgary`、`Edmonton` 等
返回值:
str | None: 对应的省份名称;未匹配返回 None
"""
mapping = {
"Calgary": "Alberta",
"Edmonton": "Alberta",
"Vancouver": "British Columbia",
"Halifax": "Nova Scotia",
"Toronto": "Ontario",
"Ottawa": "Ontario",
"Mississauga": "Ontario",
"Brampton": "Ontario",
"Hamilton": "Ontario",
"Kitchener": "Ontario",
"London": "Ontario",
"Markham": "Ontario",
"Vaughan": "Ontario",
"Windsor": "Ontario",
"Oshawa": "Ontario",
"Brantford": "Ontario",
"Barrie": "Ontario",
"Sudbury": "Ontario",
"Kingston": "Ontario",
"Guelph": "Ontario",
"Cambridge": "Ontario",
"Sarnia": "Ontario",
"Peterborough": "Ontario",
"Waterloo": "Ontario",
"Belleville": "Ontario",
"Brockville": "Ontario",
"Burlington": "Ontario",
"Cornwall": "Ontario",
"Kawartha Lakes": "Ontario",
"North Bay": "Ontario",
"Orillia": "Ontario",
"Pickering": "Ontario",
"Sault Ste. Marie": "Ontario",
"Stratford": "Ontario",
"Durham": "Ontario",
"Norfolk County": "Ontario",
"Prince Edward County": "Ontario",
"Quinte West": "Ontario",
"St. Catharines": "Ontario",
"Welland": "Ontario",
"Thorold": "Ontario",
"Niagara Falls": "Ontario",
"Pelham": "Ontario",
"Port Colborne": "Ontario",
}
# 随机返回一条 key 和 value
return random.choice(list(mapping.items()))
# 随机实物
def get_random_food(self, city: str, shop: str) -> list[str]:
"""
随机选择 1~2 种食物类别,并为每个类别至少选择 1 个具体产品
参数:
shop (str): 商店名称(当前未使用,占位参数)
返回值:
list[str]: 随机选取的产品名称列表
"""
categories = [
[
'Wonder Bread White',
'Villaggio White Bread',
'No Name Sliced White Bread',
"President's Choice White Sliced Bread",
],
[
"Ben's Original Whole Wheat Bread",
"POM Whole Wheat Bread",
"Silver Hills Bakery Whole Wheat Sliced Bread",
"Country Harvest Whole Wheat Bread",
],
[
"Wonder Bread Hot Dog Buns",
"Villaggio Hamburger Buns",
"Dempster's Dinner Rolls",
"No Frills Hot Dog Buns",
],
[
"Stonemill Bakehouse Bagels",
"Wonder Bagels",
"Montreal Bagels (pre-packaged, e.g., St. Lawrence brand)",
"President's Choice Bagels",
],
[
"Silver Hills Multi-Grain Sliced Bread",
"POM Multi-Grain Bread",
"Country Harvest Multi-Grain Loaf",
],
[
"President's Choice French Stick",
"Dempster's Italian Style Bread",
"Wonder Italian Bread",
"Villaggio Country Style Loaf",
],
]
# 随机选择 1~2 个类别(不重复)
category_count = random.randint(1, 2)
chosen_categories = random.sample(categories, k=category_count)
# 每个类别至少选择 1 个产品,最多选择 3 个以避免过多
selected_products: list[str] = []
for cat in chosen_categories:
max_pick = min(3, len(cat))
pick_count = random.randint(1, max_pick)
selected_products.extend(random.sample(cat, k=pick_count))
logger.debug(f"随机选择的产品: {selected_products}")
text = f'{shop}, {city} buy: '
for p in selected_products:
text += f'{p} * {random.randint(1, 3)}, '
text = text[:-2]
text = text + '.'
logger.debug(f'随机选择的产品文本: {text}')
return text
# 填写问卷
def fill_questionnaire(self):
"""
完成问卷填写
参数:
city (str): 线程启动时传入的城市名称,用于匹配省份并填写数据
"""
try:
info = generate_child_parent_names()
child_full_name = info['child_full_name']
parent_full_name = info['parent_full_name']
child_birthday = info['child_birthday']
# 2023-04-01转为MM/DD/YYYY
child_birthday = datetime.strptime(child_birthday, '%Y-%m-%d').strftime('%m/%d/%Y')
address_str = info['child_address_str']
city_name = info['child_city_name']
postcode = info['child_postcode']
parent_phone = info['parent_phone']
province = info['parent_state']
# email = mail_.email_create_random()
email = 'zhiyu@qq.com'
logger.debug(f"child_full_name --> {child_full_name}")
logger.debug(f"parent_full_name --> {parent_full_name}")
logger.debug(f"child_birthday --> {child_birthday}")
logger.debug(f"address_str --> {address_str}")
logger.debug(f"city_name --> {city_name}")
logger.debug(f"postcode --> {postcode}")
logger.debug(f"parent_phone --> {parent_phone}")
logger.debug(f"province --> {province}")
logger.debug(f"email --> {email}")
self.tab.wait(0.1)
self.tab.ele('t:input@id=name1').input(child_full_name)
self.tab.wait(0.1)
self.tab.ele('t:input@id=name2').input(parent_full_name)
self.tab.wait(0.1)
self.tab.ele('t:input@id=dateOfBirth').input(child_birthday)
self.tab.wait(0.1)
self.tab.ele('t:input@id=street1').input(address_str)
self.tab.wait(0.1)
self.tab.ele('t:input@id=city').input(city_name)
self.tab.wait(0.1)
self.tab.ele(
't:select@formcontrolname=state').ele(f't:option@text():{province}').click()
self.tab.wait(0.1)
self.tab.ele('t:input@id=zip').input(postcode)
self.tab.wait(0.1)
self.tab.ele('t:input@id=phone1').input(parent_phone)
self.tab.wait(0.1)
self.tab.ele('t:input@id=emailAddress').input(email)
self.tab.wait(0.1)
self.tab.ele('t:input@id=confirmEmailemail').input(email)
self.tab.wait(0.1)
self.tab.ele('t:input@@formcontrolname=resideInUS@@id=Yes').click()
self.tab.wait(0.1)
self.tab.ele('t:input@@formcontrolname=watchedDuringPeriod@@id=Yes').click()
self.tab.wait(0.1)
self.tab.ele('t:input@id=signatureMinor').input(child_full_name)
self.tab.wait(0.1)
self.tab.ele('t:input@id=signatureParentGuardian').input(parent_full_name)
self.solve_recaptcha()
return self.submit_file(
child_full_name=child_full_name,
parent_full_name=parent_full_name,
child_birthday=child_birthday,
address_str=address_str,
city_name=city_name,
parent_phone=parent_phone,
postcode=postcode,
province=province,
email=email,
text=""
)
except Exception as e:
logger.error(f"填写问卷失败: {e}")
# 提交问卷
def submit_file(self, child_full_name: str, parent_full_name: str, child_birthday: str, address_str: str, city_name: str, parent_phone: str, postcode: str, province: str, email: str, text: str):
"""
提交问卷后的数据保存到后端服务(孩子与家长字段)
参数:
child_full_name (str): 孩子全名
parent_full_name (str): 家长全名
child_birthday (str): 孩子生日(字符串,已为 MM/DD/YYYY
address_str (str): 街道地址
city_name (str): 城市
parent_phone (str): 家长电话
postcode (str): 邮编
province (str): 省/州全称
email (str): 邮箱
text (str): 文本内容(如反馈地址)
"""
jc = 0
while True:
if jc >= 3:
logger.error("提交问卷失败")
return False
res = self.solve_recaptcha()
if not res:
jc += 1
continue
res = self.tab.ele('t:button@text():SUBMIT')
if res:
logger.debug(f"点击Submit按钮")
res.click()
self.tab.wait(3)
res = self.tab.ele(
't:h2@text()=THANK YOU FOR SUBMITTING YOUR INFORMATION', timeout=1)
if res:
logger.info("提交问卷成功")
logger.info(f"反馈地址: {text}")
res = self.tab.ele('t:b')
if res:
logger.info(f"反馈地址: {res.text}")
text = res.text
status = True
else:
status=False
api.create_info(
child_full_name=child_full_name,
parent_full_name=parent_full_name,
child_birthday=child_birthday,
address_str=address_str,
city_name=city_name,
parent_phone=parent_phone,
postcode=postcode,
province=province,
email=email,
text=text,
status=status
)
return True
bol = self.tab.ele(
't:div@text():ERR_TIMED_OUT', timeout=1)
if bol:
logger.debug("刷新网页")
self.tab.refresh()
self.tab.wait(1.5)
bol = self.tab.ele(
't:div@text():ERR_SSL_PROTOCOL_ERROR', timeout=1)
if bol:
logger.debug("刷新网页")
self.tab.refresh()
self.tab.wait(1.5)
bol = self.tab.ele(
't:div@text():ERR_SOCKS_CONNECTION_FAILED', timeout=1)
if bol:
logger.debug("刷新网页")
self.tab.refresh()
self.tab.wait(1.5)
jc += 1
def parse_proxy(proxy: str) -> tuple[str, int, str, str] | None:
"""
解析代理字符串为四元组 `(host, port, user, pwd)`
参数:
proxy: 形如 `host:port:user:pwd`
返回值:
(host, port, user, pwd) 或 None格式错误
"""
try:
host, port, user, pwd = proxy.split(":", 3)
return host, int(port), user, pwd
except Exception:
logger.error(f"代理格式错误: {proxy}")
return None
def create_fingerprint_browser(proxy: str) -> tuple[str, str] | None:
"""
创建指纹浏览器并打开窗口,返回 `(browser_id, debugger_http)`
参数:
proxy: 代理字符串
返回值:
(browser_id, http) 或 None失败
"""
info = parse_proxy(proxy)
if info is None:
return None
host, port, user, pwd = info
try:
browser_id = bit_browser.bit_browser_create(
remark=f"{user}",
proxy_type="socks5",
host=host,
port=str(port),
proxy_user=user,
proxy_pwd=pwd,
)
if not browser_id:
return None
logger.info(f"创建指纹浏览器成功: {browser_id}")
time.sleep(1)
http = bit_browser.bit_browser_open(browser_id)
if not http:
return None
logger.info(f"打开指纹浏览器成功: {browser_id}")
return browser_id, http
except Exception as e:
logger.error(f"创建指纹浏览器失败: {e}")
return None
def close_and_delete_browser(browser_id: str) -> None:
"""
关闭并删除指定指纹浏览器
参数:
browser_id: 指纹浏览器ID
"""
try:
bit_browser.bit_browser_close(browser_id)
except Exception as e:
logger.warning(f"关闭浏览器失败或已关闭: {browser_id} - {e}")
time.sleep(1)
try:
bit_browser.bit_browser_delete(browser_id)
except Exception as e:
logger.warning(f"删除浏览器失败或已删除: {browser_id} - {e}")
def run_task_with_proxy(proxy: str, stop_event: threading.Event) -> None:
"""
使用代理创建指纹浏览器、执行自动化,并在结束后清理
参数:
proxy: 代理字符串
"""
browser_id: str | None = None
try:
created = create_fingerprint_browser(proxy)
if not created:
return
browser_id, http = created
if stop_event.is_set():
return
auto = Auto(http=http)
auto.open_url('https://www.claimform.youtubeprivacysettlement.com')
if stop_event.is_set():
return
if not auto.wait_home():
return
if stop_event.is_set():
return
if not auto.click_continue():
return
if stop_event.is_set():
return
auto.fill_questionnaire()
except Exception as e:
logger.error(f"执行任务异常: {e}")
finally:
if browser_id:
try:
close_and_delete_browser(browser_id)
except Exception:
pass
def proxy_loop(proxy: str, stop_event: threading.Event) -> None:
"""
为单个代理保持持续运行:任务结束后立即重建并再次执行
参数:
proxy: 代理字符串
stop_event: 停止事件,用于外部触发退出循环
"""
while not stop_event.is_set():
try:
if is_forbidden_time():
if stop_event.wait(timeout=60):
break
cleanup_all_browsers()
secs = seconds_until(20, 0)
if stop_event.wait(timeout=secs):
break
continue
run_task_with_proxy(proxy, stop_event)
except Exception as e:
logger.error(f"代理循环异常: {proxy} - {e}")
if stop_event.is_set():
break
if stop_event.wait(timeout=0.1):
break
def is_forbidden_time() -> bool:
"""
判断当前是否处于禁跑时段(每日 18:30 ~ 20:00本地时间
返回值:
bool: True 表示处于禁跑时段
"""
# 去除晚上停止功能
return False
# 禁跑时段为 18:30 ~ 20:00
now = datetime.now()
start = now.replace(hour=18, minute=30, second=0, microsecond=0)
end = now.replace(hour=20, minute=0, second=0, microsecond=0)
return start <= now < end
def wait_until_out_of_forbidden(interval_sec: float = 5.0, stop_event: threading.Event | None = None) -> None:
"""
在禁跑时段内循环等待,直到禁跑时段结束
参数:
interval_sec: 轮询间隔秒数
stop_event: 可选停止事件,若设置则在等待期间可提前结束
"""
while is_forbidden_time():
if stop_event is not None and stop_event.wait(timeout=interval_sec):
break
time.sleep(interval_sec)
def seconds_until(hour: int, minute: int) -> float:
"""
计算到今天指定时间点的剩余秒数
参数:
hour: 目标小时24小时制
minute: 目标分钟
返回值:
float: 剩余秒数,若目标时间已过则为 0
"""
now = datetime.now()
target = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
if target <= now:
return 0.0
return (target - now).total_seconds()
def count_fingerprint_browsers() -> int:
"""
统计当前指纹浏览器数量
返回值:
int: 当前总数量
"""
try:
res = bit_browser.bit_browser_get(0, 100)
data = res.get("data", {}) if isinstance(res, dict) else {}
total = data.get("totalNum")
lst = data.get("list", [])
if isinstance(total, int) and total >= 0:
return total
return len(lst)
except Exception as e:
logger.warning(f"统计指纹浏览器数量失败: {e}")
return 0
def cleanup_all_browsers() -> None:
"""
关闭并删除所有指纹浏览器
"""
try:
res = bit_browser.bit_browser_get(0, 100)
data = res.get("data", {}) if isinstance(res, dict) else {}
lst = data.get("list", [])
ids = [i.get("id") for i in lst if i.get("id")]
for bid in ids:
close_and_delete_browser(bid)
except Exception as e:
logger.warning(f"清理所有指纹浏览器失败: {e}")
def delete_excess_browsers(limit: int) -> None:
"""
删除超出上限的指纹浏览器,从列表末尾开始删除
参数:
limit: 允许的最大浏览器数量
"""
try:
res = bit_browser.bit_browser_get(0, 100)
data = res.get("data", {}) if isinstance(res, dict) else {}
lst = data.get("list", [])
ids = [i.get("id") for i in lst if i.get("id")]
count = len(ids)
if count <= limit:
return
excess = count - limit
to_delete = ids[-excess:]
for bid in reversed(to_delete):
close_and_delete_browser(bid)
logger.info(f"已删除超出数量 {excess},当前限制为 {limit}")
except Exception as e:
logger.warning(f"删除超额浏览器失败: {e}")
def monitor_browsers_and_restart(limit: int, stop_event: threading.Event, restart_event: threading.Event) -> None:
"""
每 3 秒检测指纹浏览器数量,超过 `limit` 则从末尾删除超出部分
参数:
limit: 允许的最大浏览器数量(通常为代理数量)
restart_event: 触发重启的事件(当前策略不使用)
"""
while not stop_event.is_set():
time.sleep(3)
count = count_fingerprint_browsers()
if count > limit:
logger.warning(f"指纹浏览器数量 {count} 超过限制 {limit},开始删除超出部分")
delete_excess_browsers(limit)
def main():
"""
多线程并发管理:按代理数量并发创建指纹浏览器并执行任务;每 3 秒监控数量,超限则从末尾删除多余浏览器。
"""
proxies = list(proxy_list)
while True:
stop_event = threading.Event()
restart_event = threading.Event()
if is_forbidden_time():
if stop_event.wait(timeout=60):
continue
cleanup_all_browsers()
logger.info("处于禁跑时段,等待至禁跑结束")
wait_until_out_of_forbidden()
continue
executor = ThreadPoolExecutor(max_workers=len(proxies))
try:
futures_map = {executor.submit(proxy_loop, p, stop_event): p for p in proxies}
monitor_thread = threading.Thread(
target=monitor_browsers_and_restart,
args=(len(proxies), stop_event, restart_event),
daemon=True,
)
monitor_thread.start()
while True:
if restart_event.is_set():
stop_event.set()
try:
executor.shutdown(wait=True)
except Exception:
pass
break
if is_forbidden_time():
logger.info("进入禁跑时段停止当前批次等待1分钟后清理指纹浏览器")
stop_event.set()
try:
executor.shutdown(wait=True)
except Exception:
pass
time.sleep(60)
cleanup_all_browsers()
wait_until_out_of_forbidden()
break
for f, proxy in list(futures_map.items()):
if f.done() and not stop_event.is_set() and not restart_event.is_set():
try:
_ = f.exception()
except Exception:
pass
try:
new_future = executor.submit(proxy_loop, proxy, stop_event)
del futures_map[f]
futures_map[new_future] = proxy
except Exception as e:
logger.error(f"重启代理线程失败: {proxy} - {e}")
time.sleep(0.2)
try:
monitor_thread.join(timeout=5)
except Exception:
pass
finally:
try:
executor.shutdown(wait=True)
except Exception:
pass
continue
def main2():
auto = Auto()
auto.open_url('https://www.claimform.youtubeprivacysettlement.com')
bol = auto.wait_home()
if not bol:
return
auto.fill_questionnaire()
# auto.solve_recaptcha()
if __name__ == "__main__":
main2()

95
spider/proxys.py Normal file
View File

@@ -0,0 +1,95 @@
work = [
"us.novproxy.io:1000:qyd00056-region-CA:qyd00056",
"us.novproxy.io:1000:qyd00054-region-US:qyd00054",
"us.novproxy.io:1000:qyd00053-region-CA:qyd00053",
"us.novproxy.io:1000:qyd00052-region-US:qyd00052",
]
ca1 = [
"us.novproxy.io:1000:qyd00051-region-CA:qyd00051",
"us.novproxy.io:1000:qyd00050-region-US:qyd00050",
"us.novproxy.io:1000:qyd00049-region-CA:qyd00049",
"us.novproxy.io:1000:qyd00048-region-US:qyd00048",
"us.novproxy.io:1000:qyd00047-region-CA:qyd00047",
]
ca2 = [
"us.novproxy.io:1000:qyd00046-region-US:qyd00046",
"us.novproxy.io:1000:qyd00045-region-CA:qyd00045",
"us.novproxy.io:1000:qyd00044-region-US:qyd00044",
"us.novproxy.io:1000:qyd00043-region-CA:qyd00043",
"us.novproxy.io:1000:qyd00042-region-US:qyd00042",
]
ca3 = [
"us.novproxy.io:1000:qyd00041-region-CA:qyd00041",
"us.novproxy.io:1000:qyd00040-region-CA:qyd00040",
"us.novproxy.io:1000:qyd00039-region-US:qyd00039",
"us.novproxy.io:1000:qyd00038-region-CA:qyd00038",
"us.novproxy.io:1000:qyd00037-region-US:qyd00037",
]
cwd = [
"us.novproxy.io:1000:qyd00036-region-CA:qyd00036",
"us.novproxy.io:1000:qyd00035-region-US:qyd00035",
"us.novproxy.io:1000:qyd00034-region-CA:qyd00034",
"us.novproxy.io:1000:qyd00033-region-US:qyd00033",
]
wt = [
"us.novproxy.io:1000:qyd00032-region-CA:qyd00032",
"us.novproxy.io:1000:qyd00031-region-US:qyd00031",
"us.novproxy.io:1000:qyd00030-region-CA:qyd00030",
"us.novproxy.io:1000:qyd00029-region-US:qyd00029",
]
hc = [
"us.novproxy.io:1000:qyd00028-region-CA:qyd00028",
"us.novproxy.io:1000:qyd00027-region-US:qyd00027",
"us.novproxy.io:1000:qyd00026-region-CA:qyd00026",
"us.novproxy.io:1000:qyd00025-region-US:qyd00025",
]
zlj = [
"us.novproxy.io:1000:qyd00024-region-CA:qyd00024",
"us.novproxy.io:1000:qyd00023-region-US:qyd00023",
"us.novproxy.io:1000:qyd00022-region-CA:qyd00022",
"us.novproxy.io:1000:qyd00021-region-US:qyd00021",
]
wzq = [
"us.novproxy.io:1000:qyd00020-region-CA:qyd00020",
"us.novproxy.io:1000:qyd00019-region-US:qyd00019",
"us.novproxy.io:1000:qyd00018-region-CA:qyd00018",
"us.novproxy.io:1000:qyd00017-region-US:qyd00017",
]
xy = [
"us.novproxy.io:1000:qyd00016-region-CA:qyd00016",
"us.novproxy.io:1000:qyd00015-region-US:qyd00015",
"us.novproxy.io:1000:qyd00014-region-CA:qyd00014",
"us.novproxy.io:1000:qyd00013-region-US:qyd00013",
]
yll = [
"us.novproxy.io:1000:qyd00012-region-CA:qyd00012",
"us.novproxy.io:1000:qyd00011-region-US:qyd00011",
"us.novproxy.io:1000:qyd00010-region-CA:qyd00010",
"us.novproxy.io:1000:qyd00009-region-US:qyd00009",
]
szt = [
"us.novproxy.io:1000:qyd00008-region-CA:qyd00008",
"us.novproxy.io:1000:qyd00007-region-US:qyd00007",
"us.novproxy.io:1000:qyd00006-region-CA:qyd00006",
"us.novproxy.io:1000:qyd00005-region-US:qyd00005",
]
hz = [
"us.novproxy.io:1000:qyd00004-region-CA:qyd00004",
"us.novproxy.io:1000:qyd00003-region-US:qyd00003",
"us.novproxy.io:1000:qyd00002-region-CA:qyd00002",
"us.novproxy.io:1000:qyd00001-region-US:qyd00001",
]
proxy_list = work

31
spider/requirements.txt Normal file
View File

@@ -0,0 +1,31 @@
aiohttp
requests
curl_cffi
aiohttp-socks
requests[socks]
fake_useragent
apscheduler
aiofiles
loguru
portalocker
aiomultiprocess
faker
eth_account
eth_utils
solders
toncli
ecdsa
base58
ddddocr
aiohttp_socks
websockets
psutil
socks
drissionpage
fastapi
uvicorn
pydantic
ultralytics
opencv-python-headless
torch
pillow

22
spider/test.py Normal file
View File

@@ -0,0 +1,22 @@
from DrissionPage import Chromium
from loguru import logger
from bit_browser import bit_browser
# http = bit_browser.bit_browser_open('871851b9835d42b3911f39162b3427d5')
# print(http)
browser = Chromium('127.0.0.1:65480')
tab = browser.latest_tab
# tab.get('bitbrowser://settings/clearBrowserData')
res = tab.ele('t:settings-ui',timeout=3).sr('t:settings-main').sr('t:settings-basic-page').sr('t:settings-privacy-page').sr('t:settings-clear-browsing-data-dialog').sr('t:cr-dialog')
res = res.ele('t:cr-page-selector@id=pages')
res = res.ele('t:settings-dropdown-menu@id=clearFromBasic').shadow_root
res.ele('t:select@id=dropdownMenu').ele('t:option@value=4').click()
# res = tab.ele('t:settings-dropdown-menu@id=clearFromBasic',timeout=3)
print(res)
if res:
logger.info(f"html: {res.html}")
# res = tab.ele('t:h2@text()=THANK YOU FOR SUBMITTING YOUR INFORMATION', timeout=3)
# if res:
# logger.info("提交问卷成功")
# res = tab.ele('t:b')
# if res:
# logger.info(f"反馈地址: {res.text}")

1051
spider/work.py Normal file

File diff suppressed because it is too large Load Diff