From 45ff5a62e3e3e14a8baed37e65f808472039137e Mon Sep 17 00:00:00 2001 From: bvwl <2201101122@qq.com> Date: Fri, 12 Dec 2025 14:40:04 +0800 Subject: [PATCH] 0.0.1 --- .gitignore | 14 + README.md | 3 + back/Dockerfile | 30 + back/apis/__init__.py | 5 + back/apis/country/__init__.py | 9 + back/apis/country/food/schema.py | 66 ++ back/apis/country/food/view.py | 122 ++ back/apis/country/info/schema.py | 88 ++ back/apis/country/info/view.py | 171 +++ back/apis/country/models.py | 116 ++ back/apis/country/shop/schema.py | 74 ++ back/apis/country/shop/view.py | 155 +++ back/compose.yml | 29 + back/main.py | 152 +++ .../models/0_20251212143904_init.py | 67 ++ back/pyproject.toml | 4 + back/requirements.txt | 25 + back/settings.py | 34 + back/utils/__init__.py | 0 back/utils/browser_api.py | 143 +++ back/utils/decorators.py | 165 +++ back/utils/exceptions.py | 47 + back/utils/logs.py | 218 ++++ back/utils/out_base.py | 8 + back/utils/redis_tool.py | 96 ++ back/utils/session_store.py | 177 +++ back/utils/time_tool.py | 56 + spider/api.py | 120 ++ spider/auto_challenge.py | 313 +++++ spider/bit_browser.py | 318 +++++ spider/mail_.py | 851 +++++++++++++ spider/main.py | 765 ++++++++++++ spider/proxys.py | 95 ++ spider/requirements.txt | 31 + spider/test.py | 22 + spider/work.py | 1051 +++++++++++++++++ 36 files changed, 5640 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100755 back/Dockerfile create mode 100644 back/apis/__init__.py create mode 100644 back/apis/country/__init__.py create mode 100644 back/apis/country/food/schema.py create mode 100644 back/apis/country/food/view.py create mode 100644 back/apis/country/info/schema.py create mode 100644 back/apis/country/info/view.py create mode 100644 back/apis/country/models.py create mode 100644 back/apis/country/shop/schema.py create mode 100644 back/apis/country/shop/view.py create mode 100755 back/compose.yml create mode 100644 back/main.py create mode 100644 back/migrations/models/0_20251212143904_init.py create mode 100644 back/pyproject.toml create mode 100644 back/requirements.txt create mode 100644 back/settings.py create mode 100644 back/utils/__init__.py create mode 100644 back/utils/browser_api.py create mode 100644 back/utils/decorators.py create mode 100644 back/utils/exceptions.py create mode 100644 back/utils/logs.py create mode 100644 back/utils/out_base.py create mode 100644 back/utils/redis_tool.py create mode 100644 back/utils/session_store.py create mode 100644 back/utils/time_tool.py create mode 100644 spider/api.py create mode 100644 spider/auto_challenge.py create mode 100644 spider/bit_browser.py create mode 100644 spider/mail_.py create mode 100644 spider/main.py create mode 100644 spider/proxys.py create mode 100644 spider/requirements.txt create mode 100644 spider/test.py create mode 100644 spider/work.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cc5561c --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +__pycache__ +.env +.trae +.idea +.DS_Store +*.baiduyun.* +.vscode +对比 +logs/sessions.json +logs/sessions.log +222.py +333.py +444.py +chain diff --git a/README.md b/README.md new file mode 100644 index 0000000..a268b26 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ + +# 0.0.1 +- 初始化项目 diff --git a/back/Dockerfile b/back/Dockerfile new file mode 100755 index 0000000..beb22c7 --- /dev/null +++ b/back/Dockerfile @@ -0,0 +1,30 @@ +# 运行环境 +FROM python:3.12-slim + +# 设置时区 +ENV TZ=Asia/Shanghai +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +# 设置工作目录和Python环境变量 +WORKDIR /app +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 + +# 安装系统依赖 +RUN sed -i 's|http://deb.debian.org/debian|http://mirrors.aliyun.com/debian|g' /etc/apt/sources.list.d/debian.sources \ + && sed -i 's|http://security.debian.org/debian-security|http://mirrors.aliyun.com/debian-security|g' /etc/apt/sources.list.d/debian.sources \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + gcc \ + python3-dev \ + tzdata \ + && rm -rf /var/lib/apt/lists/* + +# 优化:先复制依赖文件,避免每次代码变更都重新安装依赖 +COPY requirements.txt /app/ +RUN pip install --no-cache-dir -r requirements.txt -i https://mirrors.cloud.tencent.com/pypi/simple +# 复制项目文件 +COPY . /app + +# 设置启动命令 +CMD ["python", "main.py"] \ No newline at end of file diff --git a/back/apis/__init__.py b/back/apis/__init__.py new file mode 100644 index 0000000..d83cbc3 --- /dev/null +++ b/back/apis/__init__.py @@ -0,0 +1,5 @@ +from fastapi import APIRouter +from .country import app as country_app + +app = APIRouter() +app.include_router(country_app, prefix='/country') \ No newline at end of file diff --git a/back/apis/country/__init__.py b/back/apis/country/__init__.py new file mode 100644 index 0000000..890e3ec --- /dev/null +++ b/back/apis/country/__init__.py @@ -0,0 +1,9 @@ +from fastapi import APIRouter +from .info.view import app as info_app +from .food.view import app as food_app +from .shop.view import app as shop_app + +app = APIRouter() +app.include_router(info_app, prefix='/info', tags=['信息']) +app.include_router(food_app, prefix='/food', tags=['食物']) +app.include_router(shop_app, prefix='/shop', tags=['商店']) diff --git a/back/apis/country/food/schema.py b/back/apis/country/food/schema.py new file mode 100644 index 0000000..91b0609 --- /dev/null +++ b/back/apis/country/food/schema.py @@ -0,0 +1,66 @@ +from datetime import datetime, timezone, timedelta +from pydantic import BaseModel, Field, computed_field +from typing import List +from uuid import UUID +from utils.time_tool import TimestampModel + +CHINA_TZ = timezone(timedelta(hours=8)) + + +class Base(BaseModel): + """ + 基础食物信息模型 + + 仅包含食物名称 + """ + name: str = Field(..., description='食物名称') + + +class Create(Base): + """ + 创建请求模型 + """ + pass + + +class Update(BaseModel): + """ + 更新请求模型,支持部分更新 + """ + name: str | None = Field(None, description='食物名称') + + +class Out(TimestampModel, Base): + """ + 输出模型 + """ + code: int = Field(200, description='状态码') + message: str = Field('成功', description='提示信息') + id: UUID = Field(..., description='ID') + + create_time: datetime = Field(..., description='创建时间') + update_time: datetime = Field(..., description='更新时间') + + @computed_field + @property + def create_time_cn(self) -> str: + return self.create_time.astimezone(CHINA_TZ).strftime("%Y-%m-%d %H:%M:%S") + + @computed_field + @property + def update_time_cn(self) -> str: + return self.update_time.astimezone(CHINA_TZ).strftime("%Y-%m-%d %H:%M:%S") + + class Config: + from_attributes = True + + +class OutList(BaseModel): + """ + 列表输出模型 + """ + code: int = Field(200, description='状态码') + message: str = Field('成功', description='提示信息') + count: int = Field(0, description='总数') + num: int = Field(0, description='当前数量') + items: List[Out] = Field([], description='列表数据') diff --git a/back/apis/country/food/view.py b/back/apis/country/food/view.py new file mode 100644 index 0000000..d553b93 --- /dev/null +++ b/back/apis/country/food/view.py @@ -0,0 +1,122 @@ + +from fastapi import APIRouter, Query, Body, HTTPException +from uuid import UUID +from .schema import Create, Update, Out, OutList +from ..models import Food +from utils.decorators import handle_exceptions_unified +from utils.time_tool import parse_time +from utils.out_base import CommonOut + +app = APIRouter() + + +# 创建食物 +@app.post("", response_model=Out, description='创建食物', summary='创建食物') +@handle_exceptions_unified() +async def post(item: Create = Body(..., description='创建数据')): + """ + 创建食物记录 + """ + res = await Food.create(**item.model_dump()) + if not res: + raise HTTPException(status_code=400, detail='创建失败') + return res + + +# 查询食物 +@app.get("", response_model=OutList, description='获取食物', summary='获取食物') +@handle_exceptions_unified() +async def gets( + id: UUID | None = Query(None, description='主键ID'), + name: str | None = Query(None, description='食物名称'), + order_by: str | None = Query('create_time', description='排序字段', + regex='^(-)?(id|name|create_time|update_time)$'), + res_count: bool = Query(False, description='是否返回总数'), + create_time_start: str | int | None = Query( + None, description='创建时间开始 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'), + create_time_end: str | int | None = Query( + None, description='创建时间结束 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'), + update_time_start: str | int | None = Query( + None, description='更新时间开始 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'), + update_time_end: str | int | None = Query( + None, description='更新时间结束 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'), + page: int = Query(1, ge=1, description='页码'), + limit: int = Query(10, ge=1, le=1000, description='每页数量'), +): + """ + 获取食物列表 + """ + query = Food.all() + if id: + query = query.filter(id=id) + if name: + query = query.filter(name=name) + if create_time_start: + query = query.filter(create_time__gte=parse_time(create_time_start)) + if create_time_end: + query = query.filter(create_time__lte=parse_time( + create_time_end, is_end=True)) + if update_time_start: + query = query.filter(update_time__gte=parse_time(update_time_start)) + if update_time_end: + query = query.filter(update_time__lte=parse_time( + update_time_end, is_end=True)) + + if order_by: + query = query.order_by(order_by) + + if res_count: + count = await query.count() + else: + count = -1 + offset = (page - 1) * limit # 计算偏移量 + query = query.limit(limit).offset(offset) # 应用分页 + + res = await query + if not res: + raise HTTPException(status_code=404, detail='食物不存在') + num = len(res) + return OutList(count=count, num=num, items=res) + + +# 更新食物 +@app.put("", response_model=Out, description='更新食物', summary='更新食物') +@handle_exceptions_unified() +async def put(id: UUID = Query(..., description='主键ID'), + item: Update = Body(..., description='更新数据'), + ): + """ + 部分更新食物,只更新传入的非空字段 + """ + # 检查食物是否存在 + secret = await Food.get_or_none(id=id) + if not secret: + raise HTTPException(status_code=404, detail='食物不存在') + + # 获取要更新的字段(排除None值的字段) + update_data = item.model_dump(exclude_unset=True) + + # 如果没有要更新的字段 + if not update_data: + raise HTTPException(status_code=400, detail='没有要更新的字段') + + # 更新食物字段 + await secret.update_from_dict(update_data) + await secret.save() + return secret + + +# 删除食物 + +@app.delete("", response_model=CommonOut, description='删除食物', summary='删除食物') +@handle_exceptions_unified() +async def delete(id: UUID = Query(..., description='主键ID'), + ): + """删除食物""" + secret = await Food.get_or_none(id=id) + if not secret: + raise HTTPException(status_code=404, detail='食物不存在') + await secret.delete() + # Tortoise ORM 单个实例的 delete() 方法返回 None,而不是删除的记录数 + # 删除成功时手动返回 1,如果有异常会被装饰器捕获 + return CommonOut(count=1) diff --git a/back/apis/country/info/schema.py b/back/apis/country/info/schema.py new file mode 100644 index 0000000..ba17e03 --- /dev/null +++ b/back/apis/country/info/schema.py @@ -0,0 +1,88 @@ +from datetime import datetime, timezone, timedelta +from pydantic import BaseModel, Field, computed_field +from typing import List +from uuid import UUID +from utils.time_tool import TimestampModel + +CHINA_TZ = timezone(timedelta(hours=8)) + + +class Base(BaseModel): + """ + 基础信息模型 + + 字段与数据库模型 Info 保持一致(孩子与家长字段) + """ + child_full_name: str = Field(..., description='孩子全名') + parent_full_name: str = Field(..., description='家长全名') + child_birthday: str = Field(..., description='孩子生日') + address_str: str = Field(..., description='街道地址') + city_name: str = Field(..., description='城市') + parent_phone: str = Field(..., description='家长电话') + postcode: str = Field(..., description='邮编') + province: str = Field(..., description='省/州全称') + status: bool = Field(False, description='状态') + email: str | None = Field(None, description='邮箱') + email_content: str | None = Field(None, description='邮件内容') + text: str | None = Field(None, description='文本内容') + + +class Create(Base): + """ + 创建请求模型 + """ + pass + + +class Update(BaseModel): + """ + 更新请求模型,支持部分更新 + """ + child_full_name: str | None = Field(None, description='孩子全名') + parent_full_name: str | None = Field(None, description='家长全名') + child_birthday: str | None = Field(None, description='孩子生日') + address_str: str | None = Field(None, description='街道地址') + city_name: str | None = Field(None, description='城市') + parent_phone: str | None = Field(None, description='家长电话') + postcode: str | None = Field(None, description='邮编') + province: str | None = Field(None, description='省/州全称') + status: bool | None = Field(None, description='状态') + email: str | None = Field(None, description='邮箱') + email_content: str | None = Field(None, description='邮件内容') + text: str | None = Field(None, description='文本内容') + + +class Out(TimestampModel, Base): + """ + 输出模型 + """ + code: int = Field(200, description='状态码') + message: str = Field('成功', description='提示信息') + id: UUID = Field(..., description='ID') + + create_time: datetime = Field(..., description='创建时间') + update_time: datetime = Field(..., description='更新时间') + + @computed_field + @property + def create_time_cn(self) -> str: + return self.create_time.astimezone(CHINA_TZ).strftime("%Y-%m-%d %H:%M:%S") + + @computed_field + @property + def update_time_cn(self) -> str: + return self.update_time.astimezone(CHINA_TZ).strftime("%Y-%m-%d %H:%M:%S") + + class Config: + from_attributes = True + + +class OutList(BaseModel): + """ + 列表输出模型 + """ + code: int = Field(200, description='状态码') + message: str = Field('成功', description='提示信息') + count: int = Field(0, description='总数') + num: int = Field(0, description='当前数量') + items: List[Out] = Field([], description='列表数据') diff --git a/back/apis/country/info/view.py b/back/apis/country/info/view.py new file mode 100644 index 0000000..277d58e --- /dev/null +++ b/back/apis/country/info/view.py @@ -0,0 +1,171 @@ + +from fastapi import APIRouter, Query, Body, HTTPException +import random +from uuid import UUID +from .schema import Create, Update, Out, OutList +from ..models import Info +from utils.decorators import handle_exceptions_unified +from utils.time_tool import parse_time +from utils.out_base import CommonOut +from tortoise.transactions import in_transaction + +app = APIRouter() + + +# 创建信息 +@app.post("", response_model=Out, description='创建信息', summary='创建信息') +@handle_exceptions_unified() +async def post(item: Create = Body(..., description='创建数据')): + """ + 创建信息记录 + """ + res = await Info.create(**item.model_dump()) + if not res: + raise HTTPException(status_code=400, detail='创建失败') + return res + + +# 查询信息 +@app.get("", response_model=OutList, description='获取信息', summary='获取信息') +@handle_exceptions_unified() +async def gets( + id: UUID | None = Query(None, description='主键ID'), + child_full_name: str | None = Query(None, description='孩子全名'), + parent_full_name: str | None = Query(None, description='家长全名'), + child_birthday: str | None = Query(None, description='孩子生日'), + address_str: str | None = Query(None, description='街道地址'), + city_name: str | None = Query(None, description='城市'), + parent_phone: str | None = Query(None, description='家长电话'), + postcode: str | None = Query(None, description='邮编'), + province: str | None = Query(None, description='州全称'), + status: bool | None = Query(None, description='状态'), + email: str | None = Query(None, description='邮箱'), + order_by: str | None = Query('create_time', description='排序字段', + regex='^(-)?(id|child_full_name|parent_full_name|city_name|postcode|province|create_time|update_time)$'), + res_count: bool = Query(False, description='是否返回总数'), + create_time_start: str | int | None = Query( + None, description='创建时间开始 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'), + create_time_end: str | int | None = Query( + None, description='创建时间结束 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'), + update_time_start: str | int | None = Query( + None, description='更新时间开始 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'), + update_time_end: str | int | None = Query( + None, description='更新时间结束 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'), + page: int = Query(1, ge=1, description='页码'), + limit: int = Query(10, ge=1, le=1000, description='每页数量'), +): + """ + 获取信息列表 + """ + query = Info.all() + if id: + query = query.filter(id=id) + if child_full_name: + query = query.filter(child_full_name=child_full_name) + if parent_full_name: + query = query.filter(parent_full_name=parent_full_name) + if child_birthday: + query = query.filter(child_birthday=child_birthday) + if address_str: + query = query.filter(address_str=address_str) + if city_name: + query = query.filter(city_name=city_name) + if parent_phone: + query = query.filter(parent_phone=parent_phone) + if postcode: + query = query.filter(postcode=postcode) + if province: + query = query.filter(province=province) + if email: + query = query.filter(email=email) + if status is not None: + query = query.filter(status=status) + if create_time_start: + query = query.filter(create_time__gte=parse_time(create_time_start)) + if create_time_end: + query = query.filter(create_time__lte=parse_time( + create_time_end, is_end=True)) + if update_time_start: + query = query.filter(update_time__gte=parse_time(update_time_start)) + if update_time_end: + query = query.filter(update_time__lte=parse_time( + update_time_end, is_end=True)) + + if order_by: + query = query.order_by(order_by) + + if res_count: + count = await query.count() + else: + count = -1 + offset = (page - 1) * limit # 计算偏移量 + query = query.limit(limit).offset(offset) # 应用分页 + + res = await query + if not res: + raise HTTPException(status_code=404, detail='信息不存在') + num = len(res) + return OutList(count=count, num=num, items=res) + + +# 更新信息 +@app.put("", response_model=Out, description='更新信息', summary='更新信息') +@handle_exceptions_unified() +async def put(id: UUID = Query(..., description='主键ID'), + item: Update = Body(..., description='更新数据'), + ): + """ + 部分更新信息,只更新传入的非空字段 + """ + # 检查信息是否存在 + secret = await Info.get_or_none(id=id) + if not secret: + raise HTTPException(status_code=404, detail='信息不存在') + + # 获取要更新的字段(排除None值的字段) + update_data = item.model_dump(exclude_unset=True) + + # 如果没有要更新的字段 + if not update_data: + raise HTTPException(status_code=400, detail='没有要更新的字段') + + # 更新信息字段 + await secret.update_from_dict(update_data) + await secret.save() + return secret + + +# 删除信息 + +@app.delete("", response_model=CommonOut, description='删除信息', summary='删除信息') +@handle_exceptions_unified() +async def delete(id: UUID = Query(..., description='主键ID'), + ): + """删除信息""" + secret = await Info.get_or_none(id=id) + if not secret: + raise HTTPException(status_code=404, detail='信息不存在') + await secret.delete() + # Tortoise ORM 单个实例的 delete() 方法返回 None,而不是删除的记录数 + # 删除成功时手动返回 1,如果有异常会被装饰器捕获 + return CommonOut(count=1) + + +# 随机获取一条状态修改为True的记录 +@app.get("/one", response_model=Out, description='随机获取一条状态修改为True的记录', summary='随机获取一条状态修改为True的记录') +@handle_exceptions_unified() +async def random_update_status(): + """ + 随机获取一条状态为 False 的记录并在事务中更新为 True + """ + async with in_transaction() as conn: + q = Info.filter(status=False).using_db(conn) + current_running_count = await q.count() + if current_running_count == 0: + raise HTTPException(status_code=404, detail='没有状态为False的记录') + pick_index = random.choice(range(current_running_count)) + item = await q.order_by('create_time').offset(pick_index).first() + updated = await Info.filter(id=item.id, status=False).using_db(conn).update(status=True) + if updated == 0: + raise HTTPException(status_code=400, detail='并发冲突,未更新') + return item diff --git a/back/apis/country/models.py b/back/apis/country/models.py new file mode 100644 index 0000000..6bc997e --- /dev/null +++ b/back/apis/country/models.py @@ -0,0 +1,116 @@ +import uuid +from tortoise import fields +from tortoise.models import Model + + +class Shop(Model): + """ + 店铺模型 + + 字段: + id (UUIDField): 主键,默认使用 UUID 生成 + province (CharField): 省份,最大长度 255 + city (CharField): 城市,最大长度 255 + street (CharField): 街道,最大长度 255 + shop_name (CharField): 店铺名称,最大长度 255 + shop_number (CharField): 店铺号码,最大长度 255, nullable 为 True + """ + id = fields.UUIDField(pk=True, default=uuid.uuid4, description="ID") + province = fields.CharField(max_length=255, null=True, index=True, description="省份") + city = fields.CharField(max_length=255, index=True, description="城市") + street = fields.CharField(max_length=255, index=True, description="街道") + shop_name = fields.CharField(max_length=255, index=True, description="店铺名称") + shop_number = fields.CharField(max_length=255, null=True, description="店铺号码") + create_time = fields.DatetimeField(auto_now_add=True, index=True, description='创建时间') + update_time = fields.DatetimeField(auto_now=True, description='更新时间') + + + class Meta: + table = "shop" + table_description = "店铺表" + ordering = ["create_time"] + indexes = [ + ("province", "city", "street"), + ] + def __repr__(self): + return f"" + + __str__ = __repr__ + +class Food(Model): + """ + 食物模型 + + 字段: + id (UUIDField): 主键,默认使用 UUID 生成 + name (CharField): 食物名称,最大长度 255 + """ + id = fields.UUIDField(pk=True, default=uuid.uuid4, description="ID") + name = fields.CharField(max_length=255, index=True, description="食物名称") + create_time = fields.DatetimeField(auto_now_add=True, index=True, description='创建时间') + update_time = fields.DatetimeField(auto_now=True, description='更新时间') + + + class Meta: + table = "food" + table_description = "食物表" + ordering = ["create_time"] + indexes = [ + ("name",), + ] + def __repr__(self): + return f"" + + __str__ = __repr__ + + +class Info(Model): + """ + 信息模型(孩子与家长字段) + + 字段: + id (UUIDField): 主键,默认使用 UUID 生成 + child_full_name (CharField): 孩子全名,最大长度 255 + parent_full_name (CharField): 家长全名,最大长度 255 + child_birthday (CharField): 孩子生日(原始字符串),最大长度 32 + address_str (CharField): 街道地址,最大长度 255 + city_name (CharField): 城市,最大长度 255 + parent_phone (CharField): 家长电话,最大长度 64 + postcode (CharField): 邮编,最大长度 20 + province (CharField): 省/州全称,最大长度 255 + status (BooleanField): 状态,默认值 False + email (CharField): 邮箱,最大长度 255, nullable 为 True + text (TextField): 文本内容, nullable 为 True + + """ + id = fields.UUIDField(pk=True, default=uuid.uuid4, description="ID") + child_full_name = fields.CharField(max_length=255, index=True, description="孩子全名") + parent_full_name = fields.CharField(max_length=255, index=True, description="家长全名") + child_birthday = fields.CharField(max_length=32, description="孩子生日") + address_str = fields.CharField(max_length=255, index=True, description="街道地址") + city_name = fields.CharField(max_length=255, index=True, description="城市") + parent_phone = fields.CharField(max_length=64, description="家长电话") + postcode = fields.CharField(max_length=20, index=True, description="邮编") + province = fields.CharField(max_length=255, index=True, description="省/州全称") + status = fields.BooleanField(default=False, description="状态") + # 邮件内容 + email = fields.CharField(max_length=255, unique=True, index=True, description="邮箱") + email_content = fields.TextField(null=True, description="邮件内容") + text = fields.TextField(null=True, description="文本内容") + create_time = fields.DatetimeField(auto_now_add=True, index=True, description='创建时间') + update_time = fields.DatetimeField(auto_now=True, description='更新时间') + + + class Meta: + table = "info" + table_description = "信息表" + ordering = ["create_time"] + indexes = [ + ("city_name", "postcode", "province"), + ("child_full_name", "parent_full_name"), + ] + + def __repr__(self): + return f"" + + __str__ = __repr__ diff --git a/back/apis/country/shop/schema.py b/back/apis/country/shop/schema.py new file mode 100644 index 0000000..58b191d --- /dev/null +++ b/back/apis/country/shop/schema.py @@ -0,0 +1,74 @@ +from datetime import datetime, timezone, timedelta +from pydantic import BaseModel, Field, computed_field +from typing import List +from uuid import UUID +from utils.time_tool import TimestampModel + +CHINA_TZ = timezone(timedelta(hours=8)) + + +class Base(BaseModel): + """ + 基础店铺信息模型 + + 包含店铺相关的通用字段,供创建与输出模型复用 + """ + province: str | None = Field(None, description='省份') + city: str = Field(..., description='城市') + street: str = Field(..., description='街道') + shop_name: str = Field(..., description='店铺名称') + shop_number: str | None = Field(None, description='店铺号码') + + +class Create(Base): + """ + 创建请求模型 + """ + pass + + +class Update(BaseModel): + """ + 更新请求模型,支持部分更新 + """ + province: str | None = Field(None, description='省份') + city: str | None = Field(None, description='城市') + street: str | None = Field(None, description='街道') + shop_name: str | None = Field(None, description='店铺名称') + shop_number: str | None = Field(None, description='店铺号码') + + +class Out(TimestampModel, Base): + """ + 输出模型 + """ + code: int = Field(200, description='状态码') + message: str = Field('成功', description='提示信息') + id: UUID = Field(..., description='ID') + + create_time: datetime = Field(..., description='创建时间') + update_time: datetime = Field(..., description='更新时间') + + @computed_field + @property + def create_time_cn(self) -> str: + return self.create_time.astimezone(CHINA_TZ).strftime("%Y-%m-%d %H:%M:%S") + + @computed_field + @property + def update_time_cn(self) -> str: + return self.update_time.astimezone(CHINA_TZ).strftime("%Y-%m-%d %H:%M:%S") + + class Config: + from_attributes = True + + +class OutList(BaseModel): + """ + 列表输出模型 + """ + code: int = Field(200, description='状态码') + message: str = Field('成功', description='提示信息') + count: int = Field(0, description='总数') + num: int = Field(0, description='当前数量') + items: List[Out] = Field([], description='列表数据') diff --git a/back/apis/country/shop/view.py b/back/apis/country/shop/view.py new file mode 100644 index 0000000..0488e22 --- /dev/null +++ b/back/apis/country/shop/view.py @@ -0,0 +1,155 @@ + +from fastapi import APIRouter, Query, Body, HTTPException +from uuid import UUID +from .schema import Create, Update, Out, OutList +from ..models import Shop +from utils.decorators import handle_exceptions_unified +from utils.time_tool import parse_time +from utils.out_base import CommonOut +from tortoise.transactions import in_transaction +import random + +app = APIRouter() + + +# 创建店铺 +@app.post("", response_model=Out, description='创建店铺', summary='创建店铺') +@handle_exceptions_unified() +async def post(item: Create = Body(..., description='创建数据')): + """ + 创建店铺记录 + """ + res = await Shop.filter(street=item.street).first() + if res: + raise HTTPException(status_code=400, detail='店铺已存在') + res = await Shop.create(**item.model_dump()) + if not res: + raise HTTPException(status_code=400, detail='创建失败') + return res + + +# 查询店铺 +@app.get("", response_model=OutList, description='获取店铺', summary='获取店铺') +@handle_exceptions_unified() +async def gets( + id: UUID | None = Query(None, description='主键ID'), + province: str | None = Query(None, description='省份'), + city: str | None = Query(None, description='城市'), + street: str | None = Query(None, description='街道'), + shop_name: str | None = Query(None, description='店铺名称'), + shop_number: str | None = Query(None, description='店铺号码'), + order_by: str | None = Query('create_time', description='排序字段', + regex='^(-)?(id|province|city|street|shop_name|create_time|update_time)$'), + res_count: bool = Query(False, description='是否返回总数'), + create_time_start: str | int | None = Query( + None, description='创建时间开始 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'), + create_time_end: str | int | None = Query( + None, description='创建时间结束 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'), + update_time_start: str | int | None = Query( + None, description='更新时间开始 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'), + update_time_end: str | int | None = Query( + None, description='更新时间结束 (支持 YYYY-MM-DD / YYYY-MM-DD HH:mm:ss / 13位时间戳)'), + page: int = Query(1, ge=1, description='页码'), + limit: int = Query(10, ge=1, le=1000, description='每页数量'), +): + """ + 获取店铺列表 + """ + query = Shop.all() + if id: + query = query.filter(id=id) + if province: + query = query.filter(province=province) + if city: + query = query.filter(city=city) + if street: + query = query.filter(street=street) + if shop_name: + query = query.filter(shop_name=shop_name) + if shop_number: + query = query.filter(shop_number=shop_number) + if create_time_start: + query = query.filter(create_time__gte=parse_time(create_time_start)) + if create_time_end: + query = query.filter(create_time__lte=parse_time( + create_time_end, is_end=True)) + if update_time_start: + query = query.filter(update_time__gte=parse_time(update_time_start)) + if update_time_end: + query = query.filter(update_time__lte=parse_time( + update_time_end, is_end=True)) + + if order_by: + query = query.order_by(order_by) + + if res_count: + count = await query.count() + else: + count = -1 + offset = (page - 1) * limit # 计算偏移量 + query = query.limit(limit).offset(offset) # 应用分页 + + res = await query + if not res: + raise HTTPException(status_code=404, detail='店铺不存在') + num = len(res) + return OutList(count=count, num=num, items=res) + + +# 更新店铺 +@app.put("", response_model=Out, description='更新店铺', summary='更新店铺') +@handle_exceptions_unified() +async def put(id: UUID = Query(..., description='主键ID'), + item: Update = Body(..., description='更新数据'), + ): + """ + 部分更新店铺,只更新传入的非空字段 + """ + # 检查店铺是否存在 + secret = await Shop.get_or_none(id=id) + if not secret: + raise HTTPException(status_code=404, detail='店铺不存在') + + # 获取要更新的字段(排除None值的字段) + update_data = item.model_dump(exclude_unset=True) + + # 如果没有要更新的字段 + if not update_data: + raise HTTPException(status_code=400, detail='没有要更新的字段') + + # 更新店铺字段 + await secret.update_from_dict(update_data) + await secret.save() + return secret + + +# 删除店铺 + +@app.delete("", response_model=CommonOut, description='删除店铺', summary='删除店铺') +@handle_exceptions_unified() +async def delete(id: UUID = Query(..., description='主键ID'), + ): + """删除店铺""" + secret = await Shop.get_or_none(id=id) + if not secret: + raise HTTPException(status_code=404, detail='店铺不存在') + await secret.delete() + # Tortoise ORM 单个实例的 delete() 方法返回 None,而不是删除的记录数 + # 删除成功时手动返回 1,如果有异常会被装饰器捕获 + return CommonOut(count=1) + +# 随机取一个店铺 +@app.get("/random", response_model=Out, description='随机取一个店铺', summary='随机取一个店铺') +@handle_exceptions_unified() +async def get_random_shop(): + """ + 随机取一个店铺(事务内计数与偏移选择,避免数据库不稳定的随机排序) + """ + async with in_transaction() as conn: + q = Shop.all().using_db(conn) + total = await q.count() + if total == 0: + raise HTTPException(status_code=404, detail='店铺不存在') + pick_index = random.choice(range(total)) + item = await q.order_by('create_time').offset(pick_index).first() + return item \ No newline at end of file diff --git a/back/compose.yml b/back/compose.yml new file mode 100755 index 0000000..92fa605 --- /dev/null +++ b/back/compose.yml @@ -0,0 +1,29 @@ +services: + # 容器服务名称 + ca_auto_table: + # 容器名称 + container_name: ca_auto_table + build: + # 在当前目录下寻找Dockerfile文件并构建镜像 + context: . + dockerfile: Dockerfile + # 重启策略 + restart: always + # 挂载目录 本地化容器数据 + # 这里挂载了本地当前目录的app目录到容器的/app目录 + volumes: + - .:/app + # 环境变量 可以在Dockerfile中配置环境变量,应用中获取 + environment: + - NAME=ca_auto_table + - TZ=Asia/Shanghai + # 端口映射 容器端口映射到主机端口 + ports: + - "6060:6060" + # 日志配置 - 限制日志大小并启用日志轮转 + logging: + driver: "json-file" + options: + max-size: "10m" # 单个日志文件最大10MB + max-file: "3" # 保留最多3个日志文件 + compress: "true" # 压缩旧日志文件 diff --git a/back/main.py b/back/main.py new file mode 100644 index 0000000..33feff1 --- /dev/null +++ b/back/main.py @@ -0,0 +1,152 @@ +from fastapi import FastAPI +from settings import TORTOISE_ORM +from fastapi.middleware.cors import CORSMiddleware +from tortoise.contrib.fastapi import register_tortoise +from apscheduler.schedulers.asyncio import AsyncIOScheduler +from apscheduler.triggers.interval import IntervalTrigger +from tortoise import Tortoise +from contextlib import asynccontextmanager +from apis import app as main_router +import asyncio +import signal +import sys + +@asynccontextmanager +async def lifespan(app: FastAPI): + """ + 应用生命周期管理函数 + + - 启动:注册定时任务并启动调度器 + - 关闭:优雅关闭调度器与数据库连接 + """ + print('项目启动...') + + # 初始化数据库连接(使用 Tortoise 直接初始化,确保路由与定时任务可用) + try: + await Tortoise.init(config=TORTOISE_ORM) + print('数据库初始化完成') + except Exception as e: + print(f'数据库初始化失败: {e}') + + # 每30分钟保持一次数据库连接活跃 + scheduler.add_job( + keep_db_connection_alive, + IntervalTrigger(minutes=30), + id='keep_db_alive', + name='保持数据库连接', + coalesce=True, + misfire_grace_time=30, + ) + + + scheduler.start() + try: + yield + finally: + print('项目结束...') + + # 关闭数据库连接 + print('关闭数据库连接...') + try: + await asyncio.wait_for(Tortoise.close_connections(), timeout=2) + except asyncio.TimeoutError: + print('关闭数据库连接超时') + except Exception as e: + print(f'关闭数据库连接出错: {e}') + + # 关闭调度器 + print('关闭调度器...') + try: + if scheduler is not None and hasattr(scheduler, 'shutdown'): + scheduler.shutdown(wait=False) + except Exception as e: + print(f'关闭调度器出错: {e}') + + + +# 创建 FastAPI 应用实例 +app = FastAPI(lifespan=lifespan) + +# 配置 CORS 中间件 +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# 创建调度器实例 +scheduler = AsyncIOScheduler() + +# 包含主路由 +app.include_router(main_router) + +# 注意:使用自定义 lifespan 已在启动时手动初始化数据库。 +# 若改回默认事件机制,可重新启用 register_tortoise。 + + +async def keep_db_connection_alive(): + """ + 保持数据库连接活跃的函数 + 定期执行简单查询以防止连接超时 + """ + try: + conn = Tortoise.get_connection("default") + await conn.execute_query("SELECT 1") + print("数据库连接检查成功") + except Exception as e: + print(f"数据库连接检查失败: {e}") + + +def signal_handler(): + """ + 处理终止信号,确保资源正确释放 + """ + + async def shutdown(): + print("收到终止信号,开始优雅关闭...") + + # 关闭数据库连接 + print("关闭数据库连接...") + try: + await Tortoise.close_connections() + except Exception as e: + print(f"关闭数据库连接时出错: {e}") + + # 关闭调度器 + print("关闭调度器...") + try: + scheduler.shutdown() + except Exception as e: + print(f"关闭调度器时出错: {e}") + + print("所有资源已关闭,程序退出") + sys.exit(0) + + loop = asyncio.get_event_loop() + loop.create_task(shutdown()) + # 给异步任务一些时间完成 + loop.run_until_complete(asyncio.sleep(2)) + sys.exit(0) + + +if __name__ == '__main__': + from uvicorn import run + + # 注册信号处理 + for sig in (signal.SIGINT, signal.SIGTERM): + signal.signal(sig, lambda sig, frame: signal_handler()) + + run( + 'main:app', + host='0.0.0.0', + port=6060, + reload=False, + workers=1, + # loop='uvloop', + http='httptools', + limit_concurrency=10000, + backlog=4096, + timeout_keep_alive=5 + ) diff --git a/back/migrations/models/0_20251212143904_init.py b/back/migrations/models/0_20251212143904_init.py new file mode 100644 index 0000000..01a242b --- /dev/null +++ b/back/migrations/models/0_20251212143904_init.py @@ -0,0 +1,67 @@ +from tortoise import BaseDBAsyncClient + + +async def upgrade(db: BaseDBAsyncClient) -> str: + return """ + CREATE TABLE IF NOT EXISTS `food` ( + `id` CHAR(36) NOT NULL PRIMARY KEY COMMENT 'ID', + `name` VARCHAR(255) NOT NULL COMMENT '食物名称', + `create_time` DATETIME(6) NOT NULL COMMENT '创建时间' DEFAULT CURRENT_TIMESTAMP(6), + `update_time` DATETIME(6) NOT NULL COMMENT '更新时间' DEFAULT CURRENT_TIMESTAMP(6) ON UPDATE CURRENT_TIMESTAMP(6), + KEY `idx_food_name_b88f83` (`name`), + KEY `idx_food_create__2db565` (`create_time`) +) CHARACTER SET utf8mb4 COMMENT='食物表'; +CREATE TABLE IF NOT EXISTS `info` ( + `id` CHAR(36) NOT NULL PRIMARY KEY COMMENT 'ID', + `child_full_name` VARCHAR(255) NOT NULL COMMENT '孩子全名', + `parent_full_name` VARCHAR(255) NOT NULL COMMENT '家长全名', + `child_birthday` VARCHAR(32) NOT NULL COMMENT '孩子生日', + `address_str` VARCHAR(255) NOT NULL COMMENT '街道地址', + `city_name` VARCHAR(255) NOT NULL COMMENT '城市', + `parent_phone` VARCHAR(64) NOT NULL COMMENT '家长电话', + `postcode` VARCHAR(20) NOT NULL COMMENT '邮编', + `province` VARCHAR(255) NOT NULL COMMENT '省/州全称', + `status` BOOL NOT NULL COMMENT '状态' DEFAULT 0, + `email` VARCHAR(255) NOT NULL UNIQUE COMMENT '邮箱', + `email_content` LONGTEXT COMMENT '邮件内容', + `text` LONGTEXT COMMENT '文本内容', + `create_time` DATETIME(6) NOT NULL COMMENT '创建时间' DEFAULT CURRENT_TIMESTAMP(6), + `update_time` DATETIME(6) NOT NULL COMMENT '更新时间' DEFAULT CURRENT_TIMESTAMP(6) ON UPDATE CURRENT_TIMESTAMP(6), + KEY `idx_info_child_f_dae7dc` (`child_full_name`), + KEY `idx_info_parent__d99e40` (`parent_full_name`), + KEY `idx_info_address_8c2b80` (`address_str`), + KEY `idx_info_city_na_ac7d8f` (`city_name`), + KEY `idx_info_postcod_9a4431` (`postcode`), + KEY `idx_info_provinc_58581b` (`province`), + KEY `idx_info_email_653be4` (`email`), + KEY `idx_info_create__3bea91` (`create_time`), + KEY `idx_info_city_na_a8ca74` (`city_name`, `postcode`, `province`), + KEY `idx_info_child_f_2cf26a` (`child_full_name`, `parent_full_name`) +) CHARACTER SET utf8mb4 COMMENT='信息表'; +CREATE TABLE IF NOT EXISTS `shop` ( + `id` CHAR(36) NOT NULL PRIMARY KEY COMMENT 'ID', + `province` VARCHAR(255) COMMENT '省份', + `city` VARCHAR(255) NOT NULL COMMENT '城市', + `street` VARCHAR(255) NOT NULL COMMENT '街道', + `shop_name` VARCHAR(255) NOT NULL COMMENT '店铺名称', + `shop_number` VARCHAR(255) COMMENT '店铺号码', + `create_time` DATETIME(6) NOT NULL COMMENT '创建时间' DEFAULT CURRENT_TIMESTAMP(6), + `update_time` DATETIME(6) NOT NULL COMMENT '更新时间' DEFAULT CURRENT_TIMESTAMP(6) ON UPDATE CURRENT_TIMESTAMP(6), + KEY `idx_shop_provinc_904758` (`province`), + KEY `idx_shop_city_69d82f` (`city`), + KEY `idx_shop_street_5aaa95` (`street`), + KEY `idx_shop_shop_na_938b2f` (`shop_name`), + KEY `idx_shop_create__e13964` (`create_time`), + KEY `idx_shop_provinc_72e64a` (`province`, `city`, `street`) +) CHARACTER SET utf8mb4 COMMENT='店铺表'; +CREATE TABLE IF NOT EXISTS `aerich` ( + `id` INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + `version` VARCHAR(255) NOT NULL, + `app` VARCHAR(100) NOT NULL, + `content` JSON NOT NULL +) CHARACTER SET utf8mb4;""" + + +async def downgrade(db: BaseDBAsyncClient) -> str: + return """ + """ diff --git a/back/pyproject.toml b/back/pyproject.toml new file mode 100644 index 0000000..abf7e22 --- /dev/null +++ b/back/pyproject.toml @@ -0,0 +1,4 @@ +[tool.aerich] +tortoise_orm = "settings.TORTOISE_ORM" +location = "./migrations" +src_folder = "./." diff --git a/back/requirements.txt b/back/requirements.txt new file mode 100644 index 0000000..8385116 --- /dev/null +++ b/back/requirements.txt @@ -0,0 +1,25 @@ +aerich +aiohttp +aiomysql +APScheduler +fastapi +# numpy +tenacity +tortoise-orm +uvicorn +pycryptodome +curl_cffi +fake_useragent +aiohttp_socks +pynacl +eth-account +base58 +aioredis +redis +httpx +loguru +uvloop +cryptography +uvicorn[standard] +psutil +DrissionPage diff --git a/back/settings.py b/back/settings.py new file mode 100644 index 0000000..8bdcd24 --- /dev/null +++ b/back/settings.py @@ -0,0 +1,34 @@ +TORTOISE_ORM = { + 'connections': { + 'default': { + # 'engine': 'tortoise.backends.asyncpg', PostgreSQL + 'engine': 'tortoise.backends.mysql', # MySQL or Mariadb + 'credentials': { + 'host': '192.168.11.67', + 'port': 3306, + 'user': 'us', + 'password': 'BkftDZfBzjBFAFwD', + 'database': 'us', + 'minsize': 10, # 最小连接数设为10,避免连接过多 + 'maxsize': 30, # 最大连接数设为30,避免超出数据库限制 + 'charset': 'utf8mb4', + "echo": False, + 'pool_recycle': 3600, # 增加连接回收时间从300秒到3600秒(1小时) + 'connect_timeout': 10, # 连接超时时间 + } + }, + }, + 'apps': { + 'models': { + # 仅注册实际存在的模型模块,移除不存在的 apis.project.models,避免 Aerich 初始化失败 + 'models': [ + "apis.country.models", + "aerich.models" + ], + 'default_connection': 'default', + + } + }, + 'use_tz': False, + 'timezone': 'Asia/Shanghai' +} diff --git a/back/utils/__init__.py b/back/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/back/utils/browser_api.py b/back/utils/browser_api.py new file mode 100644 index 0000000..c5ac1a0 --- /dev/null +++ b/back/utils/browser_api.py @@ -0,0 +1,143 @@ +import datetime +import asyncio +import httpx +from loguru import logger +from utils.decorators import handle_exceptions_unified + + +class BrowserApi: + """ + 浏览器接口 + """ + + def __init__(self): + self.local_url = 'http://127.0.0.1:54345' + self.headers = {'Content-Type': 'application/json'} + # 使用异步 HTTP 客户端,启用连接池和超时设置 + self.client = httpx.AsyncClient( + base_url=self.local_url, + headers=self.headers, + timeout=httpx.Timeout(30.0, connect=10.0), # 总超时30秒,连接超时10秒 + limits=httpx.Limits(max_keepalive_connections=50, max_connections=100), # 连接池配置 + ) + + async def __aenter__(self): + """异步上下文管理器入口""" + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """异步上下文管理器出口,关闭客户端""" + await self.aclose() + + async def aclose(self): + """关闭 HTTP 客户端""" + if self.client: + await self.client.aclose() + + # 打开指纹浏览器 + @handle_exceptions_unified() + async def open_browser(self, id: str, jc: int = 0): + """ + 打开指纹浏览器(异步优化版本) + :param jc: 计次 + :param id: 浏览器id + :return:http, pid + """ + if jc > 3: + return None, None + url = '/browser/open' + data = { + 'id': id + } + try: + res = await self.client.post(url, json=data) + res.raise_for_status() # 检查 HTTP 状态码 + res_data = res.json() + logger.info(f'打开指纹浏览器: {res_data}') + if not res_data.get('success'): + logger.error(f'打开指纹浏览器失败: {res_data}') + return await self.open_browser(id, jc + 1) + data = res_data.get('data') + http = data.get('http') + pid = data.get('pid') + logger.info(f'打开指纹浏览器成功: {http}, {pid}') + return http, pid + except httpx.TimeoutException as e: + logger.error(f'打开指纹浏览器超时: {e}') + if jc < 3: + return await self.open_browser(id, jc + 1) + return None, None + except httpx.RequestError as e: + logger.error(f'打开指纹浏览器请求错误: {e}') + if jc < 3: + return await self.open_browser(id, jc + 1) + return None, None + except Exception as e: + logger.error(f'打开指纹浏览器异常: {e}') + if jc < 3: + return await self.open_browser(id, jc + 1) + return None, None + + # 关闭指纹浏览器 + @handle_exceptions_unified() + async def close_browser(self, id: str, jc: int = 0): + """ + 关闭指纹浏览器(异步优化版本) + :param jc: 计次 + :param id: 浏览器id + :return: + """ + if jc > 3: + return None + url = '/browser/close' + data = { + 'id': id + } + try: + res = await self.client.post(url, json=data) + res.raise_for_status() # 检查 HTTP 状态码 + res_data = res.json() + logger.info(f'关闭指纹浏览器: {res_data}') + if not res_data.get('success'): + msg = res_data.get('msg', '') + # 如果浏览器正在打开中,等待后重试(不是真正的错误) + if '正在打开中' in msg or 'opening' in msg.lower(): + if jc < 3: + # 等待 1-3 秒后重试(根据重试次数递增等待时间) + wait_time = (jc + 1) * 1.0 # 第1次重试等1秒,第2次等2秒,第3次等3秒 + logger.info(f'浏览器正在打开中,等待 {wait_time} 秒后重试关闭: browser_id={id}') + await asyncio.sleep(wait_time) + return await self.close_browser(id, jc + 1) + else: + # 超过重试次数,记录警告但不作为错误 + logger.warning(f'关闭指纹浏览器失败(浏览器正在打开中,已重试3次): browser_id={id}') + return None + else: + # 其他错误,记录为错误并重试 + logger.error(f'关闭指纹浏览器失败: {res_data}') + if jc < 3: + await asyncio.sleep(0.5) # 短暂等待后重试 + return await self.close_browser(id, jc + 1) + return None + logger.info(f'关闭指纹浏览器成功: browser_id={id}') + return True + except httpx.TimeoutException as e: + logger.error(f'关闭指纹浏览器超时: {e}') + if jc < 3: + await asyncio.sleep(1.0) + return await self.close_browser(id, jc + 1) + return None + except httpx.RequestError as e: + logger.error(f'关闭指纹浏览器请求错误: {e}') + if jc < 3: + await asyncio.sleep(1.0) + return await self.close_browser(id, jc + 1) + return None + except Exception as e: + logger.error(f'关闭指纹浏览器异常: {e}') + if jc < 3: + await asyncio.sleep(1.0) + return await self.close_browser(id, jc + 1) + return None + +browser_api = BrowserApi() diff --git a/back/utils/decorators.py b/back/utils/decorators.py new file mode 100644 index 0000000..0178660 --- /dev/null +++ b/back/utils/decorators.py @@ -0,0 +1,165 @@ +from functools import wraps +from fastapi import HTTPException +from typing import Callable, Any, Optional +import logging +import asyncio +from tortoise.exceptions import OperationalError + +# 获取日志记录器 +logger = logging.getLogger(__name__) + + +def handle_exceptions_unified( + max_retries: int = 0, + retry_delay: float = 1.0, + status_code: int = 500, + custom_message: Optional[str] = None, + is_background_task: bool = False +): + """ + 统一的异常处理装饰器 + + 集成了所有异常处理功能:数据库重试、自定义状态码、自定义消息、后台任务处理 + + Args: + max_retries: 最大重试次数,默认0(不重试) + retry_delay: 重试间隔时间(秒),默认1秒 + status_code: HTTP状态码,默认500 + custom_message: 自定义错误消息前缀 + is_background_task: 是否为后台任务(不抛出HTTPException) + + 使用方法: + # 基础异常处理 + @handle_exceptions_unified() + async def basic_function(...): + pass + + # 带数据库重试 + @handle_exceptions_unified(max_retries=3, retry_delay=1.0) + async def db_function(...): + pass + + # 自定义状态码和消息 + @handle_exceptions_unified(status_code=400, custom_message="参数错误") + async def validation_function(...): + pass + + # 后台任务处理 + @handle_exceptions_unified(is_background_task=True) + async def background_function(...): + pass + """ + def decorator(func: Callable) -> Callable: + @wraps(func) + async def wrapper(*args, **kwargs) -> Any: + last_exception = None + + for attempt in range(max_retries + 1): + try: + return await func(*args, **kwargs) + except HTTPException as e: + # HTTPException 直接抛出,不重试 + if is_background_task: + logger.error(f"后台任务 {func.__name__} HTTPException: {str(e)}") + return False + raise + except OperationalError as e: + last_exception = e + error_msg = str(e).lower() + + # 检查是否是连接相关的错误 + if any(keyword in error_msg for keyword in [ + 'lost connection', 'connection', 'timeout', + 'server has gone away', 'broken pipe' + ]): + if attempt < max_retries: + logger.warning( + f"函数 {func.__name__} 数据库连接错误 (尝试 {attempt + 1}/{max_retries + 1}): {str(e)}" + ) + # 等待一段时间后重试,使用指数退避 + await asyncio.sleep(retry_delay * (2 ** attempt)) + continue + else: + logger.error( + f"函数 {func.__name__} 数据库连接错误,已达到最大重试次数: {str(e)}" + ) + else: + # 非连接错误,直接处理 + logger.error(f"函数 {func.__name__} 发生数据库错误: {str(e)}") + if is_background_task: + return False + error_detail = f"{custom_message}: {str(e)}" if custom_message else f"数据库操作失败: {str(e)}" + raise HTTPException(status_code=status_code, detail=error_detail) + except Exception as e: + last_exception = e + if attempt < max_retries: + logger.warning( + f"函数 {func.__name__} 发生异常 (尝试 {attempt + 1}/{max_retries + 1}): {str(e)}" + ) + await asyncio.sleep(retry_delay * (2 ** attempt)) + continue + else: + logger.error(f"函数 {func.__name__} 发生异常: {str(e)}", exc_info=True) + if is_background_task: + return False + break + + # 所有重试都失败了,处理最后一个异常 + if is_background_task: + return False + + if isinstance(last_exception, OperationalError): + error_detail = f"{custom_message}: 数据库连接失败: {str(last_exception)}" if custom_message else f"数据库连接失败: {str(last_exception)}" + else: + error_detail = f"{custom_message}: {str(last_exception)}" if custom_message else str(last_exception) + + raise HTTPException(status_code=status_code, detail=error_detail) + + return wrapper + return decorator + + +# 向后兼容的别名函数 +def handle_exceptions_with_db_retry(max_retries: int = 3, retry_delay: float = 1.0): + """ + 带数据库连接重试的异常处理装饰器(向后兼容) + + 这是 handle_exceptions_unified 的别名,保持向后兼容性 + """ + return handle_exceptions_unified(max_retries=max_retries, retry_delay=retry_delay) + + +def handle_exceptions(func: Callable) -> Callable: + """ + 基础异常处理装饰器(向后兼容) + + 这是 handle_exceptions_unified() 的别名,保持向后兼容性 + """ + return handle_exceptions_unified()(func) + + +def handle_background_task_exceptions(func: Callable) -> Callable: + """ + 后台任务异常处理装饰器(向后兼容) + + 这是 handle_exceptions_unified 的别名,保持向后兼容性 + """ + return handle_exceptions_unified(is_background_task=True)(func) + + +def handle_exceptions_with_custom_message(message: str = "操作失败"): + """ + 带自定义错误消息的异常处理装饰器(向后兼容) + + 这是 handle_exceptions_unified 的别名,保持向后兼容性 + """ + return handle_exceptions_unified(custom_message=message) + + +def handle_exceptions_with_status_code(status_code: int = 500, message: str = None): + """ + 带自定义状态码和错误消息的异常处理装饰器(向后兼容) + + 这是 handle_exceptions_unified 的别名,保持向后兼容性 + """ + return handle_exceptions_unified(status_code=status_code, custom_message=message) \ No newline at end of file diff --git a/back/utils/exceptions.py b/back/utils/exceptions.py new file mode 100644 index 0000000..28c35c2 --- /dev/null +++ b/back/utils/exceptions.py @@ -0,0 +1,47 @@ +import os +from fastapi import Request, status +from fastapi.exceptions import HTTPException, RequestValidationError +from fastapi.responses import JSONResponse +from .logs import getLogger + +logger = getLogger(os.environ.get('APP_NAME')) + + +def global_http_exception_handler(request: Request, exc): + """ + 全局HTTP请求处理异常 + :param request: HTTP请求对象 + :param exc: 本次发生的异常对象 + :return: + """ + + # 使用日志记录异常 + logger.error(f"发生异常:{exc.detail}") + + # 直接返回JSONResponse,避免重新抛出异常导致循环 + return JSONResponse( + status_code=exc.status_code, + content={ + 'err_msg': exc.detail, + 'status': False + }, + headers=getattr(exc, 'headers', None) + ) + + +def global_request_exception_handler(request: Request, exc): + """ + 全局请求校验异常处理函数 + :param request: HTTP请求对象 + :param exc: 本次发生的异常对象 + :return: + """ + + # 直接返回JSONResponse,避免重新抛出异常 + return JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content={ + 'err_msg': exc.errors()[0], + 'status': False + } + ) diff --git a/back/utils/logs.py b/back/utils/logs.py new file mode 100644 index 0000000..465891a --- /dev/null +++ b/back/utils/logs.py @@ -0,0 +1,218 @@ +import logging +import os +from logging import Logger +from concurrent_log_handler import ConcurrentRotatingFileHandler +from logging.handlers import TimedRotatingFileHandler +import gzip +import shutil +import glob +from datetime import datetime, timedelta +from pathlib import Path + + +def getLogger(name: str = 'root') -> Logger: + """ + 创建一个按2小时滚动、支持多进程安全、自动压缩日志的 Logger + :param name: 日志器名称 + :return: 单例 Logger 对象 + """ + logger: Logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) + + if not logger.handlers: + # 控制台输出 + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.DEBUG) + + # 日志目录 + log_dir = "logs" + os.makedirs(log_dir, exist_ok=True) + + # 日志文件路径 + log_file = os.path.join(log_dir, f"{name}.log") + + # 文件处理器:每2小时滚动一次,保留7天,共84个文件,支持多进程写入 + file_handler = TimedRotatingFileHandler( + filename=log_file, + when='H', + interval=2, # 每2小时切一次 + backupCount=84, # 保留7天 = 7 * 24 / 2 = 84个文件 + encoding='utf-8', + delay=False, + utc=False # 你也可以改成 True 表示按 UTC 时间切 + ) + + # 设置 Formatter - 简化格式,去掉路径信息 + formatter = logging.Formatter( + fmt="【{name}】{levelname} {asctime} {message}", + datefmt="%Y-%m-%d %H:%M:%S", + style="{" + ) + console_formatter = logging.Formatter( + fmt="{levelname} {asctime} {message}", + datefmt="%Y-%m-%d %H:%M:%S", + style="{" + ) + + file_handler.setFormatter(formatter) + console_handler.setFormatter(console_formatter) + + logger.addHandler(console_handler) + logger.addHandler(file_handler) + + # 添加压缩功能(在第一次创建 logger 时执行一次) + _compress_old_logs(log_dir, name) + + return logger + + +def _compress_old_logs(log_dir: str, name: str): + """ + 将旧日志压缩成 .gz 格式 + """ + pattern = os.path.join(log_dir, f"{name}.log.*") + for filepath in glob.glob(pattern): + if filepath.endswith('.gz'): + continue + try: + with open(filepath, 'rb') as f_in: + with gzip.open(filepath + '.gz', 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + os.remove(filepath) + except Exception as e: + print(f"日志压缩失败: {filepath}, 原因: {e}") + + +def compress_old_logs(log_dir: str = None, name: str = "root"): + """ + 压缩旧的日志文件(公共接口) + + Args: + log_dir: 日志目录,如果不指定则使用默认目录 + name: 日志器名称 + """ + if log_dir is None: + log_dir = "logs" + + _compress_old_logs(log_dir, name) + + +def log_api_call(logger: Logger, user_id: str = None, endpoint: str = None, method: str = None, params: dict = None, response_status: int = None, client_ip: str = None): + """ + 记录API调用信息,包含用户ID、接口路径、请求方法、参数、响应状态和来源IP + + Args: + logger: 日志器对象 + user_id: 用户ID + endpoint: 接口路径 + method: 请求方法 (GET, POST, PUT, DELETE等) + params: 请求参数 + response_status: 响应状态码 + client_ip: 客户端IP地址 + """ + try: + # 构建日志信息 + log_parts = [] + + if user_id: + log_parts.append(f"用户={user_id}") + + if client_ip: + log_parts.append(f"IP={client_ip}") + + if method and endpoint: + log_parts.append(f"{method} {endpoint}") + elif endpoint: + log_parts.append(f"接口={endpoint}") + + if params: + # 过滤敏感信息 + safe_params = {k: v for k, v in params.items() + if k.lower() not in ['password', 'token', 'secret', 'key']} + if safe_params: + log_parts.append(f"参数={safe_params}") + + if response_status: + log_parts.append(f"状态码={response_status}") + + if log_parts: + log_message = " ".join(log_parts) + logger.info(log_message) + + except Exception as e: + logger.error(f"记录API调用日志失败: {e}") + + +def delete_old_compressed_logs(log_dir: str = None, days: int = 7): + """ + 删除超过指定天数的压缩日志文件 + + Args: + log_dir: 日志目录,如果不指定则使用默认目录 + days: 保留天数,默认7天 + """ + try: + if log_dir is None: + log_dir = "logs" + + log_path = Path(log_dir) + if not log_path.exists(): + return + + # 计算截止时间 + cutoff_time = datetime.now() - timedelta(days=days) + + # 获取所有压缩日志文件 + gz_files = [f for f in log_path.iterdir() + if f.is_file() and f.name.endswith('.log.gz')] + + deleted_count = 0 + for gz_file in gz_files: + # 获取文件修改时间 + file_mtime = datetime.fromtimestamp(gz_file.stat().st_mtime) + + # 如果文件超过保留期限,删除它 + if file_mtime < cutoff_time: + gz_file.unlink() + print(f"删除旧压缩日志文件: {gz_file}") + deleted_count += 1 + + if deleted_count > 0: + print(f"总共删除了 {deleted_count} 个旧压缩日志文件") + + except Exception as e: + print(f"删除旧压缩日志文件失败: {e}") + +if __name__ == '__main__': + logger = getLogger('WebAPI') + + # 基础日志测试 + logger.info("系统启动") + logger.debug("调试信息") + logger.warning("警告信息") + logger.error("错误信息") + + # API调用日志测试 + log_api_call( + logger=logger, + user_id="user123", + endpoint="/api/users/info", + method="GET", + params={"id": 123, "fields": ["name", "email"]}, + response_status=200, + client_ip="192.168.1.100" + ) + + log_api_call( + logger=logger, + user_id="user456", + endpoint="/api/users/login", + method="POST", + params={"username": "test", "password": "hidden"}, # password会被过滤 + response_status=401, + client_ip="10.0.0.50" + ) + + # 单例验证 + logger2 = getLogger('WebAPI') + print(f"Logger单例验证: {id(logger) == id(logger2)}") diff --git a/back/utils/out_base.py b/back/utils/out_base.py new file mode 100644 index 0000000..2360deb --- /dev/null +++ b/back/utils/out_base.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel, Field + + +class CommonOut(BaseModel): + """操作结果详情模型""" + code: int = Field(200, description='状态码') + message: str = Field('成功', description='提示信息') + count: int = Field(0, description='操作影响的记录数') diff --git a/back/utils/redis_tool.py b/back/utils/redis_tool.py new file mode 100644 index 0000000..f378728 --- /dev/null +++ b/back/utils/redis_tool.py @@ -0,0 +1,96 @@ +import redis +from loguru import logger + + +class RedisClient: + def __init__(self, host: str = 'localhost', port: int = 6379, password: str = None): + self.host = host + self.port = port + self.password = password + self.browser_client = None + self.task_client = None + self.cache_client = None + self.ok_client = None + self.init() + + # 初始化 + def init(self): + """ + 初始化Redis客户端 + :return: + """ + if self.browser_client is None: + self.browser_client = redis.Redis(host=self.host, port=self.port, password=self.password, db=0, + decode_responses=True) + + if self.task_client is None: + self.task_client = redis.Redis(host=self.host, port=self.port, password=self.password, db=1, + decode_responses=True) + + if self.cache_client is None: + self.cache_client = redis.Redis(host=self.host, port=self.port, password=self.password, db=2, + decode_responses=True) + + if self.ok_client is None: + self.ok_client = redis.Redis(host=self.host, port=self.port, password=self.password, db=3, + decode_responses=True) + + logger.info("Redis连接已初始化") + + # 关闭连接 + def close(self): + self.browser_client.close() + self.task_client.close() + self.cache_client.close() + self.ok_client.close() + logger.info("Redis连接已关闭") + + """browser_client""" + + # 写入浏览器信息 + async def set_browser(self, browser_id: str, data: dict): + try: + # 处理None值,将其转换为空字符串 + processed_data = {} + for key, value in data.items(): + if value is None: + processed_data[key] = "" + else: + processed_data[key] = value + + self.browser_client.hset(browser_id, mapping=processed_data) + logger.info(f"写入浏览器信息: {browser_id} - {processed_data}") + return True + except Exception as e: + logger.error(f"写入浏览器信息失败: {browser_id} - {e}") + return False + + # 获取浏览器信息 + async def get_browser(self, browser_id: str = None): + try: + if browser_id is None: + # 获取全部数据 + data = self.browser_client.hgetall() + else: + data = self.browser_client.hgetall(browser_id) + logger.info(f"获取浏览器信息: {browser_id} - {data}") + return data + except Exception as e: + logger.error(f"获取浏览器信息失败: {browser_id} - {e}") + + +async def main(): + host = '183.66.27.14' + port = 50086 + password = 'redis_AdJsBP' + redis_client = RedisClient(host, port, password) + # await redis_client.set_browser('9eac7f95ca2d47359ace4083a566e119', {'status': 'online', 'current_task_id': None}) + await redis_client.get_browser('9eac7f95ca2d47359ace4083a566e119') + # 关闭连接 + redis_client.close() + + +if __name__ == '__main__': + import asyncio + + asyncio.run(main()) diff --git a/back/utils/session_store.py b/back/utils/session_store.py new file mode 100644 index 0000000..ce9743f --- /dev/null +++ b/back/utils/session_store.py @@ -0,0 +1,177 @@ +import os +import json +import threading +from datetime import datetime, timedelta +from typing import Optional, Dict, Any, List +from loguru import logger + + +class SessionStore: + """ + 会话持久化存储(日志文件版 + 内存缓存) + + 优化方案: + 1. 使用日志文件记录(追加模式,性能好,不会因为文件变大而变慢) + 2. 在内存中保留最近的会话记录(用于快速查询) + 3. 定期清理过期的内存记录(保留最近1小时或最多1000条) + """ + + def __init__(self, file_path: str = 'logs/sessions.log', enable_log: bool = True, max_memory_records: int = 1000): + """ + 初始化会话存储。 + + Args: + file_path (str): 日志文件路径(默认 logs/sessions.log) + enable_log (bool): 是否启用日志记录,False 则不记录到文件 + max_memory_records (int): 内存中保留的最大记录数,默认1000 + """ + self.file_path = file_path + self.enable_log = enable_log + self.max_memory_records = max_memory_records + self._lock = threading.Lock() + # 内存中的会话记录 {pid: record} + self._memory_cache: Dict[int, Dict[str, Any]] = {} + # 记录创建时间,用于清理过期记录 + self._cache_timestamps: Dict[int, datetime] = {} + + if enable_log: + os.makedirs(os.path.dirname(file_path), exist_ok=True) + + def _write_log(self, action: str, record: Dict[str, Any]) -> None: + """ + 写入日志文件(追加模式,性能好) + + Args: + action (str): 操作类型(CREATE/UPDATE) + record (Dict[str, Any]): 会话记录 + """ + if not self.enable_log: + return + + try: + with self._lock: + log_line = json.dumps({ + 'action': action, + 'timestamp': datetime.now().isoformat(), + 'data': record + }, ensure_ascii=False) + with open(self.file_path, 'a', encoding='utf-8') as f: + f.write(log_line + '\n') + except Exception as e: + # 静默处理日志写入错误,避免影响主流程 + logger.debug(f"写入会话日志失败: {e}") + + def _cleanup_old_cache(self) -> None: + """ + 清理过期的内存缓存记录 + - 保留最近1小时的记录 + - 最多保留 max_memory_records 条记录 + """ + now = datetime.now() + expire_time = now - timedelta(hours=1) + + # 清理过期记录 + expired_pids = [ + pid for pid, timestamp in self._cache_timestamps.items() + if timestamp < expire_time + ] + for pid in expired_pids: + self._memory_cache.pop(pid, None) + self._cache_timestamps.pop(pid, None) + + # 如果记录数仍然超过限制,删除最旧的记录 + if len(self._memory_cache) > self.max_memory_records: + # 按时间戳排序,删除最旧的 + sorted_pids = sorted( + self._cache_timestamps.items(), + key=lambda x: x[1] + ) + # 计算需要删除的数量 + to_remove = len(self._memory_cache) - self.max_memory_records + for pid, _ in sorted_pids[:to_remove]: + self._memory_cache.pop(pid, None) + self._cache_timestamps.pop(pid, None) + + def create_session(self, record: Dict[str, Any]) -> None: + """ + 创建新会话记录。 + + Args: + record (Dict[str, Any]): 会话信息字典 + """ + record = dict(record) + record.setdefault('created_at', datetime.now().isoformat()) + pid = record.get('pid') + + if pid is not None: + with self._lock: + # 保存到内存缓存 + self._memory_cache[pid] = record + self._cache_timestamps[pid] = datetime.now() + # 清理过期记录 + self._cleanup_old_cache() + + # 写入日志文件(追加模式,性能好) + self._write_log('CREATE', record) + + def update_session(self, pid: int, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]: + """ + 按 PID 更新会话记录。 + + Args: + pid (int): 进程ID + updates (Dict[str, Any]): 更新字段字典 + + Returns: + Optional[Dict[str, Any]]: 更新后的会话记录 + """ + with self._lock: + # 从内存缓存获取 + record = self._memory_cache.get(pid) + if record: + record.update(updates) + record.setdefault('updated_at', datetime.now().isoformat()) + self._cache_timestamps[pid] = datetime.now() + else: + # 如果内存中没有,创建一个新记录 + record = {'pid': pid} + record.update(updates) + record.setdefault('created_at', datetime.now().isoformat()) + record.setdefault('updated_at', datetime.now().isoformat()) + self._memory_cache[pid] = record + self._cache_timestamps[pid] = datetime.now() + + if record: + # 写入日志文件 + self._write_log('UPDATE', record) + + return record + + def get_session_by_pid(self, pid: int) -> Optional[Dict[str, Any]]: + """ + 按 PID 查询会话记录(仅从内存缓存查询,性能好) + + Args: + pid (int): 进程ID + + Returns: + Optional[Dict[str, Any]]: 会话记录 + """ + with self._lock: + return self._memory_cache.get(pid) + + def list_sessions(self, status: Optional[int] = None) -> List[Dict[str, Any]]: + """ + 列出会话记录,可按状态过滤(仅从内存缓存查询) + + Args: + status (Optional[int]): 状态码过滤(如 100 运行中、200 已结束、500 失败) + + Returns: + List[Dict[str, Any]]: 会话记录列表 + """ + with self._lock: + records = list(self._memory_cache.values()) + if status is None: + return records + return [r for r in records if r.get('status') == status] diff --git a/back/utils/time_tool.py b/back/utils/time_tool.py new file mode 100644 index 0000000..9642e97 --- /dev/null +++ b/back/utils/time_tool.py @@ -0,0 +1,56 @@ +from datetime import datetime, timedelta, timezone +from pydantic import BaseModel, field_serializer +CN_TZ = timezone(timedelta(hours=8)) + + +def now_cn() -> datetime: + """ + 获取中国时区的当前时间 + 返回带有中国时区信息的 datetime 对象 + """ + return datetime.now(CN_TZ) + +def parse_time(val: str | int, is_end: bool = False) -> datetime: + """ + 将传入的字符串或时间戳解析为中国时区的 datetime,用于数据库查询时间比较。 + 支持格式: + - "YYYY-MM-DD" + - "YYYY-MM-DD HH:mm:ss" + - 10 位时间戳(秒) + - 13 位时间戳(毫秒) + """ + dt_cn: datetime + + if isinstance(val, int) or (isinstance(val, str) and val.isdigit()): + ts = int(val) + # 根据量级判断是秒还是毫秒 + if ts >= 10**12: + dt_cn = datetime.fromtimestamp(ts / 1000, CN_TZ) + else: + dt_cn = datetime.fromtimestamp(ts, CN_TZ) + else: + try: + dt_cn = datetime.strptime(val, "%Y-%m-%d").replace(tzinfo=CN_TZ) + if is_end: + dt_cn = dt_cn.replace(hour=23, minute=59, second=59, microsecond=999999) + except ValueError: + try: + dt_cn = datetime.strptime(val, "%Y-%m-%d %H:%M:%S").replace(tzinfo=CN_TZ) + except ValueError: + raise ValueError("时间格式错误,支持 'YYYY-MM-DD' 或 'YYYY-MM-DD HH:mm:ss' 或 10/13位时间戳") + + # 与 ORM 配置保持一致(use_tz=False),返回本地时区的“朴素”时间 + return dt_cn.replace(tzinfo=None) + + +# 自动把 datetime 序列化为 13位时间戳的基类 +class TimestampModel(BaseModel): + """自动把 datetime 序列化为 13位时间戳的基类""" + + model_config = {"arbitrary_types_allowed": True} + + @field_serializer("*", when_used="json", check_fields=False) # "*" 表示作用于所有字段 + def serialize_datetime(self, value): + if isinstance(value, datetime): + return int(value.timestamp()*1000) # 转成 13 位 int 时间戳 + return value diff --git a/spider/api.py b/spider/api.py new file mode 100644 index 0000000..4776596 --- /dev/null +++ b/spider/api.py @@ -0,0 +1,120 @@ +import requests +from loguru import logger +import csv +import os +import random +class Api: + def __init__(self) -> None: + # self.base_url = 'http://127.0.0.1:6060' + self.base_url = 'http://192.168.11.67:6060' + + # 创建店铺 + def create_shop(self, city: str, street: str, shop_name: str) -> dict: + url = f'{self.base_url}/country/shop' + item = { + 'city': city, + 'street': street, + 'shop_name': shop_name, + } + response = requests.post(url, json=item).json() + logger.info(response) + return response + + # 查询店铺 + def get_shop(self, city: str) -> dict: + url = f'{self.base_url}/country/shop' + response = requests.get(url).json() + # logger.info(response) + return response + + # 创建信息 + def create_info(self, child_full_name: str, parent_full_name: str, child_birthday: str, address_str: str, city_name: str, parent_phone: str, postcode: str, province: str, email: str, text: str, status: bool = False, email_content: str | None = None) -> dict: + """ + 创建信息记录(孩子与家长字段) + + 参数: + child_full_name (str): 孩子全名 + parent_full_name (str): 家长全名 + child_birthday (str): 孩子生日(字符串) + address_str (str): 街道地址 + city_name (str): 城市 + parent_phone (str): 家长电话 + postcode (str): 邮编 + province (str): 省/州全称 + email (str): 邮箱 + text (str): 文本内容(如反馈地址) + status (bool): 状态 + email_content (str | None): 邮件内容 + + 返回值: + dict: 接口返回的数据 + """ + url = f'{self.base_url}/country/info' + item = { + "child_full_name": child_full_name, + "parent_full_name": parent_full_name, + "child_birthday": child_birthday, + "address_str": address_str, + "city_name": city_name, + "parent_phone": parent_phone, + "postcode": postcode, + "province": province, + "status": status, + "email": email, + "email_content": email_content, + "text": text + } + response = requests.post(url, json=item).json() + logger.info(response) + return response + + # 根据城市 随机获取一个店铺 + def get_random_shop(self) -> dict: + url = f'{self.base_url}/country/shop/random' + response = requests.get(url).json() + # logger.info(response) + if not response.get('street'): + logger.error(f'没有店铺') + return None + return response + +def main(): + """ + 从同目录的 `bakeries.csv` 读取面包店数据,按列映射输出或创建店铺 + + 列顺序:`Name,Address,City` + """ + api = Api() + csv_path = os.path.join(os.path.dirname(__file__), 'data.csv') + if not os.path.exists(csv_path): + logger.error(f'CSV 文件不存在: {csv_path}') + return + + with open(csv_path, 'r', encoding='utf-8') as file: + reader = csv.reader(file) + header = next(reader, None) + for row in reader: + if len(row) < 3: + logger.warning(f'行列数不足,跳过: {row}') + continue + shop_name, street, city = row[1], row[2], row[0] + if ' (city)' in city: + city = city.replace(' (city)', '') + if 'Quebec' in city: + continue + if ',' in city: + city = city.split(',')[0] + logger.info(f'city: {city}, street: {street}, shop_name: {shop_name}') + api.create_shop(city, street, shop_name) + +# def main2(): +# api = Api() +# city = 'Toronto' +# shop = api.get_random_shop() +# if shop: +# logger.info(shop) + +# if __name__ == '__main__': +# main() + +api = Api() diff --git a/spider/auto_challenge.py b/spider/auto_challenge.py new file mode 100644 index 0000000..a8fb327 --- /dev/null +++ b/spider/auto_challenge.py @@ -0,0 +1,313 @@ +import io +import time +import uuid +from typing import Optional, List +import requests +from PIL import Image +import base64 +from loguru import logger +RESAMPLE_FILTER = Image.Resampling.LANCZOS +class ReCaptchaHandler: + + path_map_44 = { + 0: "//table/tbody/tr[1]/td[1]", + 1: "//table/tbody/tr[1]/td[2]", + 2: "//table/tbody/tr[1]/td[3]", + 3: "//table/tbody/tr[1]/td[4]", + 4: "//table/tbody/tr[2]/td[1]", + 5: "//table/tbody/tr[2]/td[2]", + 6: "//table/tbody/tr[2]/td[3]", + 7: "//table/tbody/tr[2]/td[4]", + 8: "//table/tbody/tr[3]/td[1]", + 9: "//table/tbody/tr[3]/td[2]", + 10: "//table/tbody/tr[3]/td[3]", + 11: "//table/tbody/tr[3]/td[4]", + 12: "//table/tbody/tr[4]/td[1]", + 13: "//table/tbody/tr[4]/td[2]", + 14: "//table/tbody/tr[4]/td[3]", + 15: "//table/tbody/tr[4]/td[4]", + } + + path_map_33 = { + 0: "//table/tbody/tr[1]/td[1]", + 1: "//table/tbody/tr[1]/td[2]", + 2: "//table/tbody/tr[1]/td[3]", + 3: "//table/tbody/tr[2]/td[1]", + 4: "//table/tbody/tr[2]/td[2]", + 5: "//table/tbody/tr[2]/td[3]", + 6: "//table/tbody/tr[3]/td[1]", + 7: "//table/tbody/tr[3]/td[2]", + 8: "//table/tbody/tr[3]/td[3]", + } + + api_host="http://192.168.11.13:7070/analyze_batch/" + def __init__(self, driver): + self.driver = driver + self.checkbox_iframe = None + self.challenge_iframe = None + self.challenge_type = None + self.challenge_question = None + self.challenge_i33_first = True + self.i11s = {} + self.challenge_44_img = None + + @staticmethod + def split_image(image_bytes: bytes) -> Optional[List[str]]: + try: + image_stream = io.BytesIO(image_bytes) + img = Image.open(image_stream) + except: + return None + + width, height = img.size + tile_width = width // 3 + tile_height = height // 3 + + base64_tiles = [] + for i in range(3): + for j in range(3): + left = j * tile_width + upper = i * tile_height + right = (j + 1) * tile_width if j < 2 else width + lower = (i + 1) * tile_height if i < 2 else height + + tile = img.crop((left, upper, right, lower)) + buf = io.BytesIO() + tile.save(buf, format="PNG") + b64 = base64.b64encode(buf.getvalue()).decode() + base64_tiles.append(b64) + + return base64_tiles + + def find_checkbox_iframe(self): + time.sleep(1) + try: + iframe = self.driver.ele('css: iframe[title="reCAPTCHA"]') + if iframe: + self.checkbox_iframe = iframe + self.checkbox_iframe.ele("#recaptcha-anchor").click() + return True + except: + pass + return False + + def find_challenge_iframe(self): + try: + iframe = self.driver.ele("@|title=recaptcha challenge expires in two minutes@|title=reCAPTCHA 验证任务将于 2 分钟后过期") + # logger.info(f"iframe: {iframe}") + if iframe: + self.challenge_iframe = iframe + return True + except: + pass + return False + + def check_11_refresh(self, check_ele): + for k, v in self.i11s.items(): + if v.get("new"): + self.i11s[k]['new'] = False + + check_ele = [i[0] for i in check_ele] + + for idx in check_ele: + if idx not in self.i11s: + self.i11s[idx] = {'srcs': [], 'new': False} + + while True: + ele = self.challenge_iframe.ele('#rc-imageselect-target').ele( + f"xpath:{self.path_map_33[idx]}") + + img_ele = ele.ele('.rc-image-tile-11', timeout=0.1) + if not img_ele: + time.sleep(0.1) + continue + + byte_data = img_ele.src() + b64_str = base64.b64encode(byte_data).decode() + + if b64_str not in self.i11s[idx]['srcs']: + self.i11s[idx]['srcs'].append(b64_str) + self.i11s[idx]['new'] = True + break + + def click_answer(self, result, challenge_type): + if challenge_type == 4: + for x in result["results"][0]['result']: + self.challenge_iframe.ele('#rc-imageselect-target').ele( + f"xpath:{self.path_map_44[x]}").click() + time.sleep(0.1) + + # if not result["results"][0]['result']: + # try: + # image_bytes = base64.b64decode(self.challenge_44_img) + # name = str(uuid.uuid4()) + # with open(rf"{name}.png",'wb') as f: + # f.write(image_bytes) + # except: + # pass + + self.challenge_iframe.ele('#recaptcha-verify-button').click() + self.i11s.clear() + return True + + if challenge_type == 3: + found_ele = [] + + for res in result["results"]: + if res["result"].get('target_found'): + idx = int(res["image_id"]) + self.challenge_iframe.ele('#rc-imageselect-target').ele( + f"xpath:{self.path_map_33[idx]}").click() + found_ele.append((idx, self.path_map_33[idx])) + time.sleep(0.1) + + if found_ele: + if len(found_ele) <= 2 and self.challenge_i33_first: + self.challenge_iframe.ele('#recaptcha-reload-button').click() + return False + + cls = self.challenge_iframe.ele('#rc-imageselect-target').ele( + f"xpath:{found_ele[0][1]}").attr('class') + if 'rc-imageselect-tileselected' in cls: + self.challenge_iframe.ele('#recaptcha-verify-button').click() + self.i11s.clear() + return True + + self.check_11_refresh(found_ele) + return False + + self.challenge_iframe.ele('#recaptcha-verify-button').click() + self.i11s.clear() + return True + + return False + + def challenge_i33(self): + if len(self.challenge_iframe.eles('.rc-image-tile-33', timeout=1)) == 9: + self.challenge_i33_first = True + self.i11s.clear() + + first_ele = self.challenge_iframe.eles('.rc-image-tile-33')[0] + byte_data = first_ele.src() + + tiles = self.split_image(byte_data) + if tiles: + images = {i: t for i, t in enumerate(tiles)} + if res := self.identify_verification_code(images): + self.click_answer(res, 3) + else: + self.challenge_i33_first = False + data = {} + + for k, v in self.i11s.items(): + if v['new']: + img_b64 = v['srcs'][-1] + data[k] = img_b64 + if res := self.identify_verification_code(data): + self.click_answer(res, 3) + + def challenge_i44(self): + ele = self.challenge_iframe.eles('.rc-image-tile-44')[0] + byte_data = ele.src() + b64_str = base64.b64encode(byte_data).decode() + self.challenge_44_img = b64_str + if res := self.identify_verification_code({0: b64_str}): + self.click_answer(res, 4) + def identify_verification_code(self, images): + data = {"images": []} + for k, img in images.items(): + if img: + data["images"].append({ + "image_id": str(k), + "image_base64": img, + "target_class": self.challenge_question + }) + if data['images']: + res = requests.post(self.api_host, json=data) + return res.json() + return None + + def challenge(self): + if not self.find_checkbox_iframe(): + return {"status": False, "message": "no verification code found"} + url_before = self.driver.url + # logger.info(f"url_before: {url_before}") + self.find_challenge_iframe() + if not self.challenge_iframe: + return {"status": False, "message": "no verification code found"} + while True: + time.sleep(1) + + if self.driver.url != url_before: + return {"status": True, "message": "验证码自动通过1"} + if self.checkbox_iframe.ele("#recaptcha-anchor").attr('aria-checked') == 'true': + return {"status": True, "message": "验证码自动通过2"} + # 兼容 ChromiumFrame 无 style() 方法:优先读取 style 属性,其次使用 JS 计算样式 + vis = None + try: + style_str = self.challenge_iframe.attr('style') or '' + if 'visibility' in style_str: + vis = 'hidden' if 'visibility: hidden' in style_str.replace(' ', '') else 'visible' + except Exception: + pass + if vis is None: + try: + # 通过 JS 获取 iframe 的可见性 + vis = self.driver.run_js( + 'var f = document.querySelector("iframe[title=\\"recaptcha challenge expires in two minutes\\"]") || document.querySelector("iframe[title=\\"reCAPTCHA 验证任务将于 2 分钟后过期\\"]");' + 'f ? getComputedStyle(f).visibility : null;' + ) + except Exception: + vis = None + if vis != 'hidden': + break + # try: + # if self.driver.url != url_before: + # return {"status": True, "message": "验证码自动通过1"} + # if self.checkbox_iframe.ele("#recaptcha-anchor").attr('aria-checked') == 'true': + # return {"status": True, "message": "验证码自动通过2"} + # if self.challenge_iframe.style('visibility') != 'hidden': + # logger.info(222) + # break + # except: + # logger.error("challenge error") + # pass + try: + while True: + # 重复使用可见性判断,避免依赖不存在的 style() + vis = None + try: + style_str = self.challenge_iframe.attr('style') or '' + if 'visibility' in style_str: + vis = 'hidden' if 'visibility: hidden' in style_str.replace(' ', '') else 'visible' + except Exception: + pass + if vis is None: + try: + vis = self.driver.run_js( + 'var f = document.querySelector("iframe[title=\\"recaptcha challenge expires in two minutes\\"]") || document.querySelector("iframe[title=\\"reCAPTCHA 验证任务将于 2 分钟后过期\\"]");' + 'f ? getComputedStyle(f).visibility : null;' + ) + except Exception: + vis = None + if vis == 'hidden': + break + time.sleep(1) + if self.driver.url != url_before: + return {"status": True, "message": "captcha successfully resolved"} + if self.checkbox_iframe.ele("#recaptcha-anchor").attr('aria-checked') == 'true': + return {"status": True, "message": "captcha successfully resolved"} + # 获取题目 + self.challenge_question = self.challenge_iframe.ele("tag:strong").text + + # 判断 4×4 + if self.challenge_iframe.ele('.rc-image-tile-44', timeout=0.1): + self.challenge_i44() + + # 判断 3×3 或 1×1 + elif self.challenge_iframe.ele('.rc-image-tile-33', timeout=0.1) or \ + self.challenge_iframe.ele('.rc-image-tile-11', timeout=0.1): + self.challenge_i33() + except: + pass + return {"status": True, "message": "captcha successfully resolved"} diff --git a/spider/bit_browser.py b/spider/bit_browser.py new file mode 100644 index 0000000..411f203 --- /dev/null +++ b/spider/bit_browser.py @@ -0,0 +1,318 @@ +import time +import requests +from loguru import logger +from functools import wraps + + +def retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0): + """ + 通用重试装饰器 + :param max_retries: 最大重试次数 + :param delay: 每次重试的初始延迟(秒) + :param backoff: 每次重试延迟的递增倍数 + """ + + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + retries = 0 + current_delay = delay + while retries < max_retries: + try: + return func(*args, **kwargs) + except Exception as e: + retries += 1 + if retries >= max_retries: + logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}") + return None # 重试次数用尽后返回 None + logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}") + time.sleep(current_delay) + current_delay *= backoff + + return None # 三次重试仍未成功,返回 None + + return wrapper + + return decorator + + + +# 比特浏览器模块 +class BitBrowser: + def __init__(self): + self.bit_host = "http://127.0.0.1" + pass + + # 创建比特币浏览器 + @retry(max_retries=3, delay=1.0, backoff=1.0) + def bit_browser_create(self, remark: str = '指纹浏览器', ua: str = None, host: str = None, port: str = None, + proxy_user: str = None, + proxy_pwd: str = None, proxy_type: str = 'noproxy', urls: str = None, + bit_port: str = "54345") -> str: + """ + 创建比特币浏览器 + :param bit_port: 可选,默认54345 + :param ua: 可选,默认随机 + :param proxy_type: 代理类型 (可选) ['noproxy', 'http', 'https', 'socks5', 'ssh'] + :param urls: 额外打开的url (可选) 多个用,分割 + :param host: 代理IP地址 (可选) + :param port: 代理IP端口 (可选) + :param proxy_user: 代理账号 (可选) + :param proxy_pwd: 代理密码 (可选) + :param remark: 备注 (可选) + :param bit_port: 可选,默认54345 + :return: 返回浏览器ID + """ + url = f"{self.bit_host}:{bit_port}/browser/update" + headers = {'Content-Type': 'application/json'} + data = { + 'name': f'{remark if len(remark) < 40 else remark[:40]}', # 窗口名称 + 'remark': f'{remark}', # 备注 + 'proxyMethod': 2, # 代理方式 2自定义 3 提取IP + # 代理类型 ['noproxy', 'http', 'https', 'socks5', 'ssh'] + 'proxyType': f'{proxy_type}', + "browserFingerPrint": {"userAgent": ua} # 留空,随机指纹 + } + if host is not None: + data['host'] = host + if port is not None: + data['port'] = port + if proxy_user is not None: + data['proxyUserName'] = proxy_user + if proxy_pwd is not None: + data['proxyPassword'] = proxy_pwd + if urls is not None: + data['url'] = urls # 额外打开的url 多个用,分割 + res = requests.post(url, json=data, headers=headers).json() + if not res.get('success'): + raise Exception(res) + browser_pk = res['data']['id'] + return browser_pk + + # 修改比特币浏览器 + @retry(max_retries=3, delay=1.0, backoff=1.0) + def bit_browser_update(self, pk: str, remark: str = None, proxyType: str = 'noproxy', host: str = None, + port: str = None, proxy_user: str = None, proxy_pwd: str = None, urls: str = None, + bit_port: str = "54345") -> bool: + """ + 修改比特币浏览器 传入某个参数则修改某个参数 + :param proxyType: 代理类型 noproxy|http|https|socks5(默认noproxy) + :param pk: # 浏览器ID + :param remark: # 备注 + :param host: # 代理主机 + :param port: # 代理端口 + :param proxy_user: # 代理账号 + :param proxy_pwd: # 代理密码 + :param urls: # 额外打开的url 多个用,分割 + :param bit_port: # 可选,默认54345 + :return: bool + """ + url = f"{self.bit_host}:{bit_port}/browser/update/partial" + headers = {'Content-Type': 'application/json'} + data = dict() + data['ids'] = [pk] + if remark is not None: + data['remark'] = remark + data['name'] = remark + if urls is not None: + data['url'] = urls + if proxyType != 'noproxy': + data['proxyType'] = proxyType + if host is not None: + data['host'] = host + if port is not None: + data['port'] = port if isinstance(port, int) else int(port) + if proxy_user is not None: + data['proxyUserName'] = proxy_user + if proxy_pwd is not None: + data['proxyPassword'] = proxy_pwd + res = requests.post(url, json=data, headers=headers).json() + if not res.get('success'): + raise Exception(res) + return True + + # 打开比特币浏览器 + @retry(max_retries=3, delay=1.0, backoff=1.0) + def bit_browser_open(self, pk: str, bit_port: str = "54345") -> str: + """ + 打开比特币浏览器 + :param pk: 浏览器ID + :param bit_port: 可选,默认54345 + :return: 返回浏览器地址 + """ + url = f"{self.bit_host}:{bit_port}/browser/open" + data = {"id": f'{pk}'} + headers = {'Content-Type': 'application/json'} + res = requests.post(url, json=data, headers=headers).json() + if not res.get('success'): + raise Exception(res) + debugger_address = res['data']['http'] + return debugger_address + + # 关闭比特币浏览器 + @retry(max_retries=3, delay=1.0, backoff=1.0) + def bit_browser_close(self, pk: str, bit_port: str = "54345"): + """ + 关闭比特币浏览器 - 执行后需要等待5s + :param pk: 浏览器ID + :param bit_port: 可选,默认54345 + :return: 无返回值 + """ + url = f"{self.bit_host}:{bit_port}/browser/close" + headers = {'Content-Type': 'application/json'} + data = {'id': f'{pk}'} + res = requests.post(url, json=data, headers=headers).json() + if not res.get('success'): + raise Exception(res) + # 等待3秒 + time.sleep(3) + bol = self.bit_browser_status(pk) + if bol: + raise Exception(f'浏览器ID {pk} 未正常关闭, 等待3秒后重试') + return True + + # 删除比特币浏览器 + @retry(max_retries=3, delay=1.0, backoff=1.0) + def bit_browser_delete(self, pk: str, bit_port: str = "54345"): + """ + 删除比特币浏览器 + :param pk: 浏览器ID + :param bit_port: 可选,默认54345 + :return: 无返回值 + """ + url = f"{self.bit_host}:{bit_port}/browser/delete" + headers = {'Content-Type': 'application/json'} + data = {'id': f'{pk}'} + res = requests.post(url, json=data, headers=headers).json() + if not res.get('success'): + raise Exception(res) + return True + + # 获取所有比特币浏览器 + @retry(max_retries=3, delay=1.0, backoff=1.0) + def bit_browser_get(self, page: int = 0, limit: int = 10, group_id: str | None = None, + bit_port: str | None = "54345") -> dict: + """ + 获取所有比特币浏览器 + :param page: 页码 + :param limit: 每页数量 + :param group_id: 组ID(可选) + :param bit_port: 可选,默认54345 + :return: {'success': True, 'data': {'page': 1, 'pageSize': 10, 'totalNum': 128, 'list': [{'id': '12a3126accc14c93bd34adcccfc3083c'},{'id':'edc5d61a56214e9f8a8bbf1a2e1b405d'}]}} + """ + + url = f"{self.bit_host}:{bit_port}/browser/list" + headers = {'Content-Type': 'application/json'} + data = {'page': page, 'pageSize': limit} + if group_id is not None: + data['groupId'] = group_id + res = requests.post(url, json=data, headers=headers).json() + if not res.get('success'): + raise Exception(res) + return res + + # 获取比特浏览器窗口详情 + @retry(max_retries=3, delay=1.0, backoff=1.0) + def bit_browser_detail(self, pk: str, bit_port: str = "54345") -> dict: + """ + 获取比特浏览器窗口详情 + :param pk: 浏览器ID + :param bit_port: 可选,默认54345 + :return: {'success': True, 'data': {'id': '12a3126accc14c93bd34adcccfc3083c', 'name': '12a3126accc14c93bd34adcccfc3083c', 'remark': '12a3126accc14c93bd34adcccfc3083c', ' + """ + url = f"{self.bit_host}:{bit_port}/browser/detail" + headers = {'Content-Type': 'application/json'} + data = {'id': f'{pk}'} + res = requests.post(url, json=data, headers=headers).json() + if not res.get('success'): + raise Exception(res) + return res + + # 获取比特浏览器的进程id + def bit_browser_pid(self, pk: str, bit_port: str = "54345") -> str: + """ + 获取比特浏览器的进程id + :param pk: 浏览器ID + :param bit_port: 可选,默认54345 + :return: 返回进程id + """ + url = f"{self.bit_host}:{bit_port}/browser/pids/alive" + headers = {'Content-Type': 'application/json'} + data = { + "ids": [pk] + } + res = requests.post(url, json=data, headers=headers).json() + if not res.get('success'): + raise Exception(res) + return res['data'][pk] + + # 获取窗口状态 + @retry(max_retries=3, delay=1.0, backoff=1.0) + def bit_browser_status(self, pk: str, bit_port: str = "54345") -> dict: + """ + 获取比特浏览器窗口状态 + :param pk: 浏览器ID + :param bit_port: 可选,默认54345 + :return: {'success': True, 'data': {'id': '12a3126accc14c93bd34adcccfc3083c', 'name': '12a3126accc14c93bd34adcccfc3083c', 'remark': '12a3126accc14c93bd34adcccfc3083c', ' + """ + url = f"{self.bit_host}:{bit_port}/browser/pids" + headers = {'Content-Type': 'application/json'} + data = {'ids': [pk]} + res = requests.post(url, json=data, headers=headers).json() + # print(f'res --> {res}') + if not res.get('success'): + raise Exception(res) + if res.get('data').get(pk) is None: + return False + else: + return True + + +async def main(): + bit = BitBrowser() + # res = await bit._bit_browser_get() + jc = 0 + while 1: + res = await bit._bit_browser_get( + page=jc, + limit=100, + group_id='4028808b9a52223a019a581bbea1275c') + li = res["data"]["list"] + if len(li) == 0: + break + + for i in li: + id = i["id"] + # 读取浏览器详情 + res = await bit._bit_browser_detail(id) + + # print(f'id -->{id} --> {res}') + data = res["data"] + ua = data["browserFingerPrint"]["userAgent"] + proxy_type = data.get("proxyType") + host = data.get("host") + port = data.get("port") + proxy_account = data.get("proxyUserName") + proxy_password = data.get("proxyPassword") + print(f'id -->{id}') + print(f'ua -->{ua}') + print(f'proxy_type -->{proxy_type}') + print(f'host -->{host}') + print(f'port -->{port}') + print(f'proxy_account -->{proxy_account}') + print(f'proxy_password -->{proxy_password}') + print(f'='*50) + jc += 1 + +def main2(): + bit = BitBrowser() + browser_id = '5ba9eb974c7c45e2bb086585c75f70e8' + # 关闭浏览器 + # res = bit.bit_browser_close(browser_id) + # res = bit.bit_browser_get() + # print(res) + +# if __name__ == '__main__': + # main2() + +bit_browser = BitBrowser() \ No newline at end of file diff --git a/spider/mail_.py b/spider/mail_.py new file mode 100644 index 0000000..b8de530 --- /dev/null +++ b/spider/mail_.py @@ -0,0 +1,851 @@ +import asyncio +import imaplib +import email +import random +import socket +import string +import time +from email.header import decode_header +from datetime import timezone, timedelta +import email.utils +import aiohttp +import socks +import requests +import smtplib +from email.mime.text import MIMEText +from email.header import Header +from functools import wraps +from loguru import logger + + +def retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0): + """ + 通用重试装饰器 + :param max_retries: 最大重试次数 + :param delay: 每次重试的初始延迟(秒) + :param backoff: 每次重试延迟的递增倍数 + """ + + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + retries = 0 + current_delay = delay + while retries < max_retries: + try: + return func(*args, **kwargs) + except Exception as e: + retries += 1 + if retries >= max_retries: + logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}") + return None # 重试次数用尽后返回 None + logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}") + time.sleep(current_delay) + current_delay *= backoff + + return None # 三次重试仍未成功,返回 None + + return wrapper + + return decorator + + +def async_retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0): + """ + 支持异步函数的通用重试装饰器 + :param max_retries: 最大重试次数 + :param delay: 每次重试的初始延迟(秒) + :param backoff: 每次重试延迟的递增倍数 + """ + + def decorator(func): + @wraps(func) + async def wrapper(*args, **kwargs): + retries = 0 + current_delay = delay + while retries < max_retries: + try: + return await func(*args, **kwargs) # 直接执行原始方法 + except Exception as e: + retries += 1 + if retries >= max_retries: + logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}") + return None # 重试次数用尽后返回 None + logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}") + + await asyncio.sleep(current_delay) # 异步延迟 + current_delay *= backoff # 根据backoff递增延迟 + + return None # 三次重试仍未成功,返回 None + + return wrapper + + return decorator + + +# 域名管理类 - 高内聚低耦合的域名管理方案 +class DomainManager: + """ + 域名管理器 - 统一管理所有邮箱域名相关操作 + 实现高内聚低耦合的设计原则 + """ + + def __init__(self): + # 域名列表 - 只需要在这里添加新域名 + self._domains = [ + "gmail.com", + "qianyouduo.com", + "rxybb.com", + "cqrxy.vip", + "0n.lv", + "qianyouduo.com", + "ziyouzuan.com", + "emaing.online", + "emaing.fun", + "emaing.asia", + "isemaing.site", + "emaing.cyou", + "emaing.site", + "emaing.icu", + "emaing.store", + "emaing.pw", + "emaing.xyz", + "qydkjgs.asia", + "qydkj.homes", + "qydkj.baby", + "qydkj.cyou", + "qydkjgs.autos", + "qydkj.autos", + "qydkjgs.cyou", + "qydkjgs.homes", + "qydgs.asia", + "qydkj.asia", + "qydgs.cyou", + "lulanjing.asia", + "lisihan.asia", + "mmwan.asia", + "xyttan.asia", + "zpaily.asia", + "youxinzhiguo.asia", + "huijinfenmu.asia", + "linghao.asia", + "cqhc.asia", + "huacun.asia", + "huachen.asia", + "yisabeier.asia", + "xinxinr.cyou", + "lilisi.asia", + "xybbwan.cyou", + "zhongjing.cyou", + "zprxy.cyou", + "cqhuacun.cyou", + "huazong.icu", + "huacun.cyou" + ] + + def get_domain_by_type(self, mail_type: int) -> str: + """ + 根据邮箱类型获取域名 + :param mail_type: 邮箱类型编号 + :return: 对应的域名 + """ + if 0 <= mail_type < len(self._domains): + return self._domains[mail_type] + return self._domains[1] # 默认返回 qianyouduo.com + + def get_domain_type(self, domain: str) -> int: + """ + 根据域名获取类型编号 + :param domain: 域名 + :return: 对应的类型编号,如果不存在返回1 + """ + try: + return self._domains.index(domain) + except ValueError: + return 1 # 默认返回 qianyouduo.com 的类型 + + def get_imap_server(self, mail_type: int) -> str: + """ + 根据邮箱类型获取IMAP服务器地址 + :param mail_type: 邮箱类型编号 + :return: IMAP服务器地址 + """ + domain = self.get_domain_by_type(mail_type) + return f"imap.{domain}" + + def get_imap_server_by_domain(self, domain: str) -> str: + """ + 根据域名获取IMAP服务器地址 + :param domain: 域名 + :return: IMAP服务器地址 + """ + return f"imap.{domain}" + + def is_valid_domain(self, domain: str) -> bool: + """ + 检查域名是否在支持列表中 + :param domain: 域名 + :return: 是否支持该域名 + """ + return domain in self._domains + + def get_all_domains(self) -> list: + """ + 获取所有支持的域名列表 + :return: 域名列表的副本 + """ + return self._domains.copy() + + def get_domain_count(self) -> int: + """ + 获取支持的域名总数 + :return: 域名总数 + """ + return len(self._domains) + + def get_creatable_domains(self) -> list: + """ + 获取可用于创建邮箱的域名列表(排除gmail.com) + :return: 可创建邮箱的域名列表 + """ + return [domain for domain in self._domains if domain != "gmail.com"] + + def get_creatable_domain_by_type(self, mail_type: int) -> str: + """ + 根据邮箱类型获取可创建的域名(排除gmail.com) + :param mail_type: 邮箱类型编号 + :return: 对应的域名,如果是gmail.com则返回默认域名 + """ + domain = self.get_domain_by_type(mail_type) + if domain == "gmail.com": + return self._domains[1] # 返回qianyouduo.com作为默认 + return domain + + def get_random_creatable_domain(self) -> str: + """ + 随机获取一个可创建邮箱的域名(排除 gmail.com) + + 返回值: + str: 随机选取的域名 + """ + creatable = self.get_creatable_domains() + if not creatable: + raise ValueError("无可用域名用于创建邮箱") + return random.choice(creatable) + + +# 邮箱模块 +class Mail: + def __init__(self): + self.domain_manager = DomainManager() + self.api_host = 'http://111.10.175.206:5020' + + def email_account_read(self, pk: int = None, account: str = None, status: bool = None, host: str = None, + proxy_account: str = None, + parent_account: str = None, order_by: str = None, level: int = None, + update_time_start: str = None, update_time_end: str = None, res_count: bool = False, + create_time_start: str = None, create_time_end: str = None, page: int = None, + limit: int = None) -> dict: + """ + 读取mail账号 + :param level: 邮箱等级(可选) + :param status: 状态(可选) + :param update_time_start: 更新时间起始(可选) + :param update_time_end: 更新时间结束(可选) + :param res_count: 返回总数 (可选) + :param parent_account: 母邮箱账号 (可选) + :param pk: 主键 (可选) + :param account: 账号 (可选) + :param host: 代理 (可选) + :param proxy_account: 代理账号 (可选) + :param order_by: 排序方式 (可选) id|create_time|update_time 前面加-表示倒序 + :param create_time_start: 创建起始时间 (可选) + :param create_time_end: 创建结束时间 (可选) + :param page: 页码 (可选) + :param limit: 每页数量 (可选) + :return: 返回json 成功字段code=200 + """ + if pk is not None: + url = f'{self.api_host}/mail/account/{pk}' + return requests.get(url).json() + + url = f'{self.api_host}/mail/account' + data = dict() + if account is not None: + data['account'] = account + if status is not None: + data['status'] = status + if host is not None: + data['host'] = host + if proxy_account is not None: + data['proxy_account'] = proxy_account + if parent_account is not None: + data['parent_account'] = parent_account + if order_by is not None: + data['order_by'] = order_by + if level is not None: + data['level'] = level + if create_time_start is not None: + data['create_time_start'] = create_time_start + if create_time_end is not None: + data['create_time_end'] = create_time_end + if update_time_start is not None: + data['update_time_start'] = update_time_start + if update_time_end is not None: + data['update_time_end'] = update_time_end + if res_count: + data['res_count'] = res_count + if page is not None: + data['page'] = page + if limit is not None: + data['limit'] = limit + res = requests.get(url, params=data).json() + if res.get('code') not in [200, 400, 404]: + raise Exception(res) + return res + + # 创建随机邮箱 + @retry(max_retries=3, delay=1.0, backoff=1.0) + def email_create_random(self, count: int = 8, pwd: str = 'Zpaily88', mail_type: int | None = None) -> str: + """ + 创建随机邮箱(随机域名,排除 gmail.com) + :param count: 邮箱长度(默认8位) + :param pwd: 邮箱密码(默认Zpaily88) + :param mail_type: 指定邮箱类型编号;为 None 时随机选择可创建域名 + :return: 邮箱账号 + """ + headers = { + "Accept-Language": "zh-CN,zh;q=0.9", + "Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "Content-Type": "application/json", + "Origin": "https://mail.qianyouduo.com", + "Pragma": "no-cache", + "Referer": "https://mail.qianyouduo.com/admin/api/doc", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "accept": "*/*", + "sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"", + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": "\"macOS\"" + } + url = "https://mail.qianyouduo.com/admin/api/v1/boxes" + name = ''.join(random.choices(string.ascii_letters + string.digits, k=count)).lower() + + # 随机选择可创建域名(排除 gmail.com);如指定类型则按类型选择 + mail_end = ( + self.domain_manager.get_creatable_domain_by_type(mail_type) + if mail_type is not None + else self.domain_manager.get_random_creatable_domain() + ) + data = { + "name": name, + "email": f"{name}@{mail_end}", + "passwordPlaintext": pwd + } + response = requests.post(url, headers=headers, json=data) + if 'Validation errors: [user] This combination of username and domain is already in database' in response.text: + return f'{name}@{mail_end}' + if response.status_code != 201: + raise Exception(response.status_code) + return f"{name}@{mail_end}" + + # 异步创建随机邮箱 + @async_retry(max_retries=3, delay=1.0, backoff=1.0) + async def _email_create_random(self, count: int = 8, pwd: str = 'Zpaily88', mail_type: int | None = None) -> str: + """ + 创建随机邮箱(随机域名,排除 gmail.com) + :param count: 邮箱长度(默认8位) + :param pwd: 邮箱密码(默认Zpaily88) + :param mail_type: 指定邮箱类型编号;为 None 时随机选择可创建域名 + :return:邮箱账号 + """ + headers = { + "Accept-Language": "zh-CN,zh;q=0.9", + "Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "Content-Type": "application/json", + "Origin": "https://mail.qianyouduo.com", + "Pragma": "no-cache", + "Referer": "https://mail.qianyouduo.com/admin/api/doc", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "accept": "*/*", + "sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"", + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": "\"macOS\"" + } + url = "https://mail.qianyouduo.com/admin/api/v1/boxes" + name = ''.join(random.choices(string.ascii_letters + string.digits, k=count)).lower() + + # 随机选择可创建域名(排除 gmail.com);如指定类型则按类型选择 + mail_end = ( + self.domain_manager.get_creatable_domain_by_type(mail_type) + if mail_type is not None + else self.domain_manager.get_random_creatable_domain() + ) + data = { + "name": name, + "email": f"{name}@{mail_end}", + "passwordPlaintext": pwd + } + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers, json=data) as response: + status = response.status + text = await response.text() + if 'Validation errors: [user] This combination of username and domain is already in database' in text: + return f"{name}@{mail_end}" + if status != 201: + raise Exception(status) + return f"{name}@{mail_end}" + + # 创建邮箱 + @retry(max_retries=3, delay=1.0, backoff=1.0) + def email_create(self, account: str, pwd: str = 'Zpaily88') -> str | None: + """ + 创建邮箱 + :param account: 邮箱账号 + :param pwd: 邮箱密码(默认Zpaily88) + :return:邮箱账号 + """ + headers = { + "Accept-Language": "zh-CN,zh;q=0.9", + "Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "Content-Type": "application/json", + "Origin": "https://mail.qianyouduo.com", + "Pragma": "no-cache", + "Referer": "https://mail.qianyouduo.com/admin/api/doc", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "accept": "*/*", + "sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"", + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": "\"macOS\"" + } + url = "https://mail.qianyouduo.com/admin/api/v1/boxes" + name = account.split('@')[0] + mail_end = account.split('@')[1] + + # 排除gmail.com域名 + if mail_end == "gmail.com": + return None + # 验证域名是否支持 + if not self.domain_manager.is_valid_domain(mail_end): + raise ValueError(f"不支持的域名: {mail_end},支持的域名列表: {self.domain_manager.get_all_domains()}") + + data = { + "name": name, + "email": f"{name}@{mail_end}", + "passwordPlaintext": pwd + } + response = requests.post(url, headers=headers, json=data) + print(f'创建邮箱响应: {response.status_code}') + if response.status_code not in [201, 400]: + raise Exception(response.status_code) + return f"{name}@{mail_end}" + + # 异步创建邮箱 + @async_retry(max_retries=3, delay=1.0, backoff=1.0) + async def _email_create(self, account: str, pwd: str = 'Zpaily88') -> str | None: + """ + 创建邮箱 + :param account: 邮箱账号 + :param pwd: 邮箱密码(默认Zpaily88) + :return: 邮箱账号 + """ + headers = { + "Accept-Language": "zh-CN,zh;q=0.9", + "Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "Content-Type": "application/json", + "Origin": "https://mail.qianyouduo.com", + "Pragma": "no-cache", + "Referer": "https://mail.qianyouduo.com/admin/api/doc", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "accept": "*/*", + "sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"", + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": "\"macOS\"" + } + url = "https://mail.qianyouduo.com/admin/api/v1/boxes" + name = account.split('@')[0] + mail_end = account.split('@')[1] + # 排除gmail.com域名 + if mail_end == "gmail.com": + return None + + # 验证域名是否支持 + if not self.domain_manager.is_valid_domain(mail_end): + raise ValueError(f"不支持的域名: {mail_end},支持的域名列表: {self.domain_manager.get_all_domains()}") + + data = { + "name": name, + "email": f"{name}@{mail_end}", + "passwordPlaintext": pwd + } + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers, json=data) as response: + status = response.status + if status not in [201, 400]: + raise Exception(f'status code: {status}') + return f"{name}@{mail_end}" + + # 删除邮箱 + @retry(max_retries=3, delay=1.0, backoff=1.0) + def email_delete(self, account: str) -> bool: + """ + 删除邮箱 + :param account: 邮箱账号 + :return: True表示删除成功,False表示删除失败 + """ + headers = { + "Accept-Language": "zh-CN,zh;q=0.9", + "Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "Content-Type": "application/json", + "Origin": "https://mail.qianyouduo.com", + "Pragma": "no-cache", + "Referer": "https://mail.qianyouduo.com/admin/api/doc", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "accept": "*/*", + "sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"", + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": "\"macOS\"" + } + url = f"https://mail.qianyouduo.com/admin/api/v1/boxes/{account}" + if '@gmail.com' in account: + return False + response = requests.delete(url, headers=headers) + print(f'删除邮箱响应: --> {response.status_code}') + if response.status_code not in [204, 404]: + raise Exception(response.status_code) + return True + + # 异步删除邮箱 + @async_retry(max_retries=3, delay=1.0, backoff=1.0) + async def _email_delete(self, account: str) -> bool: + """ + 删除邮箱 + :param account: 邮箱账号 + :return: True表示删除成功,False表示删除失败 + """ + headers = { + "Accept-Language": "zh-CN,zh;q=0.9", + "Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "Content-Type": "application/json", + "Origin": "https://mail.qianyouduo.com", + "Pragma": "no-cache", + "Referer": "https://mail.qianyouduo.com/admin/api/doc", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "accept": "*/*", + "sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"", + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": "\"macOS\"" + } + url = f"https://mail.qianyouduo.com/admin/api/v1/boxes/{account}" + if '@gmail.com' in account: + return False + async with aiohttp.ClientSession() as session: + async with session.delete(url, headers=headers) as response: + status = response.status + if status not in [204, 404]: + raise Exception(f'status code: {status}') + return True + + # 处理邮件正文 + @staticmethod + def extract_body(msg): + """ + 提取邮件正文,优先返回 HTML 文本 + - 更健壮的字符集解析:优先使用 part 的 charset 信息,失败回退到 utf-8 / latin-1 + - 仅处理 inline 的 text/html 与 text/plain 内容 + """ + html_text = None + plain_text = None + + def _decode_part(part): + payload = part.get_payload(decode=True) + if payload is None: + return None + # 优先从内容中解析 charset + charset = (part.get_content_charset() or part.get_param('charset') or 'utf-8') + try: + return payload.decode(charset, errors='replace') + except LookupError: + # 未知编码时回退 + try: + return payload.decode('utf-8', errors='replace') + except Exception: + return payload.decode('latin-1', errors='replace') + + if msg.is_multipart(): + for part in msg.walk(): + content_type = part.get_content_type() + content_disposition = part.get_content_disposition() + + if content_type == "text/html" and (not content_disposition or content_disposition == "inline"): + html_text = _decode_part(part) or html_text + elif content_type == "text/plain" and (not content_disposition or content_disposition == "inline"): + plain_text = _decode_part(part) or plain_text + else: + content_type = msg.get_content_type() + if content_type == "text/html": + html_text = _decode_part(msg) + elif content_type == "text/plain": + plain_text = _decode_part(msg) + + # 优先返回 HTML 文本,如果没有 HTML 文本,则返回纯文本 + return html_text or plain_text or "" + + # 转换邮件日期 + @staticmethod + def convert_to_china_time(date_str): + """ + 将邮件日期转换为10位时间戳(中国时区) + - 保留原始邮件的时区信息;若无时区,则按 UTC 处理 + - 异常时返回当前时间戳,避免解析失败导致崩溃 + """ + try: + email_date = email.utils.parsedate_to_datetime(date_str) + if email_date is None: + return int(time.time()) + if email_date.tzinfo is None: + email_date = email_date.replace(tzinfo=timezone.utc) + china_time = email_date.astimezone(timezone(timedelta(hours=8))) + return int(china_time.timestamp()) + except Exception: + return int(time.time()) + + # 获取邮件 + def email_read(self, user: str, from_: str, limit: int = 1, is_del: bool = False) -> list | None: + """ + 获取最新邮件 + :param user: 母账号 + :param from_: 发件人匹配关键字(可为邮箱或显示名,大小写不敏感) + :param limit: 获取邮件数量(默认1封) + :param is_del: 是否删除整个邮箱账号(非 Gmail 才会执行账号删除) + :return: 返回邮件列表,每个元素格式为: + { + "title": "邮件标题", + "from": "发件人", + "date": "邮件日期(中国时区时间戳)", + "content": "邮件正文", + "code": 200 + } + """ + user_li = user.split('@') + domain = user_li[1] + + # 使用域名管理器获取邮箱类型 + if not self.domain_manager.is_valid_domain(domain): + return None + + mail_type = self.domain_manager.get_domain_type(domain) + # 仅对 Gmail 进行点号归一化,其它域名按原样处理 + local_part = user_li[0] + if domain == "gmail.com": + local_part = local_part.replace('.', '') + user = local_part + '@' + user_li[1] + proxy_host = None + proxy_port = None + proxy_user = None + proxy_pwd = None + if mail_type == 0: + res = self.email_account_read(parent_account=user, status=True, level=0) + if res['code'] != 200: + return None + pwd = res['items'][0]['parent_pwd'] + proxy_host = res['items'][0]['host'] + proxy_port = res['items'][0]['port'] + proxy_user = res['items'][0]['proxy_account'] + proxy_pwd = res['items'][0]['proxy_pwd'] + else: + pwd = 'Zpaily88' + + items = [] # 存储邮件列表 + + # 保存原始socket + original_socket = None + if proxy_host is not None and proxy_port is not None: + original_socket = socket.socket + if proxy_user is not None and proxy_pwd is not None: + socks.setdefaultproxy(socks.SOCKS5, proxy_host, int(proxy_port), True, proxy_user, proxy_pwd) + else: + socks.setdefaultproxy(socks.SOCKS5, proxy_host, int(proxy_port), True) + socket.socket = socks.socksocket + + imap_server = None + had_error = False + try: + # 在设置代理后创建IMAP连接 + imap_server = imaplib.IMAP4_SSL(self.domain_manager.get_imap_server(mail_type)) + if not imap_server: + had_error = True + else: + + # pwd去除空格 + pwd = pwd.replace(' ', '') + # print(f'pwd: {pwd}') + imap_server.login(user, pwd) + status, _ = imap_server.select("INBOX") + if status != 'OK': + had_error = True + else: + status, email_ids = imap_server.search(None, "ALL") + if status != 'OK': + had_error = True + else: + email_id_list = email_ids[0].split() + + # 获取最近limit条邮件ID + recent_ids = email_id_list[-20:] # 仍然获取最近20封以确保有足够的邮件可以筛选 + found_count = 0 # 记录找到的符合条件的邮件数量 + + for email_id in recent_ids[::-1]: # 从最新的邮件开始处理 + if found_count >= limit: # 如果已经找到足够数量的邮件,就退出循环 + break + + status, msg_data = imap_server.fetch(email_id, "(RFC822)") + for response in msg_data: + if isinstance(response, tuple): + msg = email.message_from_bytes(response[1]) + # 兼容性发件人匹配:解析地址与显示名,大小写不敏感,支持子串匹配 + from_field = msg.get("From", "") + addresses = email.utils.getaddresses([from_field]) + needle = (from_ or "").lower() + candidates = [] + for name, addr in addresses: + if name: + candidates.append(name.lower()) + if addr: + candidates.append(addr.lower()) + if any(needle in c for c in candidates): + # 标题解码,处理无标题或编码缺失的情况 + raw_subject = msg.get("Subject") + subject = "" + if raw_subject is not None: + dh = decode_header(raw_subject) + if dh: + s, enc = dh[0] + if isinstance(s, bytes): + try: + subject = s.decode(enc or 'utf-8', errors='replace') + except LookupError: + subject = s.decode('utf-8', errors='replace') + else: + subject = s + + item = { + "title": subject, + "from": msg["From"], + "content": self.extract_body(msg), + "code": 200 + } + + # 获取并转换邮件时间 + date_str = msg["Date"] + if date_str: + item["date"] = self.convert_to_china_time(date_str) + + items.append(item) + found_count += 1 + + if found_count >= limit: # 如果已经找到足够数量的邮件,就跳出内层循环 + break + + # 读取完成不再对单封邮件做删除标记与 expunge + + except imaplib.IMAP4.error as e: + # items.append({'title': 'error', 'from': 'error', 'content': f'连接邮箱失败: {e}', 'code': 500}) + had_error = True + except Exception as e: + # items.append({'title': 'error', 'from': 'error', 'content': f'获取邮件异常: {e}', 'code': 500}) + had_error = True + finally: + try: + # 检查连接是否建立 + if 'imap_server' in locals() and imap_server is not None: + try: + # 先检查是否处于已选择状态 + if hasattr(imap_server, 'state') and imap_server.state == 'SELECTED': + imap_server.close() + except Exception as e: + logger.error(f"关闭IMAP文件夹时发生错误: {e}") + try: + # 无论如何尝试登出 + imap_server.logout() + except Exception as e: + logger.error(f"登出IMAP服务器时发生错误: {e}") + # 在Windows上可能需要强制关闭socket + try: + if hasattr(imap_server, 'sock') and imap_server.sock is not None: + imap_server.sock.close() + except Exception as sock_err: + logger.error(f"强制关闭socket时发生错误: {sock_err}") + except Exception as outer_e: + logger.error(f"处理IMAP连接关闭时发生错误: {outer_e}") + finally: + # 重置socket设置(如果使用了代理) + if proxy_host is not None and original_socket is not None: + socket.socket = original_socket + + # 若成功获取到至少一封匹配邮件且请求删除,则删除整个邮箱账号 + if is_del and len(items) > 0: + try: + self.email_delete(user) + except Exception as del_err: + logger.error(f"删除邮箱账号失败: {del_err}") + + if had_error: + return None + if len(items) == 0: + return None + return items # 返回邮件列表 + + +async def main(): + """ + 使用示例:展示新的域名管理系统的使用方法 + """ + mail = Mail() + # mai = '0gz3vvd4@'+'qydgs.asia' + # res = mail.email_create(mai) + # print(f"创建的邮箱: {res}") + random_email = mail.email_create_random() + print(f"创建的随机邮箱: {random_email}") + + # 读取邮件 + # res = mail.email_read('0gz3vvd4@qydgs.asia', '@', 1, is_del=True) + # print(f'读取的邮件: {res}') + + # 删除邮箱 + res = mail.email_delete(random_email) + print(f"删除的邮箱: {res}") + +mail_ = Mail() + +# if __name__ == '__main__': + # asyncio.run(main()) diff --git a/spider/main.py b/spider/main.py new file mode 100644 index 0000000..7f911f5 --- /dev/null +++ b/spider/main.py @@ -0,0 +1,765 @@ +import random +import time +from datetime import datetime +from DrissionPage import Chromium +from loguru import logger +from work import generate_child_parent_names +from mail_ import mail_ +from bit_browser import bit_browser +from api import api +from proxys import proxy_list +import threading +from concurrent.futures import ThreadPoolExecutor, as_completed +from auto_challenge import ReCaptchaHandler + + +class Auto: + def __init__(self, http: str = None): + # self.browser = Chromium(http) + self.browser = Chromium() + self.tab = self.browser.latest_tab + pass + + # cf打码 + def solve_cloudflare(self, is_ok: bool = False): + tab = self.browser.latest_tab + for _ in range(5): + tab.wait(1) + res = tab.ele( + 't:h1@text()=Sorry, you have been blocked', timeout=1) + if res: + logger.error("Cloudflare验证失败") + return False + + try: + shadow1 = tab.ele( + 'x://*[@name="cf-turnstile-response"]').parent().shadow_root + iframe = shadow1.get_frame(1) + if iframe: + logger.debug("找到Cloudflare iframe") + shadow2 = iframe.ele('x:/html/body').shadow_root + if shadow2: + logger.debug("找到Cloudflare iframe body shadow root") + status = shadow2.ele( + 'x://span[text()="Verifying..."]', timeout=1.5) + if status: + tab.wait(3) + status = shadow2.ele( + 'x://span[text()="Success!"]', timeout=1.5) + if status: + logger.debug("Cloudflare验证成功") + return True + checkbox = shadow2.ele( + 'x://input[@type="checkbox"]', timeout=1.5) + if checkbox: + checkbox.click() + logger.debug("点击Cloudflare复选框") + tab.wait(3) + logger.debug("重新获取状态") + # return False + except Exception as e: + # logger.error(f"处理Cloudflare异常: {e}") + if is_ok: + logger.debug(f"cloudflare处理通过: {e}") + return True + return self.solve_cloudflare(is_ok=True) + tab.wait(1) + return False + + # 谷歌验证码 + def solve_recaptcha(self): + logger.debug("开始解决谷歌验证码") + recaptcha_handler = ReCaptchaHandler(self.tab) + res = recaptcha_handler.challenge() + if res.get("status"): + logger.debug("谷歌验证码成功") + iframe = self.tab.ele('t:iframe@title=reCAPTCHA') + # print(iframe) + res = iframe.ele('t:div@class=recaptcha-checkbox-border') + if res: + logger.debug(f"html: {res.html}") + if 'display: none;' in res.html: + logger.debug("谷歌验证码成功") + return True + else: + print("No element found") + return False + logger.error("谷歌验证码失败") + + return False + + # 打开URL + def open_url(self, url: str): + self.tab.get(url) + + def get_tab(self): + return self.tab + + # 等待进入首页 + def wait_home(self): + logger.debug("等待进入首页") + jc = 0 + while True: + if jc > 3: + logger.error("等待进入首页超过5次,未成功") + return False + self.tab.wait(1) + bol = self.tab.ele( + 't:div@text():YOUTUBE PRIVACY SETTLEMENT', timeout=1) + if bol: + logger.debug("成功进入首页") + return True + + jc += 1 + + + # 随机取城市 + def get_random_city(self, province: str | None = None): + cities = { + "Alberta": ["Calgary", "Edmonton"], + "British Columbia": ["Vancouver"], + # "Manitoba": ["Winnipeg", "Rochester"], + # "New Brunswick": ["Fredericton", "Moncton"], + # "Newfoundland and Labrador": ["St. John's", "Halifax"], + "Nova Scotia": ["Halifax"], + "Ontario": ["Toronto"], + # "Prince Edward Island": ["Charlottetown", "St. John's"], + # "Quebec": ["Quebec City", "Montreal"], + # "Saskatchewan": ["Saskatoon", "Regina"], + } + if province is None: + province = random.choice(list(cities.keys())) + return province, random.choice(cities.get(province, [])) + + def get_province_by_city(self) -> str | None: + """ + 根据城市名称解析对应省份 + + 参数: + city (str): 城市名称,例如 `Calgary`、`Edmonton` 等 + + 返回值: + str | None: 对应的省份名称;未匹配返回 None + """ + mapping = { + "Calgary": "Alberta", + "Edmonton": "Alberta", + "Vancouver": "British Columbia", + "Halifax": "Nova Scotia", + "Toronto": "Ontario", + "Ottawa": "Ontario", + "Mississauga": "Ontario", + "Brampton": "Ontario", + "Hamilton": "Ontario", + "Kitchener": "Ontario", + "London": "Ontario", + "Markham": "Ontario", + "Vaughan": "Ontario", + "Windsor": "Ontario", + "Oshawa": "Ontario", + "Brantford": "Ontario", + "Barrie": "Ontario", + "Sudbury": "Ontario", + "Kingston": "Ontario", + "Guelph": "Ontario", + "Cambridge": "Ontario", + "Sarnia": "Ontario", + "Peterborough": "Ontario", + "Waterloo": "Ontario", + "Belleville": "Ontario", + "Brockville": "Ontario", + "Burlington": "Ontario", + "Cornwall": "Ontario", + "Kawartha Lakes": "Ontario", + "North Bay": "Ontario", + "Orillia": "Ontario", + "Pickering": "Ontario", + "Sault Ste. Marie": "Ontario", + "Stratford": "Ontario", + "Durham": "Ontario", + "Norfolk County": "Ontario", + "Prince Edward County": "Ontario", + "Quinte West": "Ontario", + "St. Catharines": "Ontario", + "Welland": "Ontario", + "Thorold": "Ontario", + "Niagara Falls": "Ontario", + "Pelham": "Ontario", + "Port Colborne": "Ontario", + } + # 随机返回一条 key 和 value + return random.choice(list(mapping.items())) + + # 随机实物 + + def get_random_food(self, city: str, shop: str) -> list[str]: + """ + 随机选择 1~2 种食物类别,并为每个类别至少选择 1 个具体产品 + + 参数: + shop (str): 商店名称(当前未使用,占位参数) + + 返回值: + list[str]: 随机选取的产品名称列表 + """ + categories = [ + [ + 'Wonder Bread White', + 'Villaggio White Bread', + 'No Name Sliced White Bread', + "President's Choice White Sliced Bread", + ], + [ + "Ben's Original Whole Wheat Bread", + "POM Whole Wheat Bread", + "Silver Hills Bakery Whole Wheat Sliced Bread", + "Country Harvest Whole Wheat Bread", + ], + [ + "Wonder Bread Hot Dog Buns", + "Villaggio Hamburger Buns", + "Dempster's Dinner Rolls", + "No Frills Hot Dog Buns", + ], + [ + "Stonemill Bakehouse Bagels", + "Wonder Bagels", + "Montreal Bagels (pre-packaged, e.g., St. Lawrence brand)", + "President's Choice Bagels", + ], + [ + "Silver Hills Multi-Grain Sliced Bread", + "POM Multi-Grain Bread", + "Country Harvest Multi-Grain Loaf", + ], + [ + "President's Choice French Stick", + "Dempster's Italian Style Bread", + "Wonder Italian Bread", + "Villaggio Country Style Loaf", + ], + ] + + # 随机选择 1~2 个类别(不重复) + category_count = random.randint(1, 2) + chosen_categories = random.sample(categories, k=category_count) + + # 每个类别至少选择 1 个产品,最多选择 3 个以避免过多 + selected_products: list[str] = [] + for cat in chosen_categories: + max_pick = min(3, len(cat)) + pick_count = random.randint(1, max_pick) + selected_products.extend(random.sample(cat, k=pick_count)) + logger.debug(f"随机选择的产品: {selected_products}") + text = f'{shop}, {city} buy: ' + for p in selected_products: + text += f'{p} * {random.randint(1, 3)}, ' + text = text[:-2] + text = text + '.' + logger.debug(f'随机选择的产品文本: {text}') + return text + + # 填写问卷 + def fill_questionnaire(self): + """ + 完成问卷填写 + + 参数: + city (str): 线程启动时传入的城市名称,用于匹配省份并填写数据 + """ + try: + info = generate_child_parent_names() + child_full_name = info['child_full_name'] + parent_full_name = info['parent_full_name'] + child_birthday = info['child_birthday'] + # 2023-04-01转为MM/DD/YYYY + child_birthday = datetime.strptime(child_birthday, '%Y-%m-%d').strftime('%m/%d/%Y') + address_str = info['child_address_str'] + city_name = info['child_city_name'] + postcode = info['child_postcode'] + parent_phone = info['parent_phone'] + province = info['parent_state'] + # email = mail_.email_create_random() + email = 'zhiyu@qq.com' + logger.debug(f"child_full_name --> {child_full_name}") + logger.debug(f"parent_full_name --> {parent_full_name}") + logger.debug(f"child_birthday --> {child_birthday}") + logger.debug(f"address_str --> {address_str}") + logger.debug(f"city_name --> {city_name}") + logger.debug(f"postcode --> {postcode}") + logger.debug(f"parent_phone --> {parent_phone}") + logger.debug(f"province --> {province}") + logger.debug(f"email --> {email}") + self.tab.wait(0.1) + self.tab.ele('t:input@id=name1').input(child_full_name) + self.tab.wait(0.1) + self.tab.ele('t:input@id=name2').input(parent_full_name) + self.tab.wait(0.1) + self.tab.ele('t:input@id=dateOfBirth').input(child_birthday) + self.tab.wait(0.1) + self.tab.ele('t:input@id=street1').input(address_str) + self.tab.wait(0.1) + self.tab.ele('t:input@id=city').input(city_name) + self.tab.wait(0.1) + self.tab.ele( + 't:select@formcontrolname=state').ele(f't:option@text():{province}').click() + self.tab.wait(0.1) + self.tab.ele('t:input@id=zip').input(postcode) + self.tab.wait(0.1) + self.tab.ele('t:input@id=phone1').input(parent_phone) + self.tab.wait(0.1) + self.tab.ele('t:input@id=emailAddress').input(email) + self.tab.wait(0.1) + self.tab.ele('t:input@id=confirmEmailemail').input(email) + self.tab.wait(0.1) + self.tab.ele('t:input@@formcontrolname=resideInUS@@id=Yes').click() + self.tab.wait(0.1) + self.tab.ele('t:input@@formcontrolname=watchedDuringPeriod@@id=Yes').click() + self.tab.wait(0.1) + self.tab.ele('t:input@id=signatureMinor').input(child_full_name) + self.tab.wait(0.1) + self.tab.ele('t:input@id=signatureParentGuardian').input(parent_full_name) + self.solve_recaptcha() + + return self.submit_file( + child_full_name=child_full_name, + parent_full_name=parent_full_name, + child_birthday=child_birthday, + address_str=address_str, + city_name=city_name, + parent_phone=parent_phone, + postcode=postcode, + province=province, + email=email, + text="" + ) + + except Exception as e: + logger.error(f"填写问卷失败: {e}") + + # 提交问卷 + def submit_file(self, child_full_name: str, parent_full_name: str, child_birthday: str, address_str: str, city_name: str, parent_phone: str, postcode: str, province: str, email: str, text: str): + """ + 提交问卷后的数据保存到后端服务(孩子与家长字段) + + 参数: + child_full_name (str): 孩子全名 + parent_full_name (str): 家长全名 + child_birthday (str): 孩子生日(字符串,已为 MM/DD/YYYY) + address_str (str): 街道地址 + city_name (str): 城市 + parent_phone (str): 家长电话 + postcode (str): 邮编 + province (str): 省/州全称 + email (str): 邮箱 + text (str): 文本内容(如反馈地址) + """ + jc = 0 + while True: + if jc >= 3: + logger.error("提交问卷失败") + return False + res = self.solve_recaptcha() + if not res: + jc += 1 + continue + res = self.tab.ele('t:button@text():SUBMIT') + if res: + logger.debug(f"点击Submit按钮") + res.click() + self.tab.wait(3) + res = self.tab.ele( + 't:h2@text()=THANK YOU FOR SUBMITTING YOUR INFORMATION', timeout=1) + if res: + logger.info("提交问卷成功") + logger.info(f"反馈地址: {text}") + + res = self.tab.ele('t:b') + if res: + logger.info(f"反馈地址: {res.text}") + text = res.text + status = True + + else: + status=False + + api.create_info( + child_full_name=child_full_name, + parent_full_name=parent_full_name, + child_birthday=child_birthday, + address_str=address_str, + city_name=city_name, + parent_phone=parent_phone, + postcode=postcode, + province=province, + email=email, + text=text, + status=status + ) + return True + + bol = self.tab.ele( + 't:div@text():ERR_TIMED_OUT', timeout=1) + if bol: + logger.debug("刷新网页") + self.tab.refresh() + self.tab.wait(1.5) + bol = self.tab.ele( + 't:div@text():ERR_SSL_PROTOCOL_ERROR', timeout=1) + if bol: + logger.debug("刷新网页") + self.tab.refresh() + self.tab.wait(1.5) + bol = self.tab.ele( + 't:div@text():ERR_SOCKS_CONNECTION_FAILED', timeout=1) + if bol: + logger.debug("刷新网页") + self.tab.refresh() + self.tab.wait(1.5) + jc += 1 + + +def parse_proxy(proxy: str) -> tuple[str, int, str, str] | None: + """ + 解析代理字符串为四元组 `(host, port, user, pwd)` + + 参数: + proxy: 形如 `host:port:user:pwd` + + 返回值: + (host, port, user, pwd) 或 None(格式错误) + """ + try: + host, port, user, pwd = proxy.split(":", 3) + return host, int(port), user, pwd + except Exception: + logger.error(f"代理格式错误: {proxy}") + return None + + +def create_fingerprint_browser(proxy: str) -> tuple[str, str] | None: + """ + 创建指纹浏览器并打开窗口,返回 `(browser_id, debugger_http)` + + 参数: + proxy: 代理字符串 + + 返回值: + (browser_id, http) 或 None(失败) + """ + info = parse_proxy(proxy) + if info is None: + return None + host, port, user, pwd = info + try: + browser_id = bit_browser.bit_browser_create( + remark=f"{user}", + proxy_type="socks5", + host=host, + port=str(port), + proxy_user=user, + proxy_pwd=pwd, + ) + if not browser_id: + return None + logger.info(f"创建指纹浏览器成功: {browser_id}") + time.sleep(1) + http = bit_browser.bit_browser_open(browser_id) + if not http: + return None + logger.info(f"打开指纹浏览器成功: {browser_id}") + return browser_id, http + except Exception as e: + logger.error(f"创建指纹浏览器失败: {e}") + return None + + +def close_and_delete_browser(browser_id: str) -> None: + """ + 关闭并删除指定指纹浏览器 + + 参数: + browser_id: 指纹浏览器ID + """ + try: + bit_browser.bit_browser_close(browser_id) + except Exception as e: + logger.warning(f"关闭浏览器失败或已关闭: {browser_id} - {e}") + time.sleep(1) + try: + bit_browser.bit_browser_delete(browser_id) + except Exception as e: + logger.warning(f"删除浏览器失败或已删除: {browser_id} - {e}") + + +def run_task_with_proxy(proxy: str, stop_event: threading.Event) -> None: + """ + 使用代理创建指纹浏览器、执行自动化,并在结束后清理 + + 参数: + proxy: 代理字符串 + """ + browser_id: str | None = None + try: + created = create_fingerprint_browser(proxy) + if not created: + return + browser_id, http = created + if stop_event.is_set(): + return + auto = Auto(http=http) + auto.open_url('https://www.claimform.youtubeprivacysettlement.com') + if stop_event.is_set(): + return + if not auto.wait_home(): + return + if stop_event.is_set(): + return + if not auto.click_continue(): + return + if stop_event.is_set(): + return + auto.fill_questionnaire() + except Exception as e: + logger.error(f"执行任务异常: {e}") + finally: + if browser_id: + try: + close_and_delete_browser(browser_id) + except Exception: + pass + + +def proxy_loop(proxy: str, stop_event: threading.Event) -> None: + """ + 为单个代理保持持续运行:任务结束后立即重建并再次执行 + + 参数: + proxy: 代理字符串 + stop_event: 停止事件,用于外部触发退出循环 + """ + while not stop_event.is_set(): + try: + if is_forbidden_time(): + if stop_event.wait(timeout=60): + break + cleanup_all_browsers() + secs = seconds_until(20, 0) + if stop_event.wait(timeout=secs): + break + continue + run_task_with_proxy(proxy, stop_event) + except Exception as e: + logger.error(f"代理循环异常: {proxy} - {e}") + if stop_event.is_set(): + break + if stop_event.wait(timeout=0.1): + break + + +def is_forbidden_time() -> bool: + """ + 判断当前是否处于禁跑时段(每日 18:30 ~ 20:00,本地时间) + + 返回值: + bool: True 表示处于禁跑时段 + """ + # 去除晚上停止功能 + return False + # 禁跑时段为 18:30 ~ 20:00 + now = datetime.now() + start = now.replace(hour=18, minute=30, second=0, microsecond=0) + end = now.replace(hour=20, minute=0, second=0, microsecond=0) + return start <= now < end + +def wait_until_out_of_forbidden(interval_sec: float = 5.0, stop_event: threading.Event | None = None) -> None: + """ + 在禁跑时段内循环等待,直到禁跑时段结束 + + 参数: + interval_sec: 轮询间隔秒数 + stop_event: 可选停止事件,若设置则在等待期间可提前结束 + """ + while is_forbidden_time(): + if stop_event is not None and stop_event.wait(timeout=interval_sec): + break + time.sleep(interval_sec) + + +def seconds_until(hour: int, minute: int) -> float: + """ + 计算到今天指定时间点的剩余秒数 + + 参数: + hour: 目标小时(24小时制) + minute: 目标分钟 + + 返回值: + float: 剩余秒数,若目标时间已过则为 0 + """ + now = datetime.now() + target = now.replace(hour=hour, minute=minute, second=0, microsecond=0) + if target <= now: + return 0.0 + return (target - now).total_seconds() + + +def count_fingerprint_browsers() -> int: + """ + 统计当前指纹浏览器数量 + + 返回值: + int: 当前总数量 + """ + try: + res = bit_browser.bit_browser_get(0, 100) + data = res.get("data", {}) if isinstance(res, dict) else {} + total = data.get("totalNum") + lst = data.get("list", []) + if isinstance(total, int) and total >= 0: + return total + return len(lst) + except Exception as e: + logger.warning(f"统计指纹浏览器数量失败: {e}") + return 0 + + +def cleanup_all_browsers() -> None: + """ + 关闭并删除所有指纹浏览器 + """ + try: + res = bit_browser.bit_browser_get(0, 100) + data = res.get("data", {}) if isinstance(res, dict) else {} + lst = data.get("list", []) + ids = [i.get("id") for i in lst if i.get("id")] + for bid in ids: + close_and_delete_browser(bid) + except Exception as e: + logger.warning(f"清理所有指纹浏览器失败: {e}") + + +def delete_excess_browsers(limit: int) -> None: + """ + 删除超出上限的指纹浏览器,从列表末尾开始删除 + + 参数: + limit: 允许的最大浏览器数量 + """ + try: + res = bit_browser.bit_browser_get(0, 100) + data = res.get("data", {}) if isinstance(res, dict) else {} + lst = data.get("list", []) + ids = [i.get("id") for i in lst if i.get("id")] + count = len(ids) + if count <= limit: + return + excess = count - limit + to_delete = ids[-excess:] + for bid in reversed(to_delete): + close_and_delete_browser(bid) + logger.info(f"已删除超出数量 {excess},当前限制为 {limit}") + except Exception as e: + logger.warning(f"删除超额浏览器失败: {e}") + + +def monitor_browsers_and_restart(limit: int, stop_event: threading.Event, restart_event: threading.Event) -> None: + """ + 每 3 秒检测指纹浏览器数量,超过 `limit` 则从末尾删除超出部分 + + 参数: + limit: 允许的最大浏览器数量(通常为代理数量) + restart_event: 触发重启的事件(当前策略不使用) + """ + while not stop_event.is_set(): + time.sleep(3) + count = count_fingerprint_browsers() + if count > limit: + logger.warning(f"指纹浏览器数量 {count} 超过限制 {limit},开始删除超出部分") + delete_excess_browsers(limit) + + +def main(): + """ + 多线程并发管理:按代理数量并发创建指纹浏览器并执行任务;每 3 秒监控数量,超限则从末尾删除多余浏览器。 + """ + proxies = list(proxy_list) + while True: + stop_event = threading.Event() + restart_event = threading.Event() + + if is_forbidden_time(): + if stop_event.wait(timeout=60): + continue + cleanup_all_browsers() + logger.info("处于禁跑时段,等待至禁跑结束") + wait_until_out_of_forbidden() + continue + + executor = ThreadPoolExecutor(max_workers=len(proxies)) + try: + futures_map = {executor.submit(proxy_loop, p, stop_event): p for p in proxies} + + monitor_thread = threading.Thread( + target=monitor_browsers_and_restart, + args=(len(proxies), stop_event, restart_event), + daemon=True, + ) + monitor_thread.start() + + while True: + if restart_event.is_set(): + stop_event.set() + try: + executor.shutdown(wait=True) + except Exception: + pass + break + if is_forbidden_time(): + logger.info("进入禁跑时段,停止当前批次,等待1分钟后清理指纹浏览器") + stop_event.set() + try: + executor.shutdown(wait=True) + except Exception: + pass + time.sleep(60) + cleanup_all_browsers() + wait_until_out_of_forbidden() + break + for f, proxy in list(futures_map.items()): + if f.done() and not stop_event.is_set() and not restart_event.is_set(): + try: + _ = f.exception() + except Exception: + pass + try: + new_future = executor.submit(proxy_loop, proxy, stop_event) + del futures_map[f] + futures_map[new_future] = proxy + except Exception as e: + logger.error(f"重启代理线程失败: {proxy} - {e}") + time.sleep(0.2) + + try: + monitor_thread.join(timeout=5) + except Exception: + pass + finally: + try: + executor.shutdown(wait=True) + except Exception: + pass + continue + +def main2(): + auto = Auto() + auto.open_url('https://www.claimform.youtubeprivacysettlement.com') + bol = auto.wait_home() + if not bol: + return + auto.fill_questionnaire() + # auto.solve_recaptcha() + + +if __name__ == "__main__": + main2() diff --git a/spider/proxys.py b/spider/proxys.py new file mode 100644 index 0000000..67844f0 --- /dev/null +++ b/spider/proxys.py @@ -0,0 +1,95 @@ +work = [ + "us.novproxy.io:1000:qyd00056-region-CA:qyd00056", + "us.novproxy.io:1000:qyd00054-region-US:qyd00054", + "us.novproxy.io:1000:qyd00053-region-CA:qyd00053", + "us.novproxy.io:1000:qyd00052-region-US:qyd00052", + ] + +ca1 = [ + "us.novproxy.io:1000:qyd00051-region-CA:qyd00051", + "us.novproxy.io:1000:qyd00050-region-US:qyd00050", + "us.novproxy.io:1000:qyd00049-region-CA:qyd00049", + "us.novproxy.io:1000:qyd00048-region-US:qyd00048", + "us.novproxy.io:1000:qyd00047-region-CA:qyd00047", +] +ca2 = [ + "us.novproxy.io:1000:qyd00046-region-US:qyd00046", + "us.novproxy.io:1000:qyd00045-region-CA:qyd00045", + "us.novproxy.io:1000:qyd00044-region-US:qyd00044", + "us.novproxy.io:1000:qyd00043-region-CA:qyd00043", + "us.novproxy.io:1000:qyd00042-region-US:qyd00042", + ] + +ca3 = [ + "us.novproxy.io:1000:qyd00041-region-CA:qyd00041", + "us.novproxy.io:1000:qyd00040-region-CA:qyd00040", + "us.novproxy.io:1000:qyd00039-region-US:qyd00039", + "us.novproxy.io:1000:qyd00038-region-CA:qyd00038", + "us.novproxy.io:1000:qyd00037-region-US:qyd00037", + ] + +cwd = [ + "us.novproxy.io:1000:qyd00036-region-CA:qyd00036", + "us.novproxy.io:1000:qyd00035-region-US:qyd00035", + "us.novproxy.io:1000:qyd00034-region-CA:qyd00034", + "us.novproxy.io:1000:qyd00033-region-US:qyd00033", + ] + +wt = [ + "us.novproxy.io:1000:qyd00032-region-CA:qyd00032", + "us.novproxy.io:1000:qyd00031-region-US:qyd00031", + "us.novproxy.io:1000:qyd00030-region-CA:qyd00030", + "us.novproxy.io:1000:qyd00029-region-US:qyd00029", +] + +hc = [ + "us.novproxy.io:1000:qyd00028-region-CA:qyd00028", + "us.novproxy.io:1000:qyd00027-region-US:qyd00027", + "us.novproxy.io:1000:qyd00026-region-CA:qyd00026", + "us.novproxy.io:1000:qyd00025-region-US:qyd00025", +] + +zlj = [ + "us.novproxy.io:1000:qyd00024-region-CA:qyd00024", + "us.novproxy.io:1000:qyd00023-region-US:qyd00023", + "us.novproxy.io:1000:qyd00022-region-CA:qyd00022", + "us.novproxy.io:1000:qyd00021-region-US:qyd00021", +] + +wzq = [ + "us.novproxy.io:1000:qyd00020-region-CA:qyd00020", + "us.novproxy.io:1000:qyd00019-region-US:qyd00019", + "us.novproxy.io:1000:qyd00018-region-CA:qyd00018", + "us.novproxy.io:1000:qyd00017-region-US:qyd00017", +] + +xy = [ + "us.novproxy.io:1000:qyd00016-region-CA:qyd00016", + "us.novproxy.io:1000:qyd00015-region-US:qyd00015", + "us.novproxy.io:1000:qyd00014-region-CA:qyd00014", + "us.novproxy.io:1000:qyd00013-region-US:qyd00013", +] + +yll = [ + "us.novproxy.io:1000:qyd00012-region-CA:qyd00012", + "us.novproxy.io:1000:qyd00011-region-US:qyd00011", + "us.novproxy.io:1000:qyd00010-region-CA:qyd00010", + "us.novproxy.io:1000:qyd00009-region-US:qyd00009", +] + +szt = [ + "us.novproxy.io:1000:qyd00008-region-CA:qyd00008", + "us.novproxy.io:1000:qyd00007-region-US:qyd00007", + "us.novproxy.io:1000:qyd00006-region-CA:qyd00006", + "us.novproxy.io:1000:qyd00005-region-US:qyd00005", +] + +hz = [ + "us.novproxy.io:1000:qyd00004-region-CA:qyd00004", + "us.novproxy.io:1000:qyd00003-region-US:qyd00003", + "us.novproxy.io:1000:qyd00002-region-CA:qyd00002", + "us.novproxy.io:1000:qyd00001-region-US:qyd00001", +] + + +proxy_list = work \ No newline at end of file diff --git a/spider/requirements.txt b/spider/requirements.txt new file mode 100644 index 0000000..727f30b --- /dev/null +++ b/spider/requirements.txt @@ -0,0 +1,31 @@ +aiohttp +requests +curl_cffi +aiohttp-socks +requests[socks] +fake_useragent +apscheduler +aiofiles +loguru +portalocker +aiomultiprocess +faker +eth_account +eth_utils +solders +toncli +ecdsa +base58 +ddddocr +aiohttp_socks +websockets +psutil +socks +drissionpage +fastapi +uvicorn +pydantic +ultralytics +opencv-python-headless +torch +pillow \ No newline at end of file diff --git a/spider/test.py b/spider/test.py new file mode 100644 index 0000000..a65fc10 --- /dev/null +++ b/spider/test.py @@ -0,0 +1,22 @@ +from DrissionPage import Chromium +from loguru import logger +from bit_browser import bit_browser +# http = bit_browser.bit_browser_open('871851b9835d42b3911f39162b3427d5') +# print(http) +browser = Chromium('127.0.0.1:65480') +tab = browser.latest_tab +# tab.get('bitbrowser://settings/clearBrowserData') +res = tab.ele('t:settings-ui',timeout=3).sr('t:settings-main').sr('t:settings-basic-page').sr('t:settings-privacy-page').sr('t:settings-clear-browsing-data-dialog').sr('t:cr-dialog') +res = res.ele('t:cr-page-selector@id=pages') +res = res.ele('t:settings-dropdown-menu@id=clearFromBasic').shadow_root +res.ele('t:select@id=dropdownMenu').ele('t:option@value=4').click() +# res = tab.ele('t:settings-dropdown-menu@id=clearFromBasic',timeout=3) +print(res) +if res: + logger.info(f"html: {res.html}") +# res = tab.ele('t:h2@text()=THANK YOU FOR SUBMITTING YOUR INFORMATION', timeout=3) +# if res: +# logger.info("提交问卷成功") +# res = tab.ele('t:b') +# if res: +# logger.info(f"反馈地址: {res.text}") diff --git a/spider/work.py b/spider/work.py new file mode 100644 index 0000000..7b405f8 --- /dev/null +++ b/spider/work.py @@ -0,0 +1,1051 @@ +import random +import time +from datetime import date, timedelta +from typing import Optional, Dict +import re + +import requests +try: + from bit_browser import retry +except ImportError: + def retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0): + """ + 简易重试装饰器(本地兜底),用于在缺失 bit_browser 时提供重试能力 + + 参数: + max_retries (int): 最大重试次数 + delay (float): 首次重试延时秒数 + backoff (float): 每次重试延时的倍增系数 + + 返回值: + Callable: 装饰器,包装被装饰函数以支持重试 + """ + def _decorator(func): + def _wrapper(*args, **kwargs): + tries = 0 + cur_delay = delay + while True: + try: + return func(*args, **kwargs) + except Exception: + tries += 1 + if tries >= max_retries: + raise + time.sleep(cur_delay) + cur_delay *= backoff + return _wrapper + return _decorator + + +CA_PROVINCE_ABBR = { + "Alberta": "AB", + "British Columbia": "BC", + "Manitoba": "MB", + "New Brunswick": "NB", + "Newfoundland and Labrador": "NL", + "Nova Scotia": "NS", + "Ontario": "ON", + "Prince Edward Island": "PE", + "Quebec": "QC", + "Saskatchewan": "SK", + "Northwest Territories": "NT", + "Nunavut": "NU", + "Yukon": "YT", +} + + +CA_COORDS = { + "AB": [(51.044733, -114.071883, "Calgary"), (53.546124, -113.493823, "Edmonton")], + "BC": [(49.282729, -123.120738, "Vancouver"), (48.428421, -123.365644, "Victoria")], + "MB": [(49.895137, -97.138374, "Winnipeg"), (50.445211, -96.823611, "East St Paul")], + "NB": [(45.963589, -66.643115, "Fredericton"), (46.510712, -67.255044, "Woodstock")], + "NL": [(53.135509, -57.660435, "Labrador City"), (47.561510, -52.712585, "St. John's")], + "NS": [(44.648862, -63.575320, "Halifax"), (45.010474, -63.416817, "Truro")], + "ON": [(43.653225, -79.383186, "Toronto"), (45.421532, -75.697189, "Ottawa")], + "PE": [(46.238240, -63.131074, "Charlottetown"), (46.392410, -63.787629, "Summerside")], + "QC": [(45.501689, -73.567256, "Montreal"), (46.813878, -71.207980, "Quebec City")], + "SK": [(52.133214, -106.670046, "Saskatoon"), (50.445211, -104.618896, "Regina")], + "NT": [(62.4540, -114.3725, "Yellowknife"), (61.251955, -114.352482, "Yellowknife")], + "NU": [(63.7467, -68.5167, "Iqaluit"), (64.282327, -76.614813, "Nunavut")], + "YT": [(60.7212, -135.0568, "Whitehorse"), (64.000000, -138.000000, "Yukon")], +} + + +CA_AREA_CODES = { + "AB": ["403", "587", "825"], + "BC": ["236", "250", "604", "672", "778"], + "MB": ["204", "431"], + "NB": ["506"], + "NL": ["709"], + "NS": ["782", "902"], + "ON": ["226", "249", "289", "343", "365", "416", "437", "519", "548", "613", "639", "647", "705", "807", "905"], + "PE": ["902"], + "QC": ["418", "438", "450", "514", "579", "581", "819", "873"], + "SK": ["306", "639"], + "NT": ["867"], + "NU": ["867"], + "YT": ["867"], +} + + +# 主要城市的区号(更精确的城市级约束) +CITY_AREA_CODES = { + "Calgary": ["403", "587", "825"], + "Edmonton": ["780", "587", "825"], + "Vancouver": ["604", "778", "236", "672"], + "Halifax": ["902", "782"], + "Toronto": ["416", "647", "437"], +} + + +# 邮编首字母合法性映射(按省份缩写) +POSTAL_PREFIXES = { + "AB": {"T"}, + "BC": {"V"}, + "MB": {"R"}, + "NB": {"E"}, + "NL": {"A"}, + "NS": {"B"}, + "ON": {"K", "L", "M"}, + "PE": {"C"}, + "QC": {"G", "H", "J"}, + "SK": {"S"}, + "NT": {"X"}, + "NU": {"X"}, + "YT": {"Y"}, +} + + +REMOTE_PROVINCES = {"NL", "NT", "NU", "YT"} + + +def _normalize_province(province: str) -> str: + """ + 省份入参规范化,支持全称或缩写,返回缩写 + + 参数: + province (str): 省份,可为全称或缩写(如 "Alberta" 或 "AB") + + 返回值: + str: 省份缩写(如 "AB") + """ + if not province: + raise ValueError("province 不能为空") + p = province.strip() + if len(p) == 2: + return p.upper() + return CA_PROVINCE_ABBR.get(p, p) + + +def _pick_coords(province_abbr: str, city: Optional[str]) -> tuple[float, float, str]: + """ + 按省份与可选城市选择一个坐标点 + + 参数: + province_abbr (str): 省份缩写 + city (Optional[str]): 城市名(如 "Calgary"),可为空 + + 返回值: + (lat, lon, city_name): 选中的基础坐标及城市名 + """ + coords = CA_COORDS.get(province_abbr) + if not coords: + # 默认回退至 Calgary + return 51.044733, -114.071883, "Calgary" + if city: + c = city.strip().lower() + for lat, lon, cname in coords: + if cname.lower() == c: + return lat, lon, cname + return random.choice(coords) + + +def _random_near(lat: float, lon: float) -> tuple[float, float]: + """ + 在给定坐标附近生成一个随机偏移坐标 + + 参数: + lat (float): 基准纬度 + lon (float): 基准经度 + + 返回值: + (new_lat, new_lon): 随机偏移后的坐标 + """ + return lat + (random.random() - 0.5) * 0.1, lon + (random.random() - 0.5) * 0.1 + + +@retry(max_retries=3, delay=1.0, backoff=1.0) +def _reverse_geocode(lat: float, lon: float) -> Dict: + """ + 使用 Nominatim 反向地理编码,返回地址字典 + + 参数: + lat (float): 纬度 + lon (float): 经度 + + 返回值: + dict: 包含 address 字段的响应数据 + """ + url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={lat}&lon={lon}&zoom=18&addressdetails=1" + headers = {"User-Agent": "ca_auto_table/1.0"} + r = requests.get(url, headers=headers, timeout=15) + r.raise_for_status() + return r.json() + + +def _format_address(address: Dict, province_abbr: str) -> str: + """ + 将 Nominatim 的 address 格式化为完整地址字符串 + + 参数: + address (dict): Nominatim 返回的 address 字段 + province_abbr (str): 省份缩写(如 "AB") + + 返回值: + str: 格式化后的地址字符串 + """ + house = address.get("house_number") + road = address.get("road") or address.get("residential") or address.get("footway") + city = address.get("city") or address.get("town") or address.get("village") + postcode = address.get("postcode") or "" + if house and road and city: + return f"{house} {road}, {city}, {province_abbr} {postcode}, Canada" + # 远端省份允许部分地址 + return f"{city or ''}, {province_abbr} {postcode}, Canada".strip(", ") + + +def _random_name() -> tuple[str, str]: + """ + 生成随机英文名(Firstname, Lastname),组合空间可达数百万以上 + + 实现策略: + - 60% 概率使用常见英文名与姓氏列表(更自然) + - 40% 概率使用音节组合算法动态生成(数量级远超百万) + + 返回值: + (firstname, lastname) + """ + common_first = [ + "James", "Mary", "Robert", "Patricia", "John", "Jennifer", "Michael", "Linda", "William", "Elizabeth", + "David", "Barbara", "Richard", "Susan", "Joseph", "Jessica", "Thomas", "Sarah", "Charles", "Karen", + "Christopher", "Nancy", "Daniel", "Lisa", "Matthew", "Betty", "Anthony", "Margaret", "Mark", "Sandra", + "Donald", "Ashley", "Steven", "Kimberly", "Paul", "Emily", "Andrew", "Donna", "Joshua", "Michelle", + "Kenneth", "Dorothy", "Kevin", "Carol", "Brian", "Amanda", "George", "Melissa", "Edward", "Deborah", + "Ronald", "Stephanie", "Timothy", "Rebecca", "Jason", "Laura", "Jeffrey", "Sharon", "Ryan", "Cynthia", + "Jacob", "Kathleen", "Gary", "Amy", "Nicholas", "Shirley", "Eric", "Angela", "Stephen", "Helen", + "Jonathan", "Anna", "Larry", "Brenda", "Justin", "Pamela", "Scott", "Nicole", "Brandon", "Samantha", + "Frank", "Katherine", "Benjamin", "Christine", "Gregory", "Emma", "Raymond", "Ruth", "Samuel", "Julie", + "Patrick", "Olivia", "Alexander", "Victoria" + ] + common_last = [ + "Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez", + "Hernandez", "Lopez", "Gonzalez", "Wilson", "Anderson", "Thomas", "Taylor", "Moore", "Jackson", "Martin", + "Lee", "Perez", "Thompson", "White", "Harris", "Sanchez", "Clark", "Ramirez", "Lewis", "Robinson", + "Walker", "Young", "Allen", "King", "Wright", "Scott", "Torres", "Nguyen", "Hill", "Flores", + "Green", "Adams", "Nelson", "Baker", "Hall", "Rivera", "Campbell", "Mitchell", "Carter", "Roberts", + "Turner", "Phillips", "Parker", "Evans", "Edwards", "Collins", "Stewart", "Sanchez", "Morris", "Rogers", + "Reed", "Cook", "Morgan", "Bell", "Murphy", "Bailey", "Cooper", "Richardson", "Cox", "Howard", + "Ward", "Torres", "Peterson", "Gray", "Ramirez", "James", "Watson", "Brooks", "Kelly", "Sanders", + "Price", "Bennett", "Wood", "Barnes", "Ross", "Henderson", "Coleman", "Jenkins", "Perry", "Powell", + "Long", "Patterson", "Hughes", "Flores" + ] + + if random.random() < 0.6: + return random.choice(common_first), random.choice(common_last) + + # 动态音节组合生成,支持数百万组合 + f_beg = [ + "al", "ben", "car", "dan", "el", "fran", "ge", "har", "isa", "jo", "ka", "li", "mar", "no", + "ol", "pa", "qui", "ra", "sa", "ta", "ul", "vi", "wil", "xa", "ya", "zo" + ] + f_mid = [ + "a", "e", "i", "o", "u", "ae", "ai", "ia", "ie", "oa", "ou" + ] + f_end = [ + "n", "ne", "na", "son", "ton", "la", "ra", "rie", "ry", "ley", "ly", "ah" + ] + + l_beg = [ + "sm", "john", "dav", "wil", "and", "tho", "tay", "mo", "jack", "mar", "lee", "tho", "whi", "har", + "san", "cla", "ram", "lew", "rob", "walk", "young", "all", "king", "wri", "scott", "tor", "nguy", + "hil", "flo", "gre", "ada", "nel", "bak", "hal", "riv", "camp", "mit", "car", "rob" + ] + l_mid = [ + "a", "e", "i", "o", "u", "ar", "er", "or", "an", "en", "in", "on", "un" + ] + l_suf = [ + "son", "ton", "man", "ley", "ford", "wood", "well", "er", "ers", "ing", "s", "son", "es" + ] + + def build_name(beg, mid, end, syllables=(2, 3)) -> str: + parts = [random.choice(beg)] + for _ in range(random.choice(syllables) - 1): + parts.append(random.choice(mid)) + parts.append(random.choice(end)) + name = "".join(parts) + return name.capitalize() + + first = build_name(f_beg, f_mid, f_end) + last = build_name(l_beg, l_mid, l_suf) + return first, last + + +def _random_birthday() -> str: + """ + 生成随机生日,格式为 yyyy-mm-dd + + 返回值: + str: 生日字符串 + """ + start = date(1950, 1, 1) + end = date(2000, 12, 31) + delta_days = (end - start).days + d = start + timedelta(days=random.randint(0, delta_days)) + return f"{d.year}-{d.month:02d}-{d.day:02d}" + + +def _random_phone(province_abbr: str) -> str: + """ + 生成随机加拿大电话号码,带区号 + + 参数: + province_abbr (str): 省份缩写 + + 返回值: + str: 电话,例如 "(403) 555-1234" + """ + codes = CA_AREA_CODES.get(province_abbr, ["000"]) + area = random.choice(codes) + exchange = str(random.randint(200, 899)).zfill(3) + line = str(random.randint(1000, 9999)).zfill(4) + return f"({area}) {exchange}-{line}" + + +def _random_phone_city(province_abbr: str, city: Optional[str]) -> str: + """ + 按城市优先选择区号,若城市未配置则回退到省份区号 + + 参数: + province_abbr (str): 省份缩写 + city (Optional[str]): 城市名 + + 返回值: + str: 电话,例如 "(403) 555-1234" + """ + codes = None + if city: + codes = CITY_AREA_CODES.get(city) + codes = codes or CA_AREA_CODES.get(province_abbr, ["000"]) + area = random.choice(codes) + exchange = str(random.randint(200, 899)).zfill(3) + line = str(random.randint(1000, 9999)).zfill(4) + return f"(#{area}) {exchange}-{line}".replace("#", "") + + +def _postal_valid_for_province(province_abbr: str, postcode: str) -> bool: + """ + 校验邮编首字母是否符合省份规范 + + 参数: + province_abbr (str): 省份缩写 + postcode (str): 邮编字符串 + + 返回值: + bool: 合法返回 True,否则 False + """ + if not postcode: + return False + prefixes = POSTAL_PREFIXES.get(province_abbr) + if not prefixes: + return True + return postcode[0].upper() in prefixes + + +def generate_canada_info(province: str, city: Optional[str] = None, max_attempts: int = 15, sleep_sec: float = 0.6) -> Dict[str, str]: + """ + 随机生成加拿大个人与地址信息,可指定省份(全称或缩写)与可选城市 + + 参数: + province (str): 省份(如 "Alberta" 或 "AB") + city (Optional[str]): 城市(如 "Calgary"),不传则在省内随机 + max_attempts (int): 反向地理编码最大尝试次数 + sleep_sec (float): 每次失败后的等待秒数,用于尊重 Nominatim 频率限制 + + 返回值: + dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称 + """ + prov_abbr = _normalize_province(province) + base_lat, base_lon, chosen_city = _pick_coords(prov_abbr, city) + + address_str = "" + city_name = "" + postcode = "" + for _ in range(max_attempts): + lat, lon = _random_near(base_lat, base_lon) + data = _reverse_geocode(lat, lon) + if not data: + time.sleep(sleep_sec) + continue + addr = data.get("address", {}) + city_name = addr.get("city") or addr.get("town") or addr.get("village") or chosen_city + postcode = addr.get("postcode") or "" + address_str = _format_address(addr, prov_abbr) + if prov_abbr in REMOTE_PROVINCES: + break + if addr.get("house_number") and (addr.get("road") or addr.get("residential") or addr.get("footway")) and city_name and _postal_valid_for_province(prov_abbr, postcode): + break + time.sleep(sleep_sec) + + firstname, lastname = _random_name() + full_name = f"{firstname} {lastname}" + birthday = _random_birthday() + phone = _random_phone_city(prov_abbr, city or chosen_city) + + return { + "firstname": firstname, + "lastname": lastname, + "full_name": full_name, + "birthday": birthday, + "address_str": address_str.split(",")[0], + "city_name": city_name, + "phone": phone, + "postcode": postcode, + "province": next((k for k, v in CA_PROVINCE_ABBR.items() if v == prov_abbr), prov_abbr), + } + + +def get_random_canada_info(province, city) -> Dict[str, str]: + """ + 本地生成加拿大个人与地址信息(不依赖外部网络) + + 参数: + province (str): 省份(如 "Alberta" 或 "AB") + city (str | None): 城市(如 "Calgary"),不传则按省份随机 + + 返回值: + dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称 + """ + prov_abbr = _normalize_province(province) + _, _, chosen_city = _pick_coords(prov_abbr, city) + + firstname, lastname = _random_name() + full_name = f"{firstname} {lastname}" + birthday = _random_birthday() + phone = _random_phone_city(prov_abbr, city or chosen_city) + + def _random_street() -> str: + """ + 生成本地街道地址 + + 返回值: + str: 形如 '123 Maple Ave' 的地址 + """ + house = random.randint(10, 9999) + street_roots = [ + "Maple", "Oak", "Pine", "Cedar", "Elm", "Birch", "Willow", "Spruce", "Ash", + "River", "Lake", "Hill", "Queen", "King", "Main", "Victoria", "Wellington", + "Church", "College", "Centre" + ] + suffixes = ["St", "Ave", "Rd", "Blvd", "Dr", "Ct", "Pl", "Ln", "Way", "Terrace"] + return f"{house} {random.choice(street_roots)} {random.choice(suffixes)}" + + def _random_postal(p_abbr: str) -> str: + """ + 生成加拿大邮编(A1A 1A1),首字母符合省份规范 + + 参数: + p_abbr (str): 省份缩写 + + 返回值: + str: 邮编 + """ + allowed_letters = "ABCEGHJKLMNPRSTVXY" + prefixes = POSTAL_PREFIXES.get(p_abbr) or set(allowed_letters) + first_letter = random.choice(sorted(list(prefixes))) + + def L() -> str: + return random.choice(allowed_letters) + + def D() -> str: + return str(random.randint(0, 9)) + + return f"{first_letter}{D()}{L()} {D()}{L()}{D()}" + + address_str = _random_street() + city_name = city or chosen_city + postcode = _random_postal(prov_abbr) + province_full = next((k for k, v in CA_PROVINCE_ABBR.items() if v == prov_abbr), prov_abbr) + + return { + "firstname": firstname, + "lastname": lastname, + "full_name": full_name, + "birthday": birthday, + "address_str": address_str, + "city_name": city_name, + "phone": phone, + "postcode": postcode, + "province": province_full, + } + + +US_STATE_ABBR = { + "Alabama": "AL", + "Alaska": "AK", + "Arizona": "AZ", + "Arkansas": "AR", + "California": "CA", + "Colorado": "CO", + "Connecticut": "CT", + "Delaware": "DE", + "Florida": "FL", + "Georgia": "GA", + "Hawaii": "HI", + "Idaho": "ID", + "Illinois": "IL", + "Indiana": "IN", + "Iowa": "IA", + "Kansas": "KS", + "Kentucky": "KY", + "Louisiana": "LA", + "Maine": "ME", + "Maryland": "MD", + "Massachusetts": "MA", + "Michigan": "MI", + "Minnesota": "MN", + "Mississippi": "MS", + "Missouri": "MO", + "Montana": "MT", + "Nebraska": "NE", + "Nevada": "NV", + "New Hampshire": "NH", + "New Jersey": "NJ", + "New Mexico": "NM", + "New York": "NY", + "North Carolina": "NC", + "North Dakota": "ND", + "Ohio": "OH", + "Oklahoma": "OK", + "Oregon": "OR", + "Pennsylvania": "PA", + "Rhode Island": "RI", + "South Carolina": "SC", + "South Dakota": "SD", + "Tennessee": "TN", + "Texas": "TX", + "Utah": "UT", + "Vermont": "VT", + "Virginia": "VA", + "Washington": "WA", + "West Virginia": "WV", + "Wisconsin": "WI", + "Wyoming": "WY", +} + + +US_COORDS = { + "CA": [(34.052235, -118.243683, "Los Angeles"), (37.774929, -122.419416, "San Francisco")], + "NY": [(40.712776, -74.005974, "New York"), (42.886447, -78.878369, "Buffalo")], + "TX": [(29.760427, -95.369804, "Houston"), (32.776665, -96.796989, "Dallas")], + "FL": [(25.761681, -80.191788, "Miami"), (28.538336, -81.379234, "Orlando")], + "IL": [(41.878113, -87.629799, "Chicago"), (39.781721, -89.650148, "Springfield")], + "WA": [(47.606209, -122.332069, "Seattle"), (47.658779, -117.426047, "Spokane")], + "MA": [(42.360082, -71.058880, "Boston"), (42.262593, -71.802293, "Worcester")], + "PA": [(39.952583, -75.165222, "Philadelphia"), (40.440624, -79.995888, "Pittsburgh")], + "AZ": [(33.448376, -112.074036, "Phoenix"), (32.222607, -110.974711, "Tucson")], + "GA": [(33.748997, -84.387985, "Atlanta"), (32.080898, -81.091203, "Savannah")], + "OH": [(39.961178, -82.998795, "Columbus"), (41.499321, -81.694359, "Cleveland")], + "NC": [(35.227085, -80.843124, "Charlotte"), (35.779590, -78.638179, "Raleigh")], + "MI": [(42.331427, -83.045754, "Detroit"), (42.963240, -85.668086, "Grand Rapids")], + "CO": [(39.739236, -104.990251, "Denver"), (38.833881, -104.821363, "Colorado Springs")], + "VA": [(37.540725, -77.436048, "Richmond"), (36.852926, -75.977985, "Virginia Beach")], + "NJ": [(40.735657, -74.172366, "Newark"), (40.717754, -74.043143, "Jersey City")], + "MD": [(39.290385, -76.612189, "Baltimore"), (39.083997, -77.152757, "Rockville")], + "MN": [(44.977753, -93.265011, "Minneapolis"), (44.953703, -93.089958, "Saint Paul")], + "WI": [(43.038902, -87.906474, "Milwaukee"), (43.073051, -89.401230, "Madison")], + "MO": [(38.627003, -90.199404, "St. Louis"), (39.099724, -94.578331, "Kansas City")], + "IN": [(39.768403, -86.158068, "Indianapolis"), (41.079273, -85.139351, "Fort Wayne")], + "TN": [(36.162664, -86.781602, "Nashville"), (35.149532, -90.048981, "Memphis")], + "OR": [(45.515232, -122.678385, "Portland"), (44.942898, -123.035095, "Salem")], + "NV": [(36.169941, -115.139830, "Las Vegas"), (39.529633, -119.813803, "Reno")], +} + + +US_CITY_AREA_CODES = { + "Los Angeles": ["213", "310", "323", "424", "661"], + "San Francisco": ["415", "628"], + "New York": ["212", "347", "718", "929", "646"], + "Buffalo": ["716"], + "Houston": ["713", "281", "832"], + "Dallas": ["214", "469", "972"], + "Miami": ["305", "786"], + "Orlando": ["407", "689"], + "Chicago": ["312", "773", "872"], + "Seattle": ["206"], + "Spokane": ["509"], + "Boston": ["617", "857"], + "Worcester": ["508", "774"], + "Philadelphia": ["215", "267", "445"], + "Pittsburgh": ["412", "878"], + "Phoenix": ["602", "480", "623"], + "Tucson": ["520"], + "Atlanta": ["404", "470", "678", "770"], + "Savannah": ["912"], + "Columbus": ["614", "380"], + "Cleveland": ["216", "440"], + "Charlotte": ["704", "980"], + "Raleigh": ["919", "984"], + "Detroit": ["313", "734", "586"], + "Grand Rapids": ["616"], + "Denver": ["303", "720"], + "Colorado Springs": ["719"], + "Richmond": ["804"], + "Virginia Beach": ["757"], + "Newark": ["973", "862"], + "Jersey City": ["201", "551"], + "Baltimore": ["410", "443", "667"], + "Rockville": ["240", "301"], + "Minneapolis": ["612"], + "Saint Paul": ["651"], + "Milwaukee": ["414"], + "Madison": ["608"], + "St. Louis": ["314", "636"], + "Kansas City": ["816"], + "Indianapolis": ["317", "463"], + "Fort Wayne": ["260"], + "Nashville": ["615", "629"], + "Memphis": ["901"], + "Portland": ["503", "971"], + "Salem": ["503"], + "Las Vegas": ["702", "725"], + "Reno": ["775"], +} + + +US_AREA_CODES = { + abbr: sorted({code for _, _, city in cities for code in US_CITY_AREA_CODES.get(city, [])}) + for abbr, cities in US_COORDS.items() +} + + +US_ZIP_RANGES = { + "CA": (900, 961), + "NY": (100, 149), + "TX": (750, 799), + "FL": (320, 349), + "IL": (600, 629), + "WA": (980, 994), + "MA": (10, 27), + "PA": (150, 196), + "AZ": (850, 865), + "GA": (300, 319), + "OH": (430, 459), + "NC": (270, 289), + "MI": (480, 499), + "CO": (800, 816), + "VA": (220, 246), + "NJ": (70, 89), + "MD": (206, 219), + "MN": (550, 567), + "WI": (530, 549), + "MO": (630, 658), + "IN": (460, 479), + "TN": (370, 385), + "OR": (970, 979), + "NV": (889, 898), +} + + +def _normalize_state(state: str) -> str: + """ + 州入参规范化,支持全称或缩写,返回缩写 + + 参数: + state (str): 州名,可为全称或缩写(如 "California" 或 "CA") + + 返回值: + str: 州缩写(如 "CA") + """ + if not state: + raise ValueError("state 不能为空") + s = state.strip() + if len(s) == 2: + return s.upper() + return US_STATE_ABBR.get(s, s) + + +def _us_pick_coords(state_abbr: str, city: Optional[str]) -> tuple[float, float, str]: + """ + 按州与可选城市选择一个坐标点 + + 参数: + state_abbr (str): 州缩写 + city (Optional[str]): 城市名(如 "Los Angeles"),可为空 + + 返回值: + (lat, lon, city_name): 选中的基础坐标及城市名 + """ + coords = US_COORDS.get(state_abbr) + if not coords: + return 40.712776, -74.005974, "New York" + if city: + c = city.strip().lower() + for lat, lon, cname in coords: + if cname.lower() == c: + return lat, lon, cname + return random.choice(coords) + + +def _us_format_address(address: Dict, state_abbr: str) -> str: + """ + 将 Nominatim 的 address 格式化为美国地址字符串 + + 参数: + address (dict): Nominatim 返回的 address 字段 + state_abbr (str): 州缩写(如 "CA") + + 返回值: + str: 格式化后的地址字符串 + """ + house = address.get("house_number") + road = address.get("road") or address.get("residential") or address.get("footway") + city = address.get("city") or address.get("town") or address.get("village") + postcode = address.get("postcode") or "" + if house and road and city: + return f"{house} {road}, {city}, {state_abbr} {postcode}, United States" + return f"{city or ''}, {state_abbr} {postcode}, United States".strip(", ") + + +def _us_random_phone_state(state_abbr: str, city: Optional[str]) -> str: + """ + 生成随机美国电话号码,按城市优先选择区号 + + 参数: + state_abbr (str): 州缩写 + city (Optional[str]): 城市名 + + 返回值: + str: 电话,例如 "(213) 555-1234" + """ + codes = None + if city: + codes = US_CITY_AREA_CODES.get(city) + codes = codes or US_AREA_CODES.get(state_abbr, ["000"]) + area = random.choice(codes) + exchange = str(random.randint(200, 899)).zfill(3) + line = str(random.randint(1000, 9999)).zfill(4) + return f"({area}) {exchange}-{line}" + + +def _us_random_zip_for_state(state_abbr: str) -> str: + """ + 生成美国 ZIP Code(5 位数字),范围符合州常见分配段 + + 参数: + state_abbr (str): 州缩写 + + 返回值: + str: ZIP Code,如 "90012" + """ + rng = US_ZIP_RANGES.get(state_abbr) + if not rng: + prefix = random.randint(100, 999) + else: + prefix = random.randint(rng[0], rng[1]) + suffix = random.randint(0, 99) + return f"{prefix:03d}{suffix:02d}" + + +def generate_us_info(state: str, city: Optional[str] = None, max_attempts: int = 15, sleep_sec: float = 0.6) -> Dict[str, str]: + """ + 随机生成美国个人与地址信息,可指定州(全称或缩写)与可选城市 + + 参数: + state (str): 州(如 "California" 或 "CA") + city (Optional[str]): 城市(如 "Los Angeles"),不传则在州内随机 + max_attempts (int): 反向地理编码最大尝试次数 + sleep_sec (float): 每次失败后的等待秒数,用于尊重 Nominatim 频率限制 + + 返回值: + dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称 + """ + state_abbr = _normalize_state(state) + base_lat, base_lon, chosen_city = _us_pick_coords(state_abbr, city) + + address_str = "" + city_name = "" + postcode = "" + for _ in range(max_attempts): + lat, lon = _random_near(base_lat, base_lon) + data = _reverse_geocode(lat, lon) + if not data: + time.sleep(sleep_sec) + continue + addr = data.get("address", {}) + city_name = addr.get("city") or addr.get("town") or addr.get("village") or chosen_city + postcode = addr.get("postcode") or "" + address_str = _us_format_address(addr, state_abbr) + if addr.get("house_number") and (addr.get("road") or addr.get("residential") or addr.get("footway")) and city_name and re.fullmatch(r"\d{5}(-\d{4})?", postcode or ""): + break + time.sleep(sleep_sec) + + firstname, lastname = _random_name() + full_name = f"{firstname} {lastname}" + birthday = _random_birthday() + phone = _us_random_phone_state(state_abbr, city or chosen_city) + + state_full = next((k for k, v in US_STATE_ABBR.items() if v == state_abbr), state_abbr) + + return { + "firstname": firstname, + "lastname": lastname, + "full_name": full_name, + "birthday": birthday, + "address_str": address_str.split(",")[0], + "city_name": city_name, + "phone": phone, + "postcode": postcode, + "state": state_full, + } + + +def get_random_us_info(state: str, city: Optional[str]) -> Dict[str, str]: + """ + 本地生成美国个人与地址信息(不依赖外部网络) + + 参数: + state (str): 州(如 "California" 或 "CA") + city (str | None): 城市(如 "Los Angeles"),不传则按州随机 + + 返回值: + dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称 + """ + state_abbr = _normalize_state(state) + _, _, chosen_city = _us_pick_coords(state_abbr, city) + + firstname, lastname = _random_name() + full_name = f"{firstname} {lastname}" + birthday = _random_birthday() + phone = _us_random_phone_state(state_abbr, city or chosen_city) + + def _random_street_us() -> str: + """ + 生成本地美国街道地址 + + 返回值: + str: 形如 '123 Maple Ave' 的地址 + """ + house = random.randint(10, 9999) + street_roots = [ + "Maple", "Oak", "Pine", "Cedar", "Elm", "Birch", "Willow", "Spruce", "Ash", + "River", "Lake", "Hill", "Queen", "King", "Main", "Washington", "Lincoln", + "Church", "College", "Center" + ] + suffixes = ["St", "Ave", "Rd", "Blvd", "Dr", "Ct", "Pl", "Ln", "Way", "Terrace"] + return f"{house} {random.choice(street_roots)} {random.choice(suffixes)}" + + address_str = _random_street_us() + city_name = city or chosen_city + postcode = _us_random_zip_for_state(state_abbr) + state_full = next((k for k, v in US_STATE_ABBR.items() if v == state_abbr), state_abbr) + + return { + "firstname": firstname, + "lastname": lastname, + "full_name": full_name, + "birthday": birthday, + "address_str": address_str, + "city_name": city_name, + "phone": phone, + "postcode": postcode, + "state": state_full, + } + + +def _random_birthday_by_age_range(min_age: int, max_age: int) -> str: + """ + 按年龄区间生成随机生日,格式为 yyyy-mm-dd + + 参数: + min_age (int): 最小年龄(含) + max_age (int): 最大年龄(含) + + 返回值: + str: 生日字符串 + """ + if min_age < 0: + min_age = 0 + if max_age < min_age: + max_age = min_age + today = date.today() + start = today - timedelta(days=max_age * 365 + 366) + end = today - timedelta(days=min_age * 365) + delta_days = (end - start).days + d = start + timedelta(days=random.randint(0, max(delta_days, 1))) + return f"{d.year}-{d.month:02d}-{d.day:02d}" + + +def _random_date_between(start: date, end: date) -> str: + """ + 在指定日期区间内生成随机日期,格式为 yyyy-mm-dd + + 参数: + start (date): 起始日期(含) + end (date): 结束日期(含) + + 返回值: + str: 随机日期字符串 + """ + if end < start: + start, end = end, start + delta_days = (end - start).days + d = start + timedelta(days=random.randint(0, max(delta_days, 1))) + return f"{d.year}-{d.month:02d}-{d.day:02d}" + + +def generate_child_parent_names( + enforce_period_under13: bool = True, + period_start: str = "2013-07-01", + period_end: str = "2020-04-01", + min_child_age: int = 1, + max_child_age: int = 17, + min_parent_age: int = 25, + max_parent_age: int = 65, + country: str = "US", + province_or_state: Optional[str] = None, + city: Optional[str] = None, + use_network: bool = False, + separate_phones: bool = True, +) -> Dict[str, str]: + """ + 生成两个随机人:未成年孩子与家长,孩子与家长共享姓氏,并包含随机地址等完整信息 + + 参数: + enforce_period_under13 (bool): 是否强制孩子在 [period_start, period_end] 期间均小于13岁(默认开启) + period_start (str): 期间开始日期,默认 "2013-07-01" + period_end (str): 期间结束日期,默认 "2020-04-01" + min_child_age (int): 孩子最小年龄(用于未启用期间约束时) + max_child_age (int): 孩子最大年龄(用于未启用期间约束时) + min_parent_age (int): 家长最小年龄(用于未启用期间约束时) + max_parent_age (int): 家长最大年龄(用于未启用期间约束时) + country (str): 国家,"CA" 或 "US",默认 "CA" + province_or_state (str | None): 指定省/州,默认随机 + city (str | None): 指定城市,默认随机 + use_network (bool): 是否使用网络反向地理编码生成地址,默认 False 使用本地生成 + separate_phones (bool): 是否为孩子与家长生成不同的电话号码,默认 True + + 返回值: + dict: 包含孩子与家长的 Firstname、Lastname、全名、生日与地址等字段 + """ + parent_first, parent_last = _random_name() + child_first, _ = _random_name() + + if enforce_period_under13: + ps = date.fromisoformat(period_start) + pe = date.fromisoformat(period_end) + bound = date(pe.year - 13, pe.month, pe.day) + child_min = bound + timedelta(days=1) + child_max = pe + child_birthday = _random_date_between(child_min, child_max) + + # 依据孩子生日生成家长生日,设定合理的年龄差 + y, m, d = map(int, child_birthday.split("-")) + child_dt = date(y, m, d) + + def _minus_years_safe(dt: date, years: int) -> date: + try: + return date(dt.year - years, dt.month, dt.day) + except ValueError: + # 处理闰年2月29等情况,回退到当月最后一天 + while True: + try: + return date(dt.year - years, dt.month, dt.day) + except ValueError: + dt = dt - timedelta(days=1) + + gap = random.randint(20, 45) + parent_dt = _minus_years_safe(child_dt, gap) + parent_birthday = f"{parent_dt.year}-{parent_dt.month:02d}-{parent_dt.day:02d}" + else: + child_birthday = _random_birthday_by_age_range(min_child_age, max_child_age) + parent_birthday = _random_birthday_by_age_range(min_parent_age, max_parent_age) + + country = (country or "CA").upper() + addr_info: Dict[str, str] + if country == "US": + state_abbr = province_or_state or random.choice(list(US_STATE_ABBR.values())) + if use_network: + addr_info = generate_us_info(state_abbr, city) + else: + addr_info = get_random_us_info(state_abbr, city) + state_full = addr_info.get("state") + child_phone = addr_info.get("phone") + parent_phone = addr_info.get("phone") + if separate_phones: + child_phone = _us_random_phone_state(state_abbr, addr_info.get("city_name")) + return { + "child_firstname": child_first, + "child_lastname": parent_last, + "child_full_name": f"{child_first} {parent_last}", + "child_birthday": child_birthday, + "child_address_str": addr_info.get("address_str"), + "child_city_name": addr_info.get("city_name"), + "child_phone": child_phone, + "child_postcode": addr_info.get("postcode"), + "child_state": state_full, + "parent_firstname": parent_first, + "parent_lastname": parent_last, + "parent_full_name": f"{parent_first} {parent_last}", + "parent_birthday": parent_birthday, + "parent_address_str": addr_info.get("address_str"), + "parent_city_name": addr_info.get("city_name"), + "parent_phone": parent_phone, + "parent_postcode": addr_info.get("postcode"), + "parent_state": state_full, + } + else: + prov_abbr = province_or_state or random.choice(list(CA_PROVINCE_ABBR.values())) + if use_network: + addr_info = generate_canada_info(prov_abbr, city) + else: + addr_info = get_random_canada_info(prov_abbr, city) + province_full = addr_info.get("province") + # 生成孩子与家长电话 + parent_phone = addr_info.get("phone") + child_phone = parent_phone + if separate_phones: + # 使用省缩写与城市生成新的号码 + ca_abbr = CA_PROVINCE_ABBR.get(province_full, prov_abbr) + child_phone = _random_phone_city(ca_abbr, addr_info.get("city_name")) + return { + "child_firstname": child_first, + "child_lastname": parent_last, + "child_full_name": f"{child_first} {parent_last}", + "child_birthday": child_birthday, + "child_address_str": addr_info.get("address_str"), + "child_city_name": addr_info.get("city_name"), + "child_phone": child_phone, + "child_postcode": addr_info.get("postcode"), + "child_province": province_full, + "parent_firstname": parent_first, + "parent_lastname": parent_last, + "parent_full_name": f"{parent_first} {parent_last}", + "parent_birthday": parent_birthday, + "parent_address_str": addr_info.get("address_str"), + "parent_city_name": addr_info.get("city_name"), + "parent_phone": parent_phone, + "parent_postcode": addr_info.get("postcode"), + "parent_province": province_full, + } +def main() -> None: + """ + 演示:生成 Alberta 省 Calgary 的随机信息;可修改为其他省/城市 + """ + info = generate_canada_info("Alberta", "Calgary") + print(info) + + +if __name__ == "__main__": + # main() + info = generate_child_parent_names() + print(info)