1052 lines
37 KiB
Python
1052 lines
37 KiB
Python
import random
|
||
import time
|
||
from datetime import date, timedelta
|
||
from typing import Optional, Dict
|
||
import re
|
||
|
||
import requests
|
||
try:
|
||
from bit_browser import retry
|
||
except ImportError:
|
||
def retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0):
|
||
"""
|
||
简易重试装饰器(本地兜底),用于在缺失 bit_browser 时提供重试能力
|
||
|
||
参数:
|
||
max_retries (int): 最大重试次数
|
||
delay (float): 首次重试延时秒数
|
||
backoff (float): 每次重试延时的倍增系数
|
||
|
||
返回值:
|
||
Callable: 装饰器,包装被装饰函数以支持重试
|
||
"""
|
||
def _decorator(func):
|
||
def _wrapper(*args, **kwargs):
|
||
tries = 0
|
||
cur_delay = delay
|
||
while True:
|
||
try:
|
||
return func(*args, **kwargs)
|
||
except Exception:
|
||
tries += 1
|
||
if tries >= max_retries:
|
||
raise
|
||
time.sleep(cur_delay)
|
||
cur_delay *= backoff
|
||
return _wrapper
|
||
return _decorator
|
||
|
||
|
||
CA_PROVINCE_ABBR = {
|
||
"Alberta": "AB",
|
||
"British Columbia": "BC",
|
||
"Manitoba": "MB",
|
||
"New Brunswick": "NB",
|
||
"Newfoundland and Labrador": "NL",
|
||
"Nova Scotia": "NS",
|
||
"Ontario": "ON",
|
||
"Prince Edward Island": "PE",
|
||
"Quebec": "QC",
|
||
"Saskatchewan": "SK",
|
||
"Northwest Territories": "NT",
|
||
"Nunavut": "NU",
|
||
"Yukon": "YT",
|
||
}
|
||
|
||
|
||
CA_COORDS = {
|
||
"AB": [(51.044733, -114.071883, "Calgary"), (53.546124, -113.493823, "Edmonton")],
|
||
"BC": [(49.282729, -123.120738, "Vancouver"), (48.428421, -123.365644, "Victoria")],
|
||
"MB": [(49.895137, -97.138374, "Winnipeg"), (50.445211, -96.823611, "East St Paul")],
|
||
"NB": [(45.963589, -66.643115, "Fredericton"), (46.510712, -67.255044, "Woodstock")],
|
||
"NL": [(53.135509, -57.660435, "Labrador City"), (47.561510, -52.712585, "St. John's")],
|
||
"NS": [(44.648862, -63.575320, "Halifax"), (45.010474, -63.416817, "Truro")],
|
||
"ON": [(43.653225, -79.383186, "Toronto"), (45.421532, -75.697189, "Ottawa")],
|
||
"PE": [(46.238240, -63.131074, "Charlottetown"), (46.392410, -63.787629, "Summerside")],
|
||
"QC": [(45.501689, -73.567256, "Montreal"), (46.813878, -71.207980, "Quebec City")],
|
||
"SK": [(52.133214, -106.670046, "Saskatoon"), (50.445211, -104.618896, "Regina")],
|
||
"NT": [(62.4540, -114.3725, "Yellowknife"), (61.251955, -114.352482, "Yellowknife")],
|
||
"NU": [(63.7467, -68.5167, "Iqaluit"), (64.282327, -76.614813, "Nunavut")],
|
||
"YT": [(60.7212, -135.0568, "Whitehorse"), (64.000000, -138.000000, "Yukon")],
|
||
}
|
||
|
||
|
||
CA_AREA_CODES = {
|
||
"AB": ["403", "587", "825"],
|
||
"BC": ["236", "250", "604", "672", "778"],
|
||
"MB": ["204", "431"],
|
||
"NB": ["506"],
|
||
"NL": ["709"],
|
||
"NS": ["782", "902"],
|
||
"ON": ["226", "249", "289", "343", "365", "416", "437", "519", "548", "613", "639", "647", "705", "807", "905"],
|
||
"PE": ["902"],
|
||
"QC": ["418", "438", "450", "514", "579", "581", "819", "873"],
|
||
"SK": ["306", "639"],
|
||
"NT": ["867"],
|
||
"NU": ["867"],
|
||
"YT": ["867"],
|
||
}
|
||
|
||
|
||
# 主要城市的区号(更精确的城市级约束)
|
||
CITY_AREA_CODES = {
|
||
"Calgary": ["403", "587", "825"],
|
||
"Edmonton": ["780", "587", "825"],
|
||
"Vancouver": ["604", "778", "236", "672"],
|
||
"Halifax": ["902", "782"],
|
||
"Toronto": ["416", "647", "437"],
|
||
}
|
||
|
||
|
||
# 邮编首字母合法性映射(按省份缩写)
|
||
POSTAL_PREFIXES = {
|
||
"AB": {"T"},
|
||
"BC": {"V"},
|
||
"MB": {"R"},
|
||
"NB": {"E"},
|
||
"NL": {"A"},
|
||
"NS": {"B"},
|
||
"ON": {"K", "L", "M"},
|
||
"PE": {"C"},
|
||
"QC": {"G", "H", "J"},
|
||
"SK": {"S"},
|
||
"NT": {"X"},
|
||
"NU": {"X"},
|
||
"YT": {"Y"},
|
||
}
|
||
|
||
|
||
REMOTE_PROVINCES = {"NL", "NT", "NU", "YT"}
|
||
|
||
|
||
def _normalize_province(province: str) -> str:
|
||
"""
|
||
省份入参规范化,支持全称或缩写,返回缩写
|
||
|
||
参数:
|
||
province (str): 省份,可为全称或缩写(如 "Alberta" 或 "AB")
|
||
|
||
返回值:
|
||
str: 省份缩写(如 "AB")
|
||
"""
|
||
if not province:
|
||
raise ValueError("province 不能为空")
|
||
p = province.strip()
|
||
if len(p) == 2:
|
||
return p.upper()
|
||
return CA_PROVINCE_ABBR.get(p, p)
|
||
|
||
|
||
def _pick_coords(province_abbr: str, city: Optional[str]) -> tuple[float, float, str]:
|
||
"""
|
||
按省份与可选城市选择一个坐标点
|
||
|
||
参数:
|
||
province_abbr (str): 省份缩写
|
||
city (Optional[str]): 城市名(如 "Calgary"),可为空
|
||
|
||
返回值:
|
||
(lat, lon, city_name): 选中的基础坐标及城市名
|
||
"""
|
||
coords = CA_COORDS.get(province_abbr)
|
||
if not coords:
|
||
# 默认回退至 Calgary
|
||
return 51.044733, -114.071883, "Calgary"
|
||
if city:
|
||
c = city.strip().lower()
|
||
for lat, lon, cname in coords:
|
||
if cname.lower() == c:
|
||
return lat, lon, cname
|
||
return random.choice(coords)
|
||
|
||
|
||
def _random_near(lat: float, lon: float) -> tuple[float, float]:
|
||
"""
|
||
在给定坐标附近生成一个随机偏移坐标
|
||
|
||
参数:
|
||
lat (float): 基准纬度
|
||
lon (float): 基准经度
|
||
|
||
返回值:
|
||
(new_lat, new_lon): 随机偏移后的坐标
|
||
"""
|
||
return lat + (random.random() - 0.5) * 0.1, lon + (random.random() - 0.5) * 0.1
|
||
|
||
|
||
@retry(max_retries=3, delay=1.0, backoff=1.0)
|
||
def _reverse_geocode(lat: float, lon: float) -> Dict:
|
||
"""
|
||
使用 Nominatim 反向地理编码,返回地址字典
|
||
|
||
参数:
|
||
lat (float): 纬度
|
||
lon (float): 经度
|
||
|
||
返回值:
|
||
dict: 包含 address 字段的响应数据
|
||
"""
|
||
url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={lat}&lon={lon}&zoom=18&addressdetails=1"
|
||
headers = {"User-Agent": "ca_auto_table/1.0"}
|
||
r = requests.get(url, headers=headers, timeout=15)
|
||
r.raise_for_status()
|
||
return r.json()
|
||
|
||
|
||
def _format_address(address: Dict, province_abbr: str) -> str:
|
||
"""
|
||
将 Nominatim 的 address 格式化为完整地址字符串
|
||
|
||
参数:
|
||
address (dict): Nominatim 返回的 address 字段
|
||
province_abbr (str): 省份缩写(如 "AB")
|
||
|
||
返回值:
|
||
str: 格式化后的地址字符串
|
||
"""
|
||
house = address.get("house_number")
|
||
road = address.get("road") or address.get("residential") or address.get("footway")
|
||
city = address.get("city") or address.get("town") or address.get("village")
|
||
postcode = address.get("postcode") or ""
|
||
if house and road and city:
|
||
return f"{house} {road}, {city}, {province_abbr} {postcode}, Canada"
|
||
# 远端省份允许部分地址
|
||
return f"{city or ''}, {province_abbr} {postcode}, Canada".strip(", ")
|
||
|
||
|
||
def _random_name() -> tuple[str, str]:
|
||
"""
|
||
生成随机英文名(Firstname, Lastname),组合空间可达数百万以上
|
||
|
||
实现策略:
|
||
- 60% 概率使用常见英文名与姓氏列表(更自然)
|
||
- 40% 概率使用音节组合算法动态生成(数量级远超百万)
|
||
|
||
返回值:
|
||
(firstname, lastname)
|
||
"""
|
||
common_first = [
|
||
"James", "Mary", "Robert", "Patricia", "John", "Jennifer", "Michael", "Linda", "William", "Elizabeth",
|
||
"David", "Barbara", "Richard", "Susan", "Joseph", "Jessica", "Thomas", "Sarah", "Charles", "Karen",
|
||
"Christopher", "Nancy", "Daniel", "Lisa", "Matthew", "Betty", "Anthony", "Margaret", "Mark", "Sandra",
|
||
"Donald", "Ashley", "Steven", "Kimberly", "Paul", "Emily", "Andrew", "Donna", "Joshua", "Michelle",
|
||
"Kenneth", "Dorothy", "Kevin", "Carol", "Brian", "Amanda", "George", "Melissa", "Edward", "Deborah",
|
||
"Ronald", "Stephanie", "Timothy", "Rebecca", "Jason", "Laura", "Jeffrey", "Sharon", "Ryan", "Cynthia",
|
||
"Jacob", "Kathleen", "Gary", "Amy", "Nicholas", "Shirley", "Eric", "Angela", "Stephen", "Helen",
|
||
"Jonathan", "Anna", "Larry", "Brenda", "Justin", "Pamela", "Scott", "Nicole", "Brandon", "Samantha",
|
||
"Frank", "Katherine", "Benjamin", "Christine", "Gregory", "Emma", "Raymond", "Ruth", "Samuel", "Julie",
|
||
"Patrick", "Olivia", "Alexander", "Victoria"
|
||
]
|
||
common_last = [
|
||
"Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez",
|
||
"Hernandez", "Lopez", "Gonzalez", "Wilson", "Anderson", "Thomas", "Taylor", "Moore", "Jackson", "Martin",
|
||
"Lee", "Perez", "Thompson", "White", "Harris", "Sanchez", "Clark", "Ramirez", "Lewis", "Robinson",
|
||
"Walker", "Young", "Allen", "King", "Wright", "Scott", "Torres", "Nguyen", "Hill", "Flores",
|
||
"Green", "Adams", "Nelson", "Baker", "Hall", "Rivera", "Campbell", "Mitchell", "Carter", "Roberts",
|
||
"Turner", "Phillips", "Parker", "Evans", "Edwards", "Collins", "Stewart", "Sanchez", "Morris", "Rogers",
|
||
"Reed", "Cook", "Morgan", "Bell", "Murphy", "Bailey", "Cooper", "Richardson", "Cox", "Howard",
|
||
"Ward", "Torres", "Peterson", "Gray", "Ramirez", "James", "Watson", "Brooks", "Kelly", "Sanders",
|
||
"Price", "Bennett", "Wood", "Barnes", "Ross", "Henderson", "Coleman", "Jenkins", "Perry", "Powell",
|
||
"Long", "Patterson", "Hughes", "Flores"
|
||
]
|
||
|
||
if random.random() < 0.6:
|
||
return random.choice(common_first), random.choice(common_last)
|
||
|
||
# 动态音节组合生成,支持数百万组合
|
||
f_beg = [
|
||
"al", "ben", "car", "dan", "el", "fran", "ge", "har", "isa", "jo", "ka", "li", "mar", "no",
|
||
"ol", "pa", "qui", "ra", "sa", "ta", "ul", "vi", "wil", "xa", "ya", "zo"
|
||
]
|
||
f_mid = [
|
||
"a", "e", "i", "o", "u", "ae", "ai", "ia", "ie", "oa", "ou"
|
||
]
|
||
f_end = [
|
||
"n", "ne", "na", "son", "ton", "la", "ra", "rie", "ry", "ley", "ly", "ah"
|
||
]
|
||
|
||
l_beg = [
|
||
"sm", "john", "dav", "wil", "and", "tho", "tay", "mo", "jack", "mar", "lee", "tho", "whi", "har",
|
||
"san", "cla", "ram", "lew", "rob", "walk", "young", "all", "king", "wri", "scott", "tor", "nguy",
|
||
"hil", "flo", "gre", "ada", "nel", "bak", "hal", "riv", "camp", "mit", "car", "rob"
|
||
]
|
||
l_mid = [
|
||
"a", "e", "i", "o", "u", "ar", "er", "or", "an", "en", "in", "on", "un"
|
||
]
|
||
l_suf = [
|
||
"son", "ton", "man", "ley", "ford", "wood", "well", "er", "ers", "ing", "s", "son", "es"
|
||
]
|
||
|
||
def build_name(beg, mid, end, syllables=(2, 3)) -> str:
|
||
parts = [random.choice(beg)]
|
||
for _ in range(random.choice(syllables) - 1):
|
||
parts.append(random.choice(mid))
|
||
parts.append(random.choice(end))
|
||
name = "".join(parts)
|
||
return name.capitalize()
|
||
|
||
first = build_name(f_beg, f_mid, f_end)
|
||
last = build_name(l_beg, l_mid, l_suf)
|
||
return first, last
|
||
|
||
|
||
def _random_birthday() -> str:
|
||
"""
|
||
生成随机生日,格式为 yyyy-mm-dd
|
||
|
||
返回值:
|
||
str: 生日字符串
|
||
"""
|
||
start = date(1950, 1, 1)
|
||
end = date(2000, 12, 31)
|
||
delta_days = (end - start).days
|
||
d = start + timedelta(days=random.randint(0, delta_days))
|
||
return f"{d.year}-{d.month:02d}-{d.day:02d}"
|
||
|
||
|
||
def _random_phone(province_abbr: str) -> str:
|
||
"""
|
||
生成随机加拿大电话号码,带区号
|
||
|
||
参数:
|
||
province_abbr (str): 省份缩写
|
||
|
||
返回值:
|
||
str: 电话,例如 "(403) 555-1234"
|
||
"""
|
||
codes = CA_AREA_CODES.get(province_abbr, ["000"])
|
||
area = random.choice(codes)
|
||
exchange = str(random.randint(200, 899)).zfill(3)
|
||
line = str(random.randint(1000, 9999)).zfill(4)
|
||
return f"({area}) {exchange}-{line}"
|
||
|
||
|
||
def _random_phone_city(province_abbr: str, city: Optional[str]) -> str:
|
||
"""
|
||
按城市优先选择区号,若城市未配置则回退到省份区号
|
||
|
||
参数:
|
||
province_abbr (str): 省份缩写
|
||
city (Optional[str]): 城市名
|
||
|
||
返回值:
|
||
str: 电话,例如 "(403) 555-1234"
|
||
"""
|
||
codes = None
|
||
if city:
|
||
codes = CITY_AREA_CODES.get(city)
|
||
codes = codes or CA_AREA_CODES.get(province_abbr, ["000"])
|
||
area = random.choice(codes)
|
||
exchange = str(random.randint(200, 899)).zfill(3)
|
||
line = str(random.randint(1000, 9999)).zfill(4)
|
||
return f"(#{area}) {exchange}-{line}".replace("#", "")
|
||
|
||
|
||
def _postal_valid_for_province(province_abbr: str, postcode: str) -> bool:
|
||
"""
|
||
校验邮编首字母是否符合省份规范
|
||
|
||
参数:
|
||
province_abbr (str): 省份缩写
|
||
postcode (str): 邮编字符串
|
||
|
||
返回值:
|
||
bool: 合法返回 True,否则 False
|
||
"""
|
||
if not postcode:
|
||
return False
|
||
prefixes = POSTAL_PREFIXES.get(province_abbr)
|
||
if not prefixes:
|
||
return True
|
||
return postcode[0].upper() in prefixes
|
||
|
||
|
||
def generate_canada_info(province: str, city: Optional[str] = None, max_attempts: int = 15, sleep_sec: float = 0.6) -> Dict[str, str]:
|
||
"""
|
||
随机生成加拿大个人与地址信息,可指定省份(全称或缩写)与可选城市
|
||
|
||
参数:
|
||
province (str): 省份(如 "Alberta" 或 "AB")
|
||
city (Optional[str]): 城市(如 "Calgary"),不传则在省内随机
|
||
max_attempts (int): 反向地理编码最大尝试次数
|
||
sleep_sec (float): 每次失败后的等待秒数,用于尊重 Nominatim 频率限制
|
||
|
||
返回值:
|
||
dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称
|
||
"""
|
||
prov_abbr = _normalize_province(province)
|
||
base_lat, base_lon, chosen_city = _pick_coords(prov_abbr, city)
|
||
|
||
address_str = ""
|
||
city_name = ""
|
||
postcode = ""
|
||
for _ in range(max_attempts):
|
||
lat, lon = _random_near(base_lat, base_lon)
|
||
data = _reverse_geocode(lat, lon)
|
||
if not data:
|
||
time.sleep(sleep_sec)
|
||
continue
|
||
addr = data.get("address", {})
|
||
city_name = addr.get("city") or addr.get("town") or addr.get("village") or chosen_city
|
||
postcode = addr.get("postcode") or ""
|
||
address_str = _format_address(addr, prov_abbr)
|
||
if prov_abbr in REMOTE_PROVINCES:
|
||
break
|
||
if addr.get("house_number") and (addr.get("road") or addr.get("residential") or addr.get("footway")) and city_name and _postal_valid_for_province(prov_abbr, postcode):
|
||
break
|
||
time.sleep(sleep_sec)
|
||
|
||
firstname, lastname = _random_name()
|
||
full_name = f"{firstname} {lastname}"
|
||
birthday = _random_birthday()
|
||
phone = _random_phone_city(prov_abbr, city or chosen_city)
|
||
|
||
return {
|
||
"firstname": firstname,
|
||
"lastname": lastname,
|
||
"full_name": full_name,
|
||
"birthday": birthday,
|
||
"address_str": address_str.split(",")[0],
|
||
"city_name": city_name,
|
||
"phone": phone,
|
||
"postcode": postcode,
|
||
"province": next((k for k, v in CA_PROVINCE_ABBR.items() if v == prov_abbr), prov_abbr),
|
||
}
|
||
|
||
|
||
def get_random_canada_info(province, city) -> Dict[str, str]:
|
||
"""
|
||
本地生成加拿大个人与地址信息(不依赖外部网络)
|
||
|
||
参数:
|
||
province (str): 省份(如 "Alberta" 或 "AB")
|
||
city (str | None): 城市(如 "Calgary"),不传则按省份随机
|
||
|
||
返回值:
|
||
dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称
|
||
"""
|
||
prov_abbr = _normalize_province(province)
|
||
_, _, chosen_city = _pick_coords(prov_abbr, city)
|
||
|
||
firstname, lastname = _random_name()
|
||
full_name = f"{firstname} {lastname}"
|
||
birthday = _random_birthday()
|
||
phone = _random_phone_city(prov_abbr, city or chosen_city)
|
||
|
||
def _random_street() -> str:
|
||
"""
|
||
生成本地街道地址
|
||
|
||
返回值:
|
||
str: 形如 '123 Maple Ave' 的地址
|
||
"""
|
||
house = random.randint(10, 9999)
|
||
street_roots = [
|
||
"Maple", "Oak", "Pine", "Cedar", "Elm", "Birch", "Willow", "Spruce", "Ash",
|
||
"River", "Lake", "Hill", "Queen", "King", "Main", "Victoria", "Wellington",
|
||
"Church", "College", "Centre"
|
||
]
|
||
suffixes = ["St", "Ave", "Rd", "Blvd", "Dr", "Ct", "Pl", "Ln", "Way", "Terrace"]
|
||
return f"{house} {random.choice(street_roots)} {random.choice(suffixes)}"
|
||
|
||
def _random_postal(p_abbr: str) -> str:
|
||
"""
|
||
生成加拿大邮编(A1A 1A1),首字母符合省份规范
|
||
|
||
参数:
|
||
p_abbr (str): 省份缩写
|
||
|
||
返回值:
|
||
str: 邮编
|
||
"""
|
||
allowed_letters = "ABCEGHJKLMNPRSTVXY"
|
||
prefixes = POSTAL_PREFIXES.get(p_abbr) or set(allowed_letters)
|
||
first_letter = random.choice(sorted(list(prefixes)))
|
||
|
||
def L() -> str:
|
||
return random.choice(allowed_letters)
|
||
|
||
def D() -> str:
|
||
return str(random.randint(0, 9))
|
||
|
||
return f"{first_letter}{D()}{L()} {D()}{L()}{D()}"
|
||
|
||
address_str = _random_street()
|
||
city_name = city or chosen_city
|
||
postcode = _random_postal(prov_abbr)
|
||
province_full = next((k for k, v in CA_PROVINCE_ABBR.items() if v == prov_abbr), prov_abbr)
|
||
|
||
return {
|
||
"firstname": firstname,
|
||
"lastname": lastname,
|
||
"full_name": full_name,
|
||
"birthday": birthday,
|
||
"address_str": address_str,
|
||
"city_name": city_name,
|
||
"phone": phone,
|
||
"postcode": postcode,
|
||
"province": province_full,
|
||
}
|
||
|
||
|
||
US_STATE_ABBR = {
|
||
"Alabama": "AL",
|
||
"Alaska": "AK",
|
||
"Arizona": "AZ",
|
||
"Arkansas": "AR",
|
||
"California": "CA",
|
||
"Colorado": "CO",
|
||
"Connecticut": "CT",
|
||
"Delaware": "DE",
|
||
"Florida": "FL",
|
||
"Georgia": "GA",
|
||
"Hawaii": "HI",
|
||
"Idaho": "ID",
|
||
"Illinois": "IL",
|
||
"Indiana": "IN",
|
||
"Iowa": "IA",
|
||
"Kansas": "KS",
|
||
"Kentucky": "KY",
|
||
"Louisiana": "LA",
|
||
"Maine": "ME",
|
||
"Maryland": "MD",
|
||
"Massachusetts": "MA",
|
||
"Michigan": "MI",
|
||
"Minnesota": "MN",
|
||
"Mississippi": "MS",
|
||
"Missouri": "MO",
|
||
"Montana": "MT",
|
||
"Nebraska": "NE",
|
||
"Nevada": "NV",
|
||
"New Hampshire": "NH",
|
||
"New Jersey": "NJ",
|
||
"New Mexico": "NM",
|
||
"New York": "NY",
|
||
"North Carolina": "NC",
|
||
"North Dakota": "ND",
|
||
"Ohio": "OH",
|
||
"Oklahoma": "OK",
|
||
"Oregon": "OR",
|
||
"Pennsylvania": "PA",
|
||
"Rhode Island": "RI",
|
||
"South Carolina": "SC",
|
||
"South Dakota": "SD",
|
||
"Tennessee": "TN",
|
||
"Texas": "TX",
|
||
"Utah": "UT",
|
||
"Vermont": "VT",
|
||
"Virginia": "VA",
|
||
"Washington": "WA",
|
||
"West Virginia": "WV",
|
||
"Wisconsin": "WI",
|
||
"Wyoming": "WY",
|
||
}
|
||
|
||
|
||
US_COORDS = {
|
||
"CA": [(34.052235, -118.243683, "Los Angeles"), (37.774929, -122.419416, "San Francisco")],
|
||
"NY": [(40.712776, -74.005974, "New York"), (42.886447, -78.878369, "Buffalo")],
|
||
"TX": [(29.760427, -95.369804, "Houston"), (32.776665, -96.796989, "Dallas")],
|
||
"FL": [(25.761681, -80.191788, "Miami"), (28.538336, -81.379234, "Orlando")],
|
||
"IL": [(41.878113, -87.629799, "Chicago"), (39.781721, -89.650148, "Springfield")],
|
||
"WA": [(47.606209, -122.332069, "Seattle"), (47.658779, -117.426047, "Spokane")],
|
||
"MA": [(42.360082, -71.058880, "Boston"), (42.262593, -71.802293, "Worcester")],
|
||
"PA": [(39.952583, -75.165222, "Philadelphia"), (40.440624, -79.995888, "Pittsburgh")],
|
||
"AZ": [(33.448376, -112.074036, "Phoenix"), (32.222607, -110.974711, "Tucson")],
|
||
"GA": [(33.748997, -84.387985, "Atlanta"), (32.080898, -81.091203, "Savannah")],
|
||
"OH": [(39.961178, -82.998795, "Columbus"), (41.499321, -81.694359, "Cleveland")],
|
||
"NC": [(35.227085, -80.843124, "Charlotte"), (35.779590, -78.638179, "Raleigh")],
|
||
"MI": [(42.331427, -83.045754, "Detroit"), (42.963240, -85.668086, "Grand Rapids")],
|
||
"CO": [(39.739236, -104.990251, "Denver"), (38.833881, -104.821363, "Colorado Springs")],
|
||
"VA": [(37.540725, -77.436048, "Richmond"), (36.852926, -75.977985, "Virginia Beach")],
|
||
"NJ": [(40.735657, -74.172366, "Newark"), (40.717754, -74.043143, "Jersey City")],
|
||
"MD": [(39.290385, -76.612189, "Baltimore"), (39.083997, -77.152757, "Rockville")],
|
||
"MN": [(44.977753, -93.265011, "Minneapolis"), (44.953703, -93.089958, "Saint Paul")],
|
||
"WI": [(43.038902, -87.906474, "Milwaukee"), (43.073051, -89.401230, "Madison")],
|
||
"MO": [(38.627003, -90.199404, "St. Louis"), (39.099724, -94.578331, "Kansas City")],
|
||
"IN": [(39.768403, -86.158068, "Indianapolis"), (41.079273, -85.139351, "Fort Wayne")],
|
||
"TN": [(36.162664, -86.781602, "Nashville"), (35.149532, -90.048981, "Memphis")],
|
||
"OR": [(45.515232, -122.678385, "Portland"), (44.942898, -123.035095, "Salem")],
|
||
"NV": [(36.169941, -115.139830, "Las Vegas"), (39.529633, -119.813803, "Reno")],
|
||
}
|
||
|
||
|
||
US_CITY_AREA_CODES = {
|
||
"Los Angeles": ["213", "310", "323", "424", "661"],
|
||
"San Francisco": ["415", "628"],
|
||
"New York": ["212", "347", "718", "929", "646"],
|
||
"Buffalo": ["716"],
|
||
"Houston": ["713", "281", "832"],
|
||
"Dallas": ["214", "469", "972"],
|
||
"Miami": ["305", "786"],
|
||
"Orlando": ["407", "689"],
|
||
"Chicago": ["312", "773", "872"],
|
||
"Seattle": ["206"],
|
||
"Spokane": ["509"],
|
||
"Boston": ["617", "857"],
|
||
"Worcester": ["508", "774"],
|
||
"Philadelphia": ["215", "267", "445"],
|
||
"Pittsburgh": ["412", "878"],
|
||
"Phoenix": ["602", "480", "623"],
|
||
"Tucson": ["520"],
|
||
"Atlanta": ["404", "470", "678", "770"],
|
||
"Savannah": ["912"],
|
||
"Columbus": ["614", "380"],
|
||
"Cleveland": ["216", "440"],
|
||
"Charlotte": ["704", "980"],
|
||
"Raleigh": ["919", "984"],
|
||
"Detroit": ["313", "734", "586"],
|
||
"Grand Rapids": ["616"],
|
||
"Denver": ["303", "720"],
|
||
"Colorado Springs": ["719"],
|
||
"Richmond": ["804"],
|
||
"Virginia Beach": ["757"],
|
||
"Newark": ["973", "862"],
|
||
"Jersey City": ["201", "551"],
|
||
"Baltimore": ["410", "443", "667"],
|
||
"Rockville": ["240", "301"],
|
||
"Minneapolis": ["612"],
|
||
"Saint Paul": ["651"],
|
||
"Milwaukee": ["414"],
|
||
"Madison": ["608"],
|
||
"St. Louis": ["314", "636"],
|
||
"Kansas City": ["816"],
|
||
"Indianapolis": ["317", "463"],
|
||
"Fort Wayne": ["260"],
|
||
"Nashville": ["615", "629"],
|
||
"Memphis": ["901"],
|
||
"Portland": ["503", "971"],
|
||
"Salem": ["503"],
|
||
"Las Vegas": ["702", "725"],
|
||
"Reno": ["775"],
|
||
}
|
||
|
||
|
||
US_AREA_CODES = {
|
||
abbr: sorted({code for _, _, city in cities for code in US_CITY_AREA_CODES.get(city, [])})
|
||
for abbr, cities in US_COORDS.items()
|
||
}
|
||
|
||
|
||
US_ZIP_RANGES = {
|
||
"CA": (900, 961),
|
||
"NY": (100, 149),
|
||
"TX": (750, 799),
|
||
"FL": (320, 349),
|
||
"IL": (600, 629),
|
||
"WA": (980, 994),
|
||
"MA": (10, 27),
|
||
"PA": (150, 196),
|
||
"AZ": (850, 865),
|
||
"GA": (300, 319),
|
||
"OH": (430, 459),
|
||
"NC": (270, 289),
|
||
"MI": (480, 499),
|
||
"CO": (800, 816),
|
||
"VA": (220, 246),
|
||
"NJ": (70, 89),
|
||
"MD": (206, 219),
|
||
"MN": (550, 567),
|
||
"WI": (530, 549),
|
||
"MO": (630, 658),
|
||
"IN": (460, 479),
|
||
"TN": (370, 385),
|
||
"OR": (970, 979),
|
||
"NV": (889, 898),
|
||
}
|
||
|
||
|
||
def _normalize_state(state: str) -> str:
|
||
"""
|
||
州入参规范化,支持全称或缩写,返回缩写
|
||
|
||
参数:
|
||
state (str): 州名,可为全称或缩写(如 "California" 或 "CA")
|
||
|
||
返回值:
|
||
str: 州缩写(如 "CA")
|
||
"""
|
||
if not state:
|
||
raise ValueError("state 不能为空")
|
||
s = state.strip()
|
||
if len(s) == 2:
|
||
return s.upper()
|
||
return US_STATE_ABBR.get(s, s)
|
||
|
||
|
||
def _us_pick_coords(state_abbr: str, city: Optional[str]) -> tuple[float, float, str]:
|
||
"""
|
||
按州与可选城市选择一个坐标点
|
||
|
||
参数:
|
||
state_abbr (str): 州缩写
|
||
city (Optional[str]): 城市名(如 "Los Angeles"),可为空
|
||
|
||
返回值:
|
||
(lat, lon, city_name): 选中的基础坐标及城市名
|
||
"""
|
||
coords = US_COORDS.get(state_abbr)
|
||
if not coords:
|
||
return 40.712776, -74.005974, "New York"
|
||
if city:
|
||
c = city.strip().lower()
|
||
for lat, lon, cname in coords:
|
||
if cname.lower() == c:
|
||
return lat, lon, cname
|
||
return random.choice(coords)
|
||
|
||
|
||
def _us_format_address(address: Dict, state_abbr: str) -> str:
|
||
"""
|
||
将 Nominatim 的 address 格式化为美国地址字符串
|
||
|
||
参数:
|
||
address (dict): Nominatim 返回的 address 字段
|
||
state_abbr (str): 州缩写(如 "CA")
|
||
|
||
返回值:
|
||
str: 格式化后的地址字符串
|
||
"""
|
||
house = address.get("house_number")
|
||
road = address.get("road") or address.get("residential") or address.get("footway")
|
||
city = address.get("city") or address.get("town") or address.get("village")
|
||
postcode = address.get("postcode") or ""
|
||
if house and road and city:
|
||
return f"{house} {road}, {city}, {state_abbr} {postcode}, United States"
|
||
return f"{city or ''}, {state_abbr} {postcode}, United States".strip(", ")
|
||
|
||
|
||
def _us_random_phone_state(state_abbr: str, city: Optional[str]) -> str:
|
||
"""
|
||
生成随机美国电话号码,按城市优先选择区号
|
||
|
||
参数:
|
||
state_abbr (str): 州缩写
|
||
city (Optional[str]): 城市名
|
||
|
||
返回值:
|
||
str: 电话,例如 "(213) 555-1234"
|
||
"""
|
||
codes = None
|
||
if city:
|
||
codes = US_CITY_AREA_CODES.get(city)
|
||
codes = codes or US_AREA_CODES.get(state_abbr, ["000"])
|
||
area = random.choice(codes)
|
||
exchange = str(random.randint(200, 899)).zfill(3)
|
||
line = str(random.randint(1000, 9999)).zfill(4)
|
||
return f"({area}) {exchange}-{line}"
|
||
|
||
|
||
def _us_random_zip_for_state(state_abbr: str) -> str:
|
||
"""
|
||
生成美国 ZIP Code(5 位数字),范围符合州常见分配段
|
||
|
||
参数:
|
||
state_abbr (str): 州缩写
|
||
|
||
返回值:
|
||
str: ZIP Code,如 "90012"
|
||
"""
|
||
rng = US_ZIP_RANGES.get(state_abbr)
|
||
if not rng:
|
||
prefix = random.randint(100, 999)
|
||
else:
|
||
prefix = random.randint(rng[0], rng[1])
|
||
suffix = random.randint(0, 99)
|
||
return f"{prefix:03d}{suffix:02d}"
|
||
|
||
|
||
def generate_us_info(state: str, city: Optional[str] = None, max_attempts: int = 15, sleep_sec: float = 0.6) -> Dict[str, str]:
|
||
"""
|
||
随机生成美国个人与地址信息,可指定州(全称或缩写)与可选城市
|
||
|
||
参数:
|
||
state (str): 州(如 "California" 或 "CA")
|
||
city (Optional[str]): 城市(如 "Los Angeles"),不传则在州内随机
|
||
max_attempts (int): 反向地理编码最大尝试次数
|
||
sleep_sec (float): 每次失败后的等待秒数,用于尊重 Nominatim 频率限制
|
||
|
||
返回值:
|
||
dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称
|
||
"""
|
||
state_abbr = _normalize_state(state)
|
||
base_lat, base_lon, chosen_city = _us_pick_coords(state_abbr, city)
|
||
|
||
address_str = ""
|
||
city_name = ""
|
||
postcode = ""
|
||
for _ in range(max_attempts):
|
||
lat, lon = _random_near(base_lat, base_lon)
|
||
data = _reverse_geocode(lat, lon)
|
||
if not data:
|
||
time.sleep(sleep_sec)
|
||
continue
|
||
addr = data.get("address", {})
|
||
city_name = addr.get("city") or addr.get("town") or addr.get("village") or chosen_city
|
||
postcode = addr.get("postcode") or ""
|
||
address_str = _us_format_address(addr, state_abbr)
|
||
if addr.get("house_number") and (addr.get("road") or addr.get("residential") or addr.get("footway")) and city_name and re.fullmatch(r"\d{5}(-\d{4})?", postcode or ""):
|
||
break
|
||
time.sleep(sleep_sec)
|
||
|
||
firstname, lastname = _random_name()
|
||
full_name = f"{firstname} {lastname}"
|
||
birthday = _random_birthday()
|
||
phone = _us_random_phone_state(state_abbr, city or chosen_city)
|
||
|
||
state_full = next((k for k, v in US_STATE_ABBR.items() if v == state_abbr), state_abbr)
|
||
|
||
return {
|
||
"firstname": firstname,
|
||
"lastname": lastname,
|
||
"full_name": full_name,
|
||
"birthday": birthday,
|
||
"address_str": address_str.split(",")[0],
|
||
"city_name": city_name,
|
||
"phone": phone,
|
||
"postcode": postcode,
|
||
"state": state_full,
|
||
}
|
||
|
||
|
||
def get_random_us_info(state: str, city: Optional[str]) -> Dict[str, str]:
|
||
"""
|
||
本地生成美国个人与地址信息(不依赖外部网络)
|
||
|
||
参数:
|
||
state (str): 州(如 "California" 或 "CA")
|
||
city (str | None): 城市(如 "Los Angeles"),不传则按州随机
|
||
|
||
返回值:
|
||
dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称
|
||
"""
|
||
state_abbr = _normalize_state(state)
|
||
_, _, chosen_city = _us_pick_coords(state_abbr, city)
|
||
|
||
firstname, lastname = _random_name()
|
||
full_name = f"{firstname} {lastname}"
|
||
birthday = _random_birthday()
|
||
phone = _us_random_phone_state(state_abbr, city or chosen_city)
|
||
|
||
def _random_street_us() -> str:
|
||
"""
|
||
生成本地美国街道地址
|
||
|
||
返回值:
|
||
str: 形如 '123 Maple Ave' 的地址
|
||
"""
|
||
house = random.randint(10, 9999)
|
||
street_roots = [
|
||
"Maple", "Oak", "Pine", "Cedar", "Elm", "Birch", "Willow", "Spruce", "Ash",
|
||
"River", "Lake", "Hill", "Queen", "King", "Main", "Washington", "Lincoln",
|
||
"Church", "College", "Center"
|
||
]
|
||
suffixes = ["St", "Ave", "Rd", "Blvd", "Dr", "Ct", "Pl", "Ln", "Way", "Terrace"]
|
||
return f"{house} {random.choice(street_roots)} {random.choice(suffixes)}"
|
||
|
||
address_str = _random_street_us()
|
||
city_name = city or chosen_city
|
||
postcode = _us_random_zip_for_state(state_abbr)
|
||
state_full = next((k for k, v in US_STATE_ABBR.items() if v == state_abbr), state_abbr)
|
||
|
||
return {
|
||
"firstname": firstname,
|
||
"lastname": lastname,
|
||
"full_name": full_name,
|
||
"birthday": birthday,
|
||
"address_str": address_str,
|
||
"city_name": city_name,
|
||
"phone": phone,
|
||
"postcode": postcode,
|
||
"state": state_full,
|
||
}
|
||
|
||
|
||
def _random_birthday_by_age_range(min_age: int, max_age: int) -> str:
|
||
"""
|
||
按年龄区间生成随机生日,格式为 yyyy-mm-dd
|
||
|
||
参数:
|
||
min_age (int): 最小年龄(含)
|
||
max_age (int): 最大年龄(含)
|
||
|
||
返回值:
|
||
str: 生日字符串
|
||
"""
|
||
if min_age < 0:
|
||
min_age = 0
|
||
if max_age < min_age:
|
||
max_age = min_age
|
||
today = date.today()
|
||
start = today - timedelta(days=max_age * 365 + 366)
|
||
end = today - timedelta(days=min_age * 365)
|
||
delta_days = (end - start).days
|
||
d = start + timedelta(days=random.randint(0, max(delta_days, 1)))
|
||
return f"{d.year}-{d.month:02d}-{d.day:02d}"
|
||
|
||
|
||
def _random_date_between(start: date, end: date) -> str:
|
||
"""
|
||
在指定日期区间内生成随机日期,格式为 yyyy-mm-dd
|
||
|
||
参数:
|
||
start (date): 起始日期(含)
|
||
end (date): 结束日期(含)
|
||
|
||
返回值:
|
||
str: 随机日期字符串
|
||
"""
|
||
if end < start:
|
||
start, end = end, start
|
||
delta_days = (end - start).days
|
||
d = start + timedelta(days=random.randint(0, max(delta_days, 1)))
|
||
return f"{d.year}-{d.month:02d}-{d.day:02d}"
|
||
|
||
|
||
def generate_child_parent_names(
|
||
enforce_period_under13: bool = True,
|
||
period_start: str = "2013-07-01",
|
||
period_end: str = "2020-04-01",
|
||
min_child_age: int = 1,
|
||
max_child_age: int = 17,
|
||
min_parent_age: int = 25,
|
||
max_parent_age: int = 65,
|
||
country: str = "US",
|
||
province_or_state: Optional[str] = None,
|
||
city: Optional[str] = None,
|
||
use_network: bool = False,
|
||
separate_phones: bool = True,
|
||
) -> Dict[str, str]:
|
||
"""
|
||
生成两个随机人:未成年孩子与家长,孩子与家长共享姓氏,并包含随机地址等完整信息
|
||
|
||
参数:
|
||
enforce_period_under13 (bool): 是否强制孩子在 [period_start, period_end] 期间均小于13岁(默认开启)
|
||
period_start (str): 期间开始日期,默认 "2013-07-01"
|
||
period_end (str): 期间结束日期,默认 "2020-04-01"
|
||
min_child_age (int): 孩子最小年龄(用于未启用期间约束时)
|
||
max_child_age (int): 孩子最大年龄(用于未启用期间约束时)
|
||
min_parent_age (int): 家长最小年龄(用于未启用期间约束时)
|
||
max_parent_age (int): 家长最大年龄(用于未启用期间约束时)
|
||
country (str): 国家,"CA" 或 "US",默认 "CA"
|
||
province_or_state (str | None): 指定省/州,默认随机
|
||
city (str | None): 指定城市,默认随机
|
||
use_network (bool): 是否使用网络反向地理编码生成地址,默认 False 使用本地生成
|
||
separate_phones (bool): 是否为孩子与家长生成不同的电话号码,默认 True
|
||
|
||
返回值:
|
||
dict: 包含孩子与家长的 Firstname、Lastname、全名、生日与地址等字段
|
||
"""
|
||
parent_first, parent_last = _random_name()
|
||
child_first, _ = _random_name()
|
||
|
||
if enforce_period_under13:
|
||
ps = date.fromisoformat(period_start)
|
||
pe = date.fromisoformat(period_end)
|
||
bound = date(pe.year - 13, pe.month, pe.day)
|
||
child_min = bound + timedelta(days=1)
|
||
child_max = pe
|
||
child_birthday = _random_date_between(child_min, child_max)
|
||
|
||
# 依据孩子生日生成家长生日,设定合理的年龄差
|
||
y, m, d = map(int, child_birthday.split("-"))
|
||
child_dt = date(y, m, d)
|
||
|
||
def _minus_years_safe(dt: date, years: int) -> date:
|
||
try:
|
||
return date(dt.year - years, dt.month, dt.day)
|
||
except ValueError:
|
||
# 处理闰年2月29等情况,回退到当月最后一天
|
||
while True:
|
||
try:
|
||
return date(dt.year - years, dt.month, dt.day)
|
||
except ValueError:
|
||
dt = dt - timedelta(days=1)
|
||
|
||
gap = random.randint(20, 45)
|
||
parent_dt = _minus_years_safe(child_dt, gap)
|
||
parent_birthday = f"{parent_dt.year}-{parent_dt.month:02d}-{parent_dt.day:02d}"
|
||
else:
|
||
child_birthday = _random_birthday_by_age_range(min_child_age, max_child_age)
|
||
parent_birthday = _random_birthday_by_age_range(min_parent_age, max_parent_age)
|
||
|
||
country = (country or "CA").upper()
|
||
addr_info: Dict[str, str]
|
||
if country == "US":
|
||
state_abbr = province_or_state or random.choice(list(US_STATE_ABBR.values()))
|
||
if use_network:
|
||
addr_info = generate_us_info(state_abbr, city)
|
||
else:
|
||
addr_info = get_random_us_info(state_abbr, city)
|
||
state_full = addr_info.get("state")
|
||
child_phone = addr_info.get("phone")
|
||
parent_phone = addr_info.get("phone")
|
||
if separate_phones:
|
||
child_phone = _us_random_phone_state(state_abbr, addr_info.get("city_name"))
|
||
return {
|
||
"child_firstname": child_first,
|
||
"child_lastname": parent_last,
|
||
"child_full_name": f"{child_first} {parent_last}",
|
||
"child_birthday": child_birthday,
|
||
"child_address_str": addr_info.get("address_str"),
|
||
"child_city_name": addr_info.get("city_name"),
|
||
"child_phone": child_phone,
|
||
"child_postcode": addr_info.get("postcode"),
|
||
"child_state": state_full,
|
||
"parent_firstname": parent_first,
|
||
"parent_lastname": parent_last,
|
||
"parent_full_name": f"{parent_first} {parent_last}",
|
||
"parent_birthday": parent_birthday,
|
||
"parent_address_str": addr_info.get("address_str"),
|
||
"parent_city_name": addr_info.get("city_name"),
|
||
"parent_phone": parent_phone,
|
||
"parent_postcode": addr_info.get("postcode"),
|
||
"parent_state": state_full,
|
||
}
|
||
else:
|
||
prov_abbr = province_or_state or random.choice(list(CA_PROVINCE_ABBR.values()))
|
||
if use_network:
|
||
addr_info = generate_canada_info(prov_abbr, city)
|
||
else:
|
||
addr_info = get_random_canada_info(prov_abbr, city)
|
||
province_full = addr_info.get("province")
|
||
# 生成孩子与家长电话
|
||
parent_phone = addr_info.get("phone")
|
||
child_phone = parent_phone
|
||
if separate_phones:
|
||
# 使用省缩写与城市生成新的号码
|
||
ca_abbr = CA_PROVINCE_ABBR.get(province_full, prov_abbr)
|
||
child_phone = _random_phone_city(ca_abbr, addr_info.get("city_name"))
|
||
return {
|
||
"child_firstname": child_first,
|
||
"child_lastname": parent_last,
|
||
"child_full_name": f"{child_first} {parent_last}",
|
||
"child_birthday": child_birthday,
|
||
"child_address_str": addr_info.get("address_str"),
|
||
"child_city_name": addr_info.get("city_name"),
|
||
"child_phone": child_phone,
|
||
"child_postcode": addr_info.get("postcode"),
|
||
"child_province": province_full,
|
||
"parent_firstname": parent_first,
|
||
"parent_lastname": parent_last,
|
||
"parent_full_name": f"{parent_first} {parent_last}",
|
||
"parent_birthday": parent_birthday,
|
||
"parent_address_str": addr_info.get("address_str"),
|
||
"parent_city_name": addr_info.get("city_name"),
|
||
"parent_phone": parent_phone,
|
||
"parent_postcode": addr_info.get("postcode"),
|
||
"parent_province": province_full,
|
||
}
|
||
def main() -> None:
|
||
"""
|
||
演示:生成 Alberta 省 Calgary 的随机信息;可修改为其他省/城市
|
||
"""
|
||
info = generate_canada_info("Alberta", "Calgary")
|
||
print(info)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# main()
|
||
info = generate_child_parent_names()
|
||
print(info)
|