333 lines
12 KiB
Python
333 lines
12 KiB
Python
import random
|
||
import time
|
||
from datetime import date, timedelta
|
||
from typing import Optional, Dict
|
||
|
||
import requests
|
||
|
||
|
||
CA_PROVINCE_ABBR = {
|
||
"Alberta": "AB",
|
||
"British Columbia": "BC",
|
||
"Manitoba": "MB",
|
||
"New Brunswick": "NB",
|
||
"Newfoundland and Labrador": "NL",
|
||
"Nova Scotia": "NS",
|
||
"Ontario": "ON",
|
||
"Prince Edward Island": "PE",
|
||
"Quebec": "QC",
|
||
"Saskatchewan": "SK",
|
||
"Northwest Territories": "NT",
|
||
"Nunavut": "NU",
|
||
"Yukon": "YT",
|
||
}
|
||
|
||
|
||
CA_COORDS = {
|
||
"AB": [(51.044733, -114.071883, "Calgary"), (53.546124, -113.493823, "Edmonton")],
|
||
"BC": [(49.282729, -123.120738, "Vancouver"), (48.428421, -123.365644, "Victoria")],
|
||
"MB": [(49.895137, -97.138374, "Winnipeg"), (50.445211, -96.823611, "East St Paul")],
|
||
"NB": [(45.963589, -66.643115, "Fredericton"), (46.510712, -67.255044, "Woodstock")],
|
||
"NL": [(53.135509, -57.660435, "Labrador City"), (47.561510, -52.712585, "St. John's")],
|
||
"NS": [(44.648862, -63.575320, "Halifax"), (45.010474, -63.416817, "Truro")],
|
||
"ON": [(43.653225, -79.383186, "Toronto"), (45.421532, -75.697189, "Ottawa")],
|
||
"PE": [(46.238240, -63.131074, "Charlottetown"), (46.392410, -63.787629, "Summerside")],
|
||
"QC": [(45.501689, -73.567256, "Montreal"), (46.813878, -71.207980, "Quebec City")],
|
||
"SK": [(52.133214, -106.670046, "Saskatoon"), (50.445211, -104.618896, "Regina")],
|
||
"NT": [(62.4540, -114.3725, "Yellowknife"), (61.251955, -114.352482, "Yellowknife")],
|
||
"NU": [(63.7467, -68.5167, "Iqaluit"), (64.282327, -76.614813, "Nunavut")],
|
||
"YT": [(60.7212, -135.0568, "Whitehorse"), (64.000000, -138.000000, "Yukon")],
|
||
}
|
||
|
||
|
||
CA_AREA_CODES = {
|
||
"AB": ["403", "587", "825"],
|
||
"BC": ["236", "250", "604", "672", "778"],
|
||
"MB": ["204", "431"],
|
||
"NB": ["506"],
|
||
"NL": ["709"],
|
||
"NS": ["782", "902"],
|
||
"ON": ["226", "249", "289", "343", "365", "416", "437", "519", "548", "613", "639", "647", "705", "807", "905"],
|
||
"PE": ["902"],
|
||
"QC": ["418", "438", "450", "514", "579", "581", "819", "873"],
|
||
"SK": ["306", "639"],
|
||
"NT": ["867"],
|
||
"NU": ["867"],
|
||
"YT": ["867"],
|
||
}
|
||
|
||
|
||
REMOTE_PROVINCES = {"NL", "NT", "NU", "YT"}
|
||
|
||
|
||
def _normalize_province(province: str) -> str:
|
||
"""
|
||
省份入参规范化,支持全称或缩写,返回缩写
|
||
|
||
参数:
|
||
province (str): 省份,可为全称或缩写(如 "Alberta" 或 "AB")
|
||
|
||
返回值:
|
||
str: 省份缩写(如 "AB")
|
||
"""
|
||
if not province:
|
||
raise ValueError("province 不能为空")
|
||
p = province.strip()
|
||
if len(p) == 2:
|
||
return p.upper()
|
||
return CA_PROVINCE_ABBR.get(p, p)
|
||
|
||
|
||
def _pick_coords(province_abbr: str, city: Optional[str]) -> tuple[float, float, str]:
|
||
"""
|
||
按省份与可选城市选择一个坐标点
|
||
|
||
参数:
|
||
province_abbr (str): 省份缩写
|
||
city (Optional[str]): 城市名(如 "Calgary"),可为空
|
||
|
||
返回值:
|
||
(lat, lon, city_name): 选中的基础坐标及城市名
|
||
"""
|
||
coords = CA_COORDS.get(province_abbr)
|
||
if not coords:
|
||
# 默认回退至 Calgary
|
||
return 51.044733, -114.071883, "Calgary"
|
||
if city:
|
||
c = city.strip().lower()
|
||
for lat, lon, cname in coords:
|
||
if cname.lower() == c:
|
||
return lat, lon, cname
|
||
return random.choice(coords)
|
||
|
||
|
||
def _random_near(lat: float, lon: float) -> tuple[float, float]:
|
||
"""
|
||
在给定坐标附近生成一个随机偏移坐标
|
||
|
||
参数:
|
||
lat (float): 基准纬度
|
||
lon (float): 基准经度
|
||
|
||
返回值:
|
||
(new_lat, new_lon): 随机偏移后的坐标
|
||
"""
|
||
return lat + (random.random() - 0.5) * 0.1, lon + (random.random() - 0.5) * 0.1
|
||
|
||
|
||
def _reverse_geocode(lat: float, lon: float) -> Dict:
|
||
"""
|
||
使用 Nominatim 反向地理编码,返回地址字典
|
||
|
||
参数:
|
||
lat (float): 纬度
|
||
lon (float): 经度
|
||
|
||
返回值:
|
||
dict: 包含 address 字段的响应数据
|
||
"""
|
||
url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={lat}&lon={lon}&zoom=18&addressdetails=1"
|
||
headers = {"User-Agent": "ca_auto_table/1.0"}
|
||
r = requests.get(url, headers=headers, timeout=15)
|
||
r.raise_for_status()
|
||
return r.json()
|
||
|
||
|
||
def _format_address(address: Dict, province_abbr: str) -> str:
|
||
"""
|
||
将 Nominatim 的 address 格式化为完整地址字符串
|
||
|
||
参数:
|
||
address (dict): Nominatim 返回的 address 字段
|
||
province_abbr (str): 省份缩写(如 "AB")
|
||
|
||
返回值:
|
||
str: 格式化后的地址字符串
|
||
"""
|
||
house = address.get("house_number")
|
||
road = address.get("road") or address.get("residential") or address.get("footway")
|
||
city = address.get("city") or address.get("town") or address.get("village")
|
||
postcode = address.get("postcode") or ""
|
||
if house and road and city:
|
||
return f"{house} {road}, {city}, {province_abbr} {postcode}, Canada"
|
||
# 远端省份允许部分地址
|
||
return f"{city or ''}, {province_abbr} {postcode}, Canada".strip(", ")
|
||
|
||
|
||
def _random_name() -> tuple[str, str]:
|
||
"""
|
||
生成随机英文名(Firstname, Lastname),组合空间可达数百万以上
|
||
|
||
实现策略:
|
||
- 60% 概率使用常见英文名与姓氏列表(更自然)
|
||
- 40% 概率使用音节组合算法动态生成(数量级远超百万)
|
||
|
||
返回值:
|
||
(firstname, lastname)
|
||
"""
|
||
common_first = [
|
||
"James", "Mary", "Robert", "Patricia", "John", "Jennifer", "Michael", "Linda", "William", "Elizabeth",
|
||
"David", "Barbara", "Richard", "Susan", "Joseph", "Jessica", "Thomas", "Sarah", "Charles", "Karen",
|
||
"Christopher", "Nancy", "Daniel", "Lisa", "Matthew", "Betty", "Anthony", "Margaret", "Mark", "Sandra",
|
||
"Donald", "Ashley", "Steven", "Kimberly", "Paul", "Emily", "Andrew", "Donna", "Joshua", "Michelle",
|
||
"Kenneth", "Dorothy", "Kevin", "Carol", "Brian", "Amanda", "George", "Melissa", "Edward", "Deborah",
|
||
"Ronald", "Stephanie", "Timothy", "Rebecca", "Jason", "Laura", "Jeffrey", "Sharon", "Ryan", "Cynthia",
|
||
"Jacob", "Kathleen", "Gary", "Amy", "Nicholas", "Shirley", "Eric", "Angela", "Stephen", "Helen",
|
||
"Jonathan", "Anna", "Larry", "Brenda", "Justin", "Pamela", "Scott", "Nicole", "Brandon", "Samantha",
|
||
"Frank", "Katherine", "Benjamin", "Christine", "Gregory", "Emma", "Raymond", "Ruth", "Samuel", "Julie",
|
||
"Patrick", "Olivia", "Alexander", "Victoria"
|
||
]
|
||
common_last = [
|
||
"Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez",
|
||
"Hernandez", "Lopez", "Gonzalez", "Wilson", "Anderson", "Thomas", "Taylor", "Moore", "Jackson", "Martin",
|
||
"Lee", "Perez", "Thompson", "White", "Harris", "Sanchez", "Clark", "Ramirez", "Lewis", "Robinson",
|
||
"Walker", "Young", "Allen", "King", "Wright", "Scott", "Torres", "Nguyen", "Hill", "Flores",
|
||
"Green", "Adams", "Nelson", "Baker", "Hall", "Rivera", "Campbell", "Mitchell", "Carter", "Roberts",
|
||
"Turner", "Phillips", "Parker", "Evans", "Edwards", "Collins", "Stewart", "Sanchez", "Morris", "Rogers",
|
||
"Reed", "Cook", "Morgan", "Bell", "Murphy", "Bailey", "Cooper", "Richardson", "Cox", "Howard",
|
||
"Ward", "Torres", "Peterson", "Gray", "Ramirez", "James", "Watson", "Brooks", "Kelly", "Sanders",
|
||
"Price", "Bennett", "Wood", "Barnes", "Ross", "Henderson", "Coleman", "Jenkins", "Perry", "Powell",
|
||
"Long", "Patterson", "Hughes", "Flores"
|
||
]
|
||
|
||
if random.random() < 0.6:
|
||
return random.choice(common_first), random.choice(common_last)
|
||
|
||
# 动态音节组合生成,支持数百万组合
|
||
f_beg = [
|
||
"al", "ben", "car", "dan", "el", "fran", "ge", "har", "isa", "jo", "ka", "li", "mar", "no",
|
||
"ol", "pa", "qui", "ra", "sa", "ta", "ul", "vi", "wil", "xa", "ya", "zo"
|
||
]
|
||
f_mid = [
|
||
"a", "e", "i", "o", "u", "ae", "ai", "ia", "ie", "oa", "ou"
|
||
]
|
||
f_end = [
|
||
"n", "ne", "na", "son", "ton", "la", "ra", "rie", "ry", "ley", "ly", "ah"
|
||
]
|
||
|
||
l_beg = [
|
||
"sm", "john", "dav", "wil", "and", "tho", "tay", "mo", "jack", "mar", "lee", "tho", "whi", "har",
|
||
"san", "cla", "ram", "lew", "rob", "walk", "young", "all", "king", "wri", "scott", "tor", "nguy",
|
||
"hil", "flo", "gre", "ada", "nel", "bak", "hal", "riv", "camp", "mit", "car", "rob"
|
||
]
|
||
l_mid = [
|
||
"a", "e", "i", "o", "u", "ar", "er", "or", "an", "en", "in", "on", "un"
|
||
]
|
||
l_suf = [
|
||
"son", "ton", "man", "ley", "ford", "wood", "well", "er", "ers", "ing", "s", "son", "es"
|
||
]
|
||
|
||
def build_name(beg, mid, end, syllables=(2, 3)) -> str:
|
||
parts = [random.choice(beg)]
|
||
for _ in range(random.choice(syllables) - 1):
|
||
parts.append(random.choice(mid))
|
||
parts.append(random.choice(end))
|
||
name = "".join(parts)
|
||
return name.capitalize()
|
||
|
||
first = build_name(f_beg, f_mid, f_end)
|
||
last = build_name(l_beg, l_mid, l_suf)
|
||
return first, last
|
||
|
||
|
||
def _random_birthday() -> str:
|
||
"""
|
||
生成随机生日,格式为 yyyy-mm-dd
|
||
|
||
返回值:
|
||
str: 生日字符串
|
||
"""
|
||
start = date(1950, 1, 1)
|
||
end = date(2000, 12, 31)
|
||
delta_days = (end - start).days
|
||
d = start + timedelta(days=random.randint(0, delta_days))
|
||
return f"{d.year}-{d.month:02d}-{d.day:02d}"
|
||
|
||
|
||
def _random_phone(province_abbr: str) -> str:
|
||
"""
|
||
生成随机加拿大电话号码,带区号
|
||
|
||
参数:
|
||
province_abbr (str): 省份缩写
|
||
|
||
返回值:
|
||
str: 电话,例如 "(403) 555-1234"
|
||
"""
|
||
codes = CA_AREA_CODES.get(province_abbr, ["000"])
|
||
area = random.choice(codes)
|
||
exchange = str(random.randint(200, 899)).zfill(3)
|
||
line = str(random.randint(1000, 9999)).zfill(4)
|
||
return f"({area}) {exchange}-{line}"
|
||
|
||
|
||
def generate_canada_info(province: str, city: Optional[str] = None, max_attempts: int = 15, sleep_sec: float = 0.6) -> Dict[str, str]:
|
||
"""
|
||
随机生成加拿大个人与地址信息,可指定省份(全称或缩写)与可选城市
|
||
|
||
参数:
|
||
province (str): 省份(如 "Alberta" 或 "AB")
|
||
city (Optional[str]): 城市(如 "Calgary"),不传则在省内随机
|
||
max_attempts (int): 反向地理编码最大尝试次数
|
||
sleep_sec (float): 每次失败后的等待秒数,用于尊重 Nominatim 频率限制
|
||
|
||
返回值:
|
||
dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称
|
||
"""
|
||
prov_abbr = _normalize_province(province)
|
||
base_lat, base_lon, chosen_city = _pick_coords(prov_abbr, city)
|
||
|
||
address_str = ""
|
||
city_name = ""
|
||
postcode = ""
|
||
for _ in range(max_attempts):
|
||
lat, lon = _random_near(base_lat, base_lon)
|
||
data = _reverse_geocode(lat, lon)
|
||
addr = data.get("address", {})
|
||
city_name = addr.get("city") or addr.get("town") or addr.get("village") or chosen_city
|
||
postcode = addr.get("postcode") or ""
|
||
address_str = _format_address(addr, prov_abbr)
|
||
if prov_abbr in REMOTE_PROVINCES:
|
||
break
|
||
if addr.get("house_number") and (addr.get("road") or addr.get("residential") or addr.get("footway")) and city_name:
|
||
break
|
||
time.sleep(sleep_sec)
|
||
|
||
firstname, lastname = _random_name()
|
||
full_name = f"{firstname} {lastname}"
|
||
birthday = _random_birthday()
|
||
phone = _random_phone(prov_abbr)
|
||
|
||
return {
|
||
"firstname": firstname,
|
||
"lastname": lastname,
|
||
"full_name": full_name,
|
||
"birthday": birthday,
|
||
"address_str": address_str.split(",")[0],
|
||
"city_name": city_name,
|
||
"phone": phone,
|
||
"postcode": postcode,
|
||
"province": next((k for k, v in CA_PROVINCE_ABBR.items() if v == prov_abbr), prov_abbr),
|
||
}
|
||
|
||
|
||
def get_random_canada_info(province, city) -> Dict[str, str]:
|
||
"""
|
||
随机生成加拿大个人与地址信息,省份随机选择,城市随机选择
|
||
|
||
返回值:
|
||
dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称
|
||
"""
|
||
return generate_canada_info(province, city)
|
||
|
||
|
||
def main() -> None:
|
||
"""
|
||
演示:生成 Alberta 省 Calgary 的随机信息;可修改为其他省/城市
|
||
"""
|
||
info = generate_canada_info("Alberta", "Calgary")
|
||
print(info)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main() |