import random import time from datetime import date, timedelta from typing import Optional, Dict import requests from bit_browser import retry CA_PROVINCE_ABBR = { "Alberta": "AB", "British Columbia": "BC", "Manitoba": "MB", "New Brunswick": "NB", "Newfoundland and Labrador": "NL", "Nova Scotia": "NS", "Ontario": "ON", "Prince Edward Island": "PE", "Quebec": "QC", "Saskatchewan": "SK", "Northwest Territories": "NT", "Nunavut": "NU", "Yukon": "YT", } CA_COORDS = { "AB": [(51.044733, -114.071883, "Calgary"), (53.546124, -113.493823, "Edmonton")], "BC": [(49.282729, -123.120738, "Vancouver"), (48.428421, -123.365644, "Victoria")], "MB": [(49.895137, -97.138374, "Winnipeg"), (50.445211, -96.823611, "East St Paul")], "NB": [(45.963589, -66.643115, "Fredericton"), (46.510712, -67.255044, "Woodstock")], "NL": [(53.135509, -57.660435, "Labrador City"), (47.561510, -52.712585, "St. John's")], "NS": [(44.648862, -63.575320, "Halifax"), (45.010474, -63.416817, "Truro")], "ON": [(43.653225, -79.383186, "Toronto"), (45.421532, -75.697189, "Ottawa")], "PE": [(46.238240, -63.131074, "Charlottetown"), (46.392410, -63.787629, "Summerside")], "QC": [(45.501689, -73.567256, "Montreal"), (46.813878, -71.207980, "Quebec City")], "SK": [(52.133214, -106.670046, "Saskatoon"), (50.445211, -104.618896, "Regina")], "NT": [(62.4540, -114.3725, "Yellowknife"), (61.251955, -114.352482, "Yellowknife")], "NU": [(63.7467, -68.5167, "Iqaluit"), (64.282327, -76.614813, "Nunavut")], "YT": [(60.7212, -135.0568, "Whitehorse"), (64.000000, -138.000000, "Yukon")], } CA_AREA_CODES = { "AB": ["403", "587", "825"], "BC": ["236", "250", "604", "672", "778"], "MB": ["204", "431"], "NB": ["506"], "NL": ["709"], "NS": ["782", "902"], "ON": ["226", "249", "289", "343", "365", "416", "437", "519", "548", "613", "639", "647", "705", "807", "905"], "PE": ["902"], "QC": ["418", "438", "450", "514", "579", "581", "819", "873"], "SK": ["306", "639"], "NT": ["867"], "NU": ["867"], "YT": ["867"], } # 主要城市的区号(更精确的城市级约束) CITY_AREA_CODES = { "Calgary": ["403", "587", "825"], "Edmonton": ["780", "587", "825"], "Vancouver": ["604", "778", "236", "672"], "Halifax": ["902", "782"], "Toronto": ["416", "647", "437"], } # 邮编首字母合法性映射(按省份缩写) POSTAL_PREFIXES = { "AB": {"T"}, "BC": {"V"}, "MB": {"R"}, "NB": {"E"}, "NL": {"A"}, "NS": {"B"}, "ON": {"K", "L", "M"}, "PE": {"C"}, "QC": {"G", "H", "J"}, "SK": {"S"}, "NT": {"X"}, "NU": {"X"}, "YT": {"Y"}, } REMOTE_PROVINCES = {"NL", "NT", "NU", "YT"} def _normalize_province(province: str) -> str: """ 省份入参规范化,支持全称或缩写,返回缩写 参数: province (str): 省份,可为全称或缩写(如 "Alberta" 或 "AB") 返回值: str: 省份缩写(如 "AB") """ if not province: raise ValueError("province 不能为空") p = province.strip() if len(p) == 2: return p.upper() return CA_PROVINCE_ABBR.get(p, p) def _pick_coords(province_abbr: str, city: Optional[str]) -> tuple[float, float, str]: """ 按省份与可选城市选择一个坐标点 参数: province_abbr (str): 省份缩写 city (Optional[str]): 城市名(如 "Calgary"),可为空 返回值: (lat, lon, city_name): 选中的基础坐标及城市名 """ coords = CA_COORDS.get(province_abbr) if not coords: # 默认回退至 Calgary return 51.044733, -114.071883, "Calgary" if city: c = city.strip().lower() for lat, lon, cname in coords: if cname.lower() == c: return lat, lon, cname return random.choice(coords) def _random_near(lat: float, lon: float) -> tuple[float, float]: """ 在给定坐标附近生成一个随机偏移坐标 参数: lat (float): 基准纬度 lon (float): 基准经度 返回值: (new_lat, new_lon): 随机偏移后的坐标 """ return lat + (random.random() - 0.5) * 0.1, lon + (random.random() - 0.5) * 0.1 @retry(max_retries=3, delay=1.0, backoff=1.0) def _reverse_geocode(lat: float, lon: float) -> Dict: """ 使用 Nominatim 反向地理编码,返回地址字典 参数: lat (float): 纬度 lon (float): 经度 返回值: dict: 包含 address 字段的响应数据 """ url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={lat}&lon={lon}&zoom=18&addressdetails=1" headers = {"User-Agent": "ca_auto_table/1.0"} r = requests.get(url, headers=headers, timeout=15) r.raise_for_status() return r.json() def _format_address(address: Dict, province_abbr: str) -> str: """ 将 Nominatim 的 address 格式化为完整地址字符串 参数: address (dict): Nominatim 返回的 address 字段 province_abbr (str): 省份缩写(如 "AB") 返回值: str: 格式化后的地址字符串 """ house = address.get("house_number") road = address.get("road") or address.get("residential") or address.get("footway") city = address.get("city") or address.get("town") or address.get("village") postcode = address.get("postcode") or "" if house and road and city: return f"{house} {road}, {city}, {province_abbr} {postcode}, Canada" # 远端省份允许部分地址 return f"{city or ''}, {province_abbr} {postcode}, Canada".strip(", ") def _random_name() -> tuple[str, str]: """ 生成随机英文名(Firstname, Lastname),组合空间可达数百万以上 实现策略: - 60% 概率使用常见英文名与姓氏列表(更自然) - 40% 概率使用音节组合算法动态生成(数量级远超百万) 返回值: (firstname, lastname) """ common_first = [ "James", "Mary", "Robert", "Patricia", "John", "Jennifer", "Michael", "Linda", "William", "Elizabeth", "David", "Barbara", "Richard", "Susan", "Joseph", "Jessica", "Thomas", "Sarah", "Charles", "Karen", "Christopher", "Nancy", "Daniel", "Lisa", "Matthew", "Betty", "Anthony", "Margaret", "Mark", "Sandra", "Donald", "Ashley", "Steven", "Kimberly", "Paul", "Emily", "Andrew", "Donna", "Joshua", "Michelle", "Kenneth", "Dorothy", "Kevin", "Carol", "Brian", "Amanda", "George", "Melissa", "Edward", "Deborah", "Ronald", "Stephanie", "Timothy", "Rebecca", "Jason", "Laura", "Jeffrey", "Sharon", "Ryan", "Cynthia", "Jacob", "Kathleen", "Gary", "Amy", "Nicholas", "Shirley", "Eric", "Angela", "Stephen", "Helen", "Jonathan", "Anna", "Larry", "Brenda", "Justin", "Pamela", "Scott", "Nicole", "Brandon", "Samantha", "Frank", "Katherine", "Benjamin", "Christine", "Gregory", "Emma", "Raymond", "Ruth", "Samuel", "Julie", "Patrick", "Olivia", "Alexander", "Victoria" ] common_last = [ "Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez", "Hernandez", "Lopez", "Gonzalez", "Wilson", "Anderson", "Thomas", "Taylor", "Moore", "Jackson", "Martin", "Lee", "Perez", "Thompson", "White", "Harris", "Sanchez", "Clark", "Ramirez", "Lewis", "Robinson", "Walker", "Young", "Allen", "King", "Wright", "Scott", "Torres", "Nguyen", "Hill", "Flores", "Green", "Adams", "Nelson", "Baker", "Hall", "Rivera", "Campbell", "Mitchell", "Carter", "Roberts", "Turner", "Phillips", "Parker", "Evans", "Edwards", "Collins", "Stewart", "Sanchez", "Morris", "Rogers", "Reed", "Cook", "Morgan", "Bell", "Murphy", "Bailey", "Cooper", "Richardson", "Cox", "Howard", "Ward", "Torres", "Peterson", "Gray", "Ramirez", "James", "Watson", "Brooks", "Kelly", "Sanders", "Price", "Bennett", "Wood", "Barnes", "Ross", "Henderson", "Coleman", "Jenkins", "Perry", "Powell", "Long", "Patterson", "Hughes", "Flores" ] if random.random() < 0.6: return random.choice(common_first), random.choice(common_last) # 动态音节组合生成,支持数百万组合 f_beg = [ "al", "ben", "car", "dan", "el", "fran", "ge", "har", "isa", "jo", "ka", "li", "mar", "no", "ol", "pa", "qui", "ra", "sa", "ta", "ul", "vi", "wil", "xa", "ya", "zo" ] f_mid = [ "a", "e", "i", "o", "u", "ae", "ai", "ia", "ie", "oa", "ou" ] f_end = [ "n", "ne", "na", "son", "ton", "la", "ra", "rie", "ry", "ley", "ly", "ah" ] l_beg = [ "sm", "john", "dav", "wil", "and", "tho", "tay", "mo", "jack", "mar", "lee", "tho", "whi", "har", "san", "cla", "ram", "lew", "rob", "walk", "young", "all", "king", "wri", "scott", "tor", "nguy", "hil", "flo", "gre", "ada", "nel", "bak", "hal", "riv", "camp", "mit", "car", "rob" ] l_mid = [ "a", "e", "i", "o", "u", "ar", "er", "or", "an", "en", "in", "on", "un" ] l_suf = [ "son", "ton", "man", "ley", "ford", "wood", "well", "er", "ers", "ing", "s", "son", "es" ] def build_name(beg, mid, end, syllables=(2, 3)) -> str: parts = [random.choice(beg)] for _ in range(random.choice(syllables) - 1): parts.append(random.choice(mid)) parts.append(random.choice(end)) name = "".join(parts) return name.capitalize() first = build_name(f_beg, f_mid, f_end) last = build_name(l_beg, l_mid, l_suf) return first, last def _random_birthday() -> str: """ 生成随机生日,格式为 yyyy-mm-dd 返回值: str: 生日字符串 """ start = date(1950, 1, 1) end = date(2000, 12, 31) delta_days = (end - start).days d = start + timedelta(days=random.randint(0, delta_days)) return f"{d.year}-{d.month:02d}-{d.day:02d}" def _random_phone(province_abbr: str) -> str: """ 生成随机加拿大电话号码,带区号 参数: province_abbr (str): 省份缩写 返回值: str: 电话,例如 "(403) 555-1234" """ codes = CA_AREA_CODES.get(province_abbr, ["000"]) area = random.choice(codes) exchange = str(random.randint(200, 899)).zfill(3) line = str(random.randint(1000, 9999)).zfill(4) return f"({area}) {exchange}-{line}" def _random_phone_city(province_abbr: str, city: Optional[str]) -> str: """ 按城市优先选择区号,若城市未配置则回退到省份区号 参数: province_abbr (str): 省份缩写 city (Optional[str]): 城市名 返回值: str: 电话,例如 "(403) 555-1234" """ codes = None if city: codes = CITY_AREA_CODES.get(city) codes = codes or CA_AREA_CODES.get(province_abbr, ["000"]) area = random.choice(codes) exchange = str(random.randint(200, 899)).zfill(3) line = str(random.randint(1000, 9999)).zfill(4) return f"(#{area}) {exchange}-{line}".replace("#", "") def _postal_valid_for_province(province_abbr: str, postcode: str) -> bool: """ 校验邮编首字母是否符合省份规范 参数: province_abbr (str): 省份缩写 postcode (str): 邮编字符串 返回值: bool: 合法返回 True,否则 False """ if not postcode: return False prefixes = POSTAL_PREFIXES.get(province_abbr) if not prefixes: return True return postcode[0].upper() in prefixes def generate_canada_info(province: str, city: Optional[str] = None, max_attempts: int = 15, sleep_sec: float = 0.6) -> Dict[str, str]: """ 随机生成加拿大个人与地址信息,可指定省份(全称或缩写)与可选城市 参数: province (str): 省份(如 "Alberta" 或 "AB") city (Optional[str]): 城市(如 "Calgary"),不传则在省内随机 max_attempts (int): 反向地理编码最大尝试次数 sleep_sec (float): 每次失败后的等待秒数,用于尊重 Nominatim 频率限制 返回值: dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称 """ prov_abbr = _normalize_province(province) base_lat, base_lon, chosen_city = _pick_coords(prov_abbr, city) address_str = "" city_name = "" postcode = "" for _ in range(max_attempts): lat, lon = _random_near(base_lat, base_lon) data = _reverse_geocode(lat, lon) if not data: time.sleep(sleep_sec) continue addr = data.get("address", {}) city_name = addr.get("city") or addr.get("town") or addr.get("village") or chosen_city postcode = addr.get("postcode") or "" address_str = _format_address(addr, prov_abbr) if prov_abbr in REMOTE_PROVINCES: break if addr.get("house_number") and (addr.get("road") or addr.get("residential") or addr.get("footway")) and city_name and _postal_valid_for_province(prov_abbr, postcode): break time.sleep(sleep_sec) firstname, lastname = _random_name() full_name = f"{firstname} {lastname}" birthday = _random_birthday() phone = _random_phone_city(prov_abbr, city or chosen_city) return { "firstname": firstname, "lastname": lastname, "full_name": full_name, "birthday": birthday, "address_str": address_str.split(",")[0], "city_name": city_name, "phone": phone, "postcode": postcode, "province": next((k for k, v in CA_PROVINCE_ABBR.items() if v == prov_abbr), prov_abbr), } def get_random_canada_info(province, city) -> Dict[str, str]: """ 随机生成加拿大个人与地址信息,省份随机选择,城市随机选择 返回值: dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称 """ return generate_canada_info(province, city) def main() -> None: """ 演示:生成 Alberta 省 Calgary 的随机信息;可修改为其他省/城市 """ info = generate_canada_info("Alberta", "Calgary") print(info) if __name__ == "__main__": main()