import random import time from datetime import date, timedelta from typing import Optional, Dict import re import requests try: from bit_browser import retry except ImportError: def retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0): """ 简易重试装饰器(本地兜底),用于在缺失 bit_browser 时提供重试能力 参数: max_retries (int): 最大重试次数 delay (float): 首次重试延时秒数 backoff (float): 每次重试延时的倍增系数 返回值: Callable: 装饰器,包装被装饰函数以支持重试 """ def _decorator(func): def _wrapper(*args, **kwargs): tries = 0 cur_delay = delay while True: try: return func(*args, **kwargs) except Exception: tries += 1 if tries >= max_retries: raise time.sleep(cur_delay) cur_delay *= backoff return _wrapper return _decorator CA_PROVINCE_ABBR = { "Alberta": "AB", "British Columbia": "BC", "Manitoba": "MB", "New Brunswick": "NB", "Newfoundland and Labrador": "NL", "Nova Scotia": "NS", "Ontario": "ON", "Prince Edward Island": "PE", "Quebec": "QC", "Saskatchewan": "SK", "Northwest Territories": "NT", "Nunavut": "NU", "Yukon": "YT", } CA_COORDS = { "AB": [(51.044733, -114.071883, "Calgary"), (53.546124, -113.493823, "Edmonton")], "BC": [(49.282729, -123.120738, "Vancouver"), (48.428421, -123.365644, "Victoria")], "MB": [(49.895137, -97.138374, "Winnipeg"), (50.445211, -96.823611, "East St Paul")], "NB": [(45.963589, -66.643115, "Fredericton"), (46.510712, -67.255044, "Woodstock")], "NL": [(53.135509, -57.660435, "Labrador City"), (47.561510, -52.712585, "St. John's")], "NS": [(44.648862, -63.575320, "Halifax"), (45.010474, -63.416817, "Truro")], "ON": [(43.653225, -79.383186, "Toronto"), (45.421532, -75.697189, "Ottawa")], "PE": [(46.238240, -63.131074, "Charlottetown"), (46.392410, -63.787629, "Summerside")], "QC": [(45.501689, -73.567256, "Montreal"), (46.813878, -71.207980, "Quebec City")], "SK": [(52.133214, -106.670046, "Saskatoon"), (50.445211, -104.618896, "Regina")], "NT": [(62.4540, -114.3725, "Yellowknife"), (61.251955, -114.352482, "Yellowknife")], "NU": [(63.7467, -68.5167, "Iqaluit"), (64.282327, -76.614813, "Nunavut")], "YT": [(60.7212, -135.0568, "Whitehorse"), (64.000000, -138.000000, "Yukon")], } CA_AREA_CODES = { "AB": ["403", "587", "825"], "BC": ["236", "250", "604", "672", "778"], "MB": ["204", "431"], "NB": ["506"], "NL": ["709"], "NS": ["782", "902"], "ON": ["226", "249", "289", "343", "365", "416", "437", "519", "548", "613", "639", "647", "705", "807", "905"], "PE": ["902"], "QC": ["418", "438", "450", "514", "579", "581", "819", "873"], "SK": ["306", "639"], "NT": ["867"], "NU": ["867"], "YT": ["867"], } # 主要城市的区号(更精确的城市级约束) CITY_AREA_CODES = { "Calgary": ["403", "587", "825"], "Edmonton": ["780", "587", "825"], "Vancouver": ["604", "778", "236", "672"], "Halifax": ["902", "782"], "Toronto": ["416", "647", "437"], } # 邮编首字母合法性映射(按省份缩写) POSTAL_PREFIXES = { "AB": {"T"}, "BC": {"V"}, "MB": {"R"}, "NB": {"E"}, "NL": {"A"}, "NS": {"B"}, "ON": {"K", "L", "M"}, "PE": {"C"}, "QC": {"G", "H", "J"}, "SK": {"S"}, "NT": {"X"}, "NU": {"X"}, "YT": {"Y"}, } REMOTE_PROVINCES = {"NL", "NT", "NU", "YT"} def _normalize_province(province: str) -> str: """ 省份入参规范化,支持全称或缩写,返回缩写 参数: province (str): 省份,可为全称或缩写(如 "Alberta" 或 "AB") 返回值: str: 省份缩写(如 "AB") """ if not province: raise ValueError("province 不能为空") p = province.strip() if len(p) == 2: return p.upper() return CA_PROVINCE_ABBR.get(p, p) def _pick_coords(province_abbr: str, city: Optional[str]) -> tuple[float, float, str]: """ 按省份与可选城市选择一个坐标点 参数: province_abbr (str): 省份缩写 city (Optional[str]): 城市名(如 "Calgary"),可为空 返回值: (lat, lon, city_name): 选中的基础坐标及城市名 """ coords = CA_COORDS.get(province_abbr) if not coords: # 默认回退至 Calgary return 51.044733, -114.071883, "Calgary" if city: c = city.strip().lower() for lat, lon, cname in coords: if cname.lower() == c: return lat, lon, cname return random.choice(coords) def _random_near(lat: float, lon: float) -> tuple[float, float]: """ 在给定坐标附近生成一个随机偏移坐标 参数: lat (float): 基准纬度 lon (float): 基准经度 返回值: (new_lat, new_lon): 随机偏移后的坐标 """ return lat + (random.random() - 0.5) * 0.1, lon + (random.random() - 0.5) * 0.1 @retry(max_retries=3, delay=1.0, backoff=1.0) def _reverse_geocode(lat: float, lon: float) -> Dict: """ 使用 Nominatim 反向地理编码,返回地址字典 参数: lat (float): 纬度 lon (float): 经度 返回值: dict: 包含 address 字段的响应数据 """ url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={lat}&lon={lon}&zoom=18&addressdetails=1" headers = {"User-Agent": "ca_auto_table/1.0"} r = requests.get(url, headers=headers, timeout=15) r.raise_for_status() return r.json() def _format_address(address: Dict, province_abbr: str) -> str: """ 将 Nominatim 的 address 格式化为完整地址字符串 参数: address (dict): Nominatim 返回的 address 字段 province_abbr (str): 省份缩写(如 "AB") 返回值: str: 格式化后的地址字符串 """ house = address.get("house_number") road = address.get("road") or address.get("residential") or address.get("footway") city = address.get("city") or address.get("town") or address.get("village") postcode = address.get("postcode") or "" if house and road and city: return f"{house} {road}, {city}, {province_abbr} {postcode}, Canada" # 远端省份允许部分地址 return f"{city or ''}, {province_abbr} {postcode}, Canada".strip(", ") def _random_name() -> tuple[str, str]: """ 生成随机英文名(Firstname, Lastname),组合空间可达数百万以上 实现策略: - 60% 概率使用常见英文名与姓氏列表(更自然) - 40% 概率使用音节组合算法动态生成(数量级远超百万) 返回值: (firstname, lastname) """ common_first = [ "James", "Mary", "Robert", "Patricia", "John", "Jennifer", "Michael", "Linda", "William", "Elizabeth", "David", "Barbara", "Richard", "Susan", "Joseph", "Jessica", "Thomas", "Sarah", "Charles", "Karen", "Christopher", "Nancy", "Daniel", "Lisa", "Matthew", "Betty", "Anthony", "Margaret", "Mark", "Sandra", "Donald", "Ashley", "Steven", "Kimberly", "Paul", "Emily", "Andrew", "Donna", "Joshua", "Michelle", "Kenneth", "Dorothy", "Kevin", "Carol", "Brian", "Amanda", "George", "Melissa", "Edward", "Deborah", "Ronald", "Stephanie", "Timothy", "Rebecca", "Jason", "Laura", "Jeffrey", "Sharon", "Ryan", "Cynthia", "Jacob", "Kathleen", "Gary", "Amy", "Nicholas", "Shirley", "Eric", "Angela", "Stephen", "Helen", "Jonathan", "Anna", "Larry", "Brenda", "Justin", "Pamela", "Scott", "Nicole", "Brandon", "Samantha", "Frank", "Katherine", "Benjamin", "Christine", "Gregory", "Emma", "Raymond", "Ruth", "Samuel", "Julie", "Patrick", "Olivia", "Alexander", "Victoria" ] common_last = [ "Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez", "Hernandez", "Lopez", "Gonzalez", "Wilson", "Anderson", "Thomas", "Taylor", "Moore", "Jackson", "Martin", "Lee", "Perez", "Thompson", "White", "Harris", "Sanchez", "Clark", "Ramirez", "Lewis", "Robinson", "Walker", "Young", "Allen", "King", "Wright", "Scott", "Torres", "Nguyen", "Hill", "Flores", "Green", "Adams", "Nelson", "Baker", "Hall", "Rivera", "Campbell", "Mitchell", "Carter", "Roberts", "Turner", "Phillips", "Parker", "Evans", "Edwards", "Collins", "Stewart", "Sanchez", "Morris", "Rogers", "Reed", "Cook", "Morgan", "Bell", "Murphy", "Bailey", "Cooper", "Richardson", "Cox", "Howard", "Ward", "Torres", "Peterson", "Gray", "Ramirez", "James", "Watson", "Brooks", "Kelly", "Sanders", "Price", "Bennett", "Wood", "Barnes", "Ross", "Henderson", "Coleman", "Jenkins", "Perry", "Powell", "Long", "Patterson", "Hughes", "Flores" ] if random.random() < 0.6: return random.choice(common_first), random.choice(common_last) # 动态音节组合生成,支持数百万组合 f_beg = [ "al", "ben", "car", "dan", "el", "fran", "ge", "har", "isa", "jo", "ka", "li", "mar", "no", "ol", "pa", "qui", "ra", "sa", "ta", "ul", "vi", "wil", "xa", "ya", "zo" ] f_mid = [ "a", "e", "i", "o", "u", "ae", "ai", "ia", "ie", "oa", "ou" ] f_end = [ "n", "ne", "na", "son", "ton", "la", "ra", "rie", "ry", "ley", "ly", "ah" ] l_beg = [ "sm", "john", "dav", "wil", "and", "tho", "tay", "mo", "jack", "mar", "lee", "tho", "whi", "har", "san", "cla", "ram", "lew", "rob", "walk", "young", "all", "king", "wri", "scott", "tor", "nguy", "hil", "flo", "gre", "ada", "nel", "bak", "hal", "riv", "camp", "mit", "car", "rob" ] l_mid = [ "a", "e", "i", "o", "u", "ar", "er", "or", "an", "en", "in", "on", "un" ] l_suf = [ "son", "ton", "man", "ley", "ford", "wood", "well", "er", "ers", "ing", "s", "son", "es" ] def build_name(beg, mid, end, syllables=(2, 3)) -> str: parts = [random.choice(beg)] for _ in range(random.choice(syllables) - 1): parts.append(random.choice(mid)) parts.append(random.choice(end)) name = "".join(parts) return name.capitalize() first = build_name(f_beg, f_mid, f_end) last = build_name(l_beg, l_mid, l_suf) return first, last def _random_birthday() -> str: """ 生成随机生日,格式为 yyyy-mm-dd 返回值: str: 生日字符串 """ start = date(1950, 1, 1) end = date(2000, 12, 31) delta_days = (end - start).days d = start + timedelta(days=random.randint(0, delta_days)) return f"{d.year}-{d.month:02d}-{d.day:02d}" def _random_phone(province_abbr: str) -> str: """ 生成随机加拿大电话号码,带区号 参数: province_abbr (str): 省份缩写 返回值: str: 电话,例如 "(403) 555-1234" """ codes = CA_AREA_CODES.get(province_abbr, ["000"]) area = random.choice(codes) exchange = str(random.randint(200, 899)).zfill(3) line = str(random.randint(1000, 9999)).zfill(4) return f"({area}) {exchange}-{line}" def _random_phone_city(province_abbr: str, city: Optional[str]) -> str: """ 按城市优先选择区号,若城市未配置则回退到省份区号 参数: province_abbr (str): 省份缩写 city (Optional[str]): 城市名 返回值: str: 电话,例如 "(403) 555-1234" """ codes = None if city: codes = CITY_AREA_CODES.get(city) codes = codes or CA_AREA_CODES.get(province_abbr, ["000"]) area = random.choice(codes) exchange = str(random.randint(200, 899)).zfill(3) line = str(random.randint(1000, 9999)).zfill(4) return f"(#{area}) {exchange}-{line}".replace("#", "") def _postal_valid_for_province(province_abbr: str, postcode: str) -> bool: """ 校验邮编首字母是否符合省份规范 参数: province_abbr (str): 省份缩写 postcode (str): 邮编字符串 返回值: bool: 合法返回 True,否则 False """ if not postcode: return False prefixes = POSTAL_PREFIXES.get(province_abbr) if not prefixes: return True return postcode[0].upper() in prefixes def generate_canada_info(province: str, city: Optional[str] = None, max_attempts: int = 15, sleep_sec: float = 0.6) -> Dict[str, str]: """ 随机生成加拿大个人与地址信息,可指定省份(全称或缩写)与可选城市 参数: province (str): 省份(如 "Alberta" 或 "AB") city (Optional[str]): 城市(如 "Calgary"),不传则在省内随机 max_attempts (int): 反向地理编码最大尝试次数 sleep_sec (float): 每次失败后的等待秒数,用于尊重 Nominatim 频率限制 返回值: dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称 """ prov_abbr = _normalize_province(province) base_lat, base_lon, chosen_city = _pick_coords(prov_abbr, city) address_str = "" city_name = "" postcode = "" for _ in range(max_attempts): lat, lon = _random_near(base_lat, base_lon) data = _reverse_geocode(lat, lon) if not data: time.sleep(sleep_sec) continue addr = data.get("address", {}) city_name = addr.get("city") or addr.get("town") or addr.get("village") or chosen_city postcode = addr.get("postcode") or "" address_str = _format_address(addr, prov_abbr) if prov_abbr in REMOTE_PROVINCES: break if addr.get("house_number") and (addr.get("road") or addr.get("residential") or addr.get("footway")) and city_name and _postal_valid_for_province(prov_abbr, postcode): break time.sleep(sleep_sec) firstname, lastname = _random_name() full_name = f"{firstname} {lastname}" birthday = _random_birthday() phone = _random_phone_city(prov_abbr, city or chosen_city) return { "firstname": firstname, "lastname": lastname, "full_name": full_name, "birthday": birthday, "address_str": address_str.split(",")[0], "city_name": city_name, "phone": phone, "postcode": postcode, "province": next((k for k, v in CA_PROVINCE_ABBR.items() if v == prov_abbr), prov_abbr), } def get_random_canada_info(province, city) -> Dict[str, str]: """ 本地生成加拿大个人与地址信息(不依赖外部网络) 参数: province (str): 省份(如 "Alberta" 或 "AB") city (str | None): 城市(如 "Calgary"),不传则按省份随机 返回值: dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称 """ prov_abbr = _normalize_province(province) _, _, chosen_city = _pick_coords(prov_abbr, city) firstname, lastname = _random_name() full_name = f"{firstname} {lastname}" birthday = _random_birthday() phone = _random_phone_city(prov_abbr, city or chosen_city) def _random_street() -> str: """ 生成本地街道地址 返回值: str: 形如 '123 Maple Ave' 的地址 """ house = random.randint(10, 9999) street_roots = [ "Maple", "Oak", "Pine", "Cedar", "Elm", "Birch", "Willow", "Spruce", "Ash", "River", "Lake", "Hill", "Queen", "King", "Main", "Victoria", "Wellington", "Church", "College", "Centre" ] suffixes = ["St", "Ave", "Rd", "Blvd", "Dr", "Ct", "Pl", "Ln", "Way", "Terrace"] return f"{house} {random.choice(street_roots)} {random.choice(suffixes)}" def _random_postal(p_abbr: str) -> str: """ 生成加拿大邮编(A1A 1A1),首字母符合省份规范 参数: p_abbr (str): 省份缩写 返回值: str: 邮编 """ allowed_letters = "ABCEGHJKLMNPRSTVXY" prefixes = POSTAL_PREFIXES.get(p_abbr) or set(allowed_letters) first_letter = random.choice(sorted(list(prefixes))) def L() -> str: return random.choice(allowed_letters) def D() -> str: return str(random.randint(0, 9)) return f"{first_letter}{D()}{L()} {D()}{L()}{D()}" address_str = _random_street() city_name = city or chosen_city postcode = _random_postal(prov_abbr) province_full = next((k for k, v in CA_PROVINCE_ABBR.items() if v == prov_abbr), prov_abbr) return { "firstname": firstname, "lastname": lastname, "full_name": full_name, "birthday": birthday, "address_str": address_str, "city_name": city_name, "phone": phone, "postcode": postcode, "province": province_full, } US_STATE_ABBR = { "Alabama": "AL", "Alaska": "AK", "Arizona": "AZ", "Arkansas": "AR", "California": "CA", "Colorado": "CO", "Connecticut": "CT", "Delaware": "DE", "Florida": "FL", "Georgia": "GA", "Hawaii": "HI", "Idaho": "ID", "Illinois": "IL", "Indiana": "IN", "Iowa": "IA", "Kansas": "KS", "Kentucky": "KY", "Louisiana": "LA", "Maine": "ME", "Maryland": "MD", "Massachusetts": "MA", "Michigan": "MI", "Minnesota": "MN", "Mississippi": "MS", "Missouri": "MO", "Montana": "MT", "Nebraska": "NE", "Nevada": "NV", "New Hampshire": "NH", "New Jersey": "NJ", "New Mexico": "NM", "New York": "NY", "North Carolina": "NC", "North Dakota": "ND", "Ohio": "OH", "Oklahoma": "OK", "Oregon": "OR", "Pennsylvania": "PA", "Rhode Island": "RI", "South Carolina": "SC", "South Dakota": "SD", "Tennessee": "TN", "Texas": "TX", "Utah": "UT", "Vermont": "VT", "Virginia": "VA", "Washington": "WA", "West Virginia": "WV", "Wisconsin": "WI", "Wyoming": "WY", } US_COORDS = { "CA": [(34.052235, -118.243683, "Los Angeles"), (37.774929, -122.419416, "San Francisco")], "NY": [(40.712776, -74.005974, "New York"), (42.886447, -78.878369, "Buffalo")], "TX": [(29.760427, -95.369804, "Houston"), (32.776665, -96.796989, "Dallas")], "FL": [(25.761681, -80.191788, "Miami"), (28.538336, -81.379234, "Orlando")], "IL": [(41.878113, -87.629799, "Chicago"), (39.781721, -89.650148, "Springfield")], "WA": [(47.606209, -122.332069, "Seattle"), (47.658779, -117.426047, "Spokane")], "MA": [(42.360082, -71.058880, "Boston"), (42.262593, -71.802293, "Worcester")], "PA": [(39.952583, -75.165222, "Philadelphia"), (40.440624, -79.995888, "Pittsburgh")], "AZ": [(33.448376, -112.074036, "Phoenix"), (32.222607, -110.974711, "Tucson")], "GA": [(33.748997, -84.387985, "Atlanta"), (32.080898, -81.091203, "Savannah")], "OH": [(39.961178, -82.998795, "Columbus"), (41.499321, -81.694359, "Cleveland")], "NC": [(35.227085, -80.843124, "Charlotte"), (35.779590, -78.638179, "Raleigh")], "MI": [(42.331427, -83.045754, "Detroit"), (42.963240, -85.668086, "Grand Rapids")], "CO": [(39.739236, -104.990251, "Denver"), (38.833881, -104.821363, "Colorado Springs")], "VA": [(37.540725, -77.436048, "Richmond"), (36.852926, -75.977985, "Virginia Beach")], "NJ": [(40.735657, -74.172366, "Newark"), (40.717754, -74.043143, "Jersey City")], "MD": [(39.290385, -76.612189, "Baltimore"), (39.083997, -77.152757, "Rockville")], "MN": [(44.977753, -93.265011, "Minneapolis"), (44.953703, -93.089958, "Saint Paul")], "WI": [(43.038902, -87.906474, "Milwaukee"), (43.073051, -89.401230, "Madison")], "MO": [(38.627003, -90.199404, "St. Louis"), (39.099724, -94.578331, "Kansas City")], "IN": [(39.768403, -86.158068, "Indianapolis"), (41.079273, -85.139351, "Fort Wayne")], "TN": [(36.162664, -86.781602, "Nashville"), (35.149532, -90.048981, "Memphis")], "OR": [(45.515232, -122.678385, "Portland"), (44.942898, -123.035095, "Salem")], "NV": [(36.169941, -115.139830, "Las Vegas"), (39.529633, -119.813803, "Reno")], } US_CITY_AREA_CODES = { "Los Angeles": ["213", "310", "323", "424", "661"], "San Francisco": ["415", "628"], "New York": ["212", "347", "718", "929", "646"], "Buffalo": ["716"], "Houston": ["713", "281", "832"], "Dallas": ["214", "469", "972"], "Miami": ["305", "786"], "Orlando": ["407", "689"], "Chicago": ["312", "773", "872"], "Seattle": ["206"], "Spokane": ["509"], "Boston": ["617", "857"], "Worcester": ["508", "774"], "Philadelphia": ["215", "267", "445"], "Pittsburgh": ["412", "878"], "Phoenix": ["602", "480", "623"], "Tucson": ["520"], "Atlanta": ["404", "470", "678", "770"], "Savannah": ["912"], "Columbus": ["614", "380"], "Cleveland": ["216", "440"], "Charlotte": ["704", "980"], "Raleigh": ["919", "984"], "Detroit": ["313", "734", "586"], "Grand Rapids": ["616"], "Denver": ["303", "720"], "Colorado Springs": ["719"], "Richmond": ["804"], "Virginia Beach": ["757"], "Newark": ["973", "862"], "Jersey City": ["201", "551"], "Baltimore": ["410", "443", "667"], "Rockville": ["240", "301"], "Minneapolis": ["612"], "Saint Paul": ["651"], "Milwaukee": ["414"], "Madison": ["608"], "St. Louis": ["314", "636"], "Kansas City": ["816"], "Indianapolis": ["317", "463"], "Fort Wayne": ["260"], "Nashville": ["615", "629"], "Memphis": ["901"], "Portland": ["503", "971"], "Salem": ["503"], "Las Vegas": ["702", "725"], "Reno": ["775"], } US_AREA_CODES = { abbr: sorted({code for _, _, city in cities for code in US_CITY_AREA_CODES.get(city, [])}) for abbr, cities in US_COORDS.items() } US_ZIP_RANGES = { "CA": (900, 961), "NY": (100, 149), "TX": (750, 799), "FL": (320, 349), "IL": (600, 629), "WA": (980, 994), "MA": (10, 27), "PA": (150, 196), "AZ": (850, 865), "GA": (300, 319), "OH": (430, 459), "NC": (270, 289), "MI": (480, 499), "CO": (800, 816), "VA": (220, 246), "NJ": (70, 89), "MD": (206, 219), "MN": (550, 567), "WI": (530, 549), "MO": (630, 658), "IN": (460, 479), "TN": (370, 385), "OR": (970, 979), "NV": (889, 898), } def _normalize_state(state: str) -> str: """ 州入参规范化,支持全称或缩写,返回缩写 参数: state (str): 州名,可为全称或缩写(如 "California" 或 "CA") 返回值: str: 州缩写(如 "CA") """ if not state: raise ValueError("state 不能为空") s = state.strip() if len(s) == 2: return s.upper() return US_STATE_ABBR.get(s, s) def _us_pick_coords(state_abbr: str, city: Optional[str]) -> tuple[float, float, str]: """ 按州与可选城市选择一个坐标点 参数: state_abbr (str): 州缩写 city (Optional[str]): 城市名(如 "Los Angeles"),可为空 返回值: (lat, lon, city_name): 选中的基础坐标及城市名 """ coords = US_COORDS.get(state_abbr) if not coords: return 40.712776, -74.005974, "New York" if city: c = city.strip().lower() for lat, lon, cname in coords: if cname.lower() == c: return lat, lon, cname return random.choice(coords) def _us_format_address(address: Dict, state_abbr: str) -> str: """ 将 Nominatim 的 address 格式化为美国地址字符串 参数: address (dict): Nominatim 返回的 address 字段 state_abbr (str): 州缩写(如 "CA") 返回值: str: 格式化后的地址字符串 """ house = address.get("house_number") road = address.get("road") or address.get("residential") or address.get("footway") city = address.get("city") or address.get("town") or address.get("village") postcode = address.get("postcode") or "" if house and road and city: return f"{house} {road}, {city}, {state_abbr} {postcode}, United States" return f"{city or ''}, {state_abbr} {postcode}, United States".strip(", ") def _us_random_phone_state(state_abbr: str, city: Optional[str]) -> str: """ 生成随机美国电话号码,按城市优先选择区号 参数: state_abbr (str): 州缩写 city (Optional[str]): 城市名 返回值: str: 电话,例如 "(213) 555-1234" """ codes = None if city: codes = US_CITY_AREA_CODES.get(city) codes = codes or US_AREA_CODES.get(state_abbr, ["000"]) area = random.choice(codes) exchange = str(random.randint(200, 899)).zfill(3) line = str(random.randint(1000, 9999)).zfill(4) return f"({area}) {exchange}-{line}" def _us_random_zip_for_state(state_abbr: str) -> str: """ 生成美国 ZIP Code(5 位数字),范围符合州常见分配段 参数: state_abbr (str): 州缩写 返回值: str: ZIP Code,如 "90012" """ rng = US_ZIP_RANGES.get(state_abbr) if not rng: prefix = random.randint(100, 999) else: prefix = random.randint(rng[0], rng[1]) suffix = random.randint(0, 99) return f"{prefix:03d}{suffix:02d}" def generate_us_info(state: str, city: Optional[str] = None, max_attempts: int = 15, sleep_sec: float = 0.6) -> Dict[str, str]: """ 随机生成美国个人与地址信息,可指定州(全称或缩写)与可选城市 参数: state (str): 州(如 "California" 或 "CA") city (Optional[str]): 城市(如 "Los Angeles"),不传则在州内随机 max_attempts (int): 反向地理编码最大尝试次数 sleep_sec (float): 每次失败后的等待秒数,用于尊重 Nominatim 频率限制 返回值: dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称 """ state_abbr = _normalize_state(state) base_lat, base_lon, chosen_city = _us_pick_coords(state_abbr, city) address_str = "" city_name = "" postcode = "" for _ in range(max_attempts): lat, lon = _random_near(base_lat, base_lon) data = _reverse_geocode(lat, lon) if not data: time.sleep(sleep_sec) continue addr = data.get("address", {}) city_name = addr.get("city") or addr.get("town") or addr.get("village") or chosen_city postcode = addr.get("postcode") or "" address_str = _us_format_address(addr, state_abbr) if addr.get("house_number") and (addr.get("road") or addr.get("residential") or addr.get("footway")) and city_name and re.fullmatch(r"\d{5}(-\d{4})?", postcode or ""): break time.sleep(sleep_sec) firstname, lastname = _random_name() full_name = f"{firstname} {lastname}" birthday = _random_birthday() phone = _us_random_phone_state(state_abbr, city or chosen_city) state_full = next((k for k, v in US_STATE_ABBR.items() if v == state_abbr), state_abbr) return { "firstname": firstname, "lastname": lastname, "full_name": full_name, "birthday": birthday, "address_str": address_str.split(",")[0], "city_name": city_name, "phone": phone, "postcode": postcode, "state": state_full, } def get_random_us_info(state: str, city: Optional[str]) -> Dict[str, str]: """ 本地生成美国个人与地址信息(不依赖外部网络) 参数: state (str): 州(如 "California" 或 "CA") city (str | None): 城市(如 "Los Angeles"),不传则按州随机 返回值: dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称 """ state_abbr = _normalize_state(state) _, _, chosen_city = _us_pick_coords(state_abbr, city) firstname, lastname = _random_name() full_name = f"{firstname} {lastname}" birthday = _random_birthday() phone = _us_random_phone_state(state_abbr, city or chosen_city) def _random_street_us() -> str: """ 生成本地美国街道地址 返回值: str: 形如 '123 Maple Ave' 的地址 """ house = random.randint(10, 9999) street_roots = [ "Maple", "Oak", "Pine", "Cedar", "Elm", "Birch", "Willow", "Spruce", "Ash", "River", "Lake", "Hill", "Queen", "King", "Main", "Washington", "Lincoln", "Church", "College", "Center" ] suffixes = ["St", "Ave", "Rd", "Blvd", "Dr", "Ct", "Pl", "Ln", "Way", "Terrace"] return f"{house} {random.choice(street_roots)} {random.choice(suffixes)}" address_str = _random_street_us() city_name = city or chosen_city postcode = _us_random_zip_for_state(state_abbr) state_full = next((k for k, v in US_STATE_ABBR.items() if v == state_abbr), state_abbr) return { "firstname": firstname, "lastname": lastname, "full_name": full_name, "birthday": birthday, "address_str": address_str, "city_name": city_name, "phone": phone, "postcode": postcode, "state": state_full, } def _random_birthday_by_age_range(min_age: int, max_age: int) -> str: """ 按年龄区间生成随机生日,格式为 yyyy-mm-dd 参数: min_age (int): 最小年龄(含) max_age (int): 最大年龄(含) 返回值: str: 生日字符串 """ if min_age < 0: min_age = 0 if max_age < min_age: max_age = min_age today = date.today() start = today - timedelta(days=max_age * 365 + 366) end = today - timedelta(days=min_age * 365) delta_days = (end - start).days d = start + timedelta(days=random.randint(0, max(delta_days, 1))) return f"{d.year}-{d.month:02d}-{d.day:02d}" def _random_date_between(start: date, end: date) -> str: """ 在指定日期区间内生成随机日期,格式为 yyyy-mm-dd 参数: start (date): 起始日期(含) end (date): 结束日期(含) 返回值: str: 随机日期字符串 """ if end < start: start, end = end, start delta_days = (end - start).days d = start + timedelta(days=random.randint(0, max(delta_days, 1))) return f"{d.year}-{d.month:02d}-{d.day:02d}" def generate_child_parent_names( enforce_period_under13: bool = True, period_start: str = "2013-07-01", period_end: str = "2020-04-01", min_child_age: int = 1, max_child_age: int = 17, min_parent_age: int = 25, max_parent_age: int = 65, country: str = "US", province_or_state: Optional[str] = None, city: Optional[str] = None, use_network: bool = False, separate_phones: bool = True, ) -> Dict[str, str]: """ 生成两个随机人:未成年孩子与家长,孩子与家长共享姓氏,并包含随机地址等完整信息 参数: enforce_period_under13 (bool): 是否强制孩子在 [period_start, period_end] 期间均小于13岁(默认开启) period_start (str): 期间开始日期,默认 "2013-07-01" period_end (str): 期间结束日期,默认 "2020-04-01" min_child_age (int): 孩子最小年龄(用于未启用期间约束时) max_child_age (int): 孩子最大年龄(用于未启用期间约束时) min_parent_age (int): 家长最小年龄(用于未启用期间约束时) max_parent_age (int): 家长最大年龄(用于未启用期间约束时) country (str): 国家,"CA" 或 "US",默认 "CA" province_or_state (str | None): 指定省/州,默认随机 city (str | None): 指定城市,默认随机 use_network (bool): 是否使用网络反向地理编码生成地址,默认 False 使用本地生成 separate_phones (bool): 是否为孩子与家长生成不同的电话号码,默认 True 返回值: dict: 包含孩子与家长的 Firstname、Lastname、全名、生日与地址等字段 """ parent_first, parent_last = _random_name() child_first, _ = _random_name() if enforce_period_under13: ps = date.fromisoformat(period_start) pe = date.fromisoformat(period_end) bound = date(pe.year - 13, pe.month, pe.day) child_min = bound + timedelta(days=1) child_max = pe child_birthday = _random_date_between(child_min, child_max) # 依据孩子生日生成家长生日,设定合理的年龄差 y, m, d = map(int, child_birthday.split("-")) child_dt = date(y, m, d) def _minus_years_safe(dt: date, years: int) -> date: try: return date(dt.year - years, dt.month, dt.day) except ValueError: # 处理闰年2月29等情况,回退到当月最后一天 while True: try: return date(dt.year - years, dt.month, dt.day) except ValueError: dt = dt - timedelta(days=1) gap = random.randint(20, 45) parent_dt = _minus_years_safe(child_dt, gap) parent_birthday = f"{parent_dt.year}-{parent_dt.month:02d}-{parent_dt.day:02d}" else: child_birthday = _random_birthday_by_age_range(min_child_age, max_child_age) parent_birthday = _random_birthday_by_age_range(min_parent_age, max_parent_age) country = (country or "CA").upper() addr_info: Dict[str, str] if country == "US": state_abbr = province_or_state or random.choice(list(US_STATE_ABBR.values())) if use_network: addr_info = generate_us_info(state_abbr, city) else: addr_info = get_random_us_info(state_abbr, city) state_full = addr_info.get("state") child_phone = addr_info.get("phone") parent_phone = addr_info.get("phone") if separate_phones: child_phone = _us_random_phone_state(state_abbr, addr_info.get("city_name")) return { "child_firstname": child_first, "child_lastname": parent_last, "child_full_name": f"{child_first} {parent_last}", "child_birthday": child_birthday, "child_address_str": addr_info.get("address_str"), "child_city_name": addr_info.get("city_name"), "child_phone": child_phone, "child_postcode": addr_info.get("postcode"), "child_state": state_full, "parent_firstname": parent_first, "parent_lastname": parent_last, "parent_full_name": f"{parent_first} {parent_last}", "parent_birthday": parent_birthday, "parent_address_str": addr_info.get("address_str"), "parent_city_name": addr_info.get("city_name"), "parent_phone": parent_phone, "parent_postcode": addr_info.get("postcode"), "parent_state": state_full, } else: prov_abbr = province_or_state or random.choice(list(CA_PROVINCE_ABBR.values())) if use_network: addr_info = generate_canada_info(prov_abbr, city) else: addr_info = get_random_canada_info(prov_abbr, city) province_full = addr_info.get("province") # 生成孩子与家长电话 parent_phone = addr_info.get("phone") child_phone = parent_phone if separate_phones: # 使用省缩写与城市生成新的号码 ca_abbr = CA_PROVINCE_ABBR.get(province_full, prov_abbr) child_phone = _random_phone_city(ca_abbr, addr_info.get("city_name")) return { "child_firstname": child_first, "child_lastname": parent_last, "child_full_name": f"{child_first} {parent_last}", "child_birthday": child_birthday, "child_address_str": addr_info.get("address_str"), "child_city_name": addr_info.get("city_name"), "child_phone": child_phone, "child_postcode": addr_info.get("postcode"), "child_province": province_full, "parent_firstname": parent_first, "parent_lastname": parent_last, "parent_full_name": f"{parent_first} {parent_last}", "parent_birthday": parent_birthday, "parent_address_str": addr_info.get("address_str"), "parent_city_name": addr_info.get("city_name"), "parent_phone": parent_phone, "parent_postcode": addr_info.get("postcode"), "parent_province": province_full, } def main() -> None: """ 演示:生成 Alberta 省 Calgary 的随机信息;可修改为其他省/城市 """ info = generate_canada_info("Alberta", "Calgary") print(info) if __name__ == "__main__": # main() info = generate_child_parent_names() print(info)