vfs_registration_bot.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. import os
  2. import random
  3. import socket
  4. import json
  5. import time
  6. import string
  7. import logging
  8. import base64
  9. from datetime import datetime, timezone
  10. from urllib.parse import urlparse, urlencode
  11. from bs4 import BeautifulSoup
  12. from cryptography.hazmat.primitives import serialization, hashes
  13. from cryptography.hazmat.primitives.asymmetric import padding
  14. from cryptography.hazmat.backends import default_backend
  15. from DrissionPage import ChromiumPage, ChromiumOptions
  16. from vs_types import RateLimiteddError, BizLogicError
  17. from utils.cloudflare_bypass_for_scraping import CloudflareBypasser
  18. from toolkit.vs_cloud_api import VSCloudApi
  19. # --- 配置日志 ---
  20. logging.basicConfig(
  21. level=logging.INFO,
  22. format='%(asctime)s [%(levelname)s] %(message)s',
  23. datefmt='%H:%M:%S'
  24. )
  25. logger = logging.getLogger("VFSRegistrar")
  26. # --- 常量 ---
  27. VFS_PUBLIC_KEY_PEM = """-----BEGIN PUBLIC KEY-----
  28. MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuupFgB+lYIOtSxrRoHzc
  29. LmCZKJ6+oSbgqgOPzFMM0TasOeLw0NXEn1XfIzXdx75+tegNKwyIZumoh0yhubKs
  30. t59GV321kN0iquYRHrdh3ygfDDHlS9rROQeBqRga0ncSADtbLMrBPqXJjPCoV76y
  31. t92towriKoH75BhiazY0mghm4LjmAWrV0u/GNpV3tk9bxbtHEXGaFmxCJqjg+7x6
  32. 1e5wXLfvpj9w1QsiSWOSJxLOyICz/9ByxXycQQFdNmjnnnwco9Gt/Mi33NYH71j0
  33. 5oXIjklFC4lvJqaqSY5lS7Vwb9oCt9zX9J0Yz4z4e/3V+0jgRnWOFGofyks4FKe2
  34. GQIDAQAB
  35. -----END PUBLIC KEY-----"""
  36. class VFSHelper:
  37. """工具方法的静态类"""
  38. @staticmethod
  39. def generate_mobile_number(country_code=353, e164_format=False):
  40. if country_code == 353: # Ireland
  41. prefix = random.choice(['83', '85', '86', '87', '89'])
  42. number = f"{prefix}{''.join([str(random.randint(0, 9)) for _ in range(7)])}"
  43. return f"+353{number}" if e164_format else number
  44. elif country_code == 44: # UK
  45. prefix_second = random.choice(['1', '2', '3', '4', '5', '7', '8', '9'])
  46. number = f"7{prefix_second}{''.join([str(random.randint(0, 9)) for _ in range(8)])}"
  47. return f"+44{number}" if e164_format else number
  48. elif country_code == 86: # China
  49. prefixes = ["130", "131", "132", "133", "135", "136", "138", "139", "150", "158", "159", "186"]
  50. prefix = random.choice(prefixes)
  51. number = f"{prefix}{''.join([str(random.randint(0, 9)) for _ in range(8)])}"
  52. return f"+86{number}" if e164_format else number
  53. return "".join([str(random.randint(0, 9)) for _ in range(10)])
  54. @staticmethod
  55. def generate_password(length=12):
  56. chars = string.ascii_letters + string.digits + "@#$%"
  57. while True:
  58. pwd = ''.join(random.choices(chars, k=length))
  59. if (any(c.islower() for c in pwd) and
  60. any(c.isupper() for c in pwd) and
  61. any(c.isdigit() for c in pwd) and
  62. any(c in "@#$%" for c in pwd)):
  63. return pwd
  64. @staticmethod
  65. def encrypt_password(password: str) -> str:
  66. public_key = serialization.load_pem_public_key(
  67. VFS_PUBLIC_KEY_PEM.encode(), backend=default_backend()
  68. )
  69. ciphertext = public_key.encrypt(
  70. password.encode(),
  71. padding.OAEP(
  72. mgf=padding.MGF1(algorithm=hashes.SHA256()),
  73. algorithm=hashes.SHA256(),
  74. label=None
  75. )
  76. )
  77. return base64.b64encode(ciphertext).decode()
  78. @staticmethod
  79. def get_client_source() -> str:
  80. timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S")
  81. payload = f"GA;{timestamp}Z"
  82. return VFSHelper.encrypt_password(payload)
  83. class BrowserResponse:
  84. """标准化浏览器响应"""
  85. def __init__(self, result_dict):
  86. result_dict = result_dict or {}
  87. self.status_code = result_dict.get('status', 0)
  88. self.text = result_dict.get('body', '')
  89. self.headers = result_dict.get('headers', {})
  90. self.url = result_dict.get('url', '')
  91. self._json = None
  92. def json(self):
  93. if self._json is None:
  94. try:
  95. self._json = json.loads(self.text) if self.text else {}
  96. except json.JSONDecodeError:
  97. self._json = {}
  98. return self._json
  99. class VFSRegistrationBot:
  100. def __init__(self, config):
  101. self.config = config
  102. self.page = None
  103. self.proxy_url = config.get("proxy_url")
  104. def _init_browser(self):
  105. """初始化浏览器配置"""
  106. co = ChromiumOptions()
  107. # 查找可用端口
  108. with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
  109. s.bind(('', 0))
  110. port = s.getsockname()[1]
  111. co.set_local_port(port)
  112. chrome_path = os.getenv("CHROME_BIN")
  113. if chrome_path:
  114. co.set_paths(browser_path=chrome_path)
  115. if self.proxy_url:
  116. co.set_argument(f'--proxy-server={self.proxy_url}')
  117. co.headless(False) # VFS 验证码通常需要有头模式
  118. co.set_argument('--no-sandbox')
  119. co.set_argument('--disable-gpu')
  120. co.set_argument('--disable-dev-shm-usage')
  121. co.set_argument('--window-size=1920,1080')
  122. co.set_argument('--disable-blink-features=AutomationControlled')
  123. # 创建页面对象
  124. self.page = ChromiumPage(co)
  125. # 设置超时
  126. self.page.set.timeouts(15)
  127. def _perform_js_fetch(self, method, url, headers=None, data=None, json_data=None, retry_count=0):
  128. """注入JS执行Fetch请求,绕过部分指纹检测"""
  129. if not self.page:
  130. raise BizLogicError("Browser not initialized")
  131. if retry_count > 3:
  132. raise BizLogicError("Max retries exceeded for request")
  133. fetch_options = {
  134. "method": method.upper(),
  135. "headers": headers or {},
  136. "credentials": "include"
  137. }
  138. if json_data:
  139. fetch_options['body'] = json.dumps(json_data)
  140. fetch_options['headers']['Content-Type'] = 'application/json'
  141. elif data:
  142. fetch_options['body'] = urlencode(data) if isinstance(data, dict) else str(data)
  143. fetch_options['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
  144. logger.debug(f"Request: {method} {url}")
  145. js_script = f"""
  146. const url = "{url}";
  147. const options = {json.dumps(fetch_options)};
  148. return fetch(url, options)
  149. .then(async response => {{
  150. const text = await response.text();
  151. const headers = {{}};
  152. response.headers.forEach((value, key) => headers[key] = value);
  153. return {{
  154. status: response.status,
  155. body: text,
  156. headers: headers,
  157. url: response.url
  158. }};
  159. }})
  160. .catch(err => ({{ status: 0, body: err.toString() }}));
  161. """
  162. try:
  163. res_dict = self.page.run_js(js_script, timeout=60)
  164. resp = BrowserResponse(res_dict)
  165. if resp.status_code == 200:
  166. return resp
  167. # 处理 Cloudflare 403 拦截
  168. if resp.status_code == 403 and ("cloudflare" in resp.text.lower() or "Just a moment" in resp.text):
  169. logger.warning(f"Cloudflare 403 detected. Retrying ({retry_count+1})...")
  170. new_token = self._refresh_turnstile()
  171. if new_token and json_data and "captcha_api_key" in json_data:
  172. json_data["captcha_api_key"] = new_token
  173. return self._perform_js_fetch(method, url, headers, data, json_data, retry_count + 1)
  174. if resp.status_code == 429:
  175. raise RateLimiteddError(f"Rate Limit: {resp.text[:100]}")
  176. return resp # 返回其他状态码供调用者处理 (如 400 业务错误)
  177. except Exception as e:
  178. logger.error(f"JS Execution Error: {e}")
  179. raise BizLogicError(f"Fetch failed: {e}")
  180. def _handle_cookie_banner(self):
  181. """处理 Cookie 弹窗"""
  182. try:
  183. js = """
  184. var btn = document.getElementById('onetrust-accept-btn-handler');
  185. if(btn) { btn.click(); return true; }
  186. var banner = document.getElementById('onetrust-banner-sdk');
  187. if(banner) { banner.remove(); return true; }
  188. """
  189. self.page.run_js(js)
  190. except:
  191. pass
  192. def _refresh_turnstile(self):
  193. """刷新并获取 Cloudflare Token"""
  194. logger.info("Attempting to refresh Turnstile token...")
  195. try:
  196. self.page.run_js('try{window.turnstile.reset()}catch(e){}')
  197. cf_bypasser = CloudflareBypasser(self.page, log=False)
  198. for i in range(30):
  199. time.sleep(1)
  200. try:
  201. ele = self.page.ele('@name=cf-turnstile-response')
  202. if ele and ele.value:
  203. logger.info("Turnstile token obtained.")
  204. return ele.value
  205. except:
  206. pass
  207. if i > 5:
  208. try:
  209. cf_bypasser.click_verification_button(is_dfs=(i > 15))
  210. except:
  211. pass
  212. except Exception as e:
  213. logger.error(f"Turnstile refresh failed: {e}")
  214. return None
  215. def _wait_for_activation_link(self, username, max_wait_sec=300):
  216. """轮询获取激活链接"""
  217. logger.info(f"Waiting for email to {username}...")
  218. start_time = time.time()
  219. master_email = self.config.get("master_email", "visafly666@gmail.com")
  220. while time.time() - start_time < max_wait_sec:
  221. try:
  222. utc_now_str = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
  223. # 调用邮件 API
  224. content = VSCloudApi.Instance().fetch_mail_content(
  225. master_email,
  226. 'donotreply at vfsglobal.com',
  227. username,
  228. 'Welcome',
  229. 'ActivateAccount',
  230. utc_now_str,
  231. expiry=60
  232. )
  233. if content:
  234. soup = BeautifulSoup(content, "html.parser")
  235. link = soup.find("a", string="ActivateAccount")
  236. if link:
  237. raw_link = link["href"]
  238. clean_url = raw_link.replace(" ", "").replace("\n", "").replace("\r", "").strip()
  239. return clean_url
  240. except Exception as e:
  241. logger.warning(f"Error fetching email: {e}")
  242. time.sleep(15)
  243. logger.info("Checking email again...")
  244. return None
  245. def register(self, account):
  246. """执行单个账号注册"""
  247. website = self.config['website']
  248. try:
  249. self._init_browser()
  250. logger.info(f"Opening {website}")
  251. # 1. 设置超时
  252. self.page.set.timeouts(page_load=30, script=30)
  253. # 2. 尝试打开页面
  254. try:
  255. self.page.get(website, retry=0, timeout=30)
  256. except Exception:
  257. # 超时强制停止,防止卡死
  258. logger.warning(f"Page load timed out (Stopped manually). Checking URL...")
  259. self.page.stop_loading()
  260. # 1. 过盾
  261. cf_token = None
  262. cf_bypasser = CloudflareBypasser(self.page, log=True)
  263. for _ in range(40):
  264. time.sleep(1)
  265. current_url = self.page.url
  266. if "page-not-found" in current_url:
  267. logger.error(f"❌ [BLOCKED] Redirected to 'Page Not Found' during check. Aborting.")
  268. return False
  269. # 如果页面标题变成 403 Forbidden
  270. if "403" in self.page.title and "Just a moment" not in self.page.title:
  271. logger.error(f"❌ [BLOCKED] 403 Forbidden detected. Aborting.")
  272. return False
  273. self._handle_cookie_banner()
  274. # 尝试获取 Token
  275. try:
  276. ele = self.page.ele('@name=cf-turnstile-response')
  277. if ele and ele.value:
  278. cf_token = ele.value
  279. break
  280. except:
  281. pass
  282. # 尝试点击
  283. try:
  284. cf_bypasser.click_verification_button()
  285. except:
  286. pass
  287. if not cf_token:
  288. raise BizLogicError("Failed to obtain initial Cloudflare token")
  289. # 2. 构造注册请求
  290. post_data = {
  291. 'emailid': account['username'],
  292. 'password': VFSHelper.encrypt_password(account['password']),
  293. 'confirmPassword': VFSHelper.encrypt_password(account['password']),
  294. 'processPerDataAgreed': True,
  295. 'intTransPerDataAgreed': True,
  296. 'termAndConditionAgreed': True,
  297. 'missioncode': account['mission_code'],
  298. 'countrycode': account['country_code'],
  299. 'languageCode': 'en',
  300. 'dialcode': str(account['phone_country_code']),
  301. 'contact': account['phone_number'],
  302. 'captcha_version': 'cloudflare-v1',
  303. 'captcha_api_key': cf_token,
  304. 'cultureCode': 'en-US',
  305. 'IsSpecialUser': False,
  306. }
  307. headers = {
  308. 'content-type': 'application/json;charset=utf-8',
  309. 'accept': 'application/json, text/plain, */*',
  310. 'route': f"{account['country_code']}/en/{account['mission_code']}",
  311. 'clientsource': VFSHelper.get_client_source(),
  312. }
  313. logger.info(f"Submitting registration for {account['username']}")
  314. resp = self._perform_js_fetch(
  315. 'POST',
  316. 'https://lift-api.vfsglobal.com/user/registration',
  317. headers=headers,
  318. json_data=post_data
  319. )
  320. resp_data = resp.json()
  321. if resp_data.get("code") == "200":
  322. logger.info("Registration API success. Waiting for email...")
  323. activate_link = self._wait_for_activation_link(account['username'])
  324. if activate_link:
  325. logger.info(f"Activating account: {activate_link}")
  326. # 在当前浏览器上下文中打开链接,保持环境一致性
  327. # === 关键步骤:打开新标签页并验证结果 ===
  328. # 打开新标签页
  329. activate_tab = self.page.new_tab(activate_link)
  330. try:
  331. # 等待页面加载并查找 "Activation Successful" 文本
  332. # timeout=30 表示最多等待 30 秒
  333. logger.info("Waiting for 'Activation Successful' message on page...")
  334. # DrissionPage 查找包含特定文本的元素
  335. success_ele = activate_tab.ele('Activation Successful', timeout=30)
  336. if success_ele:
  337. logger.info(f"✅ Account {account['username']} activated successfully (Verified).")
  338. return True
  339. else:
  340. # 如果没找到成功提示,尝试读取页面内容找错误原因
  341. body_text = activate_tab.ele('tag:body').text[:200]
  342. logger.error(f"Activation verification failed. Page text: {body_text}")
  343. return False
  344. except Exception as e:
  345. logger.error(f"Error checking activation status: {e}")
  346. return False
  347. finally:
  348. # 无论成功失败,关闭激活标签页,切回主标签
  349. activate_tab.close()
  350. else:
  351. logger.error("Timeout waiting for activation email.")
  352. else:
  353. logger.error(f"Registration failed: {resp_data}")
  354. except Exception as e:
  355. logger.error(f"Registration process exception: {e}", exc_info=True)
  356. finally:
  357. if self.page:
  358. try:
  359. self.page.quit()
  360. except:
  361. pass
  362. return False
  363. # --- 主流程 ---
  364. def generate_account_details(config, pool_name):
  365. """生成账号数据字典"""
  366. rand_suffix = ''.join(random.choices(string.ascii_lowercase + string.digits, k=6))
  367. username = f"{pool_name}_{rand_suffix}@{config['email_domain']}.com"
  368. phone = VFSHelper.generate_mobile_number(config['phone_country_code'])
  369. return {
  370. 'pool_name': pool_name,
  371. 'country_code': config['country_code'],
  372. 'mission_code': config['mission_code'],
  373. 'phone_country_code': config['phone_country_code'],
  374. 'phone_number': phone,
  375. 'username': username,
  376. 'password': VFSHelper.generate_password(),
  377. }
  378. def main():
  379. # 配置
  380. config = {
  381. "pool_name": "ie_fi",
  382. "email_domain": "gmail-app",
  383. "master_email": "visafly666@gmail.com",
  384. "proxy_url": "http://127.0.0.1:7890",
  385. "target_count": 30,
  386. "phone_country_code": 353,
  387. "country_code": "irl",
  388. "mission_code": "fin",
  389. "website": "https://visa.vfsglobal.com/irl/en/fin/register",
  390. }
  391. bot = VFSRegistrationBot(config)
  392. success_accounts = []
  393. print(">>> Starting Registration Bot <<<")
  394. while len(success_accounts) < config['target_count']:
  395. account = generate_account_details(config, config['pool_name'])
  396. logger.info(f"Processing Account: {account['username']}")
  397. is_success = bot.register(account)
  398. if is_success:
  399. success_accounts.append(account)
  400. logger.info(f"Progress: {len(success_accounts)}/{config['target_count']}")
  401. # 保存结果到文件,防止中途退出丢失
  402. with open("registered_accounts.json", "w", encoding='utf-8') as f:
  403. json.dump(success_accounts, f, indent=4, ensure_ascii=False)
  404. else:
  405. logger.warning("Retrying with new account details...")
  406. # 稍微暂停,避免请求过于频繁
  407. time.sleep(5)
  408. print(">>> All tasks completed <<<")
  409. if __name__ == "__main__":
  410. main()