france_visa_registration_bot.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. import time
  2. import json
  3. import os
  4. import re
  5. import uuid
  6. import socket
  7. import shutil
  8. import random
  9. import requests
  10. import argparse
  11. import concurrent.futures
  12. import base64
  13. from urllib.parse import urlencode
  14. from datetime import datetime, timedelta
  15. from typing import Optional, Dict
  16. from DrissionPage.common import Keys
  17. from DrissionPage import ChromiumPage, ChromiumOptions
  18. import configure
  19. from utils.cloudflare_bypass_for_scraping import CloudflareBypasser
  20. from toolkit.vs_cloud_api import VSCloudApi
  21. from toolkit.proxy_tunnel import ProxyTunnel
  22. from vs_types import NotFoundError, PermissionDeniedError, RateLimiteddError, SessionExpiredOrInvalidError, BizLogicError
  23. from utils.mouse import HumanMouse
  24. from utils.keyboard import HumanKeyboard
  25. from utils.scroll import HumanScroll
  26. from utils.fingerprint_utils import FingerprintGenerator
  27. from toolkit.captcha_breaker import recognize_captcha_with_qwen
  28. def load_proxies(pool_name):
  29. """从 config/proxies.json 读取对应的代理池"""
  30. config_path = os.path.join(os.path.dirname(__file__), 'config', 'proxies.json')
  31. try:
  32. with open(config_path, 'r', encoding='utf-8') as f:
  33. data = json.load(f)
  34. proxies = data.get(pool_name, [])
  35. if not proxies:
  36. raise ValueError(f"代理池 '{pool_name}' 为空或不存在!")
  37. return proxies
  38. except Exception as e:
  39. print(f"读取代理配置文件失败: {e}")
  40. exit(1)
  41. class FranceVisaRegistrator:
  42. def __init__(self, france_visa_url, proxy_config: Optional[Dict]=None, capsolver_key: Optional[str]=None, user_inputs: Optional[Dict]=None):
  43. self.proxy_config = proxy_config
  44. self.capsolver_key = capsolver_key
  45. self.user_inputs = user_inputs
  46. # 隔离的用户数据目录
  47. self.instance_id = uuid.uuid4().hex[:8]
  48. self.france_visa_url = france_visa_url
  49. # self.instance_id = '18d389e9'
  50. self.workspace = os.path.abspath(os.path.join("data/temp_browser_data", f"reg_session_{self.instance_id}"))
  51. self.page = None
  52. self.mouse = None
  53. self.keyboard = None
  54. # 持有隧道实例
  55. self.tunnel = None
  56. def _log(self, msg):
  57. now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  58. print(f"[{now}][TLS-Reg-{self.instance_id}] {msg}")
  59. def _get_free_port(self):
  60. """获取可用端口,防止 DrissionPage 解析日志报错"""
  61. with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
  62. s.bind(('', 0))
  63. return s.getsockname()[1]
  64. def save_screenshot(self, name_prefix):
  65. try:
  66. timestamp = int(time.time())
  67. filename = f"{self.instance_id}_{name_prefix}_{timestamp}.jpg"
  68. save_path = os.path.join("data", filename)
  69. os.makedirs("data", exist_ok=True)
  70. self.page.get_screenshot(path=save_path, full_page=False)
  71. self._log(f"Screenshot saved to {save_path}")
  72. except Exception as e:
  73. self._log(f"Failed to save screenshot: {e}")
  74. def init_browser(self):
  75. """初始化独立、配置好代理的浏览器环境"""
  76. self._log("Initializing browser...")
  77. co = ChromiumOptions()
  78. # 1. 端口与路径隔离
  79. port = self._get_free_port()
  80. co.set_local_port(port)
  81. co.set_user_data_path(self.workspace)
  82. chrome_path = configure.CHROME_PATH
  83. if not chrome_path:
  84. chrome_path = os.getenv("CHROME_BIN")
  85. if chrome_path and os.path.exists(chrome_path):
  86. co.set_paths(browser_path=chrome_path)
  87. # 2. 代理配置 (支持账号密码)
  88. if self.proxy_config and self.proxy_config.get("ip"):
  89. p = self.proxy_config
  90. if p.get("username") and p.get("password"):
  91. self.tunnel = ProxyTunnel(p['ip'], p['port'], p['username'], p['password'])
  92. local_proxy = self.tunnel.start()
  93. self._log(f"Tunnel started at {local_proxy}")
  94. co.set_argument(f'--proxy-server={local_proxy}')
  95. else:
  96. proxy_str = f"{p.get('proto', 'http')}://{p['ip']}:{p['port']}"
  97. co.set_argument(f'--proxy-server={proxy_str}')
  98. else:
  99. self._log("[WARN] No proxy configured!")
  100. fingerprint_gen = FingerprintGenerator()
  101. specific_fp = fingerprint_gen.generate(self.instance_id)
  102. self._log(f'browser fingerprint={specific_fp}')
  103. # 3. 反爬及稳定性配置
  104. co.headless(False)
  105. co.set_argument('--no-sandbox')
  106. co.set_argument('--lang=en-us')
  107. co.set_argument('--accept-lang=en-us')
  108. # co.set_argument('--disable-gpu')
  109. co.set_argument('--disable-dev-shm-usage')
  110. co.set_argument('--window-size=1920,1080')
  111. co.set_argument('--disable-blink-features=AutomationControlled')
  112. co.set_argument(f"--fingerprint={specific_fp.get('seed')}")
  113. co.set_argument(f"--fingerprint-platform={specific_fp.get('platform')}")
  114. co.set_argument(f"--fingerprint-brand={specific_fp.get('brand')}")
  115. self.page = ChromiumPage(co)
  116. self.page.get(self.france_visa_url)
  117. time.sleep(5)
  118. cf_bypasser = CloudflareBypasser(self.page, log=True)
  119. cf_bypasser.bypass(max_retry=8)
  120. time.sleep(3)
  121. cf_bypasser.handle_waiting_room()
  122. self._log("正在初始化拟人化工具...")
  123. self.mouse = HumanMouse(self.page, debug=True)
  124. self.keyboard = HumanKeyboard(self.page)
  125. self._log("随机化鼠标开始位置...")
  126. viewport_width = self.page.rect.viewport_size[0]
  127. viewport_height = self.page.rect.viewport_size[1]
  128. init_x = random.randint(10, viewport_width - 10)
  129. init_y = random.randint(10, viewport_height - 10)
  130. self.mouse.move(init_x, init_y)
  131. def register(self):
  132. username = self.user_inputs.get('username')
  133. first_name = self.user_inputs.get('first_name')
  134. last_name = self.user_inputs.get('last_name')
  135. password = f'Visafly@1234'
  136. self.page.wait.ele_deleted('tag:h2@@text():Log in to France-Visas', timeout=5)
  137. self.page.ele('tag:button@@text():Create an account').click()
  138. time.sleep(5)
  139. self.page.ele('tag:input@@name=lastName').input(last_name)
  140. self.page.ele('tag:input@@name=firstName').input(first_name)
  141. self.page.ele('tag:input@@name=email').input(username)
  142. self.page.ele('tag:input@@name=emailVerif').input(username)
  143. self.page.ele('tag:input@@name=password').input(password)
  144. self.page.ele('tag:input@@name=password-confirm').input(password)
  145. self.page.ele('tag:select@@name=ddeLanguage').select('English')
  146. captcha = self.page.ele('#captchaComponent').ele('tag:img')
  147. src = captcha.attr("src")
  148. print(src)
  149. base64_data = src.split(",")[1]
  150. with open("captcha.png", "wb") as f:
  151. f.write(base64.b64decode(base64_data))
  152. result = recognize_captcha_with_qwen("captcha.png", "sk-893e895724c6403d81374e515ffaf427")
  153. print(f'captcha result={result}')
  154. self.page.ele('tag:input@@name=captchaFormulaireExtInput').input(result)
  155. self.page.ele("tag:button@@text():Create an account").click()
  156. def activate(self, sent_at=None):
  157. username = self.user_inputs.get('username')
  158. email_box = 'visafly666@gmail.com'
  159. sender = 'noreply at interieur.gouv.fr'
  160. recipient = username
  161. subject_keywords = 'Create your France-Visas account'
  162. body_keywords = ''
  163. if not sent_at:
  164. now_utc = datetime.utcnow()
  165. sent_at = now_utc.strftime("%Y-%m-%d %H:%M:%S")
  166. content_out = VSCloudApi.Instance().fetch_mail_content(
  167. email=email_box,
  168. sender=sender,
  169. recipient=recipient,
  170. subject_keywords=subject_keywords,
  171. body_keywords=body_keywords,
  172. sent_date=sent_at,
  173. expiry=600
  174. )
  175. self._log(f'activate email content={content_out}')
  176. match = re.search(r'https://\S+', content_out)
  177. activate_link = match.group(0) if match else None
  178. self.page.get(activate_link)
  179. time.sleep(3)
  180. def make_account_useful(self):
  181. def fill_date_field(page, selector, date_str):
  182. if not date_str:
  183. return
  184. ele = page.ele(selector)
  185. ele.scroll.to_see(center=True)
  186. js_detect_format = """
  187. const parts = new Intl.DateTimeFormat().formatToParts(new Date(2023, 11, 31));
  188. let format = [];
  189. for (let part of parts) {
  190. if (part.type === 'year') format.push('Y');
  191. if (part.type === 'month') format.push('M');
  192. if (part.type === 'day') format.push('D');
  193. }
  194. return format;
  195. """
  196. date_format = page.run_js(js_detect_format)
  197. year, month, day = date_str.split('-')
  198. date_dict = {
  199. 'Y': year,
  200. 'M': month.zfill(2),
  201. 'D': day.zfill(2)
  202. }
  203. ele.click()
  204. time.sleep(0.1)
  205. page.actions.type(Keys.LEFT * 3)
  206. time.sleep(0.1)
  207. for i, char in enumerate(date_format):
  208. val = date_dict[char]
  209. page.actions.type(val)
  210. time.sleep(0.1)
  211. if char == 'Y':
  212. if i < 2:
  213. page.actions.type(Keys.RIGHT)
  214. time.sleep(0.1)
  215. else:
  216. pass
  217. passport_no = self.user_inputs.get('passport_no')
  218. passport_issue_date = self.user_inputs.get('passport_issue_date')
  219. passport_expiry_date = self.user_inputs.get('passport_expiry_date')
  220. nationality = self.user_inputs.get('nationality')
  221. passport_issue_from = self.user_inputs.get('passport_issue_from')
  222. self.page.ele('#formHeader:navigationLanguage_input').select('English')
  223. time.sleep(3)
  224. self.page.ele('#formAccueilUsager:ajouterGroupe').click()
  225. time.sleep(5)
  226. self.page.ele('#formStep1:visas-selected-nationality_input').select(nationality)
  227. self.page.ele('#formStep1:Visas-selected-deposit-country_input').select('Ireland')
  228. self.page.ele('#formStep1:Visas-selected-stayDuration_input').select('Short-stay (≤ 90 days)')
  229. self.page.ele('#formStep1:Visas-selected-destination_input').select('France')
  230. self.page.ele('#formStep1:Visas-selected-deposit-town_input').select('Dublin')
  231. self.page.ele('#formStep1:Visas-selected-authority_input').select(passport_issue_from)
  232. self.page.ele('#formStep1:Visas-dde-travel-document_input').select('Ordinary passport')
  233. self.page.ele('#formStep1:Visas-dde-travel-document-number').input(passport_no)
  234. fill_date_field(self.page, '#formStep1:Visas-dde-release_date_real_input', passport_issue_date)
  235. fill_date_field(self.page, '#formStep1:Visas-dde-expiration_date_input', passport_expiry_date)
  236. self.page.ele('#formStep1:Visas-selected-purposeCategory_input').select('Tourism')
  237. self.page.ele('#formStep1:Visas-selected-purpose_input').select('Tourism / Private visit')
  238. self.page.ele('#formStep1:btnVerifier').click()
  239. time.sleep(3)
  240. self.page.ele('#formStep1:btnSuivant').click()
  241. time.sleep(3)
  242. self.page.ele('#formStep1:btnValiderModal').click()
  243. time.sleep(3)
  244. self.page.ele('.iconeDDEIdPanel').click()
  245. time.sleep(0.5)
  246. self.page.ele('text():My applications').click()
  247. time.sleep(3)
  248. html_content = self.page.html
  249. match = re.search(r'FRA1[A-Z0-9]+', html_content)
  250. if not match:
  251. raise BizLogicError(message='FRA1 not found')
  252. fra_number = match.group(0)
  253. print(fra_number)
  254. return fra_number
  255. def cleanup(self):
  256. """清理浏览器进程和缓存文件夹"""
  257. self._log("Cleaning up resources...")
  258. if self.page:
  259. try: self.page.quit()
  260. except: pass
  261. if os.path.exists(self.workspace):
  262. time.sleep(1) # 等待文件锁释放
  263. shutil.rmtree(self.workspace, ignore_errors=True)
  264. def main():
  265. france_visa_url = 'https://application-form.france-visas.gouv.fr/fv-fo-dde/'
  266. proxy_config = {
  267. 'ip': '127.0.0.1',
  268. 'port': 7890,
  269. 'username': '',
  270. 'password': ''
  271. }
  272. capsolver_key = ''
  273. user_inputs = {
  274. "username": "ManaliAshokGaikwad26@gmail-app.com",
  275. "first_name": "Manali Ashok",
  276. "last_name": "Gaikwad",
  277. "nationality": "Indian",
  278. "passport_issue_from": "India",
  279. "passport_no": "Z4413123",
  280. "passport_issue_date": "2018-01-15",
  281. "passport_expiry_date": "2028-01-14",
  282. }
  283. bot = FranceVisaRegistrator(
  284. france_visa_url,
  285. proxy_config=proxy_config,
  286. capsolver_key=capsolver_key,
  287. user_inputs=user_inputs
  288. )
  289. bot.init_browser()
  290. now_utc = datetime.utcnow()
  291. sent_at = now_utc.strftime("%Y-%m-%d %H:%M:%S")
  292. bot.register()
  293. bot.activate(sent_at=sent_at)
  294. bot.make_account_useful()
  295. if __name__ == "__main__":
  296. main()