tls_plugin.py 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116
  1. import time
  2. import json
  3. import random
  4. import re
  5. import os
  6. import uuid
  7. import shutil
  8. import socket
  9. from datetime import datetime
  10. from typing import List, Dict, Optional, Any, Callable
  11. from urllib.parse import urljoin, urlparse, urlencode, parse_qs
  12. from concurrent.futures import ThreadPoolExecutor
  13. from DrissionPage import ChromiumPage, ChromiumOptions
  14. import configure
  15. from vs_plg import IVSPlg
  16. from vs_types import VSPlgConfig, AppointmentType, VSQueryResult, VSBookResult, AvailabilityStatus, TimeSlot, DateAvailability, NotFoundError, PermissionDeniedError, RateLimiteddError, SessionExpiredOrInvalidError, BizLogicError
  17. from utils.cloudflare_bypass_for_scraping import CloudflareBypasser
  18. from toolkit.mihomo_tunnel import MihomoTunnel
  19. from utils.mouse import HumanMouse
  20. from utils.keyboard import HumanKeyboard
  21. from utils.fingerprint_utils import FingerprintGenerator
  22. class BrowserResponse:
  23. """模拟 requests.Response"""
  24. def __init__(self, result_dict):
  25. result_dict = result_dict or {}
  26. self.status_code = result_dict.get('status', 0)
  27. self.text = result_dict.get('body', '')
  28. self.headers = result_dict.get('headers', {})
  29. self.url = result_dict.get('url', '')
  30. self._json = None
  31. def json(self):
  32. if self._json is None:
  33. if not self.text:
  34. return {}
  35. try:
  36. self._json = json.loads(self.text)
  37. except:
  38. self._json = {}
  39. return self._json
  40. class TlsPlugin(IVSPlg):
  41. """
  42. TLSContact 签证预约插件 (DrissionPage 版)
  43. """
  44. def __init__(self, group_id: str):
  45. self.group_id = group_id
  46. self.config: Optional[VSPlgConfig] = None
  47. self.free_config: Dict[str, Any] = {}
  48. self.is_healthy = True
  49. self.logger = None
  50. self.mouse = None
  51. self.keyboard = None
  52. self.page: Optional[ChromiumPage] = None
  53. self.travel_group: Optional[Dict] = None
  54. self.instance_id = uuid.uuid4().hex[:8]
  55. self.root_workspace = os.path.abspath(os.path.join("data/temp_browser_data", f"{self.group_id}.{self.instance_id}"))
  56. self.user_data_path = os.path.join(self.root_workspace, "user_data")
  57. if not os.path.exists(self.root_workspace):
  58. os.makedirs(self.root_workspace)
  59. self.last_refresh_time = time.time()
  60. self.tunnel = None
  61. self.session_create_time: float = 0
  62. def get_group_id(self) -> str:
  63. return self.group_id
  64. def set_log(self, logger: Callable[[str], None]):
  65. self.logger = logger
  66. def _log(self, message):
  67. if self.logger:
  68. self.logger(f'[TlsPlugin] [{self.group_id}] {message}')
  69. else:
  70. print(f'[TlsPlugin] [{self.group_id}] {message}')
  71. def set_config(self, config: VSPlgConfig):
  72. self.config = config
  73. self.free_config = config.free_config or {}
  74. def keep_alive(self):
  75. """
  76. 统一保活机制:
  77. - 距离上次刷新超过 10 分钟:执行完整页面刷新并检查 Session。
  78. - 否则:随机发送 Fetch 小请求保活。
  79. """
  80. if time.time() - self.last_refresh_time >= 60*10:
  81. try:
  82. self._log("Cut all connections...")
  83. self.tunnel.cut_all_connections()
  84. self._log("refresh page...")
  85. self.page.refresh()
  86. self.page.wait.load_start(timeout=2)
  87. self.page.wait.doc_loaded()
  88. time.sleep(random.uniform(1, 3))
  89. self._check_page_is_session_expired_or_invalid('Book your appointment', html=self.page.html)
  90. self.last_refresh_time = time.time()
  91. self._log("refresh page finished")
  92. except Exception as e:
  93. self._log(f"refresh page error: {str(e)}")
  94. self.is_healthy = False
  95. else:
  96. choice = random.choice(['home', 'travel_groups'])
  97. headers = {}
  98. if choice == 'home':
  99. url = "https://visas-fr.tlscontact.com/"
  100. elif choice == 'travel_groups':
  101. url = "https://visas-fr.tlscontact.com/en-us/travel-groups"
  102. headers = {"cache-control": "max-age=0"}
  103. try:
  104. self._log(f"send keep alive fetch request ({choice})")
  105. self._perform_request("GET", url, headers=headers)
  106. except Exception as e:
  107. self._log(f"send keep alive fetch error: {str(e)}")
  108. self.is_healthy = False
  109. def simulate_random_human_clicks(self, min_x=300, max_x=800, min_y=400, max_y=600, min_clicks=1, max_clicks=2):
  110. """
  111. 在指定区域内模拟人类随机移动鼠标并点击数次。
  112. :param min_x: X坐标最小范围
  113. :param max_x: X坐标最大范围
  114. :param min_y: Y坐标最小范围
  115. :param max_y: Y坐标最大范围
  116. :param min_clicks: 随便点击的最少次数
  117. :param max_clicks: 随便点击的最多次数
  118. """
  119. click_count = random.randint(min_clicks, max_clicks)
  120. self._log(f"Starting random human simulation: will click {click_count} times in the area.")
  121. for i in range(click_count):
  122. rand_x = random.randint(min_x, max_x)
  123. rand_y = random.randint(min_y, max_y)
  124. self._log(f"[{i+1}/{click_count}] Moving mouse to ({rand_x}, {rand_y}) and clicking")
  125. self.mouse.click(rand_x, rand_y, humanize=True)
  126. if i < click_count - 1:
  127. sleep_time = random.uniform(0.5, 1.8)
  128. self._log(f"Resting for {sleep_time:.2f} seconds before next click...")
  129. time.sleep(sleep_time)
  130. self._log("Random human clicks simulation completed.")
  131. def health_check(self) -> bool:
  132. if not self.is_healthy:
  133. return False
  134. if self.page is None:
  135. return False
  136. try:
  137. if not self.page.run_js("return 1;"):
  138. return False
  139. except:
  140. return False
  141. if self.config.session_max_life > 0:
  142. current_time = time.time()
  143. elapsed_time = current_time - self.session_create_time
  144. if elapsed_time > self.config.session_max_life:
  145. self._log(f"Session expired.")
  146. return False
  147. return True
  148. def _save_screenshot(self, name_prefix):
  149. try:
  150. timestamp = int(time.time())
  151. filename = f"{self.instance_id}_{name_prefix}_{timestamp}.jpg"
  152. save_path = os.path.join("data", filename)
  153. os.makedirs("data", exist_ok=True)
  154. self.page.get_screenshot(path=save_path, full_page=False)
  155. self._log(f"Screenshot saved to {save_path}")
  156. except Exception as e:
  157. self._log(f"Failed to save screenshot: {e}")
  158. def create_session(self):
  159. """
  160. 全浏览器会话创建:过盾 -> JS注入登录 -> 状态机自动路由导航 -> 到达目标页
  161. """
  162. self._log(f"Initializing Session (ID: {self.instance_id})...")
  163. captcha_future = None
  164. captcha_executor = ThreadPoolExecutor(max_workers=1)
  165. login_captcha_cfg = self.free_config.get("login_captcha", {})
  166. if login_captcha_cfg.get('solve_advance'):
  167. login_page = login_captcha_cfg.get("page_url")
  168. site_key = login_captcha_cfg.get("site_key")
  169. task_type = login_captcha_cfg.get("task")
  170. self._log(f"🚀 Early starting background Captcha solve for sitekey={site_key}")
  171. rc_params = {
  172. "type": task_type,
  173. "page": login_page,
  174. "siteKey": site_key,
  175. "apiToken": self.free_config.get("capsolver_key", "")
  176. }
  177. captcha_future = captcha_executor.submit(self._solve_recaptcha, rc_params)
  178. co = ChromiumOptions()
  179. def get_free_port():
  180. with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
  181. s.bind(('', 0))
  182. return s.getsockname()[1]
  183. debug_port = get_free_port()
  184. self._log(f"Assigned Debug Port: {debug_port}")
  185. co.set_local_port(debug_port)
  186. co.set_user_data_path(self.user_data_path)
  187. chrome_path = configure.CHROME_PATH
  188. if not chrome_path:
  189. chrome_path = os.getenv("CHROME_BIN")
  190. if chrome_path and os.path.exists(chrome_path):
  191. co.set_paths(browser_path=chrome_path)
  192. if self.config.proxy and self.config.proxy.ip:
  193. p = self.config.proxy
  194. self._log(f'Current proxy id={p.id}')
  195. if p.username and p.password:
  196. self._log(f"Starting Proxy Tunnel for {p.ip}...")
  197. exit_node = {
  198. "name": "ExitNode",
  199. "type": p.proto,
  200. "server": p.ip,
  201. "port": p.port,
  202. "username": p.username,
  203. "password": p.password
  204. }
  205. relay_node = None
  206. if configure.MIHOMO_RELAY_NODES:
  207. relay_node = random.choice(configure.MIHOMO_RELAY_NODES)
  208. mihomo_path = configure.MIHOMO_BIN_PATH
  209. if not mihomo_path:
  210. mihomo_path = os.getenv("MIHOMO_BIN")
  211. if not mihomo_path:
  212. raise BizLogicError(message='Mihomo path is null, You need set mihomo bin path in configure or os env')
  213. self.tunnel = MihomoTunnel(mihomo_path, exit_node=exit_node, relay_node=relay_node)
  214. local_proxy = self.tunnel.start()
  215. self._log(f"Tunnel started at {local_proxy}")
  216. co.set_argument(f'--proxy-server={local_proxy}')
  217. else:
  218. proxy_str = f"{p.proto}://{p.ip}:{p.port}"
  219. co.set_argument(f'--proxy-server={proxy_str}')
  220. else:
  221. self._log("[WARN] No proxy configured!")
  222. specific_fp = FingerprintGenerator().generate(self.config.account.username)
  223. fp_seed = specific_fp.get("seed")
  224. fp_platform = specific_fp.get("platform")
  225. fp_brand = specific_fp.get("brand")
  226. self._log(f'browser fingerprint seed={fp_seed}')
  227. co.headless(False)
  228. co.set_argument('--no-sandbox')
  229. co.set_argument('--disable-dev-shm-usage')
  230. co.set_argument('--window-size=1920,1080')
  231. co.set_argument('--disable-blink-features=AutomationControlled')
  232. # co.set_argument('--ignore-gpu-blocklist')
  233. # co.set_argument('--enable-webgl')
  234. # co.set_argument('--use-gl=angle')
  235. # co.set_argument('--use-angle=swiftshader')
  236. co.set_argument(f"--fingerprint={fp_seed}")
  237. co.set_argument(f"--fingerprint-platform={fp_platform}")
  238. co.set_argument(f"--fingerprint-brand={fp_brand}")
  239. try:
  240. self.page = ChromiumPage(co)
  241. # --- 预检指纹信息 ---
  242. if self.config.debug:
  243. self.page.get('https://example.com')
  244. js_script = """
  245. function getFingerprint() {
  246. let webglVendor = 'Unknown';
  247. let webglRenderer = 'Unknown';
  248. try {
  249. let canvas = document.createElement('canvas');
  250. let gl = canvas.getContext('webgl') || canvas.getContext('experimental-webgl');
  251. if (gl) {
  252. let debugInfo = gl.getExtension('WEBGL_debug_renderer_info');
  253. if (debugInfo) {
  254. webglVendor = gl.getParameter(debugInfo.UNMASKED_VENDOR_WEBGL);
  255. webglRenderer = gl.getParameter(debugInfo.UNMASKED_RENDERER_WEBGL);
  256. }
  257. }
  258. } catch(e) {}
  259. return {
  260. "User-Agent": navigator.userAgent,
  261. "Platform": navigator.userAgentData ? navigator.userAgentData.platform : navigator.platform,
  262. "Brands": navigator.userAgentData ? navigator.userAgentData.brands.map(b => b.brand).join(', ') : 'Not Supported',
  263. "CPU Cores": navigator.hardwareConcurrency,
  264. "Language": navigator.language,
  265. "Timezone": Intl.DateTimeFormat().resolvedOptions().timeZone,
  266. "WebGL Vendor": webglVendor,
  267. "WebGL Renderer": webglRenderer
  268. };
  269. }
  270. return getFingerprint();
  271. """
  272. fp_data = self.page.run_js(js_script)
  273. self._log("================ 预检浏览器指纹数据 ================")
  274. self._log(json.dumps(fp_data, indent=4, ensure_ascii=False))
  275. self._log("====================================================")
  276. # --- 初始化访问与过盾 ---
  277. tls_url = self.free_config.get('tls_url', '')
  278. self._log(f"Navigating: {tls_url}")
  279. self.page.get(tls_url)
  280. time.sleep(5)
  281. if 'Attention Required! | Cloudflare' in self.page.title and 'Sorry, you have been blocked' in self.page.html:
  282. self._log(f'Block by cloudflare, try refresh...')
  283. self.page.refresh()
  284. self.page.wait.load_start(timeout=2)
  285. self.page.wait.doc_loaded()
  286. cf_bypasser = CloudflareBypasser(self.page, log=self.config.debug)
  287. if not cf_bypasser.bypass(max_retry=6):
  288. raise BizLogicError("Cloudflare bypass timeout")
  289. time.sleep(3)
  290. cf_bypasser.handle_waiting_room()
  291. self._log("Init humanize tools...")
  292. self.mouse = HumanMouse(self.page, debug=self.config.debug)
  293. self.keyboard = HumanKeyboard(self.page)
  294. viewport_width = self.page.rect.viewport_size[0]
  295. viewport_height = self.page.rect.viewport_size[1]
  296. init_x = random.randint(10, viewport_width - 10)
  297. init_y = random.randint(10, viewport_height - 10)
  298. self.mouse.move(init_x, init_y)
  299. max_steps = 10
  300. session_created = False
  301. has_submitted_login = False
  302. for step in range(max_steps):
  303. self.page.wait.doc_loaded()
  304. time.sleep(0.5)
  305. current_url = self.page.url
  306. self._log(f"--- [Router Step {step+1}] Current URL: {current_url} ---")
  307. cloudflare_blocked_indicators = [
  308. "Sorry, you have been blocked" in self.page.html,
  309. "You are being rate limited" in self.page.html,
  310. "Cloudflare Ray ID" in self.page.html
  311. ]
  312. if any(cloudflare_blocked_indicators):
  313. raise BizLogicError(message="Blocked by Cloudflare WAF. Need to change IP or browser fingerprint.")
  314. # 状态 1:到达终极目标页面 (成功退出条件)
  315. if "appointment-booking" in current_url or self.page.ele('tag:button@text():Book your appointment', timeout=1):
  316. btn_selector = 'tag:button@text():Book your appointment'
  317. if self.page.wait.ele_displayed(btn_selector, timeout=10):
  318. self.session_create_time = time.time()
  319. self._log("✅ Login & Navigation Success! Reached appointment-booking.")
  320. session_created = True
  321. break
  322. # 状态 2:遇到没有申请人的拦截页 (致命错误退出条件)
  323. no_applicant_indicators = [
  324. "Add a new applicant" in self.page.html,
  325. "You have not yet added an applicant" in self.page.html,
  326. "applicants-information" in current_url
  327. ]
  328. if any(no_applicant_indicators):
  329. raise BizLogicError(message="No applicant added. Cannot proceed to booking.")
  330. if current_url == tls_url:
  331. # 状态 3:首页/登录入口页 -> 需要点击进入登录
  332. if self.page.ele("tag:a@@href:login", timeout=1) and not self.page.ele('tag:label@@text():Email', timeout=1):
  333. self._log("State: Login Portal. Clicking login link...")
  334. login_link = self.page.ele("tag:a@@href:login")
  335. self.mouse.human_click_ele(login_link)
  336. self.page.wait.load_start(timeout=3)
  337. continue
  338. if self.page.ele("tag:svg@@data-testid=user-button", timeout=1):
  339. self._log("State: Already login, logout now...")
  340. user_btn = self.page.ele("tag:svg@@data-testid=user-button")
  341. self.mouse.human_click_ele(user_btn)
  342. time.sleep(1.5)
  343. logout_btn = self.page.ele("#logout")
  344. self.mouse.human_click_ele(logout_btn)
  345. self.page.wait.load_start(timeout=3)
  346. self.page.get(tls_url)
  347. self.page.wait.load_start(timeout=3)
  348. continue
  349. # 状态 4:真正的登录表单页
  350. if self.page.ele('tag:label@@text():Email', timeout=1) and not has_submitted_login:
  351. self._log("State: Login Form. Processing credentials and Captcha...")
  352. recaptchav2_token = ""
  353. if not captcha_future and (self.page.ele('.g-recaptcha') or self.page.ele('xpath://iframe[contains(@src, "recaptcha")]')):
  354. rec_iframe = self.page.ele('xpath://iframe[contains(@src, "recaptcha")]')
  355. rec_iframe_src = rec_iframe.attr('src')
  356. rec_parsed = urlparse(rec_iframe_src)
  357. rec_params = parse_qs(rec_parsed.query)
  358. rec_sitekey = rec_params.get("k", [None])[0]
  359. rec_size = rec_params.get("size", [None])[0]
  360. if 'normal' == rec_size:
  361. self._log(f"Found dynamic sitekey={rec_sitekey}. Starting async Captcha solver...")
  362. rc_params = {
  363. "type": "ReCaptchaV2TaskProxyLess",
  364. "page": current_url,
  365. "siteKey": rec_sitekey,
  366. "apiToken": self.free_config.get("capsolver_key", "")
  367. }
  368. captcha_future = captcha_executor.submit(self._solve_recaptcha, rc_params)
  369. username = self.config.account.username
  370. password = self.config.account.password
  371. input_ele = self.page.ele('tag:label@@text():Email').next()
  372. self.mouse.human_click_ele(input_ele)
  373. time.sleep(random.uniform(0.2, 0.6))
  374. self.keyboard.type_text(username, humanize=True)
  375. time.sleep(random.uniform(0.5, 1.2))
  376. input_ele = self.page.ele('tag:label@@text():Password').next()
  377. self.mouse.human_click_ele(input_ele)
  378. time.sleep(random.uniform(0.2, 0.6))
  379. self.keyboard.type_text(password, humanize=True)
  380. # 注入 Token
  381. if captcha_future:
  382. self._log("Waiting for background Captcha result...")
  383. try:
  384. # 设一个合理的超时,防止死锁
  385. recaptchav2_token = captcha_future.result(timeout=120)
  386. self._log("Background Captcha solved successfully!")
  387. except Exception as e:
  388. raise BizLogicError(f"Captcha solving failed or timed out: {e}")
  389. # 注入 Token
  390. if recaptchav2_token:
  391. inject_js = f"var g = document.getElementById('g-recaptcha-response'); if(g) {{ g.value = '{recaptchav2_token}'; }}"
  392. self.page.run_js(inject_js)
  393. time.sleep(random.uniform(0.5, 1.0))
  394. self._log("Submitting Login...")
  395. login_btn = self.page.ele('tag:button@@text():Login')
  396. self.mouse.human_click_ele(login_btn)
  397. has_submitted_login = True
  398. self.page.wait.load_start(timeout=5)
  399. continue
  400. # 状态 5:Travel Groups 页面
  401. if "travel-groups" in current_url:
  402. self._log("State: Travel Groups. Selecting targeted group...")
  403. groups = self._parse_travel_groups(self.page.html)
  404. location = self.free_config.get('location')
  405. self.travel_group = next((g for g in groups if location in g['location']), None)
  406. if not self.travel_group or not self.travel_group.get("submitted"):
  407. self._save_screenshot("group_not_found")
  408. raise NotFoundError(f"Group not found for {location}")
  409. formgroup_id = self.travel_group.get('group_number')
  410. btn_selector = f'tag:button@@name=formGroupId@@value={formgroup_id}'
  411. if self.page.wait.eles_loaded(btn_selector, timeout=10):
  412. buttons = self.page.eles(btn_selector)
  413. select_btn = next((btn for btn in reversed(buttons) if btn.rect.size[0] > 0 and btn.rect.size[1] > 0), None)
  414. if select_btn:
  415. time.sleep(random.uniform(0.5, 1.2))
  416. self.mouse.human_click_ele(select_btn)
  417. self.page.wait.load_start(timeout=3)
  418. continue
  419. else:
  420. self._log("[WARN] Select button found but not visible.")
  421. else:
  422. self._log(f"[WARN] Wait timeout for group button {formgroup_id}")
  423. # 状态 6:中间过渡页,需点击 "Book Appointment" 继续往下走
  424. if self.page.ele('#book-appointment-btn', timeout=1):
  425. self._log("State: Intermediate Dashboard. Clicking Book Appointment button...")
  426. self.mouse.human_click_ele(self.page.ele('#book-appointment-btn'))
  427. self.page.wait.load_start(timeout=3)
  428. continue
  429. # 状态 7:登录失败校验 或 未知加载状态
  430. if "login-actions" in current_url and has_submitted_login:
  431. self._log("Waiting on login-actions... (Might be authenticating or invalid credentials)")
  432. time.sleep(2)
  433. if self.page.ele('text:Invalid username or password', timeout=1): # 假设网页上有错误提示
  434. raise BizLogicError(message="Login Failed! Invalid credentials or Captcha rejected.")
  435. continue
  436. self._log("State: Transitioning or Unknown. Waiting 2 seconds...")
  437. time.sleep(2)
  438. if not session_created:
  439. raise BizLogicError(f"Failed to reach appointment-booking after {max_steps} navigation steps. Stuck at: {self.page.url}")
  440. except Exception as e:
  441. self._log(f"Session Create Error: {e}")
  442. if self.config.debug:
  443. self._save_screenshot("create_session_except")
  444. self.cleanup()
  445. raise e
  446. def query(self, apt_type: AppointmentType) -> VSQueryResult:
  447. res = VSQueryResult()
  448. res.success = False
  449. slots = []
  450. self._log(f"Executing silent JS fetch...")
  451. resp = self._perform_request("GET", self.page.url, retry_count=1)
  452. self._check_page_is_session_expired_or_invalid('Book your appointment', resp.text)
  453. slots = self._parse_appointment_slots(resp.text)
  454. if slots:
  455. res.success = True
  456. earliest_date = slots[0]["date"]
  457. earliest_dt = datetime.strptime(earliest_date, "%Y-%m-%d")
  458. res.availability_status = AvailabilityStatus.Available
  459. res.earliest_date = earliest_dt
  460. date_map: dict[datetime, list[TimeSlot]] = {}
  461. for s in slots:
  462. date_str = s["date"]
  463. dt = datetime.strptime(date_str, "%Y-%m-%d")
  464. date_map.setdefault(dt, []).append(
  465. TimeSlot(time=s["time"], label=str(s.get("label", "")))
  466. )
  467. res.availability = [DateAvailability(date=d, times=slots) for d, slots in date_map.items()]
  468. self._log(f"Slot Found! -> {slots}")
  469. else:
  470. self._log("No slots available.")
  471. res.success = False
  472. res.availability_status = AvailabilityStatus.NoneAvailable
  473. # TODO(TEST): 临时测试预约提交
  474. if configure.TLS_TEST_BOOK_AFTER_QUERY:
  475. test_date = "2026-06-10"
  476. test_time = "09:00"
  477. test_label = ""
  478. test_dt = datetime.strptime(test_date, "%Y-%m-%d")
  479. query_res = VSQueryResult()
  480. query_res.success = True
  481. query_res.availability_status = AvailabilityStatus.Available
  482. query_res.earliest_date = test_dt
  483. query_res.availability = [
  484. DateAvailability(
  485. date=test_dt,
  486. times=[TimeSlot(time=test_time, label=test_label)]
  487. )
  488. ]
  489. self._log(f"[TEST] using fixed June slot: {test_date} {test_time} {test_label}")
  490. test_userinput = {
  491. "support_pta": False,
  492. "expected_end_date": "2100-01-01",
  493. "expected_start_date": "2000-01-01"
  494. }
  495. try:
  496. self.book(query_res, test_userinput)
  497. except Exception as e:
  498. self._log(f"[TEST] book() after query failed: {e}")
  499. self.is_healthy = False
  500. return res
  501. def book_bak(self, slot_info: VSQueryResult, user_inputs: Dict = None) -> VSBookResult:
  502. res = VSBookResult()
  503. res.success = False
  504. exp_start = user_inputs.get('expected_start_date', '')
  505. exp_end = user_inputs.get('expected_end_date', '')
  506. support_pta = user_inputs.get('support_pta', True)
  507. target_labels = ['']
  508. if support_pta:
  509. target_labels.append('pta')
  510. available_dates_str =[
  511. da.date.strftime("%Y-%m-%d")
  512. for da in slot_info.availability if da.date
  513. ]
  514. valid_dates_list = self._filter_dates(available_dates_str, exp_start, exp_end)
  515. if not valid_dates_list:
  516. raise NotFoundError(message="No dates match user constraints")
  517. all_possible_slots =[]
  518. for da in slot_info.availability:
  519. if not da.date:
  520. continue
  521. date_str = da.date.strftime("%Y-%m-%d")
  522. if date_str in valid_dates_list:
  523. for t in da.times:
  524. if t.label in target_labels:
  525. all_possible_slots.append({
  526. "date": date_str,
  527. "time_obj": t,
  528. "label": t.label
  529. })
  530. if not all_possible_slots:
  531. raise NotFoundError(message="No suitable slot found (after label filtering)")
  532. selected_slot = random.choice(all_possible_slots)
  533. selected_date = selected_slot["date"]
  534. selected_time = selected_slot["time_obj"]
  535. selected_label = selected_slot["label"]
  536. self._log(f"Found {len(all_possible_slots)} valid slots. selected slot: {selected_date} {selected_time.time} {selected_label}")
  537. self.page.listen.start('/workflow/appointment-booking', method='POST')
  538. page_url = self.page.url
  539. location = self.travel_group.get('location')
  540. mapper = {
  541. "London": "gbLON2fr",
  542. "Dublin": "ieDUB2fr"
  543. }
  544. location_id = mapper.get(location)
  545. form_group_id =self.travel_group.get('group_number')
  546. api_token = self.free_config.get("capsolver_key", "")
  547. rc_params = {
  548. "type": "ReCaptchaV3Task",
  549. "page": page_url,
  550. "action": "book",
  551. "siteKey": "6LcTpXcfAAAAAM3VojNhyV-F1z92ADJIvcSZ39Y9",
  552. "apiToken": api_token,
  553. "proxy": True
  554. }
  555. g_token = self._solve_recaptcha(rc_params)
  556. ACTION_ID = "6033ac5e6e4ac04f59a4b74a9c5dd312876dd46bd9"
  557. js_script = f"""
  558. (async function() {{
  559. const url = "{page_url}";
  560. const groupId = "{form_group_id}";
  561. const locationId = "{location_id}";
  562. const selectedDate = "{selected_date}";
  563. const selectedTime = "{selected_time.time}";
  564. const selectedLabel = "{selected_label}";
  565. const captchaToken = "{g_token}";
  566. const actionId = "{ACTION_ID}";
  567. let castleToken = '';
  568. try {{
  569. const castleModule = window.__webpack_require__(93773);
  570. if (castleModule && castleModule.createRequestToken) {{
  571. castleToken = await castleModule.createRequestToken();
  572. console.log('Castle token obtained:', castleToken);
  573. }}
  574. }} catch(e) {{
  575. console.error('Failed to get Castle token:', e);
  576. }}
  577. let routerStateTree = '';
  578. try {{
  579. const routerModule = window.__webpack_require__(11807);
  580. const routerState = routerModule.getCurrentAppRouterState();
  581. if (routerState && routerState.tree) {{
  582. const prepareModule = window.__webpack_require__(16378);
  583. routerStateTree = prepareModule.prepareFlightRouterStateForRequest(routerState.tree);
  584. console.log('Router state tree obtained');
  585. }}
  586. }} catch(e) {{
  587. console.error('Failed to get router state:', e);
  588. }}
  589. const formData = new FormData();
  590. formData.append('1_formGroupId', groupId);
  591. formData.append('1_lang', 'en-us');
  592. formData.append('1_process', 'APPOINTMENT');
  593. formData.append('1_location', locationId);
  594. formData.append('1_date', selectedDate);
  595. formData.append('1_time', selectedTime);
  596. formData.append('1_appointmentLabel', selectedLabel);
  597. formData.append('1_castleRequestToken', castleToken);
  598. formData.append('1_captchaToken', captchaToken);
  599. formData.append('0', '[{{"status":"IDLE"}},"$K1"]');
  600. const headers = {{
  601. 'Accept': 'text/x-component',
  602. 'Next-Action': actionId,
  603. 'Next-Router-State-Tree': routerStateTree,
  604. }};
  605. const response = await fetch(url, {{
  606. method: 'POST',
  607. headers: headers,
  608. body: formData,
  609. credentials: 'include'
  610. }});
  611. const text = await response.text();
  612. const responseHeaders = {{}};
  613. response.headers.forEach((value, key) => {{
  614. responseHeaders[key] = value;
  615. }});
  616. const result = {{
  617. status: response.status,
  618. body: text,
  619. headers: responseHeaders,
  620. url: response.url,
  621. redirected: response.redirected,
  622. ok: response.ok
  623. }};
  624. return result;
  625. }})();
  626. """
  627. self._log("Submitting booking request via JS Fetch...")
  628. self.page.run_js(js_script)
  629. packet = self.page.listen.wait(timeout=15)
  630. if not packet:
  631. raise BizLogicError(message='Listening data failed')
  632. self.page.listen.stop()
  633. self._log(f"URL: {packet.url}")
  634. self._log(f"POST Body: {packet.request.postData}")
  635. self._log(f"POST Stat: {packet.response.status}")
  636. self._log(f"POST head: {packet.response.headers}")
  637. self._log(f"POST Resp: {packet.response.raw_body}")
  638. redirect_location = packet.response.headers.get('location', '') or ''
  639. appointment_confirmation_indicators = [
  640. "order-summary" in redirect_location,
  641. "partner-services" in redirect_location,
  642. "appointment-confirmation" in redirect_location,
  643. ]
  644. if any(appointment_confirmation_indicators):
  645. self._log(f"Booking Success!")
  646. res.success = True
  647. res.book_date = selected_date
  648. res.book_time = selected_time.time
  649. return res
  650. return res
  651. def book(self, slot_info: VSQueryResult, user_inputs: Dict = None) -> VSBookResult:
  652. res = VSBookResult()
  653. res.success = False
  654. exp_start = user_inputs.get('expected_start_date', '')
  655. exp_end = user_inputs.get('expected_end_date', '')
  656. support_pta = user_inputs.get('support_pta', True)
  657. target_labels = ['']
  658. if support_pta:
  659. target_labels.append('pta')
  660. available_dates_str =[
  661. da.date.strftime("%Y-%m-%d")
  662. for da in slot_info.availability if da.date
  663. ]
  664. valid_dates_list = self._filter_dates(available_dates_str, exp_start, exp_end)
  665. if not valid_dates_list:
  666. raise NotFoundError(message="No dates match user constraints")
  667. all_possible_slots =[]
  668. for da in slot_info.availability:
  669. if not da.date:
  670. continue
  671. date_str = da.date.strftime("%Y-%m-%d")
  672. if date_str in valid_dates_list:
  673. for t in da.times:
  674. if t.label in target_labels:
  675. all_possible_slots.append({
  676. "date": date_str,
  677. "time_obj": t,
  678. "label": t.label
  679. })
  680. if not all_possible_slots:
  681. raise NotFoundError(message="No suitable slot found (after label filtering)")
  682. selected_slot = random.choice(all_possible_slots)
  683. selected_date = selected_slot["date"]
  684. selected_time = selected_slot["time_obj"]
  685. selected_label = selected_slot["label"]
  686. self._log(f"Found {len(all_possible_slots)} valid slots. selected slot: {selected_date} {selected_time.time} {selected_label}")
  687. self.page.listen.start('/workflow/appointment-booking', method='POST')
  688. js_update_form = f"""
  689. try {{
  690. const buttons = Array.from(document.querySelectorAll('button[type="submit"]'));
  691. const submitBtn = buttons.find(btn => {{
  692. return btn.textContent.trim().toLowerCase().includes('book your appointment');
  693. }});
  694. if (!submitBtn) return 'Submit button not found';
  695. const form = submitBtn.closest('form');
  696. if (!form) return 'Correct form not found';
  697. function setReactValue(input, value) {{
  698. if (!input) return;
  699. input.value = value;
  700. }}
  701. setReactValue(form.querySelector('input[name="date"]'), '{selected_date}');
  702. setReactValue(form.querySelector('input[name="time"]'), '{selected_time.time}');
  703. setReactValue(form.querySelector('input[name="appointmentLabel"]'), '{selected_label}');
  704. submitBtn.removeAttribute('disabled');
  705. submitBtn.classList.remove('opacity-50', 'cursor-not-allowed');
  706. return 'form_updated';
  707. }} catch (e) {{
  708. return e.toString();
  709. }}
  710. """
  711. update_res = self.page.run_js(js_update_form)
  712. self._log(f"Form update triggered: {update_res}")
  713. if update_res != 'form_updated':
  714. raise BizLogicError(message=f"Failed to update form: {update_res}")
  715. submit_btn = self.page.ele('tag:button@@type=submit@@text():Book your appointment')
  716. if not submit_btn:
  717. raise BizLogicError(message="Submit button not found for mouse click")
  718. self._log("Moving mouse to submit button and clicking")
  719. self.mouse.human_click_ele(submit_btn)
  720. packet = self.page.listen.wait(timeout=10)
  721. if not packet:
  722. raise BizLogicError(message='Listening data failed')
  723. self.page.listen.stop()
  724. self._log(f"URL: {packet.url}")
  725. self._log(f"POST Body: {packet.request.postData}")
  726. self._log(f"POST Resp: {packet.response.body}")
  727. self._log("Waiting for Next.js to process the form submission...")
  728. for _ in range(10):
  729. try:
  730. current_page_url = self.page.url
  731. current_page_html = self.page.html
  732. appointment_confirmation_indicators = [
  733. "order-summary" in current_page_url,
  734. "partner-services" in current_page_url,
  735. "appointment-confirmation" in current_page_url,
  736. "Change my appointment" in current_page_html,
  737. "Book a new appointment" in current_page_html,
  738. ]
  739. if any(appointment_confirmation_indicators):
  740. self._log(f"✅ BOOKING SUCCESS! Redirected to: {current_page_url}")
  741. res.success = True
  742. res.label = selected_label
  743. res.book_date = selected_date
  744. res.book_time = selected_time.time
  745. self._save_screenshot("book_slot_success")
  746. break
  747. toast_selector = 'tag:div@role=alert'
  748. toast_ele = self.page.ele(toast_selector, timeout=0.5)
  749. if toast_ele:
  750. error_msg = toast_ele.text
  751. self._log(f"❌ BOOKING FAILED! Detected popup: {error_msg}")
  752. break
  753. time.sleep(0.5)
  754. except Exception:
  755. pass
  756. return res
  757. def _perform_request(self, method, url, headers=None, data=None, json_data=None, params=None, retry_count=0):
  758. """
  759. 在浏览器上下文中注入 JS 执行 Fetch
  760. """
  761. if not self.page:
  762. raise BizLogicError("Browser not initialized")
  763. if params:
  764. from urllib.parse import urlencode
  765. if '?' in url:
  766. url += '&' + urlencode(params)
  767. else:
  768. url += '?' + urlencode(params)
  769. fetch_options = {
  770. "method": method.upper(),
  771. "headers": headers or {},
  772. "credentials": "include"
  773. }
  774. # Body 处理
  775. if json_data:
  776. fetch_options['body'] = json.dumps(json_data)
  777. fetch_options['headers']['Content-Type'] = 'application/json'
  778. elif data:
  779. if isinstance(data, dict):
  780. from urllib.parse import urlencode
  781. fetch_options['body'] = urlencode(data)
  782. fetch_options['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
  783. else:
  784. fetch_options['body'] = data
  785. js_script = f"""
  786. const url = "{url}";
  787. const options = {json.dumps(fetch_options)};
  788. return fetch(url, options)
  789. .then(async response => {{
  790. const text = await response.text();
  791. const headers = {{}};
  792. response.headers.forEach((value, key) => headers[key] = value);
  793. return {{
  794. status: response.status,
  795. body: text,
  796. headers: headers,
  797. url: response.url
  798. }};
  799. }})
  800. .catch(error => {{
  801. return {{
  802. status: 0,
  803. body: error.toString(),
  804. headers: {{}},
  805. url: url
  806. }};
  807. }});
  808. """
  809. res_dict = self.page.run_js(js_script, timeout=30)
  810. resp = BrowserResponse(res_dict)
  811. if resp.status_code == 200:
  812. return resp
  813. elif resp.status_code == 401:
  814. self.is_healthy = False
  815. raise SessionExpiredOrInvalidError()
  816. elif resp.status_code == 403:
  817. if retry_count < 2:
  818. self._log(f"HTTP 403 Detected. Cloudflare session expired? Attempting refresh (Try {retry_count+1}/2)...")
  819. if self._refresh_firewall_session():
  820. self._log("Firewall session refreshed. Retrying request...")
  821. return self._perform_request(method, url, headers, data, json_data, params, retry_count+1)
  822. else:
  823. self._log("Failed to refresh firewall session.")
  824. raise PermissionDeniedError(f"HTTP 403: {resp.text[:100]}")
  825. elif resp.status_code == 429:
  826. self.is_healthy = False
  827. raise RateLimiteddError()
  828. else:
  829. if resp.status_code == 0:
  830. raise BizLogicError(f"Network Error: {resp.text}")
  831. raise BizLogicError(message=f"HTTP Error {resp.status_code}: {resp.text[:100]}")
  832. def _refresh_firewall_session(self) -> bool:
  833. """
  834. 主动刷新页面以触发 Cloudflare 挑战并尝试通过
  835. """
  836. try:
  837. self._log("Refreshing page to trigger Cloudflare...")
  838. self.page.refresh()
  839. cf = CloudflareBypasser(self.page, log=self.config.debug)
  840. success = cf.bypass(max_retry=6)
  841. if success:
  842. title = self.page.title.lower()
  843. if "access denied" in title:
  844. return False
  845. time.sleep(2)
  846. return True
  847. return False
  848. except Exception as e:
  849. self._log(f"Error during firewall refresh: {e}")
  850. return False
  851. def _solve_recaptcha(self, params) -> str:
  852. """调用 VSCloudApi 解决 ReCaptcha"""
  853. key = params.get("apiToken")
  854. if not key:
  855. raise NotFoundError("Api-token required")
  856. submit_url = "https://api.capsolver.com/createTask"
  857. task = {
  858. "type": params.get("type"),
  859. "websiteURL": params.get("page"),
  860. "websiteKey": params.get("siteKey"),
  861. }
  862. if params.get("action"):
  863. task["pageAction"] = params.get("action")
  864. if params.get("proxy"):
  865. p = self.config.proxy
  866. task["proxyType"] = p.proto
  867. task["proxyAddress"] = p.ip
  868. task["proxyPort"] = p.port
  869. if p.username:
  870. task["proxyLogin"] = p.username
  871. task["proxyPassword"] = p.password
  872. payload = {"clientKey": key, "task": task}
  873. import requests as req
  874. r = req.post(submit_url, json=payload, timeout=20)
  875. if r.status_code != 200:
  876. raise BizLogicError(message="Failed to submit capsolver task")
  877. task_id = r.json().get("taskId")
  878. for _ in range(20):
  879. r = req.post("https://api.capsolver.com/getTaskResult", json={"clientKey": key, "taskId": task_id}, timeout=20)
  880. if r.status_code == 200:
  881. d = r.json()
  882. if d.get("status") == "ready":
  883. return d["solution"]["gRecaptchaResponse"]
  884. time.sleep(3)
  885. raise BizLogicError(message="Capsolver task timeout")
  886. def _parse_travel_groups(self, html_content) -> List[Dict]:
  887. groups = []
  888. js_pattern = r'\\"travelGroups\\":\s*(\[.*?\]),\\"availableCountriesToCreateGroups'
  889. js_match = re.search(js_pattern, html_content, re.DOTALL)
  890. if js_match:
  891. json_str = js_match.group(1).replace(r'\"', '"')
  892. data = json.loads(json_str)
  893. for g in data:
  894. groups.append({
  895. 'group_name': g.get('groupName'),
  896. 'group_number': g.get('formGroupId'),
  897. 'location': g.get('vacName'),
  898. 'submitted': g.get('submitted')
  899. })
  900. else:
  901. self._log('Parsed travel group page, but not found travelGroups')
  902. return groups
  903. def _parse_appointment_slots(self, html_content) -> List[Dict]:
  904. slots = []
  905. pattern = r'"availableAppointments\\":\s*(\[.*\]),\\"showFlexiAppointment'
  906. match = re.search(pattern, html_content, re.DOTALL)
  907. if match:
  908. json_str = match.group(1).replace(r'\"', '"')
  909. data = json.loads(json_str)
  910. for day in data:
  911. d_str = day.get('day')
  912. for s in day.get('slots', []):
  913. labels = s.get('labels', [])
  914. lbl = None
  915. if 'pta' in labels: lbl = 'pta'
  916. elif 'ptaw' in labels: lbl = 'ptaw'
  917. elif '' in labels: lbl = ''
  918. if lbl is not None:
  919. slots.append({
  920. 'date': d_str,
  921. 'time': s.get('time'),
  922. 'label': lbl
  923. })
  924. return slots
  925. def _check_page_is_session_expired_or_invalid(self, keyword, html: str) -> bool:
  926. if not html:
  927. self.is_healthy = False
  928. raise SessionExpiredOrInvalidError()
  929. html_lower = html.lower()
  930. if keyword.lower() not in html_lower:
  931. session_expire_or_invalid_indicators = [
  932. 'redirected automatically' in html_lower,
  933. 'login' in html_lower and 'password' in html_lower,
  934. 'session expired' in html_lower
  935. ]
  936. if any(session_expire_or_invalid_indicators):
  937. self.is_healthy = False
  938. raise SessionExpiredOrInvalidError()
  939. def _filter_dates(self, dates: List[str], start_str: str, end_str: str) -> List[str]:
  940. if not start_str or not end_str:
  941. return dates
  942. valid_dates = []
  943. s_date = datetime.strptime(start_str[:10], "%Y-%m-%d")
  944. e_date = datetime.strptime(end_str[:10], "%Y-%m-%d")
  945. for date_str in dates:
  946. curr_date = datetime.strptime(date_str, "%Y-%m-%d")
  947. if s_date <= curr_date <= e_date:
  948. valid_dates.append(date_str)
  949. random.shuffle(valid_dates)
  950. return valid_dates
  951. # --- 资源清理核心方法 ---
  952. def cleanup(self):
  953. """
  954. 销毁浏览器并彻底删除临时文件
  955. """
  956. if self.page:
  957. try:
  958. self.page.quit(force=True)
  959. except Exception:
  960. pass
  961. self.page = None
  962. if os.path.exists(self.root_workspace):
  963. for _ in range(3):
  964. try:
  965. time.sleep(0.2)
  966. shutil.rmtree(self.root_workspace, ignore_errors=True)
  967. break
  968. except Exception as e:
  969. self._log(f"Cleanup retry: {e}")
  970. time.sleep(0.5)
  971. if os.path.exists(self.root_workspace):
  972. self._log(f"[WARN] Failed to fully remove workspace: {self.root_workspace}")
  973. if self.tunnel:
  974. try: self.tunnel.stop()
  975. except: pass
  976. self.tunnel = None
  977. def __del__(self):
  978. """
  979. 析构函数:当对象被垃圾回收时自动调用
  980. """
  981. self.cleanup()