tls_plugin2.py 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158
  1. import time
  2. import json
  3. import random
  4. import re
  5. import os
  6. import uuid
  7. import shutil
  8. import queue
  9. import threading
  10. from datetime import datetime
  11. from typing import List, Dict, Optional, Any, Callable
  12. from urllib.parse import urljoin, urlparse, urlencode, parse_qs
  13. from camoufox import NewBrowser
  14. from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError, Page, BrowserContext
  15. from vs_plg import IVSPlg
  16. from vs_types import VSPlgConfig, AppointmentType, VSQueryResult, VSBookResult, AvailabilityStatus, TimeSlot, DateAvailability, NotFoundError, PermissionDeniedError, RateLimiteddError, SessionExpiredOrInvalidError, BizLogicError
  17. from utils.cloudflare_bypass_for_scraping2 import CloudflareBypasser
  18. def _camoufox_headless_from_env():
  19. """
  20. Ubuntu/无显示器 下通过环境变量选择 Camoufox 模式(与 NewBrowser 一致):
  21. - 未设置 / 0 / false:有头(需真实 DISPLAY 或自行开 Xvfb 并 export DISPLAY=:99)
  22. - 1 / true / yes / headless:Playwright 真无头(无需 X)
  23. - virtual / xvfb:由 Camoufox 起 Xvfb 虚拟屏(需安装 Xvfb,适合要「有界面栈」又无可接显示器的 Linux)
  24. """
  25. v = (os.environ.get("CAMOUFOX_HEADLESS") or "").strip().lower()
  26. if v in ("1", "true", "yes", "headless"):
  27. return True
  28. if v in ("virtual", "xvfb", "vdisplay"):
  29. return "virtual"
  30. return False
  31. class BrowserResponse:
  32. """模拟 requests.Response"""
  33. def __init__(self, result_dict):
  34. result_dict = result_dict or {}
  35. self.status_code = result_dict.get('status', 0)
  36. self.text = result_dict.get('body', '')
  37. self.headers = result_dict.get('headers', {})
  38. self.url = result_dict.get('url', '')
  39. self._json = None
  40. def json(self):
  41. if self._json is None:
  42. if not self.text:
  43. return {}
  44. try:
  45. self._json = json.loads(self.text)
  46. except:
  47. self._json = {}
  48. return self._json
  49. class TlsPlugin(IVSPlg):
  50. """
  51. TLSContact 签证预约插件 (Camoufox 版)
  52. """
  53. def __init__(self, group_id: str):
  54. self.group_id = group_id
  55. self.config: Optional[VSPlgConfig] = None
  56. self.free_config: Dict[str, Any] = {}
  57. self.is_healthy = True
  58. self.logger = None
  59. self.page: Optional[Page] = None
  60. self.browser_ctx: Optional[BrowserContext] = None
  61. self.playwright = None
  62. self.travel_group: Optional[Dict] = None
  63. self.instance_id = uuid.uuid4().hex[:8]
  64. self.root_workspace = os.path.abspath(os.path.join("data/temp_browser_data", f"{self.group_id}.{self.instance_id}"))
  65. self.user_data_path = os.path.join(self.root_workspace, "user_data")
  66. if not os.path.exists(self.root_workspace):
  67. os.makedirs(self.root_workspace)
  68. self.session_create_time: float = 0
  69. # Playwright/Camoufox 的 Page 只能在创建它的线程使用;Sentinel 在线程池里建会话、在监控线程里 query。
  70. # 用单条工作线程串行所有浏览器操作,避免跨线程卡死或 silent health_check 失败。
  71. self._pw_cmd_queue: "queue.Queue[Optional[Callable[[], None]]]" = queue.Queue()
  72. self._pw_thread: Optional[threading.Thread] = None
  73. self._pw_worker: Optional[threading.Thread] = None
  74. self._pw_thread_ready = threading.Event()
  75. self._pw_thread_lock = threading.Lock()
  76. def get_group_id(self) -> str:
  77. return self.group_id
  78. def set_log(self, logger: Callable[[str], None]):
  79. self.logger = logger
  80. def _log(self, message):
  81. if self.logger:
  82. self.logger(f'[TlsPlugin] [{self.group_id}] {message}')
  83. else:
  84. print(f'[TlsPlugin] [{self.group_id}] {message}')
  85. def set_config(self, config: VSPlgConfig):
  86. self.config = config
  87. self.free_config = config.free_config or {}
  88. def _ensure_pw_thread(self):
  89. with self._pw_thread_lock:
  90. if self._pw_thread and self._pw_thread.is_alive():
  91. return
  92. self._pw_thread_ready.clear()
  93. t = threading.Thread(target=self._pw_loop, name=f"camoufox-tls-{self.instance_id}", daemon=True)
  94. self._pw_thread = t
  95. t.start()
  96. if not self._pw_thread_ready.wait(timeout=60):
  97. raise BizLogicError("Camoufox worker thread failed to start")
  98. def _pw_loop(self):
  99. self._pw_worker = threading.current_thread()
  100. self._pw_thread_ready.set()
  101. while True:
  102. work = self._pw_cmd_queue.get()
  103. if work is None:
  104. break
  105. work()
  106. def _run_on_pw_thread(self, fn, *args, **kwargs):
  107. if self._pw_worker is not None and threading.current_thread() is self._pw_worker:
  108. return fn(*args, **kwargs)
  109. if self._pw_worker is None or not self._pw_thread or not self._pw_thread.is_alive():
  110. self._ensure_pw_thread()
  111. out: List[Any] = [None, None]
  112. done = threading.Event()
  113. def work():
  114. try:
  115. out[1] = fn(*args, **kwargs)
  116. except BaseException as e:
  117. out[0] = e
  118. finally:
  119. done.set()
  120. self._pw_cmd_queue.put(work)
  121. if not done.wait(timeout=600):
  122. self._log("Browser thread operation timed out (600s).")
  123. raise BizLogicError("Browser thread operation timeout")
  124. if out[0] is not None:
  125. raise out[0]
  126. return out[1]
  127. def _stop_pw_thread(self):
  128. with self._pw_thread_lock:
  129. t = self._pw_thread
  130. if not t or not t.is_alive():
  131. self._pw_thread = None
  132. self._pw_worker = None
  133. return
  134. self._pw_cmd_queue.put(None)
  135. t.join(timeout=20)
  136. with self._pw_thread_lock:
  137. self._pw_thread = None
  138. self._pw_worker = None
  139. def keep_alive(self):
  140. if self.page is None:
  141. return
  142. def _work():
  143. try:
  144. resp = self._perform_request("GET", self.page.url, retry_count=1)
  145. self._check_page_is_session_expired_or_invalid('Book your appointment', html = resp.text)
  146. except SessionExpiredOrInvalidError as e:
  147. self.is_healthy = False
  148. except Exception as e:
  149. pass
  150. try:
  151. self._run_on_pw_thread(_work)
  152. except Exception:
  153. pass
  154. def _health_check_impl(self) -> bool:
  155. if not self.is_healthy:
  156. return False
  157. if self.page is None:
  158. return False
  159. try:
  160. v = self.page.evaluate("1")
  161. if v != 1:
  162. return False
  163. except:
  164. return False
  165. if self.config.session_max_life > 0:
  166. current_time = time.time()
  167. elapsed_time = current_time - self.session_create_time
  168. if elapsed_time > self.config.session_max_life:
  169. self._log(f"Session expired.")
  170. return False
  171. return True
  172. def health_check(self) -> bool:
  173. if not self.is_healthy or self.page is None:
  174. return False
  175. try:
  176. if self._pw_worker is not None and threading.current_thread() is self._pw_worker:
  177. return self._health_check_impl()
  178. return self._run_on_pw_thread(self._health_check_impl)
  179. except Exception:
  180. return False
  181. def _save_screenshot(self, name_prefix):
  182. try:
  183. timestamp = int(time.time())
  184. filename = f"{self.instance_id}_{name_prefix}_{timestamp}.jpg"
  185. save_path = os.path.join("data", filename)
  186. os.makedirs("data", exist_ok=True)
  187. self.page.screenshot(path=save_path, full_page=True)
  188. self._log(f"Screenshot saved to {save_path}")
  189. except Exception as e:
  190. self._log(f"Failed to save screenshot: {e}")
  191. def create_session(self):
  192. self._ensure_pw_thread()
  193. try:
  194. self._run_on_pw_thread(self._create_session_inner)
  195. except Exception:
  196. self._stop_pw_thread()
  197. raise
  198. def _create_session_inner(self):
  199. """
  200. 全浏览器会话创建:过盾 -> JS注入登录 -> 状态机自动路由导航 -> 到达目标页
  201. 必须在同一条 Camoufox/Playwright 工作线程中执行(Playwright 非线程安全)。
  202. """
  203. self._log(f"Initializing Session (ID: {self.instance_id})...")
  204. proxy_cfg = None
  205. if self.config.proxy and self.config.proxy.ip:
  206. p = self.config.proxy
  207. if p.username and p.password:
  208. proxy_cfg = {
  209. "server": f"{p.proto}://{p.ip}:{p.port}",
  210. "username": p.username,
  211. "password": p.password,
  212. }
  213. else:
  214. proxy_cfg = {"server": f"{p.proto}://{p.ip}:{p.port}"}
  215. else:
  216. self._log("[WARN] No proxy configured!")
  217. try:
  218. self.playwright = sync_playwright().start()
  219. headless_opt = _camoufox_headless_from_env()
  220. self._log(f"Camoufox headless={headless_opt!r} (env CAMOUFOX_HEADLESS)")
  221. self.browser_ctx = NewBrowser(
  222. self.playwright,
  223. persistent_context=True,
  224. headless=headless_opt,
  225. user_data_dir=self.user_data_path,
  226. proxy=proxy_cfg,
  227. window=(1920, 1080),
  228. )
  229. self.page = self.browser_ctx.pages[0] if self.browser_ctx.pages else self.browser_ctx.new_page()
  230. # --- 初始化访问与过盾 ---
  231. tls_url = self.free_config.get('tls_url', '')
  232. self._log(f"Navigating: {tls_url}")
  233. self.page.goto(tls_url, wait_until="domcontentloaded")
  234. time.sleep(5)
  235. cf_bypasser = CloudflareBypasser(self.page, log=True)
  236. if not cf_bypasser.bypass(max_retry=15):
  237. raise BizLogicError("Cloudflare bypass timeout")
  238. time.sleep(3)
  239. cf_bypasser.handle_waiting_room()
  240. # --- 状态机导航循环 ---
  241. max_steps = 20
  242. session_created = False
  243. has_submitted_login = False
  244. for step in range(max_steps):
  245. current_url = self.page.url
  246. self._log(f"--- [Router Step {step+1}] Current URL: {current_url} ---")
  247. # 状态 1:到达终极目标页面 (成功退出条件)
  248. if "appointment-booking" in current_url or self.page.locator("button:has-text('Book your appointment')").first.count():
  249. btn_selector = "button:has-text('Book your appointment')"
  250. if self._is_selector_visible(btn_selector, timeout=10000):
  251. self.session_create_time = time.time()
  252. self._log("✅ Login & Navigation Success! Reached appointment-booking.")
  253. session_created = True
  254. break
  255. # 状态 2:遇到没有申请人的拦截页 (致命错误退出条件)
  256. page_content = self.page.content()
  257. no_applicant_indicators = [
  258. "Add a new applicant" in page_content,
  259. "You have not yet added an applicant" in page_content,
  260. "applicants-information" in current_url
  261. ]
  262. if any(no_applicant_indicators):
  263. raise BizLogicError(message="No applicant added. Cannot proceed to booking.")
  264. # 状态 3:首页/登录入口页 -> 需要点击进入登录
  265. if self.page.locator("a[href*='login']").first.count() and not self.page.locator("label:has-text('Email')").first.count():
  266. self._log("State: Login Portal. Clicking login link...")
  267. try:
  268. self.page.locator("a[href*='login']").first.click(timeout=5000)
  269. time.sleep(3)
  270. continue
  271. except Exception:
  272. pass
  273. # 状态 4:真正的登录表单页
  274. if self.page.locator("label:has-text('Email')").first.count() and not has_submitted_login:
  275. self._log("State: Login Form. Processing credentials and Captcha...")
  276. recaptchav2_token = ""
  277. if self.page.locator(".g-recaptcha").first.count() or self.page.locator("//iframe[contains(@src, 'recaptcha')]").first.count():
  278. try:
  279. rec_iframe = self.page.locator("//iframe[contains(@src, 'recaptcha')]").first
  280. rec_iframe_src = rec_iframe.get_attribute('src') or ""
  281. rec_parsed = urlparse(rec_iframe_src)
  282. rec_params = parse_qs(rec_parsed.query)
  283. rec_sitekey = rec_params.get("k", [None])[0]
  284. rec_size = rec_params.get("size", [None])[0]
  285. if 'normal' == rec_size and rec_sitekey:
  286. self._log(f"Solving ReCaptcha sitekey={rec_sitekey}...")
  287. rc_params = {
  288. "type": "ReCaptchaV2TaskProxyLess",
  289. "page": current_url,
  290. "siteKey": rec_sitekey,
  291. "apiToken": self.free_config.get("capsolver_key", "")
  292. }
  293. recaptchav2_token = self._solve_recaptcha(rc_params)
  294. except Exception as e:
  295. self._log(f"ReCaptcha extraction failed: {e}")
  296. username = self.config.account.username
  297. password = self.config.account.password
  298. self._type_into_first_visible(
  299. selectors=[
  300. "input[name='email']",
  301. "input[type='email']",
  302. "input#email",
  303. "input[autocomplete='username']",
  304. "label:has-text('Email') + input",
  305. ],
  306. text=username,
  307. field_name="Email",
  308. )
  309. time.sleep(random.uniform(0.5, 1.2))
  310. self._type_into_first_visible(
  311. selectors=[
  312. "input[name='password']",
  313. "input[type='password']",
  314. "input#password",
  315. "input[autocomplete='current-password']",
  316. "label:has-text('Password') + input",
  317. ],
  318. text=password,
  319. field_name="Password",
  320. )
  321. # 注入 Token
  322. if recaptchav2_token:
  323. inject_js = f"var g = document.getElementById('g-recaptcha-response'); if(g) {{ g.value = '{recaptchav2_token}'; }}"
  324. try:
  325. self.page.evaluate(f"() => {{ {inject_js} }}")
  326. self._log("ReCaptcha token injected")
  327. except Exception:
  328. pass
  329. time.sleep(random.uniform(0.5, 1.0))
  330. self._log("Submitting Login...")
  331. time.sleep(random.uniform(0.3, 0.8))
  332. self.page.locator("button:has-text('Login')").first.click(timeout=10000)
  333. has_submitted_login = True
  334. time.sleep(3)
  335. continue
  336. # 状态 5:Travel Groups 页面
  337. if "travel-groups" in current_url:
  338. self._log("State: Travel Groups. Selecting targeted group...")
  339. groups = self._parse_travel_groups(self.page.content())
  340. location = self.free_config.get('location')
  341. self.travel_group = next((g for g in groups if location in g['location']), None)
  342. if not self.travel_group:
  343. self._save_screenshot("group_not_found")
  344. raise NotFoundError(f"Group not found for {location}")
  345. formgroup_id = self.travel_group.get('group_number')
  346. btn_selector = f'button[name="formGroupId"][value="{formgroup_id}"]'
  347. select_buttons = self.page.locator(btn_selector)
  348. if select_buttons.count():
  349. # 取最后一个可见的按钮
  350. select_btn = None
  351. for i in range(select_buttons.count() - 1, -1, -1):
  352. btn = select_buttons.nth(i)
  353. try:
  354. if btn.is_visible(timeout=1000):
  355. select_btn = btn
  356. break
  357. except Exception:
  358. continue
  359. if select_btn:
  360. time.sleep(random.uniform(0.5, 1.2))
  361. select_btn.click(timeout=10000)
  362. self._log(f"Clicked select button for group {formgroup_id}")
  363. time.sleep(3)
  364. continue
  365. else:
  366. self._log("[WARN] Select button found but not visible.")
  367. else:
  368. self._log(f"[WARN] Wait timeout for group button {formgroup_id}")
  369. # 状态 6:中间过渡页,需点击 "Book Appointment" 继续往下走
  370. if self.page.locator('#book-appointment-btn').first.count():
  371. self._log("State: Intermediate Dashboard. Clicking Book Appointment button...")
  372. try:
  373. self.page.locator('#book-appointment-btn').first.click(timeout=10000)
  374. time.sleep(3)
  375. continue
  376. except Exception:
  377. pass
  378. # 状态 7:登录失败校验 或 未知加载状态
  379. if "login-actions" in current_url and has_submitted_login:
  380. self._log("Waiting on login-actions... (Might be authenticating or invalid credentials)")
  381. time.sleep(2)
  382. try:
  383. if self.page.locator("text='Invalid username or password'").first.count():
  384. raise BizLogicError(message="Login Failed! Invalid credentials or Captcha rejected.")
  385. except Exception:
  386. pass
  387. continue
  388. # 兜底:未匹配到明确状态,等待页面渲染或重定向
  389. self._log("State: Transitioning or Unknown. Waiting 2 seconds...")
  390. time.sleep(2)
  391. # 如果循环耗尽还没到达目标
  392. if not session_created:
  393. raise BizLogicError(f"Failed to reach appointment-booking after {max_steps} navigation steps. Stuck at: {self.page.url}")
  394. except Exception as e:
  395. self._log(f"Session Create Error: {e}")
  396. if self.config.debug:
  397. self._save_screenshot("create_session_except")
  398. self._cleanup_failed_session()
  399. raise e
  400. def query(self, apt_type: AppointmentType) -> VSQueryResult:
  401. return self._run_on_pw_thread(self._query_impl, apt_type)
  402. def _day_block_locator_candidates(self):
  403. # 与 Drission 版 `//div[p and div//button[contains(@data-testid, "slot")]]` 对齐(子 div 下含 slot 按钮)
  404. yield self.page.locator(
  405. "xpath=//div[./p and ./div//button[contains(@data-testid, 'slot')]]"
  406. )
  407. # 结构略变:任意后代 button 带 slot
  408. yield self.page.locator(
  409. "xpath=//div[./p and .//button[contains(@data-testid, 'slot ')]]"
  410. )
  411. # 仅要求有 p 与 slot 类按钮
  412. yield self.page.locator(
  413. "xpath=//div[.//p and .//button[contains(@data-testid, 'slot')]]"
  414. )
  415. # Playwright 原生 :has
  416. yield self.page.locator("div").filter(
  417. has=self.page.locator("p")
  418. ).filter(
  419. has=self.page.locator("button[data-testid*='slot']")
  420. )
  421. def _extract_slots_from_calendar_dom(
  422. self, target_year: int, target_month_num: int
  423. ) -> List[Dict[str, Any]]:
  424. """多策略定位「日期块 + 可点时段按钮」,与页面结构差异/Camoufox 兼容。"""
  425. all_slots: List[Dict[str, Any]] = []
  426. day_blocks = None
  427. for loc in self._day_block_locator_candidates():
  428. try:
  429. n = loc.count()
  430. except Exception:
  431. continue
  432. if n > 0:
  433. day_blocks = loc
  434. self._log(f"使用日历块选择器,匹配到 {n} 个 day_blocks")
  435. break
  436. if day_blocks is None:
  437. # 不依赖 day_block 外壳:直接扫可用按钮,再向祖先找日期
  438. return self._extract_slots_from_available_buttons_only(
  439. target_year, target_month_num
  440. )
  441. for i in range(day_blocks.count()):
  442. block = day_blocks.nth(i)
  443. p_ele = block.locator("p").first
  444. if not p_ele.count():
  445. continue
  446. day_match = re.search(r"\d+", p_ele.inner_text())
  447. if not day_match:
  448. continue
  449. day_str = day_match.group()
  450. try:
  451. full_date = f"{target_year}-{target_month_num:02d}-{int(day_str):02d}"
  452. except ValueError:
  453. continue
  454. available_btns = block.locator("button[data-testid^='btn-available-slot']")
  455. for j in range(available_btns.count()):
  456. btn = available_btns.nth(j)
  457. btn_html = btn.inner_html()
  458. time_match = re.search(r"\d{2}:\d{2}", btn_html)
  459. if not time_match:
  460. continue
  461. time_str = time_match.group()
  462. test_id = btn.get_attribute("data-testid") or ""
  463. if "prime" in test_id and "weekend" in test_id:
  464. lbl = "ptaw"
  465. elif "prime" in test_id:
  466. lbl = "pta"
  467. else:
  468. lbl = ""
  469. all_slots.append(
  470. {"date": full_date, "time": time_str, "label": lbl}
  471. )
  472. if all_slots:
  473. return all_slots
  474. return self._extract_slots_from_available_buttons_only(
  475. target_year, target_month_num
  476. )
  477. def _extract_slots_from_available_buttons_only(
  478. self, target_year: int, target_month_num: int
  479. ) -> List[Dict[str, Any]]:
  480. """当整块 DOM 选不中时,用可用按钮反查日期行。"""
  481. all_slots: List[Dict[str, Any]] = []
  482. btns = self.page.locator("button[data-testid^='btn-available-slot']")
  483. n = btns.count()
  484. if n == 0:
  485. return []
  486. self._log(f"按可用按钮回查日期,共 {n} 个 btn-available-slot")
  487. for j in range(n):
  488. btn = btns.nth(j)
  489. row = btn.locator("xpath=./ancestor::div[.//p][1]")
  490. p_ele = row.locator("p").first
  491. if not p_ele.count():
  492. continue
  493. day_match = re.search(r"\d+", p_ele.inner_text())
  494. if not day_match:
  495. continue
  496. day_str = day_match.group()
  497. try:
  498. full_date = f"{target_year}-{target_month_num:02d}-{int(day_str):02d}"
  499. except ValueError:
  500. continue
  501. btn_html = btn.inner_html()
  502. time_match = re.search(r"\d{2}:\d{2}", btn_html)
  503. if not time_match:
  504. continue
  505. time_str = time_match.group()
  506. test_id = btn.get_attribute("data-testid") or ""
  507. if "prime" in test_id and "weekend" in test_id:
  508. lbl = "ptaw"
  509. elif "prime" in test_id:
  510. lbl = "pta"
  511. else:
  512. lbl = ""
  513. all_slots.append({"date": full_date, "time": time_str, "label": lbl})
  514. return all_slots
  515. def _query_impl(self, apt_type: AppointmentType) -> VSQueryResult:
  516. res = VSQueryResult()
  517. res.success = False
  518. interest_month = self.free_config.get("interest_month", time.strftime("%m-%Y"))
  519. target_date_obj = datetime.strptime(interest_month, "%m-%Y")
  520. target_month_text = target_date_obj.strftime("%B %Y")
  521. target_year = target_date_obj.year
  522. target_month_num = target_date_obj.month
  523. slots = []
  524. current_selected_ele = self.page.locator('[data-testid="btn-current-month-available"]').first
  525. current_month_text = current_selected_ele.inner_text().strip() if current_selected_ele.count() else ""
  526. is_on_target_month = (current_month_text.lower() == target_month_text.lower())
  527. if not is_on_target_month:
  528. self._log(f"Current is '{current_month_text}', navigating to '{target_month_text}'...")
  529. reached_target = False
  530. for step in range(12):
  531. current_ele = self.page.locator('[data-testid="btn-current-month-available"]').first
  532. if current_ele.count() and current_ele.inner_text().strip().lower() == target_month_text.lower():
  533. self._log(f"✅ Successfully navigated to target month: '{target_month_text}'!")
  534. reached_target = True
  535. break
  536. next_btn = self.page.locator('[data-testid="btn-next-month-available"]').first
  537. if next_btn.count():
  538. next_btn.click(timeout=5000)
  539. time.sleep(2.5)
  540. else:
  541. self._log("⚠️ Reached the end of the calendar or 'Next Month' is disabled.")
  542. break
  543. if not reached_target:
  544. self._log(f"❌ Could not navigate to target month: {target_month_text}. Stop parsing.")
  545. res.success = False
  546. res.availability_status = AvailabilityStatus.NoneAvailable
  547. return res
  548. self._log("Extracting slots from DOM using robust data-testid features...")
  549. slots = self._extract_slots_from_calendar_dom(target_year, target_month_num)
  550. else:
  551. self._log(f"Already on '{target_month_text}'. Executing silent JS fetch...")
  552. resp = self._perform_request("GET", self.page.url, retry_count=1)
  553. self._check_page_is_session_expired_or_invalid('Book your appointment', resp.text)
  554. slots = self._parse_appointment_slots(resp.text)
  555. if slots:
  556. res.success = True
  557. earliest_date = slots[0]["date"]
  558. earliest_dt = datetime.strptime(earliest_date, "%Y-%m-%d")
  559. res.availability_status = AvailabilityStatus.Available
  560. res.earliest_date = earliest_dt
  561. date_map: dict[datetime, list[TimeSlot]] = {}
  562. for s in slots:
  563. date_str = s["date"]
  564. dt = datetime.strptime(date_str, "%Y-%m-%d")
  565. date_map.setdefault(dt, []).append(
  566. TimeSlot(time=s["time"], label=str(s.get("label", "")))
  567. )
  568. res.availability = [DateAvailability(date=d, times=slots) for d, slots in date_map.items()]
  569. self._log(f"Slot Found! -> {slots}")
  570. else:
  571. self._log("No slots available.")
  572. res.success = False
  573. res.availability_status = AvailabilityStatus.NoneAvailable
  574. return res
  575. def book(self, slot_info: VSQueryResult, user_inputs: Dict = None) -> VSBookResult:
  576. return self._run_on_pw_thread(self._book_impl, slot_info, user_inputs)
  577. def _book_impl(self, slot_info: VSQueryResult, user_inputs: Dict = None) -> VSBookResult:
  578. if user_inputs is None:
  579. user_inputs = {}
  580. res = VSBookResult()
  581. res.success = False
  582. exp_start = user_inputs.get('expected_start_date', '')
  583. exp_end = user_inputs.get('expected_end_date', '')
  584. support_pta = user_inputs.get('support_pta', True)
  585. target_labels = ['']
  586. if support_pta:
  587. target_labels.append('pta')
  588. available_dates_str =[
  589. da.date.strftime("%Y-%m-%d")
  590. for da in slot_info.availability if da.date
  591. ]
  592. valid_dates_list = self._filter_dates(available_dates_str, exp_start, exp_end)
  593. if not valid_dates_list:
  594. raise NotFoundError(message="No dates match user constraints")
  595. all_possible_slots =[]
  596. for da in slot_info.availability:
  597. if not da.date:
  598. continue
  599. date_str = da.date.strftime("%Y-%m-%d")
  600. if date_str in valid_dates_list:
  601. for t in da.times:
  602. if t.label in target_labels:
  603. all_possible_slots.append({
  604. "date": date_str,
  605. "time_obj": t,
  606. "label": t.label
  607. })
  608. if not all_possible_slots:
  609. raise NotFoundError(message="No suitable slot found (after label filtering)")
  610. selected_slot = random.choice(all_possible_slots)
  611. selected_date = selected_slot["date"]
  612. selected_time = selected_slot["time_obj"]
  613. selected_label = selected_slot["label"]
  614. self._log(f"Found {len(all_possible_slots)} valid slots. selected slot: {selected_date} {selected_time.time} {selected_label}")
  615. # 随机选择预订模式 - Mode 1 (鼠标移动 + JS更新 + 点击) 或 Mode 2 (直接 JS 更新 + 点击)
  616. book_mode = random.choice([1, 2])
  617. self._log(f"Using booking mode: {book_mode}")
  618. if book_mode == 1:
  619. # Mode 1: 模拟真实用户行为 - 先移动鼠标到随机位置
  620. rand_x = random.randint(300, 800)
  621. rand_y = random.randint(400, 700)
  622. self._log(f"Mode 1: Moving mouse to ({rand_x}, {rand_y}) and clicking")
  623. # Playwright 中不直接支持 HumanMouse,但可以通过 hover 和 click 来实现
  624. dummy_locator = self.page.locator(f"xpath=//*[@id='dummy_{random.randint(1000, 9999)}']")
  625. try:
  626. # 如果虚拟定位器存在就点击(通常不会存在),否则只是触发 mousemove 事件
  627. dummy_locator.first.click(timeout=500)
  628. except Exception:
  629. pass
  630. js_update_form = f"""
  631. try {{
  632. const buttons = Array.from(document.querySelectorAll('button[type="submit"]'));
  633. const submitBtn = buttons.find(btn => {{
  634. return btn.textContent.trim().toLowerCase().includes('book your appointment');
  635. }});
  636. if (!submitBtn) return 'Submit button not found';
  637. const form = submitBtn.closest('form');
  638. if (!form) return 'Correct form not found';
  639. function setReactValue(input, value) {{
  640. if (!input) return;
  641. input.value = value;
  642. input.dispatchEvent(new Event('input', {{ bubbles: true }}));
  643. input.dispatchEvent(new Event('change', {{ bubbles: true }}));
  644. }}
  645. setReactValue(form.querySelector('input[name="date"]'), '{selected_date}');
  646. setReactValue(form.querySelector('input[name="time"]'), '{selected_time.time}');
  647. setReactValue(form.querySelector('input[name="appointmentLabel"]'), '{selected_label}');
  648. submitBtn.removeAttribute('disabled');
  649. submitBtn.classList.remove('opacity-50', 'cursor-not-allowed');
  650. return 'form_updated';
  651. }} catch (e) {{
  652. return e.toString();
  653. }}
  654. """
  655. update_res = self.page.evaluate(f"() => {{ {js_update_form} }}")
  656. self._log(f"Mode 1: Form update triggered: {update_res}")
  657. if update_res != 'form_updated':
  658. raise BizLogicError(message=f"Failed to update form in Mode 1: {update_res}")
  659. # 通过按钮定位器点击
  660. submit_btn = self.page.locator("button:has-text('Book your appointment')").first
  661. if not submit_btn.count():
  662. raise BizLogicError(message="Submit button not found for Mode 1")
  663. self._log("Mode 1: Moving mouse to submit button and clicking")
  664. time.sleep(random.uniform(0.2, 0.5))
  665. submit_btn.click(timeout=10000)
  666. inject_res = 'clicked'
  667. else:
  668. # Mode 2: 直接 JS 注入和点击 (更快但可能被检测)
  669. js_inject_and_click = f"""
  670. try {{
  671. const buttons = Array.from(document.querySelectorAll('button[type="submit"]'));
  672. const submitBtn = buttons.find(btn => {{
  673. return btn.textContent.trim().toLowerCase().includes('book your appointment');
  674. }});
  675. if (!submitBtn) return 'Submit button not found';
  676. const form = submitBtn.closest('form');
  677. if (!form) return 'Correct form not found';
  678. function setReactValue(input, value) {{
  679. if (!input) return;
  680. input.value = value;
  681. input.dispatchEvent(new Event('input', {{ bubbles: true }}));
  682. input.dispatchEvent(new Event('change', {{ bubbles: true }}));
  683. }}
  684. setReactValue(form.querySelector('input[name="date"]'), '{selected_date}');
  685. setReactValue(form.querySelector('input[name="time"]'), '{selected_time.time}');
  686. setReactValue(form.querySelector('input[name="appointmentLabel"]'), '{selected_label}');
  687. submitBtn.removeAttribute('disabled');
  688. submitBtn.click();
  689. return 'clicked';
  690. }} catch (e) {{
  691. return e.toString();
  692. }}
  693. """
  694. inject_res = self.page.evaluate(f"() => {{ {js_inject_and_click} }}")
  695. self._log(f"Mode 2: Form submission triggered: {inject_res}")
  696. if inject_res != 'clicked':
  697. raise BizLogicError(message="Failed to inject form or click the submit button")
  698. self._log("Waiting for Next.js to process the form submission...")
  699. for _ in range(10):
  700. try:
  701. current_page_url = self.page.url
  702. current_page_html = self.page.content()
  703. appointment_confirmation_indicators = [
  704. "order-summary" in current_page_url,
  705. "partner-services" in current_page_url,
  706. "appointment-confirmation" in current_page_url,
  707. "Change my appointment" in current_page_html,
  708. "Book a new appointment" in current_page_html,
  709. ]
  710. if any(appointment_confirmation_indicators):
  711. self._log(f"✅ BOOKING SUCCESS! Redirected to: {current_page_url}")
  712. res.success = True
  713. res.label = selected_label
  714. res.book_date = selected_date
  715. res.book_time = selected_time.time
  716. self._save_screenshot("book_slot_success")
  717. break
  718. toast_selector = '[role=\"alert\"]'
  719. toast_ele = self.page.locator(toast_selector).first
  720. if toast_ele.count():
  721. error_msg = toast_ele.inner_text()
  722. self._log(f"❌ BOOKING FAILED! Detected popup: {error_msg}")
  723. break
  724. time.sleep(0.5)
  725. except Exception:
  726. pass
  727. return res
  728. def _get_proxy_url(self):
  729. # 构造代理
  730. proxy_url = ""
  731. if self.config.proxy.ip:
  732. s = self.config.proxy
  733. if s.username:
  734. proxy_url = f"{s.proto}://{s.username}:{s.password}@{s.ip}:{s.port}"
  735. else:
  736. proxy_url = f"{s.proto}://{s.ip}:{s.port}"
  737. return proxy_url
  738. def _perform_request(self, method, url, headers=None, data=None, json_data=None, params=None, retry_count=0):
  739. """
  740. 在浏览器上下文中注入 JS 执行 Fetch
  741. """
  742. if not self.page:
  743. raise BizLogicError("Browser not initialized")
  744. if params:
  745. from urllib.parse import urlencode
  746. if '?' in url:
  747. url += '&' + urlencode(params)
  748. else:
  749. url += '?' + urlencode(params)
  750. fetch_options = {
  751. "method": method.upper(),
  752. "headers": headers or {},
  753. "credentials": "include"
  754. }
  755. # Body 处理
  756. if json_data:
  757. fetch_options['body'] = json.dumps(json_data)
  758. fetch_options['headers']['Content-Type'] = 'application/json'
  759. elif data:
  760. if isinstance(data, dict):
  761. from urllib.parse import urlencode
  762. fetch_options['body'] = urlencode(data)
  763. fetch_options['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
  764. else:
  765. fetch_options['body'] = data
  766. js_script = f"""
  767. const url = "{url}";
  768. const options = {json.dumps(fetch_options)};
  769. return fetch(url, options)
  770. .then(async response => {{
  771. const text = await response.text();
  772. const headers = {{}};
  773. response.headers.forEach((value, key) => headers[key] = value);
  774. return {{
  775. status: response.status,
  776. body: text,
  777. headers: headers,
  778. url: response.url
  779. }};
  780. }})
  781. .catch(error => {{
  782. return {{
  783. status: 0,
  784. body: error.toString(),
  785. headers: {{}},
  786. url: url
  787. }};
  788. }});
  789. """
  790. res_dict = self.page.evaluate(f"() => {{ {js_script} }}")
  791. resp = BrowserResponse(res_dict)
  792. if resp.status_code == 200:
  793. return resp
  794. elif resp.status_code == 401:
  795. self.is_healthy = False
  796. raise SessionExpiredOrInvalidError()
  797. elif resp.status_code == 403:
  798. if retry_count < 2:
  799. self._log(f"HTTP 403 Detected. Cloudflare session expired? Attempting refresh (Try {retry_count+1}/2)...")
  800. if self._refresh_firewall_session():
  801. self._log("Firewall session refreshed. Retrying request...")
  802. return self._perform_request(method, url, headers, data, json_data, params, retry_count+1)
  803. else:
  804. self._log("Failed to refresh firewall session.")
  805. raise PermissionDeniedError(f"HTTP 403: {resp.text[:100]}")
  806. elif resp.status_code == 429:
  807. self.is_healthy = False
  808. raise RateLimiteddError()
  809. else:
  810. if resp.status_code == 0:
  811. raise BizLogicError(f"Network Error: {resp.text}")
  812. raise BizLogicError(message=f"HTTP Error {resp.status_code}: {resp.text[:100]}")
  813. def _refresh_firewall_session(self) -> bool:
  814. """
  815. 主动刷新页面以触发 Cloudflare 挑战并尝试通过
  816. """
  817. try:
  818. # 1. 刷新当前页面 (通常 Dashboard 页)
  819. # 这会强制浏览器重新进行 HTTP 请求,从而触发 Cloudflare 拦截页
  820. self._log("Refreshing page to trigger Cloudflare...")
  821. self.page.reload(wait_until="domcontentloaded")
  822. # 2. 调用 CloudflareBypasser
  823. cf = CloudflareBypasser(self.page, log=self.config.debug)
  824. # 3. 尝试过盾 (尝试次数稍多一点,因为此时可能网络不稳定)
  825. success = cf.bypass(max_retry=10)
  826. if success:
  827. # 再次确认页面是否正常加载 (非 403 页面)
  828. title = self.page.title().lower()
  829. if "access denied" in title:
  830. return False
  831. # 等待 DOM 稍微稳定
  832. time.sleep(2)
  833. return True
  834. return False
  835. except Exception as e:
  836. self._log(f"Error during firewall refresh: {e}")
  837. return False
  838. def _solve_recaptcha(self, params) -> str:
  839. """调用 VSCloudApi 解决 ReCaptcha"""
  840. key = params.get("apiToken")
  841. if not key: raise NotFoundError("Api-token required")
  842. submit_url = "https://api.capsolver.com/createTask"
  843. task = {
  844. "type": params.get("type"),
  845. "websiteURL": params.get("page"),
  846. "websiteKey": params.get("siteKey"),
  847. }
  848. if params.get("action"):
  849. task["pageAction"] = params.get("action")
  850. # if params.get("proxy"):
  851. # p = urlparse(params.get("proxy"))
  852. # task["proxyType"] = p.proto
  853. # task["proxyAddress"] = p.hostname
  854. # task["proxyPort"] = p.port
  855. # if p.username:
  856. # task["proxyLogin"] = p.username
  857. # task["proxyPassword"] = p.password
  858. # 注意:使用 Camoufox 后,通常是 ProxyLess 模式
  859. # 除非你想让 Capsolver 也用同样的代理(通常不需要,除非风控极严)
  860. payload = {"clientKey": key, "task": task}
  861. import requests as req # 局部引用,避免混淆
  862. r = req.post(submit_url, json=payload, timeout=20)
  863. if r.status_code != 200:
  864. raise BizLogicError(message="Failed to submit capsolver task")
  865. task_id = r.json().get("taskId")
  866. for _ in range(20):
  867. r = req.post("https://api.capsolver.com/getTaskResult", json={"clientKey": key, "taskId": task_id}, timeout=20)
  868. if r.status_code == 200:
  869. d = r.json()
  870. if d.get("status") == "ready":
  871. return d["solution"]["gRecaptchaResponse"]
  872. time.sleep(3)
  873. raise BizLogicError(message="Capsolver task timeout")
  874. def _parse_travel_groups(self, html_content) -> List[Dict]:
  875. groups = []
  876. js_pattern = r'\\"travelGroups\\":\s*(\[.*?\]),\\"availableCountriesToCreateGroups'
  877. js_match = re.search(js_pattern, html_content, re.DOTALL)
  878. if js_match:
  879. json_str = js_match.group(1).replace(r'\"', '"')
  880. data = json.loads(json_str)
  881. for g in data:
  882. groups.append({
  883. 'group_name': g.get('groupName'),
  884. 'group_number': g.get('formGroupId'),
  885. 'location': g.get('vacName')
  886. })
  887. else:
  888. self._log('Parsed travel group page, but not found travelGroups')
  889. return groups
  890. def _parse_appointment_slots(self, html_content) -> List[Dict]:
  891. slots = []
  892. pattern = r'"availableAppointments\\":\s*(\[.*\]),\\"showFlexiAppointment'
  893. match = re.search(pattern, html_content, re.DOTALL)
  894. if match:
  895. json_str = match.group(1).replace(r'\"', '"')
  896. data = json.loads(json_str)
  897. for day in data:
  898. d_str = day.get('day')
  899. for s in day.get('slots', []):
  900. labels = s.get('labels', [])
  901. lbl = ""
  902. # 简化逻辑:TLS label 列表
  903. if 'pta' in labels: lbl = 'pta'
  904. elif 'ptaw' in labels: lbl = 'ptaw'
  905. elif '' in labels or not labels: lbl = ''
  906. slots.append({
  907. 'date': d_str,
  908. 'time': s.get('time'),
  909. 'label': lbl
  910. })
  911. return slots
  912. def _check_page_is_session_expired_or_invalid(self, keyword, html: str) -> bool:
  913. if not html:
  914. self.is_healthy = False
  915. raise SessionExpiredOrInvalidError()
  916. html_lower = html.lower()
  917. if keyword.lower() not in html_lower:
  918. session_expire_or_invalid_indicators = [
  919. 'redirected automatically' in html_lower,
  920. 'login' in html_lower and 'password' in html_lower,
  921. 'session expired' in html_lower
  922. ]
  923. if any(session_expire_or_invalid_indicators):
  924. self.is_healthy = False
  925. raise SessionExpiredOrInvalidError()
  926. def _filter_dates(self, dates: List[str], start_str: str, end_str: str) -> List[str]:
  927. if not start_str or not end_str:
  928. return dates
  929. valid_dates = []
  930. s_date = datetime.strptime(start_str[:10], "%Y-%m-%d")
  931. e_date = datetime.strptime(end_str[:10], "%Y-%m-%d")
  932. for date_str in dates:
  933. curr_date = datetime.strptime(date_str, "%Y-%m-%d")
  934. if s_date <= curr_date <= e_date:
  935. valid_dates.append(date_str)
  936. random.shuffle(valid_dates)
  937. return valid_dates
  938. def _is_selector_visible(self, selector: str, timeout: int = 10000) -> bool:
  939. try:
  940. self.page.wait_for_selector(selector, state="visible", timeout=timeout)
  941. return True
  942. except PlaywrightTimeoutError:
  943. return False
  944. def _human_type(self, text: str):
  945. for ch in text:
  946. self.page.keyboard.type(ch)
  947. time.sleep(random.uniform(0.03, 0.12))
  948. def _type_into_first_visible(self, selectors: List[str], text: str, field_name: str):
  949. last_err = None
  950. for selector in selectors:
  951. try:
  952. locator = self.page.locator(selector).first
  953. locator.wait_for(state="visible", timeout=3000)
  954. locator.click(timeout=3000)
  955. time.sleep(random.uniform(0.2, 0.6))
  956. locator.fill("")
  957. self._human_type(text)
  958. return
  959. except Exception as e:
  960. last_err = e
  961. continue
  962. raise BizLogicError(message=f"Can't find visible {field_name} input. Last error: {last_err}")
  963. def _close_playwright(self):
  964. if self.page:
  965. try:
  966. self.page.close()
  967. except Exception:
  968. pass
  969. self.page = None
  970. if self.browser_ctx:
  971. try:
  972. self.browser_ctx.close()
  973. except Exception:
  974. pass
  975. self.browser_ctx = None
  976. if self.playwright:
  977. try:
  978. self.playwright.stop()
  979. except Exception:
  980. pass
  981. self.playwright = None
  982. def _rmtree_workspace(self):
  983. if os.path.exists(self.root_workspace):
  984. for _ in range(3):
  985. try:
  986. time.sleep(0.2)
  987. shutil.rmtree(self.root_workspace, ignore_errors=True)
  988. break
  989. except Exception as e:
  990. self._log(f"Cleanup retry: {e}")
  991. time.sleep(0.5)
  992. if os.path.exists(self.root_workspace):
  993. self._log(f"[WARN] Failed to fully remove workspace: {self.root_workspace}")
  994. def _cleanup_failed_session(self):
  995. """create_session 在工作线程内失败时调用;外层会 _stop_pw_thread。"""
  996. self._close_playwright()
  997. self._rmtree_workspace()
  998. # --- 资源清理核心方法 ---
  999. def cleanup(self):
  1000. """
  1001. 销毁浏览器并彻底删除临时文件
  1002. """
  1003. w = getattr(self, "_pw_worker", None)
  1004. on_worker = w is not None and threading.current_thread() is w
  1005. if on_worker:
  1006. self._close_playwright()
  1007. self._rmtree_workspace()
  1008. return
  1009. if w is not None and self._pw_thread and self._pw_thread.is_alive():
  1010. try:
  1011. self._run_on_pw_thread(self._close_playwright)
  1012. except Exception:
  1013. self._close_playwright()
  1014. self._rmtree_workspace()
  1015. self._stop_pw_thread()
  1016. else:
  1017. self._close_playwright()
  1018. self._rmtree_workspace()
  1019. def __del__(self):
  1020. """
  1021. 析构函数:当对象被垃圾回收时自动调用
  1022. """
  1023. self.cleanup()
  1024. class TlsPlugin2(TlsPlugin):
  1025. """兼容工厂按模块名加载 `TlsPlugin2` 的场景。"""
  1026. pass