tls_plugin2.py 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090
  1. import time
  2. import json
  3. import random
  4. import re
  5. import os
  6. import uuid
  7. import shutil
  8. import queue
  9. import threading
  10. from datetime import datetime
  11. from typing import List, Dict, Optional, Any, Callable
  12. from urllib.parse import urljoin, urlparse, urlencode
  13. from camoufox import NewBrowser
  14. from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError, Page, BrowserContext
  15. from vs_plg import IVSPlg
  16. from vs_types import VSPlgConfig, AppointmentType, VSQueryResult, VSBookResult, AvailabilityStatus, TimeSlot, DateAvailability, NotFoundError, PermissionDeniedError, RateLimiteddError, SessionExpiredOrInvalidError, BizLogicError
  17. from utils.cloudflare_bypass_for_scraping2 import CloudflareBypasser
  18. def _camoufox_headless_from_env():
  19. """
  20. Ubuntu/无显示器 下通过环境变量选择 Camoufox 模式(与 NewBrowser 一致):
  21. - 未设置 / 0 / false:有头(需真实 DISPLAY 或自行开 Xvfb 并 export DISPLAY=:99)
  22. - 1 / true / yes / headless:Playwright 真无头(无需 X)
  23. - virtual / xvfb:由 Camoufox 起 Xvfb 虚拟屏(需安装 Xvfb,适合要「有界面栈」又无可接显示器的 Linux)
  24. """
  25. v = (os.environ.get("CAMOUFOX_HEADLESS") or "").strip().lower()
  26. if v in ("1", "true", "yes", "headless"):
  27. return True
  28. if v in ("virtual", "xvfb", "vdisplay"):
  29. return "virtual"
  30. return False
  31. class BrowserResponse:
  32. """模拟 requests.Response"""
  33. def __init__(self, result_dict):
  34. result_dict = result_dict or {}
  35. self.status_code = result_dict.get('status', 0)
  36. self.text = result_dict.get('body', '')
  37. self.headers = result_dict.get('headers', {})
  38. self.url = result_dict.get('url', '')
  39. self._json = None
  40. def json(self):
  41. if self._json is None:
  42. if not self.text:
  43. return {}
  44. try:
  45. self._json = json.loads(self.text)
  46. except:
  47. self._json = {}
  48. return self._json
  49. class TlsPlugin(IVSPlg):
  50. """
  51. TLSContact 签证预约插件 (Camoufox 版)
  52. """
  53. def __init__(self, group_id: str):
  54. self.group_id = group_id
  55. self.config: Optional[VSPlgConfig] = None
  56. self.free_config: Dict[str, Any] = {}
  57. self.is_healthy = True
  58. self.logger = None
  59. self.page: Optional[Page] = None
  60. self.browser_ctx: Optional[BrowserContext] = None
  61. self.playwright = None
  62. self.travel_group: Optional[Dict] = None
  63. self.instance_id = uuid.uuid4().hex[:8]
  64. self.root_workspace = os.path.abspath(os.path.join("data/temp_browser_data", f"{self.group_id}.{self.instance_id}"))
  65. self.user_data_path = os.path.join(self.root_workspace, "user_data")
  66. if not os.path.exists(self.root_workspace):
  67. os.makedirs(self.root_workspace)
  68. self.session_create_time: float = 0
  69. # Playwright/Camoufox 的 Page 只能在创建它的线程使用;Sentinel 在线程池里建会话、在监控线程里 query。
  70. # 用单条工作线程串行所有浏览器操作,避免跨线程卡死或 silent health_check 失败。
  71. self._pw_cmd_queue: "queue.Queue[Optional[Callable[[], None]]]" = queue.Queue()
  72. self._pw_thread: Optional[threading.Thread] = None
  73. self._pw_worker: Optional[threading.Thread] = None
  74. self._pw_thread_ready = threading.Event()
  75. self._pw_thread_lock = threading.Lock()
  76. def get_group_id(self) -> str:
  77. return self.group_id
  78. def set_log(self, logger: Callable[[str], None]):
  79. self.logger = logger
  80. def _log(self, message):
  81. if self.logger:
  82. self.logger(f'[TlsPlugin] [{self.group_id}] {message}')
  83. else:
  84. print(f'[TlsPlugin] [{self.group_id}] {message}')
  85. def set_config(self, config: VSPlgConfig):
  86. self.config = config
  87. self.free_config = config.free_config or {}
  88. def _ensure_pw_thread(self):
  89. with self._pw_thread_lock:
  90. if self._pw_thread and self._pw_thread.is_alive():
  91. return
  92. self._pw_thread_ready.clear()
  93. t = threading.Thread(target=self._pw_loop, name=f"camoufox-tls-{self.instance_id}", daemon=True)
  94. self._pw_thread = t
  95. t.start()
  96. if not self._pw_thread_ready.wait(timeout=60):
  97. raise BizLogicError("Camoufox worker thread failed to start")
  98. def _pw_loop(self):
  99. self._pw_worker = threading.current_thread()
  100. self._pw_thread_ready.set()
  101. while True:
  102. work = self._pw_cmd_queue.get()
  103. if work is None:
  104. break
  105. work()
  106. def _run_on_pw_thread(self, fn, *args, **kwargs):
  107. if self._pw_worker is not None and threading.current_thread() is self._pw_worker:
  108. return fn(*args, **kwargs)
  109. if self._pw_worker is None or not self._pw_thread or not self._pw_thread.is_alive():
  110. self._ensure_pw_thread()
  111. out: List[Any] = [None, None]
  112. done = threading.Event()
  113. def work():
  114. try:
  115. out[1] = fn(*args, **kwargs)
  116. except BaseException as e:
  117. out[0] = e
  118. finally:
  119. done.set()
  120. self._pw_cmd_queue.put(work)
  121. if not done.wait(timeout=600):
  122. self._log("Browser thread operation timed out (600s).")
  123. raise BizLogicError("Browser thread operation timeout")
  124. if out[0] is not None:
  125. raise out[0]
  126. return out[1]
  127. def _stop_pw_thread(self):
  128. with self._pw_thread_lock:
  129. t = self._pw_thread
  130. if not t or not t.is_alive():
  131. self._pw_thread = None
  132. self._pw_worker = None
  133. return
  134. self._pw_cmd_queue.put(None)
  135. t.join(timeout=20)
  136. with self._pw_thread_lock:
  137. self._pw_thread = None
  138. self._pw_worker = None
  139. def keep_alive(self):
  140. if self.page is None:
  141. return
  142. def _work():
  143. try:
  144. resp = self._perform_request("GET", self.page.url, retry_count=1)
  145. self._check_page_is_session_expired_or_invalid('Book your appointment', html = resp.text)
  146. except SessionExpiredOrInvalidError as e:
  147. self.is_healthy = False
  148. except Exception as e:
  149. pass
  150. try:
  151. self._run_on_pw_thread(_work)
  152. except Exception:
  153. pass
  154. def _health_check_impl(self) -> bool:
  155. if not self.is_healthy:
  156. return False
  157. if self.page is None:
  158. return False
  159. try:
  160. v = self.page.evaluate("1")
  161. if v != 1:
  162. return False
  163. except:
  164. return False
  165. if self.config.session_max_life > 0:
  166. current_time = time.time()
  167. elapsed_time = current_time - self.session_create_time
  168. if elapsed_time > self.config.session_max_life * 60:
  169. self._log(f"Session expired.")
  170. return False
  171. return True
  172. def health_check(self) -> bool:
  173. if not self.is_healthy or self.page is None:
  174. return False
  175. try:
  176. if self._pw_worker is not None and threading.current_thread() is self._pw_worker:
  177. return self._health_check_impl()
  178. return self._run_on_pw_thread(self._health_check_impl)
  179. except Exception:
  180. return False
  181. def _save_screenshot(self, name_prefix):
  182. try:
  183. timestamp = int(time.time())
  184. filename = f"{self.instance_id}_{name_prefix}_{timestamp}.jpg"
  185. save_path = os.path.join("data", filename)
  186. os.makedirs("data", exist_ok=True)
  187. self.page.screenshot(path=save_path, full_page=False)
  188. self._log(f"Screenshot saved to {save_path}")
  189. except Exception as e:
  190. self._log(f"Failed to save screenshot: {e}")
  191. def create_session(self):
  192. self._ensure_pw_thread()
  193. try:
  194. self._run_on_pw_thread(self._create_session_inner)
  195. except Exception:
  196. self._stop_pw_thread()
  197. raise
  198. def _create_session_inner(self):
  199. """
  200. 全浏览器会话创建:过盾 -> JS注入登录 -> 原生跳转
  201. 必须在同一条 Camoufox/Playwright 工作线程中执行(Playwright 非线程安全)。
  202. """
  203. self._log(f"Initializing Session (ID: {self.instance_id})...")
  204. proxy_cfg = None
  205. if self.config.proxy and self.config.proxy.ip:
  206. p = self.config.proxy
  207. if p.username and p.password:
  208. proxy_cfg = {
  209. "server": f"{p.scheme}://{p.ip}:{p.port}",
  210. "username": p.username,
  211. "password": p.password,
  212. }
  213. else:
  214. proxy_cfg = {"server": f"{p.scheme}://{p.ip}:{p.port}"}
  215. else:
  216. self._log("[WARN] No proxy configured!")
  217. try:
  218. self.playwright = sync_playwright().start()
  219. headless_opt = _camoufox_headless_from_env()
  220. self._log(f"Camoufox headless={headless_opt!r} (env CAMOUFOX_HEADLESS)")
  221. self.browser_ctx = NewBrowser(
  222. self.playwright,
  223. persistent_context=True,
  224. headless=headless_opt,
  225. user_data_dir=self.user_data_path,
  226. proxy=proxy_cfg,
  227. window=(1920, 1080),
  228. )
  229. self.page = self.browser_ctx.pages[0] if self.browser_ctx.pages else self.browser_ctx.new_page()
  230. tls_url = self.free_config.get('tls_url', '')
  231. self._log(f"Navigating: {tls_url}")
  232. self.page.goto(tls_url, wait_until="domcontentloaded")
  233. time.sleep(5)
  234. cf_bypasser = CloudflareBypasser(self.page, log=True)
  235. if not cf_bypasser.bypass(max_retry=15):
  236. raise BizLogicError("Cloudflare bypass timeout")
  237. time.sleep(3)
  238. btn_selector = "button:has-text('Login')"
  239. if not self._is_selector_visible(btn_selector, timeout=3000):
  240. self.page.locator("a[href*='login']").first.click(timeout=5000)
  241. time.sleep(3)
  242. if not self._is_selector_visible(btn_selector, timeout=10000):
  243. raise BizLogicError(message=f"Can't find selector={btn_selector}")
  244. time.sleep(random.uniform(0.5, 1))
  245. # recaptchav2_token = ""
  246. # if self.page.ele('.g-recaptcha') or self.page.ele('xpath://iframe[contains(@src, "recaptcha")]'):
  247. # self._log("Solving ReCaptcha...")
  248. # rc_params = {
  249. # "type": "ReCaptchaV2TaskProxyLess",
  250. # "page": self.page.url,
  251. # "siteKey": "6LcDpXcfAAAAAM7wOEsF_38DNsL20tTvPTKxpyn0",
  252. # "apiToken": self.free_config.get("capsolver_key", "")
  253. # }
  254. # recaptchav2_token = self._solve_recaptcha(rc_params)
  255. username = self.config.account.username
  256. password = self.config.account.password
  257. self._type_into_first_visible(
  258. selectors=[
  259. "input[name='email']",
  260. "input[type='email']",
  261. "input#email",
  262. "input[autocomplete='username']",
  263. "label:has-text('Email') + input",
  264. ],
  265. text=username,
  266. field_name="Email",
  267. )
  268. time.sleep(random.uniform(0.5, 1.2))
  269. self._type_into_first_visible(
  270. selectors=[
  271. "input[name='password']",
  272. "input[type='password']",
  273. "input#password",
  274. "input[autocomplete='current-password']",
  275. "label:has-text('Password') + input",
  276. ],
  277. text=password,
  278. field_name="Password",
  279. )
  280. # if recaptchav2_token:
  281. # inject_recaptchav2_token_js = f"""
  282. # var g = document.getElementById('g-recaptcha-response');
  283. # if(g) {{ g.value = "{recaptchav2_token}"; }}
  284. # """
  285. # self._log("Inject ReCaptchaV2 Token via JS...")
  286. # self.page.run_js(inject_recaptchav2_token_js)
  287. # time.sleep(random.uniform(0.5, 1.0))
  288. self._log("Submitting Login...")
  289. time.sleep(random.uniform(0.3, 0.8))
  290. self.page.locator("button:has-text('Login')").first.click(timeout=10000)
  291. self._log("Waiting for redirect...")
  292. self.page.wait_for_function(
  293. "() => !window.location.href.includes('login-actions')",
  294. timeout=45000,
  295. )
  296. time.sleep(3)
  297. if "login-actions" in self.page.url or "auth" in self.page.url:
  298. raise BizLogicError(message="Login Failed! Invalid credentials or Captcha rejected.")
  299. self.page.wait_for_load_state("domcontentloaded", timeout=15000)
  300. time.sleep(5)
  301. # groups = self._parse_travel_groups(self.page.html)
  302. # location = self.free_config.get('location')
  303. # for g in groups:
  304. # if g['location'] == location:
  305. # self.travel_group = g
  306. # break
  307. # if not self.travel_group:
  308. # self._save_screenshot("group_not_found")
  309. # raise NotFoundError(f"Group not found for {location}")
  310. # formgroup_id = self.travel_group.get('group_number')
  311. # btn_selector = f'tag:button@@name=formGroupId@@value={formgroup_id}'
  312. # self._log(f"Waiting for visible button to render: {formgroup_id}...")
  313. # self.page.wait.eles_loaded(btn_selector, timeout=15)
  314. # buttons = self.page.eles(btn_selector)
  315. # select_btn = None
  316. # for btn in reversed(buttons):
  317. # try:
  318. # w, h = btn.rect.size
  319. # if w > 0 and h > 0:
  320. # select_btn = btn
  321. # break
  322. # except Exception:
  323. # continue
  324. # if not select_btn:
  325. # self._save_screenshot("visible_button_not_found")
  326. # raise BizLogicError(f"Can't find any visible Select button for group {formgroup_id}")
  327. # time.sleep(random.uniform(0.5, 1.2))
  328. # self.mouse.human_click_ele(select_btn)
  329. # self._log("Waiting for url redirect...")
  330. # self.page.wait.url_change('travel-groups', exclude=True, timeout=45)
  331. # time.sleep(2)
  332. # if "travel-groups" in self.page.url or "auth" in self.page.url:
  333. # raise BizLogicError(message="Redirect to service-level Failed!")
  334. # no_applicant_indicators = [
  335. # "Add a new applicant" in self.page.html,
  336. # "You have not yet added an applicant. Please click the button below to add one." in self.page.html,
  337. # "applicants-information" in self.page.url
  338. # ]
  339. # if any(no_applicant_indicators):
  340. # raise BizLogicError(message=f"No applicant added")
  341. btn_selector = '#book-appointment-btn'
  342. self._log(f"Waiting for selector={btn_selector} to render...")
  343. if not self._is_selector_visible(btn_selector, timeout=15000):
  344. raise BizLogicError(message=f"Waiting for selector={btn_selector} timeout")
  345. self.page.locator(btn_selector).first.click(timeout=10000)
  346. time.sleep(3)
  347. # self._log("Waiting for url redirect...")
  348. # self.page.wait.url_change('service-level', exclude=True, timeout=45)
  349. # time.sleep(2)
  350. # if "service-level" in self.page.url or "auth" in self.page.url:
  351. # raise BizLogicError(message="Redirect to appointment-booking Failed!")
  352. btn_selector = "button:has-text('Book your appointment')"
  353. if not self._is_selector_visible(btn_selector, timeout=10000):
  354. raise BizLogicError(message=f"Waiting for selector={btn_selector} timeout")
  355. self.session_create_time = time.time()
  356. self._log(f"✅ Login & Navigation Success!")
  357. except Exception as e:
  358. self._log(f"Session Create Error: {e}")
  359. if self.config.debug:
  360. self._save_screenshot("create_session_except")
  361. self._cleanup_failed_session()
  362. raise e
  363. def query(self, apt_type: AppointmentType) -> VSQueryResult:
  364. return self._run_on_pw_thread(self._query_impl, apt_type)
  365. def _day_block_locator_candidates(self):
  366. # 与 Drission 版 `//div[p and div//button[contains(@data-testid, "slot")]]` 对齐(子 div 下含 slot 按钮)
  367. yield self.page.locator(
  368. "xpath=//div[./p and ./div//button[contains(@data-testid, 'slot')]]"
  369. )
  370. # 结构略变:任意后代 button 带 slot
  371. yield self.page.locator(
  372. "xpath=//div[./p and .//button[contains(@data-testid, 'slot')]]"
  373. )
  374. # 仅要求有 p 与 slot 类按钮
  375. yield self.page.locator(
  376. "xpath=//div[.//p and .//button[contains(@data-testid, 'slot')]]"
  377. )
  378. # Playwright 原生 :has
  379. yield self.page.locator("div").filter(
  380. has=self.page.locator("p")
  381. ).filter(
  382. has=self.page.locator("button[data-testid*='slot']")
  383. )
  384. def _extract_slots_from_calendar_dom(
  385. self, target_year: int, target_month_num: int
  386. ) -> List[Dict[str, Any]]:
  387. """多策略定位「日期块 + 可点时段按钮」,与页面结构差异/Camoufox 兼容。"""
  388. all_slots: List[Dict[str, Any]] = []
  389. day_blocks = None
  390. for loc in self._day_block_locator_candidates():
  391. try:
  392. n = loc.count()
  393. except Exception:
  394. continue
  395. if n > 0:
  396. day_blocks = loc
  397. self._log(f"使用日历块选择器,匹配到 {n} 个 day_blocks")
  398. break
  399. if day_blocks is None:
  400. # 不依赖 day_block 外壳:直接扫可用按钮,再向祖先找日期
  401. return self._extract_slots_from_available_buttons_only(
  402. target_year, target_month_num
  403. )
  404. for i in range(day_blocks.count()):
  405. block = day_blocks.nth(i)
  406. p_ele = block.locator("p").first
  407. if not p_ele.count():
  408. continue
  409. day_match = re.search(r"\d+", p_ele.inner_text())
  410. if not day_match:
  411. continue
  412. day_str = day_match.group()
  413. try:
  414. full_date = f"{target_year}-{target_month_num:02d}-{int(day_str):02d}"
  415. except ValueError:
  416. continue
  417. available_btns = block.locator("button[data-testid^='btn-available-slot']")
  418. for j in range(available_btns.count()):
  419. btn = available_btns.nth(j)
  420. btn_html = btn.inner_html()
  421. time_match = re.search(r"\d{2}:\d{2}", btn_html)
  422. if not time_match:
  423. continue
  424. time_str = time_match.group()
  425. test_id = btn.get_attribute("data-testid") or ""
  426. if "prime" in test_id and "weekend" in test_id:
  427. lbl = "ptaw"
  428. elif "prime" in test_id:
  429. lbl = "pta"
  430. else:
  431. lbl = ""
  432. all_slots.append(
  433. {"date": full_date, "time": time_str, "label": lbl}
  434. )
  435. if all_slots:
  436. return all_slots
  437. return self._extract_slots_from_available_buttons_only(
  438. target_year, target_month_num
  439. )
  440. def _extract_slots_from_available_buttons_only(
  441. self, target_year: int, target_month_num: int
  442. ) -> List[Dict[str, Any]]:
  443. """当整块 DOM 选不中时,用可用按钮反查日期行。"""
  444. all_slots: List[Dict[str, Any]] = []
  445. btns = self.page.locator("button[data-testid^='btn-available-slot']")
  446. n = btns.count()
  447. if n == 0:
  448. return []
  449. self._log(f"按可用按钮回查日期,共 {n} 个 btn-available-slot")
  450. for j in range(n):
  451. btn = btns.nth(j)
  452. row = btn.locator("xpath=./ancestor::div[.//p][1]")
  453. p_ele = row.locator("p").first
  454. if not p_ele.count():
  455. continue
  456. day_match = re.search(r"\d+", p_ele.inner_text())
  457. if not day_match:
  458. continue
  459. day_str = day_match.group()
  460. try:
  461. full_date = f"{target_year}-{target_month_num:02d}-{int(day_str):02d}"
  462. except ValueError:
  463. continue
  464. btn_html = btn.inner_html()
  465. time_match = re.search(r"\d{2}:\d{2}", btn_html)
  466. if not time_match:
  467. continue
  468. time_str = time_match.group()
  469. test_id = btn.get_attribute("data-testid") or ""
  470. if "prime" in test_id and "weekend" in test_id:
  471. lbl = "ptaw"
  472. elif "prime" in test_id:
  473. lbl = "pta"
  474. else:
  475. lbl = ""
  476. all_slots.append({"date": full_date, "time": time_str, "label": lbl})
  477. return all_slots
  478. def _query_impl(self, apt_type: AppointmentType) -> VSQueryResult:
  479. res = VSQueryResult()
  480. res.success = False
  481. interest_month = self.free_config.get("interest_month", time.strftime("%m-%Y"))
  482. target_date_obj = datetime.strptime(interest_month, "%m-%Y")
  483. target_month_text = target_date_obj.strftime("%B %Y")
  484. target_year = target_date_obj.year
  485. target_month_num = target_date_obj.month
  486. slots = []
  487. all_slots = []
  488. current_selected_ele = self.page.locator('[data-testid="btn-current-month-available"]').first
  489. current_month_text = current_selected_ele.inner_text().strip() if current_selected_ele.count() else ""
  490. is_on_target_month = (current_month_text.lower() == target_month_text.lower())
  491. if not is_on_target_month:
  492. self._log(f"Current is '{current_month_text}', navigating to '{target_month_text}'...")
  493. for _ in range(12):
  494. target_btn_xpath = f'xpath://a[contains(@href, "month={interest_month}")]'
  495. target_btn = self.page.locator(f"a[href*='month={interest_month}']").first
  496. if target_btn.count():
  497. target_btn.click(timeout=5000)
  498. time.sleep(3)
  499. break
  500. next_btn = self.page.locator('[data-testid="btn-next-month-available"]').first
  501. if next_btn.count():
  502. next_btn.click(timeout=5000)
  503. time.sleep(2)
  504. else:
  505. self._log("Warning: Cannot find target month or 'Next Month' button.")
  506. break
  507. try:
  508. self.page.wait_for_load_state("networkidle", timeout=20000)
  509. except PlaywrightTimeoutError:
  510. try:
  511. self.page.wait_for_load_state("domcontentloaded", timeout=10000)
  512. except PlaywrightTimeoutError:
  513. pass
  514. time.sleep(0.8)
  515. self._log("Extracting slots from DOM using robust data-testid features...")
  516. all_slots = self._extract_slots_from_calendar_dom(
  517. target_year, target_month_num
  518. )
  519. if not all_slots:
  520. n_slot_btns = self.page.locator("[data-testid*='slot']").count()
  521. n_avail = self.page.locator("button[data-testid^='btn-available-slot']").count()
  522. self._log(
  523. f"DOM 日历未解析到槽位: [data-testid*=\"slot\"]={n_slot_btns}, "
  524. f"btn-available-slot={n_avail},回退为页面 HTML 内嵌 JSON 解析"
  525. )
  526. try:
  527. resp = self._perform_request("GET", self.page.url, retry_count=1)
  528. self._check_page_is_session_expired_or_invalid("Book your appointment", resp.text)
  529. all_slots = self._parse_appointment_slots(resp.text)
  530. except Exception as ex:
  531. self._log(f"HTML 回退解析失败: {ex}")
  532. else:
  533. self._log(f"Already on '{target_month_text}'. Executing silent JS fetch...")
  534. resp = self._perform_request("GET", self.page.url, retry_count=1)
  535. self._check_page_is_session_expired_or_invalid('Book your appointment', resp.text)
  536. all_slots = self._parse_appointment_slots(resp.text)
  537. target_labels = self.free_config.get("target_labels", ["", "pta"])
  538. slots = [s for s in all_slots if s.get("label") in target_labels]
  539. if slots:
  540. res.success = True
  541. earliest_date = slots[0]["date"]
  542. earliest_dt = datetime.strptime(earliest_date, "%Y-%m-%d")
  543. res.availability_status = AvailabilityStatus.Available
  544. res.earliest_date = earliest_dt
  545. date_map: dict[datetime, list[TimeSlot]] = {}
  546. for s in slots:
  547. date_str = s["date"]
  548. dt = datetime.strptime(date_str, "%Y-%m-%d")
  549. date_map.setdefault(dt, []).append(
  550. TimeSlot(time=s["time"], label=str(s.get("label", "")))
  551. )
  552. res.availability = [DateAvailability(date=d, times=slots) for d, slots in date_map.items()]
  553. self._log(f"Slot Found! -> {slots}")
  554. else:
  555. self._log("No slots available.")
  556. res.success = False
  557. res.availability_status = AvailabilityStatus.NoneAvailable
  558. return res
  559. def book(self, slot_info: VSQueryResult, user_inputs: Dict = None) -> VSBookResult:
  560. return self._run_on_pw_thread(self._book_impl, slot_info, user_inputs)
  561. def _book_impl(self, slot_info: VSQueryResult, user_inputs: Dict = None) -> VSBookResult:
  562. if user_inputs is None:
  563. user_inputs = {}
  564. res = VSBookResult()
  565. res.success = False
  566. exp_start = user_inputs.get('expected_start_date', '')
  567. exp_end = user_inputs.get('expected_end_date', '')
  568. support_pta = user_inputs.get('support_pta', True)
  569. target_labels = ['']
  570. if support_pta:
  571. target_labels.append('pta')
  572. available_dates_str =[
  573. da.date.strftime("%Y-%m-%d")
  574. for da in slot_info.availability if da.date
  575. ]
  576. valid_dates_list = self._filter_dates(available_dates_str, exp_start, exp_end)
  577. if not valid_dates_list:
  578. raise NotFoundError(message="No dates match user constraints")
  579. all_possible_slots =[]
  580. for da in slot_info.availability:
  581. if not da.date:
  582. continue
  583. date_str = da.date.strftime("%Y-%m-%d")
  584. if date_str in valid_dates_list:
  585. for t in da.times:
  586. if t.label in target_labels:
  587. all_possible_slots.append({
  588. "date": date_str,
  589. "time_obj": t,
  590. "label": t.label
  591. })
  592. if not all_possible_slots:
  593. raise NotFoundError(message="No suitable slot found (after label filtering)")
  594. selected_slot = random.choice(all_possible_slots)
  595. selected_date = selected_slot["date"]
  596. selected_time = selected_slot["time_obj"]
  597. selected_label = selected_slot["label"]
  598. self._log(f"Found {len(all_possible_slots)} valid slots. selected slot: {selected_date} {selected_time.time} {selected_label}")
  599. js_inject_and_click = f"""
  600. try {{
  601. const form = document.querySelector('form');
  602. if (!form) return 'Form not found';
  603. function setReactValue(input, value) {{
  604. if (!input) return;
  605. input.value = value;
  606. input.dispatchEvent(new Event('input', {{ bubbles: true }}));
  607. input.dispatchEvent(new Event('change', {{ bubbles: true }}));
  608. }}
  609. setReactValue(form.querySelector('input[name="date"]'), '{selected_date}');
  610. setReactValue(form.querySelector('input[name="time"]'), '{selected_time.time}');
  611. setReactValue(form.querySelector('input[name="appointmentLabel"]'), '{selected_label}');
  612. const submitBtn = form.querySelector('button[type="submit"]');
  613. if (submitBtn) {{
  614. submitBtn.removeAttribute('disabled');
  615. submitBtn.classList.remove('opacity-50', 'cursor-not-allowed');
  616. submitBtn.click();
  617. return 'clicked';
  618. }} else {{
  619. return 'Submit button not found';
  620. }}
  621. }} catch (e) {{
  622. return e.toString();
  623. }}
  624. """
  625. inject_res = self.page.evaluate(f"() => {{ {js_inject_and_click} }}")
  626. self._log(f"Form submission triggered: {inject_res}")
  627. if inject_res != 'clicked':
  628. raise BizLogicError(message="Failed to inject form or click the submit button")
  629. self._log("Waiting for Next.js to process the form submission...")
  630. for _ in range(10):
  631. try:
  632. current_page_url = self.page.url
  633. current_page_html = self.page.content()
  634. appointment_confirmation_indicators = [
  635. "order-summary" in current_page_url,
  636. "partner-services" in current_page_url,
  637. "appointment-confirmation" in current_page_url,
  638. "Change my appointment" in current_page_html,
  639. "Book a new appointment" in current_page_html,
  640. ]
  641. if any(appointment_confirmation_indicators):
  642. self._log(f"✅ BOOKING SUCCESS! Redirected to: {current_page_url}")
  643. res.success = True
  644. res.label = selected_label
  645. res.book_date = selected_date
  646. res.book_time = selected_time.time
  647. self._save_screenshot("book_slot_success")
  648. break
  649. toast_selector = '[role=\"alert\"]'
  650. toast_ele = self.page.locator(toast_selector).first
  651. if toast_ele.count():
  652. error_msg = toast_ele.inner_text()
  653. self._log(f"❌ BOOKING FAILED! Detected popup: {error_msg}")
  654. break
  655. time.sleep(0.5)
  656. except Exception:
  657. pass
  658. return res
  659. def _get_proxy_url(self):
  660. # 构造代理
  661. proxy_url = ""
  662. if self.config.proxy.ip:
  663. s = self.config.proxy
  664. if s.username:
  665. proxy_url = f"{s.scheme}://{s.username}:{s.password}@{s.ip}:{s.port}"
  666. else:
  667. proxy_url = f"{s.scheme}://{s.ip}:{s.port}"
  668. return proxy_url
  669. def _perform_request(self, method, url, headers=None, data=None, json_data=None, params=None, retry_count=0):
  670. """
  671. 在浏览器上下文中注入 JS 执行 Fetch
  672. """
  673. if not self.page:
  674. raise BizLogicError("Browser not initialized")
  675. if params:
  676. from urllib.parse import urlencode
  677. if '?' in url:
  678. url += '&' + urlencode(params)
  679. else:
  680. url += '?' + urlencode(params)
  681. fetch_options = {
  682. "method": method.upper(),
  683. "headers": headers or {},
  684. "credentials": "include"
  685. }
  686. # Body 处理
  687. if json_data:
  688. fetch_options['body'] = json.dumps(json_data)
  689. fetch_options['headers']['Content-Type'] = 'application/json'
  690. elif data:
  691. if isinstance(data, dict):
  692. from urllib.parse import urlencode
  693. fetch_options['body'] = urlencode(data)
  694. fetch_options['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
  695. else:
  696. fetch_options['body'] = data
  697. js_script = f"""
  698. const url = "{url}";
  699. const options = {json.dumps(fetch_options)};
  700. return fetch(url, options)
  701. .then(async response => {{
  702. const text = await response.text();
  703. const headers = {{}};
  704. response.headers.forEach((value, key) => headers[key] = value);
  705. return {{
  706. status: response.status,
  707. body: text,
  708. headers: headers,
  709. url: response.url
  710. }};
  711. }})
  712. .catch(error => {{
  713. return {{
  714. status: 0,
  715. body: error.toString(),
  716. headers: {{}},
  717. url: url
  718. }};
  719. }});
  720. """
  721. res_dict = self.page.evaluate(f"() => {{ {js_script} }}")
  722. resp = BrowserResponse(res_dict)
  723. if resp.status_code == 200:
  724. return resp
  725. elif resp.status_code == 401:
  726. self.is_healthy = False
  727. raise SessionExpiredOrInvalidError()
  728. elif resp.status_code == 403:
  729. if retry_count < 2:
  730. self._log(f"HTTP 403 Detected. Cloudflare session expired? Attempting refresh (Try {retry_count+1}/2)...")
  731. if self._refresh_firewall_session():
  732. self._log("Firewall session refreshed. Retrying request...")
  733. return self._perform_request(method, url, headers, data, json_data, params, retry_count+1)
  734. else:
  735. self._log("Failed to refresh firewall session.")
  736. raise PermissionDeniedError(f"HTTP 403: {resp.text[:100]}")
  737. elif resp.status_code == 429:
  738. self.is_healthy = False
  739. raise RateLimiteddError()
  740. else:
  741. if resp.status_code == 0:
  742. raise BizLogicError(f"Network Error: {resp.text}")
  743. raise BizLogicError(message=f"HTTP Error {resp.status_code}: {resp.text[:100]}")
  744. def _refresh_firewall_session(self) -> bool:
  745. """
  746. 主动刷新页面以触发 Cloudflare 挑战并尝试通过
  747. """
  748. try:
  749. # 1. 刷新当前页面 (通常 Dashboard 页)
  750. # 这会强制浏览器重新进行 HTTP 请求,从而触发 Cloudflare 拦截页
  751. self._log("Refreshing page to trigger Cloudflare...")
  752. self.page.reload(wait_until="domcontentloaded")
  753. # 2. 调用 CloudflareBypasser
  754. cf = CloudflareBypasser(self.page, log=self.config.debug)
  755. # 3. 尝试过盾 (尝试次数稍多一点,因为此时可能网络不稳定)
  756. success = cf.bypass(max_retry=10)
  757. if success:
  758. # 再次确认页面是否正常加载 (非 403 页面)
  759. title = self.page.title().lower()
  760. if "access denied" in title:
  761. return False
  762. # 等待 DOM 稍微稳定
  763. time.sleep(2)
  764. return True
  765. return False
  766. except Exception as e:
  767. self._log(f"Error during firewall refresh: {e}")
  768. return False
  769. def _solve_recaptcha(self, params) -> str:
  770. """调用 VSCloudApi 解决 ReCaptcha"""
  771. key = params.get("apiToken")
  772. if not key: raise NotFoundError("Api-token required")
  773. submit_url = "https://api.capsolver.com/createTask"
  774. task = {
  775. "type": params.get("type"),
  776. "websiteURL": params.get("page"),
  777. "websiteKey": params.get("siteKey"),
  778. }
  779. if params.get("action"):
  780. task["pageAction"] = params.get("action")
  781. # if params.get("proxy"):
  782. # p = urlparse(params.get("proxy"))
  783. # task["proxyType"] = p.scheme
  784. # task["proxyAddress"] = p.hostname
  785. # task["proxyPort"] = p.port
  786. # if p.username:
  787. # task["proxyLogin"] = p.username
  788. # task["proxyPassword"] = p.password
  789. # 注意:使用 Camoufox 后,通常是 ProxyLess 模式
  790. # 除非你想让 Capsolver 也用同样的代理(通常不需要,除非风控极严)
  791. payload = {"clientKey": key, "task": task}
  792. import requests as req # 局部引用,避免混淆
  793. r = req.post(submit_url, json=payload, timeout=20)
  794. if r.status_code != 200:
  795. raise BizLogicError(message="Failed to submit capsolver task")
  796. task_id = r.json().get("taskId")
  797. for _ in range(20):
  798. r = req.post("https://api.capsolver.com/getTaskResult", json={"clientKey": key, "taskId": task_id}, timeout=20)
  799. if r.status_code == 200:
  800. d = r.json()
  801. if d.get("status") == "ready":
  802. return d["solution"]["gRecaptchaResponse"]
  803. time.sleep(3)
  804. raise BizLogicError(message="Capsolver task timeout")
  805. def _parse_travel_groups(self, html_content) -> List[Dict]:
  806. groups = []
  807. js_pattern = r'\\"travelGroups\\":\s*(\[.*?\]),\\"availableCountriesToCreateGroups'
  808. js_match = re.search(js_pattern, html_content, re.DOTALL)
  809. if js_match:
  810. json_str = js_match.group(1).replace(r'\"', '"')
  811. data = json.loads(json_str)
  812. for g in data:
  813. groups.append({
  814. 'group_name': g.get('groupName'),
  815. 'group_number': g.get('formGroupId'),
  816. 'location': g.get('vacName')
  817. })
  818. else:
  819. self._log('Parsed travel group page, but not found travelGroups')
  820. return groups
  821. def _parse_appointment_slots(self, html_content) -> List[Dict]:
  822. slots = []
  823. pattern = r'"availableAppointments\\":\s*(\[.*\]),\\"showFlexiAppointment'
  824. match = re.search(pattern, html_content, re.DOTALL)
  825. if match:
  826. json_str = match.group(1).replace(r'\"', '"')
  827. data = json.loads(json_str)
  828. for day in data:
  829. d_str = day.get('day')
  830. for s in day.get('slots', []):
  831. labels = s.get('labels', [])
  832. lbl = ""
  833. # 简化逻辑:TLS label 列表
  834. if 'pta' in labels: lbl = 'pta'
  835. elif 'ptaw' in labels: lbl = 'ptaw'
  836. elif '' in labels or not labels: lbl = ''
  837. slots.append({
  838. 'date': d_str,
  839. 'time': s.get('time'),
  840. 'label': lbl
  841. })
  842. return slots
  843. def _check_page_is_session_expired_or_invalid(self, keyword, html: str) -> bool:
  844. if not html:
  845. self.is_healthy = False
  846. raise SessionExpiredOrInvalidError()
  847. html_lower = html.lower()
  848. if keyword.lower() not in html_lower:
  849. session_expire_or_invalid_indicators = [
  850. 'redirected automatically' in html_lower,
  851. 'login' in html_lower and 'password' in html_lower,
  852. 'session expired' in html_lower
  853. ]
  854. if any(session_expire_or_invalid_indicators):
  855. self.is_healthy = False
  856. raise SessionExpiredOrInvalidError()
  857. def _filter_dates(self, dates: List[str], start_str: str, end_str: str) -> List[str]:
  858. if not start_str or not end_str:
  859. return dates
  860. valid_dates = []
  861. s_date = datetime.strptime(start_str[:10], "%Y-%m-%d")
  862. e_date = datetime.strptime(end_str[:10], "%Y-%m-%d")
  863. for date_str in dates:
  864. curr_date = datetime.strptime(date_str, "%Y-%m-%d")
  865. if s_date <= curr_date <= e_date:
  866. valid_dates.append(date_str)
  867. random.shuffle(valid_dates)
  868. return valid_dates
  869. def _is_selector_visible(self, selector: str, timeout: int = 10000) -> bool:
  870. try:
  871. self.page.wait_for_selector(selector, state="visible", timeout=timeout)
  872. return True
  873. except PlaywrightTimeoutError:
  874. return False
  875. def _human_type(self, text: str):
  876. for ch in text:
  877. self.page.keyboard.type(ch)
  878. time.sleep(random.uniform(0.03, 0.12))
  879. def _type_into_first_visible(self, selectors: List[str], text: str, field_name: str):
  880. last_err = None
  881. for selector in selectors:
  882. try:
  883. locator = self.page.locator(selector).first
  884. locator.wait_for(state="visible", timeout=3000)
  885. locator.click(timeout=3000)
  886. time.sleep(random.uniform(0.2, 0.6))
  887. locator.fill("")
  888. self._human_type(text)
  889. return
  890. except Exception as e:
  891. last_err = e
  892. continue
  893. raise BizLogicError(message=f"Can't find visible {field_name} input. Last error: {last_err}")
  894. def _close_playwright(self):
  895. if self.page:
  896. try:
  897. self.page.close()
  898. except Exception:
  899. pass
  900. self.page = None
  901. if self.browser_ctx:
  902. try:
  903. self.browser_ctx.close()
  904. except Exception:
  905. pass
  906. self.browser_ctx = None
  907. if self.playwright:
  908. try:
  909. self.playwright.stop()
  910. except Exception:
  911. pass
  912. self.playwright = None
  913. def _rmtree_workspace(self):
  914. if os.path.exists(self.root_workspace):
  915. for _ in range(3):
  916. try:
  917. time.sleep(0.2)
  918. shutil.rmtree(self.root_workspace, ignore_errors=True)
  919. break
  920. except Exception as e:
  921. self._log(f"Cleanup retry: {e}")
  922. time.sleep(0.5)
  923. if os.path.exists(self.root_workspace):
  924. self._log(f"[WARN] Failed to fully remove workspace: {self.root_workspace}")
  925. def _cleanup_failed_session(self):
  926. """create_session 在工作线程内失败时调用;外层会 _stop_pw_thread。"""
  927. self._close_playwright()
  928. self._rmtree_workspace()
  929. # --- 资源清理核心方法 ---
  930. def cleanup(self):
  931. """
  932. 销毁浏览器并彻底删除临时文件
  933. """
  934. w = getattr(self, "_pw_worker", None)
  935. on_worker = w is not None and threading.current_thread() is w
  936. if on_worker:
  937. self._close_playwright()
  938. self._rmtree_workspace()
  939. return
  940. if w is not None and self._pw_thread and self._pw_thread.is_alive():
  941. try:
  942. self._run_on_pw_thread(self._close_playwright)
  943. except Exception:
  944. self._close_playwright()
  945. self._rmtree_workspace()
  946. self._stop_pw_thread()
  947. else:
  948. self._close_playwright()
  949. self._rmtree_workspace()
  950. def __del__(self):
  951. """
  952. 析构函数:当对象被垃圾回收时自动调用
  953. """
  954. self.cleanup()
  955. class TlsPlugin2(TlsPlugin):
  956. """兼容工厂按模块名加载 `TlsPlugin2` 的场景。"""
  957. pass