tls_plugin2.py 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073
  1. import time
  2. import json
  3. import random
  4. import re
  5. import os
  6. import uuid
  7. import shutil
  8. import queue
  9. import threading
  10. from datetime import datetime
  11. from typing import List, Dict, Optional, Any, Callable
  12. from urllib.parse import urljoin, urlparse, urlencode
  13. from camoufox import NewBrowser
  14. from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError, Page, BrowserContext
  15. from vs_plg import IVSPlg
  16. from vs_types import VSPlgConfig, AppointmentType, VSQueryResult, VSBookResult, AvailabilityStatus, TimeSlot, DateAvailability, NotFoundError, PermissionDeniedError, RateLimiteddError, SessionExpiredOrInvalidError, BizLogicError
  17. from utils.cloudflare_bypass_for_scraping2 import CloudflareBypasser
  18. class BrowserResponse:
  19. """模拟 requests.Response"""
  20. def __init__(self, result_dict):
  21. result_dict = result_dict or {}
  22. self.status_code = result_dict.get('status', 0)
  23. self.text = result_dict.get('body', '')
  24. self.headers = result_dict.get('headers', {})
  25. self.url = result_dict.get('url', '')
  26. self._json = None
  27. def json(self):
  28. if self._json is None:
  29. if not self.text:
  30. return {}
  31. try:
  32. self._json = json.loads(self.text)
  33. except:
  34. self._json = {}
  35. return self._json
  36. class TlsPlugin(IVSPlg):
  37. """
  38. TLSContact 签证预约插件 (Camoufox 版)
  39. """
  40. def __init__(self, group_id: str):
  41. self.group_id = group_id
  42. self.config: Optional[VSPlgConfig] = None
  43. self.free_config: Dict[str, Any] = {}
  44. self.is_healthy = True
  45. self.logger = None
  46. self.page: Optional[Page] = None
  47. self.browser_ctx: Optional[BrowserContext] = None
  48. self.playwright = None
  49. self.travel_group: Optional[Dict] = None
  50. self.instance_id = uuid.uuid4().hex[:8]
  51. self.root_workspace = os.path.abspath(os.path.join("data/temp_browser_data", f"{self.group_id}.{self.instance_id}"))
  52. self.user_data_path = os.path.join(self.root_workspace, "user_data")
  53. if not os.path.exists(self.root_workspace):
  54. os.makedirs(self.root_workspace)
  55. self.session_create_time: float = 0
  56. # Playwright/Camoufox 的 Page 只能在创建它的线程使用;Sentinel 在线程池里建会话、在监控线程里 query。
  57. # 用单条工作线程串行所有浏览器操作,避免跨线程卡死或 silent health_check 失败。
  58. self._pw_cmd_queue: "queue.Queue[Optional[Callable[[], None]]]" = queue.Queue()
  59. self._pw_thread: Optional[threading.Thread] = None
  60. self._pw_worker: Optional[threading.Thread] = None
  61. self._pw_thread_ready = threading.Event()
  62. self._pw_thread_lock = threading.Lock()
  63. def get_group_id(self) -> str:
  64. return self.group_id
  65. def set_log(self, logger: Callable[[str], None]):
  66. self.logger = logger
  67. def _log(self, message):
  68. if self.logger:
  69. self.logger(f'[TlsPlugin] [{self.group_id}] {message}')
  70. else:
  71. print(f'[TlsPlugin] [{self.group_id}] {message}')
  72. def set_config(self, config: VSPlgConfig):
  73. self.config = config
  74. self.free_config = config.free_config or {}
  75. def _ensure_pw_thread(self):
  76. with self._pw_thread_lock:
  77. if self._pw_thread and self._pw_thread.is_alive():
  78. return
  79. self._pw_thread_ready.clear()
  80. t = threading.Thread(target=self._pw_loop, name=f"camoufox-tls-{self.instance_id}", daemon=True)
  81. self._pw_thread = t
  82. t.start()
  83. if not self._pw_thread_ready.wait(timeout=60):
  84. raise BizLogicError("Camoufox worker thread failed to start")
  85. def _pw_loop(self):
  86. self._pw_worker = threading.current_thread()
  87. self._pw_thread_ready.set()
  88. while True:
  89. work = self._pw_cmd_queue.get()
  90. if work is None:
  91. break
  92. work()
  93. def _run_on_pw_thread(self, fn, *args, **kwargs):
  94. if self._pw_worker is not None and threading.current_thread() is self._pw_worker:
  95. return fn(*args, **kwargs)
  96. if self._pw_worker is None or not self._pw_thread or not self._pw_thread.is_alive():
  97. self._ensure_pw_thread()
  98. out: List[Any] = [None, None]
  99. done = threading.Event()
  100. def work():
  101. try:
  102. out[1] = fn(*args, **kwargs)
  103. except BaseException as e:
  104. out[0] = e
  105. finally:
  106. done.set()
  107. self._pw_cmd_queue.put(work)
  108. if not done.wait(timeout=600):
  109. self._log("Browser thread operation timed out (600s).")
  110. raise BizLogicError("Browser thread operation timeout")
  111. if out[0] is not None:
  112. raise out[0]
  113. return out[1]
  114. def _stop_pw_thread(self):
  115. with self._pw_thread_lock:
  116. t = self._pw_thread
  117. if not t or not t.is_alive():
  118. self._pw_thread = None
  119. self._pw_worker = None
  120. return
  121. self._pw_cmd_queue.put(None)
  122. t.join(timeout=20)
  123. with self._pw_thread_lock:
  124. self._pw_thread = None
  125. self._pw_worker = None
  126. def keep_alive(self):
  127. if self.page is None:
  128. return
  129. def _work():
  130. try:
  131. resp = self._perform_request("GET", self.page.url, retry_count=1)
  132. self._check_page_is_session_expired_or_invalid('Book your appointment', html = resp.text)
  133. except SessionExpiredOrInvalidError as e:
  134. self.is_healthy = False
  135. except Exception as e:
  136. pass
  137. try:
  138. self._run_on_pw_thread(_work)
  139. except Exception:
  140. pass
  141. def _health_check_impl(self) -> bool:
  142. if not self.is_healthy:
  143. return False
  144. if self.page is None:
  145. return False
  146. try:
  147. v = self.page.evaluate("1")
  148. if v != 1:
  149. return False
  150. except:
  151. return False
  152. if self.config.session_max_life > 0:
  153. current_time = time.time()
  154. elapsed_time = current_time - self.session_create_time
  155. if elapsed_time > self.config.session_max_life * 60:
  156. self._log(f"Session expired.")
  157. return False
  158. return True
  159. def health_check(self) -> bool:
  160. if not self.is_healthy or self.page is None:
  161. return False
  162. try:
  163. if self._pw_worker is not None and threading.current_thread() is self._pw_worker:
  164. return self._health_check_impl()
  165. return self._run_on_pw_thread(self._health_check_impl)
  166. except Exception:
  167. return False
  168. def _save_screenshot(self, name_prefix):
  169. try:
  170. timestamp = int(time.time())
  171. filename = f"{self.instance_id}_{name_prefix}_{timestamp}.jpg"
  172. save_path = os.path.join("data", filename)
  173. os.makedirs("data", exist_ok=True)
  174. self.page.screenshot(path=save_path, full_page=False)
  175. self._log(f"Screenshot saved to {save_path}")
  176. except Exception as e:
  177. self._log(f"Failed to save screenshot: {e}")
  178. def create_session(self):
  179. self._ensure_pw_thread()
  180. try:
  181. self._run_on_pw_thread(self._create_session_inner)
  182. except Exception:
  183. self._stop_pw_thread()
  184. raise
  185. def _create_session_inner(self):
  186. """
  187. 全浏览器会话创建:过盾 -> JS注入登录 -> 原生跳转
  188. 必须在同一条 Camoufox/Playwright 工作线程中执行(Playwright 非线程安全)。
  189. """
  190. self._log(f"Initializing Session (ID: {self.instance_id})...")
  191. proxy_cfg = None
  192. if self.config.proxy and self.config.proxy.ip:
  193. p = self.config.proxy
  194. if p.username and p.password:
  195. proxy_cfg = {
  196. "server": f"{p.scheme}://{p.ip}:{p.port}",
  197. "username": p.username,
  198. "password": p.password,
  199. }
  200. else:
  201. proxy_cfg = {"server": f"{p.scheme}://{p.ip}:{p.port}"}
  202. else:
  203. self._log("[WARN] No proxy configured!")
  204. try:
  205. self.playwright = sync_playwright().start()
  206. self.browser_ctx = NewBrowser(
  207. self.playwright,
  208. persistent_context=True,
  209. headless=False,
  210. user_data_dir=self.user_data_path,
  211. proxy=proxy_cfg,
  212. window=(1920, 1080),
  213. )
  214. self.page = self.browser_ctx.pages[0] if self.browser_ctx.pages else self.browser_ctx.new_page()
  215. tls_url = self.free_config.get('tls_url', '')
  216. self._log(f"Navigating: {tls_url}")
  217. self.page.goto(tls_url, wait_until="domcontentloaded")
  218. time.sleep(5)
  219. cf_bypasser = CloudflareBypasser(self.page, log=True)
  220. if not cf_bypasser.bypass(max_retry=15):
  221. raise BizLogicError("Cloudflare bypass timeout")
  222. time.sleep(3)
  223. btn_selector = "button:has-text('Login')"
  224. if not self._is_selector_visible(btn_selector, timeout=3000):
  225. self.page.locator("a[href*='login']").first.click(timeout=5000)
  226. time.sleep(3)
  227. if not self._is_selector_visible(btn_selector, timeout=10000):
  228. raise BizLogicError(message=f"Can't find selector={btn_selector}")
  229. time.sleep(random.uniform(0.5, 1))
  230. # recaptchav2_token = ""
  231. # if self.page.ele('.g-recaptcha') or self.page.ele('xpath://iframe[contains(@src, "recaptcha")]'):
  232. # self._log("Solving ReCaptcha...")
  233. # rc_params = {
  234. # "type": "ReCaptchaV2TaskProxyLess",
  235. # "page": self.page.url,
  236. # "siteKey": "6LcDpXcfAAAAAM7wOEsF_38DNsL20tTvPTKxpyn0",
  237. # "apiToken": self.free_config.get("capsolver_key", "")
  238. # }
  239. # recaptchav2_token = self._solve_recaptcha(rc_params)
  240. username = self.config.account.username
  241. password = self.config.account.password
  242. self._type_into_first_visible(
  243. selectors=[
  244. "input[name='email']",
  245. "input[type='email']",
  246. "input#email",
  247. "input[autocomplete='username']",
  248. "label:has-text('Email') + input",
  249. ],
  250. text=username,
  251. field_name="Email",
  252. )
  253. time.sleep(random.uniform(0.5, 1.2))
  254. self._type_into_first_visible(
  255. selectors=[
  256. "input[name='password']",
  257. "input[type='password']",
  258. "input#password",
  259. "input[autocomplete='current-password']",
  260. "label:has-text('Password') + input",
  261. ],
  262. text=password,
  263. field_name="Password",
  264. )
  265. # if recaptchav2_token:
  266. # inject_recaptchav2_token_js = f"""
  267. # var g = document.getElementById('g-recaptcha-response');
  268. # if(g) {{ g.value = "{recaptchav2_token}"; }}
  269. # """
  270. # self._log("Inject ReCaptchaV2 Token via JS...")
  271. # self.page.run_js(inject_recaptchav2_token_js)
  272. # time.sleep(random.uniform(0.5, 1.0))
  273. self._log("Submitting Login...")
  274. time.sleep(random.uniform(0.3, 0.8))
  275. self.page.locator("button:has-text('Login')").first.click(timeout=10000)
  276. self._log("Waiting for redirect...")
  277. self.page.wait_for_function(
  278. "() => !window.location.href.includes('login-actions')",
  279. timeout=45000,
  280. )
  281. time.sleep(3)
  282. if "login-actions" in self.page.url or "auth" in self.page.url:
  283. raise BizLogicError(message="Login Failed! Invalid credentials or Captcha rejected.")
  284. self.page.wait_for_load_state("domcontentloaded", timeout=15000)
  285. time.sleep(5)
  286. # groups = self._parse_travel_groups(self.page.html)
  287. # location = self.free_config.get('location')
  288. # for g in groups:
  289. # if g['location'] == location:
  290. # self.travel_group = g
  291. # break
  292. # if not self.travel_group:
  293. # self._save_screenshot("group_not_found")
  294. # raise NotFoundError(f"Group not found for {location}")
  295. # formgroup_id = self.travel_group.get('group_number')
  296. # btn_selector = f'tag:button@@name=formGroupId@@value={formgroup_id}'
  297. # self._log(f"Waiting for visible button to render: {formgroup_id}...")
  298. # self.page.wait.eles_loaded(btn_selector, timeout=15)
  299. # buttons = self.page.eles(btn_selector)
  300. # select_btn = None
  301. # for btn in reversed(buttons):
  302. # try:
  303. # w, h = btn.rect.size
  304. # if w > 0 and h > 0:
  305. # select_btn = btn
  306. # break
  307. # except Exception:
  308. # continue
  309. # if not select_btn:
  310. # self._save_screenshot("visible_button_not_found")
  311. # raise BizLogicError(f"Can't find any visible Select button for group {formgroup_id}")
  312. # time.sleep(random.uniform(0.5, 1.2))
  313. # self.mouse.human_click_ele(select_btn)
  314. # self._log("Waiting for url redirect...")
  315. # self.page.wait.url_change('travel-groups', exclude=True, timeout=45)
  316. # time.sleep(2)
  317. # if "travel-groups" in self.page.url or "auth" in self.page.url:
  318. # raise BizLogicError(message="Redirect to service-level Failed!")
  319. # no_applicant_indicators = [
  320. # "Add a new applicant" in self.page.html,
  321. # "You have not yet added an applicant. Please click the button below to add one." in self.page.html,
  322. # "applicants-information" in self.page.url
  323. # ]
  324. # if any(no_applicant_indicators):
  325. # raise BizLogicError(message=f"No applicant added")
  326. btn_selector = '#book-appointment-btn'
  327. self._log(f"Waiting for selector={btn_selector} to render...")
  328. if not self._is_selector_visible(btn_selector, timeout=15000):
  329. raise BizLogicError(message=f"Waiting for selector={btn_selector} timeout")
  330. self.page.locator(btn_selector).first.click(timeout=10000)
  331. time.sleep(3)
  332. # self._log("Waiting for url redirect...")
  333. # self.page.wait.url_change('service-level', exclude=True, timeout=45)
  334. # time.sleep(2)
  335. # if "service-level" in self.page.url or "auth" in self.page.url:
  336. # raise BizLogicError(message="Redirect to appointment-booking Failed!")
  337. btn_selector = "button:has-text('Book your appointment')"
  338. if not self._is_selector_visible(btn_selector, timeout=10000):
  339. raise BizLogicError(message=f"Waiting for selector={btn_selector} timeout")
  340. self.session_create_time = time.time()
  341. self._log(f"✅ Login & Navigation Success!")
  342. except Exception as e:
  343. self._log(f"Session Create Error: {e}")
  344. if self.config.debug:
  345. self._save_screenshot("create_session_except")
  346. self._cleanup_failed_session()
  347. raise e
  348. def query(self, apt_type: AppointmentType) -> VSQueryResult:
  349. return self._run_on_pw_thread(self._query_impl, apt_type)
  350. def _day_block_locator_candidates(self):
  351. # 与 Drission 版 `//div[p and div//button[contains(@data-testid, "slot")]]` 对齐(子 div 下含 slot 按钮)
  352. yield self.page.locator(
  353. "xpath=//div[./p and ./div//button[contains(@data-testid, 'slot')]]"
  354. )
  355. # 结构略变:任意后代 button 带 slot
  356. yield self.page.locator(
  357. "xpath=//div[./p and .//button[contains(@data-testid, 'slot')]]"
  358. )
  359. # 仅要求有 p 与 slot 类按钮
  360. yield self.page.locator(
  361. "xpath=//div[.//p and .//button[contains(@data-testid, 'slot')]]"
  362. )
  363. # Playwright 原生 :has
  364. yield self.page.locator("div").filter(
  365. has=self.page.locator("p")
  366. ).filter(
  367. has=self.page.locator("button[data-testid*='slot']")
  368. )
  369. def _extract_slots_from_calendar_dom(
  370. self, target_year: int, target_month_num: int
  371. ) -> List[Dict[str, Any]]:
  372. """多策略定位「日期块 + 可点时段按钮」,与页面结构差异/Camoufox 兼容。"""
  373. all_slots: List[Dict[str, Any]] = []
  374. day_blocks = None
  375. for loc in self._day_block_locator_candidates():
  376. try:
  377. n = loc.count()
  378. except Exception:
  379. continue
  380. if n > 0:
  381. day_blocks = loc
  382. self._log(f"使用日历块选择器,匹配到 {n} 个 day_blocks")
  383. break
  384. if day_blocks is None:
  385. # 不依赖 day_block 外壳:直接扫可用按钮,再向祖先找日期
  386. return self._extract_slots_from_available_buttons_only(
  387. target_year, target_month_num
  388. )
  389. for i in range(day_blocks.count()):
  390. block = day_blocks.nth(i)
  391. p_ele = block.locator("p").first
  392. if not p_ele.count():
  393. continue
  394. day_match = re.search(r"\d+", p_ele.inner_text())
  395. if not day_match:
  396. continue
  397. day_str = day_match.group()
  398. try:
  399. full_date = f"{target_year}-{target_month_num:02d}-{int(day_str):02d}"
  400. except ValueError:
  401. continue
  402. available_btns = block.locator("button[data-testid^='btn-available-slot']")
  403. for j in range(available_btns.count()):
  404. btn = available_btns.nth(j)
  405. btn_html = btn.inner_html()
  406. time_match = re.search(r"\d{2}:\d{2}", btn_html)
  407. if not time_match:
  408. continue
  409. time_str = time_match.group()
  410. test_id = btn.get_attribute("data-testid") or ""
  411. if "prime" in test_id and "weekend" in test_id:
  412. lbl = "ptaw"
  413. elif "prime" in test_id:
  414. lbl = "pta"
  415. else:
  416. lbl = ""
  417. all_slots.append(
  418. {"date": full_date, "time": time_str, "label": lbl}
  419. )
  420. if all_slots:
  421. return all_slots
  422. return self._extract_slots_from_available_buttons_only(
  423. target_year, target_month_num
  424. )
  425. def _extract_slots_from_available_buttons_only(
  426. self, target_year: int, target_month_num: int
  427. ) -> List[Dict[str, Any]]:
  428. """当整块 DOM 选不中时,用可用按钮反查日期行。"""
  429. all_slots: List[Dict[str, Any]] = []
  430. btns = self.page.locator("button[data-testid^='btn-available-slot']")
  431. n = btns.count()
  432. if n == 0:
  433. return []
  434. self._log(f"按可用按钮回查日期,共 {n} 个 btn-available-slot")
  435. for j in range(n):
  436. btn = btns.nth(j)
  437. row = btn.locator("xpath=./ancestor::div[.//p][1]")
  438. p_ele = row.locator("p").first
  439. if not p_ele.count():
  440. continue
  441. day_match = re.search(r"\d+", p_ele.inner_text())
  442. if not day_match:
  443. continue
  444. day_str = day_match.group()
  445. try:
  446. full_date = f"{target_year}-{target_month_num:02d}-{int(day_str):02d}"
  447. except ValueError:
  448. continue
  449. btn_html = btn.inner_html()
  450. time_match = re.search(r"\d{2}:\d{2}", btn_html)
  451. if not time_match:
  452. continue
  453. time_str = time_match.group()
  454. test_id = btn.get_attribute("data-testid") or ""
  455. if "prime" in test_id and "weekend" in test_id:
  456. lbl = "ptaw"
  457. elif "prime" in test_id:
  458. lbl = "pta"
  459. else:
  460. lbl = ""
  461. all_slots.append({"date": full_date, "time": time_str, "label": lbl})
  462. return all_slots
  463. def _query_impl(self, apt_type: AppointmentType) -> VSQueryResult:
  464. res = VSQueryResult()
  465. res.success = False
  466. interest_month = self.free_config.get("interest_month", time.strftime("%m-%Y"))
  467. target_date_obj = datetime.strptime(interest_month, "%m-%Y")
  468. target_month_text = target_date_obj.strftime("%B %Y")
  469. target_year = target_date_obj.year
  470. target_month_num = target_date_obj.month
  471. slots = []
  472. all_slots = []
  473. current_selected_ele = self.page.locator('[data-testid="btn-current-month-available"]').first
  474. current_month_text = current_selected_ele.inner_text().strip() if current_selected_ele.count() else ""
  475. is_on_target_month = (current_month_text.lower() == target_month_text.lower())
  476. if not is_on_target_month:
  477. self._log(f"Current is '{current_month_text}', navigating to '{target_month_text}'...")
  478. for _ in range(12):
  479. target_btn_xpath = f'xpath://a[contains(@href, "month={interest_month}")]'
  480. target_btn = self.page.locator(f"a[href*='month={interest_month}']").first
  481. if target_btn.count():
  482. target_btn.click(timeout=5000)
  483. time.sleep(3)
  484. break
  485. next_btn = self.page.locator('[data-testid="btn-next-month-available"]').first
  486. if next_btn.count():
  487. next_btn.click(timeout=5000)
  488. time.sleep(2)
  489. else:
  490. self._log("Warning: Cannot find target month or 'Next Month' button.")
  491. break
  492. try:
  493. self.page.wait_for_load_state("networkidle", timeout=20000)
  494. except PlaywrightTimeoutError:
  495. try:
  496. self.page.wait_for_load_state("domcontentloaded", timeout=10000)
  497. except PlaywrightTimeoutError:
  498. pass
  499. time.sleep(0.8)
  500. self._log("Extracting slots from DOM using robust data-testid features...")
  501. all_slots = self._extract_slots_from_calendar_dom(
  502. target_year, target_month_num
  503. )
  504. if not all_slots:
  505. n_slot_btns = self.page.locator("[data-testid*='slot']").count()
  506. n_avail = self.page.locator("button[data-testid^='btn-available-slot']").count()
  507. self._log(
  508. f"DOM 日历未解析到槽位: [data-testid*=\"slot\"]={n_slot_btns}, "
  509. f"btn-available-slot={n_avail},回退为页面 HTML 内嵌 JSON 解析"
  510. )
  511. try:
  512. resp = self._perform_request("GET", self.page.url, retry_count=1)
  513. self._check_page_is_session_expired_or_invalid("Book your appointment", resp.text)
  514. all_slots = self._parse_appointment_slots(resp.text)
  515. except Exception as ex:
  516. self._log(f"HTML 回退解析失败: {ex}")
  517. else:
  518. self._log(f"Already on '{target_month_text}'. Executing silent JS fetch...")
  519. resp = self._perform_request("GET", self.page.url, retry_count=1)
  520. self._check_page_is_session_expired_or_invalid('Book your appointment', resp.text)
  521. all_slots = self._parse_appointment_slots(resp.text)
  522. target_labels = self.free_config.get("target_labels", ["", "pta"])
  523. slots = [s for s in all_slots if s.get("label") in target_labels]
  524. if slots:
  525. res.success = True
  526. earliest_date = slots[0]["date"]
  527. earliest_dt = datetime.strptime(earliest_date, "%Y-%m-%d")
  528. res.availability_status = AvailabilityStatus.Available
  529. res.earliest_date = earliest_dt
  530. date_map: dict[datetime, list[TimeSlot]] = {}
  531. for s in slots:
  532. date_str = s["date"]
  533. dt = datetime.strptime(date_str, "%Y-%m-%d")
  534. date_map.setdefault(dt, []).append(
  535. TimeSlot(time=s["time"], label=str(s.get("label", "")))
  536. )
  537. res.availability = [DateAvailability(date=d, times=slots) for d, slots in date_map.items()]
  538. self._log(f"Slot Found! -> {slots}")
  539. else:
  540. self._log("No slots available.")
  541. res.success = False
  542. res.availability_status = AvailabilityStatus.NoneAvailable
  543. return res
  544. def book(self, slot_info: VSQueryResult, user_inputs: Dict = None) -> VSBookResult:
  545. return self._run_on_pw_thread(self._book_impl, slot_info, user_inputs)
  546. def _book_impl(self, slot_info: VSQueryResult, user_inputs: Dict = None) -> VSBookResult:
  547. if user_inputs is None:
  548. user_inputs = {}
  549. res = VSBookResult()
  550. res.success = False
  551. exp_start = user_inputs.get('expected_start_date', '')
  552. exp_end = user_inputs.get('expected_end_date', '')
  553. support_pta = user_inputs.get('support_pta', True)
  554. target_labels = ['']
  555. if support_pta:
  556. target_labels.append('pta')
  557. available_dates_str =[
  558. da.date.strftime("%Y-%m-%d")
  559. for da in slot_info.availability if da.date
  560. ]
  561. valid_dates_list = self._filter_dates(available_dates_str, exp_start, exp_end)
  562. if not valid_dates_list:
  563. raise NotFoundError(message="No dates match user constraints")
  564. all_possible_slots =[]
  565. for da in slot_info.availability:
  566. if not da.date:
  567. continue
  568. date_str = da.date.strftime("%Y-%m-%d")
  569. if date_str in valid_dates_list:
  570. for t in da.times:
  571. if t.label in target_labels:
  572. all_possible_slots.append({
  573. "date": date_str,
  574. "time_obj": t,
  575. "label": t.label
  576. })
  577. if not all_possible_slots:
  578. raise NotFoundError(message="No suitable slot found (after label filtering)")
  579. selected_slot = random.choice(all_possible_slots)
  580. selected_date = selected_slot["date"]
  581. selected_time = selected_slot["time_obj"]
  582. selected_label = selected_slot["label"]
  583. self._log(f"Found {len(all_possible_slots)} valid slots. selected slot: {selected_date} {selected_time.time} {selected_label}")
  584. js_inject_and_click = f"""
  585. try {{
  586. const form = document.querySelector('form');
  587. if (!form) return 'Form not found';
  588. function setReactValue(input, value) {{
  589. if (!input) return;
  590. input.value = value;
  591. input.dispatchEvent(new Event('input', {{ bubbles: true }}));
  592. input.dispatchEvent(new Event('change', {{ bubbles: true }}));
  593. }}
  594. setReactValue(form.querySelector('input[name="date"]'), '{selected_date}');
  595. setReactValue(form.querySelector('input[name="time"]'), '{selected_time.time}');
  596. setReactValue(form.querySelector('input[name="appointmentLabel"]'), '{selected_label}');
  597. const submitBtn = form.querySelector('button[type="submit"]');
  598. if (submitBtn) {{
  599. submitBtn.removeAttribute('disabled');
  600. submitBtn.classList.remove('opacity-50', 'cursor-not-allowed');
  601. submitBtn.click();
  602. return 'clicked';
  603. }} else {{
  604. return 'Submit button not found';
  605. }}
  606. }} catch (e) {{
  607. return e.toString();
  608. }}
  609. """
  610. inject_res = self.page.evaluate(f"() => {{ {js_inject_and_click} }}")
  611. self._log(f"Form submission triggered: {inject_res}")
  612. if inject_res != 'clicked':
  613. raise BizLogicError(message="Failed to inject form or click the submit button")
  614. self._log("Waiting for Next.js to process the form submission...")
  615. for _ in range(10):
  616. try:
  617. current_page_url = self.page.url
  618. current_page_html = self.page.content()
  619. appointment_confirmation_indicators = [
  620. "order-summary" in current_page_url,
  621. "partner-services" in current_page_url,
  622. "appointment-confirmation" in current_page_url,
  623. "Change my appointment" in current_page_html,
  624. "Book a new appointment" in current_page_html,
  625. ]
  626. if any(appointment_confirmation_indicators):
  627. self._log(f"✅ BOOKING SUCCESS! Redirected to: {current_page_url}")
  628. res.success = True
  629. res.label = selected_label
  630. res.book_date = selected_date
  631. res.book_time = selected_time.time
  632. self._save_screenshot("book_slot_success")
  633. break
  634. toast_selector = '[role=\"alert\"]'
  635. toast_ele = self.page.locator(toast_selector).first
  636. if toast_ele.count():
  637. error_msg = toast_ele.inner_text()
  638. self._log(f"❌ BOOKING FAILED! Detected popup: {error_msg}")
  639. break
  640. time.sleep(0.5)
  641. except Exception:
  642. pass
  643. return res
  644. def _get_proxy_url(self):
  645. # 构造代理
  646. proxy_url = ""
  647. if self.config.proxy.ip:
  648. s = self.config.proxy
  649. if s.username:
  650. proxy_url = f"{s.scheme}://{s.username}:{s.password}@{s.ip}:{s.port}"
  651. else:
  652. proxy_url = f"{s.scheme}://{s.ip}:{s.port}"
  653. return proxy_url
  654. def _perform_request(self, method, url, headers=None, data=None, json_data=None, params=None, retry_count=0):
  655. """
  656. 在浏览器上下文中注入 JS 执行 Fetch
  657. """
  658. if not self.page:
  659. raise BizLogicError("Browser not initialized")
  660. if params:
  661. from urllib.parse import urlencode
  662. if '?' in url:
  663. url += '&' + urlencode(params)
  664. else:
  665. url += '?' + urlencode(params)
  666. fetch_options = {
  667. "method": method.upper(),
  668. "headers": headers or {},
  669. "credentials": "include"
  670. }
  671. # Body 处理
  672. if json_data:
  673. fetch_options['body'] = json.dumps(json_data)
  674. fetch_options['headers']['Content-Type'] = 'application/json'
  675. elif data:
  676. if isinstance(data, dict):
  677. from urllib.parse import urlencode
  678. fetch_options['body'] = urlencode(data)
  679. fetch_options['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
  680. else:
  681. fetch_options['body'] = data
  682. js_script = f"""
  683. const url = "{url}";
  684. const options = {json.dumps(fetch_options)};
  685. return fetch(url, options)
  686. .then(async response => {{
  687. const text = await response.text();
  688. const headers = {{}};
  689. response.headers.forEach((value, key) => headers[key] = value);
  690. return {{
  691. status: response.status,
  692. body: text,
  693. headers: headers,
  694. url: response.url
  695. }};
  696. }})
  697. .catch(error => {{
  698. return {{
  699. status: 0,
  700. body: error.toString(),
  701. headers: {{}},
  702. url: url
  703. }};
  704. }});
  705. """
  706. res_dict = self.page.evaluate(f"() => {{ {js_script} }}")
  707. resp = BrowserResponse(res_dict)
  708. if resp.status_code == 200:
  709. return resp
  710. elif resp.status_code == 401:
  711. self.is_healthy = False
  712. raise SessionExpiredOrInvalidError()
  713. elif resp.status_code == 403:
  714. if retry_count < 2:
  715. self._log(f"HTTP 403 Detected. Cloudflare session expired? Attempting refresh (Try {retry_count+1}/2)...")
  716. if self._refresh_firewall_session():
  717. self._log("Firewall session refreshed. Retrying request...")
  718. return self._perform_request(method, url, headers, data, json_data, params, retry_count+1)
  719. else:
  720. self._log("Failed to refresh firewall session.")
  721. raise PermissionDeniedError(f"HTTP 403: {resp.text[:100]}")
  722. elif resp.status_code == 429:
  723. self.is_healthy = False
  724. raise RateLimiteddError()
  725. else:
  726. if resp.status_code == 0:
  727. raise BizLogicError(f"Network Error: {resp.text}")
  728. raise BizLogicError(message=f"HTTP Error {resp.status_code}: {resp.text[:100]}")
  729. def _refresh_firewall_session(self) -> bool:
  730. """
  731. 主动刷新页面以触发 Cloudflare 挑战并尝试通过
  732. """
  733. try:
  734. # 1. 刷新当前页面 (通常 Dashboard 页)
  735. # 这会强制浏览器重新进行 HTTP 请求,从而触发 Cloudflare 拦截页
  736. self._log("Refreshing page to trigger Cloudflare...")
  737. self.page.reload(wait_until="domcontentloaded")
  738. # 2. 调用 CloudflareBypasser
  739. cf = CloudflareBypasser(self.page, log=self.config.debug)
  740. # 3. 尝试过盾 (尝试次数稍多一点,因为此时可能网络不稳定)
  741. success = cf.bypass(max_retry=10)
  742. if success:
  743. # 再次确认页面是否正常加载 (非 403 页面)
  744. title = self.page.title().lower()
  745. if "access denied" in title:
  746. return False
  747. # 等待 DOM 稍微稳定
  748. time.sleep(2)
  749. return True
  750. return False
  751. except Exception as e:
  752. self._log(f"Error during firewall refresh: {e}")
  753. return False
  754. def _solve_recaptcha(self, params) -> str:
  755. """调用 VSCloudApi 解决 ReCaptcha"""
  756. key = params.get("apiToken")
  757. if not key: raise NotFoundError("Api-token required")
  758. submit_url = "https://api.capsolver.com/createTask"
  759. task = {
  760. "type": params.get("type"),
  761. "websiteURL": params.get("page"),
  762. "websiteKey": params.get("siteKey"),
  763. }
  764. if params.get("action"):
  765. task["pageAction"] = params.get("action")
  766. # if params.get("proxy"):
  767. # p = urlparse(params.get("proxy"))
  768. # task["proxyType"] = p.scheme
  769. # task["proxyAddress"] = p.hostname
  770. # task["proxyPort"] = p.port
  771. # if p.username:
  772. # task["proxyLogin"] = p.username
  773. # task["proxyPassword"] = p.password
  774. # 注意:使用 Camoufox 后,通常是 ProxyLess 模式
  775. # 除非你想让 Capsolver 也用同样的代理(通常不需要,除非风控极严)
  776. payload = {"clientKey": key, "task": task}
  777. import requests as req # 局部引用,避免混淆
  778. r = req.post(submit_url, json=payload, timeout=20)
  779. if r.status_code != 200:
  780. raise BizLogicError(message="Failed to submit capsolver task")
  781. task_id = r.json().get("taskId")
  782. for _ in range(20):
  783. r = req.post("https://api.capsolver.com/getTaskResult", json={"clientKey": key, "taskId": task_id}, timeout=20)
  784. if r.status_code == 200:
  785. d = r.json()
  786. if d.get("status") == "ready":
  787. return d["solution"]["gRecaptchaResponse"]
  788. time.sleep(3)
  789. raise BizLogicError(message="Capsolver task timeout")
  790. def _parse_travel_groups(self, html_content) -> List[Dict]:
  791. groups = []
  792. js_pattern = r'\\"travelGroups\\":\s*(\[.*?\]),\\"availableCountriesToCreateGroups'
  793. js_match = re.search(js_pattern, html_content, re.DOTALL)
  794. if js_match:
  795. json_str = js_match.group(1).replace(r'\"', '"')
  796. data = json.loads(json_str)
  797. for g in data:
  798. groups.append({
  799. 'group_name': g.get('groupName'),
  800. 'group_number': g.get('formGroupId'),
  801. 'location': g.get('vacName')
  802. })
  803. else:
  804. self._log('Parsed travel group page, but not found travelGroups')
  805. return groups
  806. def _parse_appointment_slots(self, html_content) -> List[Dict]:
  807. slots = []
  808. pattern = r'"availableAppointments\\":\s*(\[.*\]),\\"showFlexiAppointment'
  809. match = re.search(pattern, html_content, re.DOTALL)
  810. if match:
  811. json_str = match.group(1).replace(r'\"', '"')
  812. data = json.loads(json_str)
  813. for day in data:
  814. d_str = day.get('day')
  815. for s in day.get('slots', []):
  816. labels = s.get('labels', [])
  817. lbl = ""
  818. # 简化逻辑:TLS label 列表
  819. if 'pta' in labels: lbl = 'pta'
  820. elif 'ptaw' in labels: lbl = 'ptaw'
  821. elif '' in labels or not labels: lbl = ''
  822. slots.append({
  823. 'date': d_str,
  824. 'time': s.get('time'),
  825. 'label': lbl
  826. })
  827. return slots
  828. def _check_page_is_session_expired_or_invalid(self, keyword, html: str) -> bool:
  829. if not html:
  830. self.is_healthy = False
  831. raise SessionExpiredOrInvalidError()
  832. html_lower = html.lower()
  833. if keyword.lower() not in html_lower:
  834. session_expire_or_invalid_indicators = [
  835. 'redirected automatically' in html_lower,
  836. 'login' in html_lower and 'password' in html_lower,
  837. 'session expired' in html_lower
  838. ]
  839. if any(session_expire_or_invalid_indicators):
  840. self.is_healthy = False
  841. raise SessionExpiredOrInvalidError()
  842. def _filter_dates(self, dates: List[str], start_str: str, end_str: str) -> List[str]:
  843. if not start_str or not end_str:
  844. return dates
  845. valid_dates = []
  846. s_date = datetime.strptime(start_str[:10], "%Y-%m-%d")
  847. e_date = datetime.strptime(end_str[:10], "%Y-%m-%d")
  848. for date_str in dates:
  849. curr_date = datetime.strptime(date_str, "%Y-%m-%d")
  850. if s_date <= curr_date <= e_date:
  851. valid_dates.append(date_str)
  852. random.shuffle(valid_dates)
  853. return valid_dates
  854. def _is_selector_visible(self, selector: str, timeout: int = 10000) -> bool:
  855. try:
  856. self.page.wait_for_selector(selector, state="visible", timeout=timeout)
  857. return True
  858. except PlaywrightTimeoutError:
  859. return False
  860. def _human_type(self, text: str):
  861. for ch in text:
  862. self.page.keyboard.type(ch)
  863. time.sleep(random.uniform(0.03, 0.12))
  864. def _type_into_first_visible(self, selectors: List[str], text: str, field_name: str):
  865. last_err = None
  866. for selector in selectors:
  867. try:
  868. locator = self.page.locator(selector).first
  869. locator.wait_for(state="visible", timeout=3000)
  870. locator.click(timeout=3000)
  871. time.sleep(random.uniform(0.2, 0.6))
  872. locator.fill("")
  873. self._human_type(text)
  874. return
  875. except Exception as e:
  876. last_err = e
  877. continue
  878. raise BizLogicError(message=f"Can't find visible {field_name} input. Last error: {last_err}")
  879. def _close_playwright(self):
  880. if self.page:
  881. try:
  882. self.page.close()
  883. except Exception:
  884. pass
  885. self.page = None
  886. if self.browser_ctx:
  887. try:
  888. self.browser_ctx.close()
  889. except Exception:
  890. pass
  891. self.browser_ctx = None
  892. if self.playwright:
  893. try:
  894. self.playwright.stop()
  895. except Exception:
  896. pass
  897. self.playwright = None
  898. def _rmtree_workspace(self):
  899. if os.path.exists(self.root_workspace):
  900. for _ in range(3):
  901. try:
  902. time.sleep(0.2)
  903. shutil.rmtree(self.root_workspace, ignore_errors=True)
  904. break
  905. except Exception as e:
  906. self._log(f"Cleanup retry: {e}")
  907. time.sleep(0.5)
  908. if os.path.exists(self.root_workspace):
  909. self._log(f"[WARN] Failed to fully remove workspace: {self.root_workspace}")
  910. def _cleanup_failed_session(self):
  911. """create_session 在工作线程内失败时调用;外层会 _stop_pw_thread。"""
  912. self._close_playwright()
  913. self._rmtree_workspace()
  914. # --- 资源清理核心方法 ---
  915. def cleanup(self):
  916. """
  917. 销毁浏览器并彻底删除临时文件
  918. """
  919. w = getattr(self, "_pw_worker", None)
  920. on_worker = w is not None and threading.current_thread() is w
  921. if on_worker:
  922. self._close_playwright()
  923. self._rmtree_workspace()
  924. return
  925. if w is not None and self._pw_thread and self._pw_thread.is_alive():
  926. try:
  927. self._run_on_pw_thread(self._close_playwright)
  928. except Exception:
  929. self._close_playwright()
  930. self._rmtree_workspace()
  931. self._stop_pw_thread()
  932. else:
  933. self._close_playwright()
  934. self._rmtree_workspace()
  935. def __del__(self):
  936. """
  937. 析构函数:当对象被垃圾回收时自动调用
  938. """
  939. self.cleanup()
  940. class TlsPlugin2(TlsPlugin):
  941. """兼容工厂按模块名加载 `TlsPlugin2` 的场景。"""
  942. pass