tls_plugin.py 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955
  1. import time
  2. import json
  3. import random
  4. import re
  5. import os
  6. import uuid
  7. import shutil
  8. import socket
  9. from datetime import datetime
  10. from typing import List, Dict, Optional, Any, Callable
  11. from urllib.parse import urljoin, urlparse, urlencode, parse_qs
  12. # DrissionPage 核心
  13. from DrissionPage import ChromiumPage, ChromiumOptions
  14. from vs_plg import IVSPlg
  15. from vs_types import VSPlgConfig, AppointmentType, VSQueryResult, VSBookResult, AvailabilityStatus, TimeSlot, DateAvailability, NotFoundError, PermissionDeniedError, RateLimiteddError, SessionExpiredOrInvalidError, BizLogicError
  16. from utils.cloudflare_bypass_for_scraping import CloudflareBypasser
  17. from toolkit.proxy_tunnel import ProxyTunnel
  18. from utils.mouse import HumanMouse
  19. from utils.keyboard import HumanKeyboard
  20. from utils.fingerprint_utils import FingerprintGenerator
  21. class BrowserResponse:
  22. """模拟 requests.Response"""
  23. def __init__(self, result_dict):
  24. result_dict = result_dict or {}
  25. self.status_code = result_dict.get('status', 0)
  26. self.text = result_dict.get('body', '')
  27. self.headers = result_dict.get('headers', {})
  28. self.url = result_dict.get('url', '')
  29. self._json = None
  30. def json(self):
  31. if self._json is None:
  32. if not self.text:
  33. return {}
  34. try:
  35. self._json = json.loads(self.text)
  36. except:
  37. self._json = {}
  38. return self._json
  39. class TlsPlugin(IVSPlg):
  40. """
  41. TLSContact 签证预约插件 (DrissionPage 版)
  42. """
  43. def __init__(self, group_id: str):
  44. self.group_id = group_id
  45. self.config: Optional[VSPlgConfig] = None
  46. self.free_config: Dict[str, Any] = {}
  47. self.is_healthy = True
  48. self.logger = None
  49. self.mouse = None
  50. self.keyboard = None
  51. self.page: Optional[ChromiumPage] = None
  52. self.travel_group: Optional[Dict] = None
  53. self.instance_id = uuid.uuid4().hex[:8]
  54. self.root_workspace = os.path.abspath(os.path.join("data/temp_browser_data", f"{self.group_id}.{self.instance_id}"))
  55. self.user_data_path = os.path.join(self.root_workspace, "user_data")
  56. if not os.path.exists(self.root_workspace):
  57. os.makedirs(self.root_workspace)
  58. self.tunnel = None
  59. self.session_create_time: float = 0
  60. def get_group_id(self) -> str:
  61. return self.group_id
  62. def set_log(self, logger: Callable[[str], None]):
  63. self.logger = logger
  64. def _log(self, message):
  65. if self.logger:
  66. self.logger(f'[TlsPlugin] [{self.group_id}] {message}')
  67. else:
  68. print(f'[TlsPlugin] [{self.group_id}] {message}')
  69. def set_config(self, config: VSPlgConfig):
  70. self.config = config
  71. self.free_config = config.free_config or {}
  72. def keep_alive(self):
  73. try:
  74. resp = self._perform_request("GET", self.page.url, retry_count=1)
  75. self._check_page_is_session_expired_or_invalid('Book your appointment', html = resp.text)
  76. except SessionExpiredOrInvalidError as e:
  77. self.is_healthy = False
  78. except Exception as e:
  79. pass
  80. def health_check(self) -> bool:
  81. if not self.is_healthy:
  82. return False
  83. if self.page is None:
  84. return False
  85. try:
  86. if not self.page.run_js("return 1;"):
  87. return False
  88. except:
  89. return False
  90. if self.config.session_max_life > 0:
  91. current_time = time.time()
  92. elapsed_time = current_time - self.session_create_time
  93. if elapsed_time > self.config.session_max_life:
  94. self._log(f"Session expired.")
  95. return False
  96. return True
  97. def _save_screenshot(self, name_prefix):
  98. try:
  99. timestamp = int(time.time())
  100. filename = f"{self.instance_id}_{name_prefix}_{timestamp}.jpg"
  101. save_path = os.path.join("data", filename)
  102. os.makedirs("data", exist_ok=True)
  103. self.page.get_screenshot(path=save_path, full_page=False)
  104. self._log(f"Screenshot saved to {save_path}")
  105. except Exception as e:
  106. self._log(f"Failed to save screenshot: {e}")
  107. def create_session(self):
  108. """
  109. 全浏览器会话创建:过盾 -> JS注入登录 -> 状态机自动路由导航 -> 到达目标页
  110. """
  111. self._log(f"Initializing Session (ID: {self.instance_id})...")
  112. co = ChromiumOptions()
  113. def get_free_port():
  114. with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
  115. s.bind(('', 0))
  116. return s.getsockname()[1]
  117. debug_port = get_free_port()
  118. self._log(f"Assigned Debug Port: {debug_port}")
  119. co.set_local_port(debug_port)
  120. co.set_user_data_path(self.user_data_path)
  121. chrome_path = os.getenv("CHROME_BIN")
  122. if chrome_path and os.path.exists(chrome_path):
  123. co.set_paths(browser_path=chrome_path)
  124. if self.config.proxy and self.config.proxy.ip:
  125. p = self.config.proxy
  126. if p.username and p.password:
  127. self._log(f"Starting Proxy Tunnel for {p.ip}...")
  128. self.tunnel = ProxyTunnel(p.ip, p.port, p.username, p.password)
  129. local_proxy = self.tunnel.start()
  130. self._log(f"Tunnel started at {local_proxy}")
  131. co.set_argument(f'--proxy-server={local_proxy}')
  132. else:
  133. proxy_str = f"{p.proto}://{p.ip}:{p.port}"
  134. co.set_argument(f'--proxy-server={proxy_str}')
  135. else:
  136. self._log("[WARN] No proxy configured!")
  137. fingerprint_gen = FingerprintGenerator()
  138. specific_fp = fingerprint_gen.generate(self.config.account.username)
  139. self._log(f'browser fingerprint={specific_fp}')
  140. co.headless(False)
  141. co.set_argument('--no-sandbox')
  142. co.set_argument('--disable-dev-shm-usage')
  143. co.set_argument('--window-size=1920,1080')
  144. co.set_argument('--disable-blink-features=AutomationControlled')
  145. co.set_argument(f"--fingerprint={specific_fp.get('seed')}")
  146. co.set_argument(f"--fingerprint-platform={specific_fp.get('platform')}")
  147. co.set_argument(f"--fingerprint-brand={specific_fp.get('brand')}")
  148. try:
  149. self.page = ChromiumPage(co)
  150. # --- 预检指纹信息 ---
  151. if self.config.debug:
  152. self.page.get('https://example.com')
  153. js_script = """
  154. function getFingerprint() {
  155. let webglVendor = 'Unknown';
  156. let webglRenderer = 'Unknown';
  157. try {
  158. let canvas = document.createElement('canvas');
  159. let gl = canvas.getContext('webgl') || canvas.getContext('experimental-webgl');
  160. if (gl) {
  161. let debugInfo = gl.getExtension('WEBGL_debug_renderer_info');
  162. if (debugInfo) {
  163. webglVendor = gl.getParameter(debugInfo.UNMASKED_VENDOR_WEBGL);
  164. webglRenderer = gl.getParameter(debugInfo.UNMASKED_RENDERER_WEBGL);
  165. }
  166. }
  167. } catch(e) {}
  168. return {
  169. "User-Agent": navigator.userAgent,
  170. "Platform": navigator.userAgentData ? navigator.userAgentData.platform : navigator.platform,
  171. "Brands": navigator.userAgentData ? navigator.userAgentData.brands.map(b => b.brand).join(', ') : 'Not Supported',
  172. "CPU Cores": navigator.hardwareConcurrency,
  173. "Language": navigator.language,
  174. "Timezone": Intl.DateTimeFormat().resolvedOptions().timeZone,
  175. "WebGL Vendor": webglVendor,
  176. "WebGL Renderer": webglRenderer
  177. };
  178. }
  179. return getFingerprint();
  180. """
  181. fp_data = self.page.run_js(js_script)
  182. self._log("================ 预检浏览器指纹数据 ================")
  183. self._log(json.dumps(fp_data, indent=4, ensure_ascii=False))
  184. self._log("====================================================")
  185. # --- 初始化访问与过盾 ---
  186. tls_url = self.free_config.get('tls_url', '')
  187. self._log(f"Navigating: {tls_url}")
  188. self.page.get(tls_url)
  189. time.sleep(5)
  190. cf_bypasser = CloudflareBypasser(self.page, log=True)
  191. if not cf_bypasser.bypass(max_retry=15):
  192. raise BizLogicError("Cloudflare bypass timeout")
  193. time.sleep(3)
  194. cf_bypasser.handle_waiting_room()
  195. # --- 初始化人类行为模拟工具 ---
  196. self._log("Init humanize tools...")
  197. self.mouse = HumanMouse(self.page, debug=True)
  198. self.keyboard = HumanKeyboard(self.page)
  199. viewport_width = self.page.rect.viewport_size[0]
  200. viewport_height = self.page.rect.viewport_size[1]
  201. init_x = random.randint(10, viewport_width - 10)
  202. init_y = random.randint(10, viewport_height - 10)
  203. self.mouse.move(init_x, init_y)
  204. max_steps = 20
  205. session_created = False
  206. has_submitted_login = False
  207. for step in range(max_steps):
  208. self.page.wait.load_start()
  209. current_url = self.page.url
  210. self._log(f"--- [Router Step {step+1}] Current URL: {current_url} ---")
  211. # 状态 1:到达终极目标页面 (成功退出条件)
  212. if "appointment-booking" in current_url or self.page.ele('tag:button@text():Book your appointment', timeout=1):
  213. btn_selector = 'tag:button@text():Book your appointment'
  214. if self.page.wait.ele_displayed(btn_selector, timeout=10):
  215. self.session_create_time = time.time()
  216. self._log("✅ Login & Navigation Success! Reached appointment-booking.")
  217. session_created = True
  218. break
  219. # 状态 2:遇到没有申请人的拦截页 (致命错误退出条件)
  220. no_applicant_indicators = [
  221. "Add a new applicant" in self.page.html,
  222. "You have not yet added an applicant" in self.page.html,
  223. "applicants-information" in current_url
  224. ]
  225. if any(no_applicant_indicators):
  226. raise BizLogicError(message="No applicant added. Cannot proceed to booking.")
  227. # 状态 3:首页/登录入口页 -> 需要点击进入登录
  228. if self.page.ele("tag:a@@href:login", timeout=1) and not self.page.ele('tag:label@@text():Email', timeout=1):
  229. self._log("State: Login Portal. Clicking login link...")
  230. login_link = self.page.ele("tag:a@@href:login")
  231. self.mouse.human_click_ele(login_link)
  232. time.sleep(3)
  233. continue
  234. # 状态 4:真正的登录表单页
  235. if self.page.ele('tag:label@@text():Email', timeout=1) and not has_submitted_login:
  236. self._log("State: Login Form. Processing credentials and Captcha...")
  237. recaptchav2_token = ""
  238. if self.page.ele('.g-recaptcha') or self.page.ele('xpath://iframe[contains(@src, "recaptcha")]'):
  239. rec_iframe = self.page.ele('xpath://iframe[contains(@src, "recaptcha")]')
  240. rec_iframe_src = rec_iframe.attr('src')
  241. rec_parsed = urlparse(rec_iframe_src)
  242. rec_params = parse_qs(rec_parsed.query)
  243. rec_sitekey = rec_params.get("k", [None])[0]
  244. rec_size = rec_params.get("size", [None])[0]
  245. if 'normal' == rec_size:
  246. self._log(f"Solving ReCaptcha sitekey={rec_sitekey}...")
  247. rc_params = {
  248. "type": "ReCaptchaV2TaskProxyLess",
  249. "page": current_url,
  250. "siteKey": rec_sitekey,
  251. "apiToken": self.free_config.get("capsolver_key", "")
  252. }
  253. recaptchav2_token = self._solve_recaptcha(rc_params)
  254. username = self.config.account.username
  255. password = self.config.account.password
  256. input_ele = self.page.ele('tag:label@@text():Email').next()
  257. self.mouse.human_click_ele(input_ele)
  258. time.sleep(random.uniform(0.2, 0.6))
  259. self.keyboard.type_text(username, humanize=True)
  260. time.sleep(random.uniform(0.5, 1.2))
  261. input_ele = self.page.ele('tag:label@@text():Password').next()
  262. self.mouse.human_click_ele(input_ele)
  263. time.sleep(random.uniform(0.2, 0.6))
  264. self.keyboard.type_text(password, humanize=True)
  265. # 注入 Token
  266. if recaptchav2_token:
  267. inject_js = f"var g = document.getElementById('g-recaptcha-response'); if(g) {{ g.value = '{recaptchav2_token}'; }}"
  268. self.page.run_js(inject_js)
  269. time.sleep(random.uniform(0.5, 1.0))
  270. self._log("Submitting Login...")
  271. login_btn = self.page.ele('tag:button@@text():Login')
  272. self.mouse.human_click_ele(login_btn)
  273. has_submitted_login = True
  274. time.sleep(3)
  275. continue
  276. # 状态 5:Travel Groups 页面
  277. if "travel-groups" in current_url:
  278. self._log("State: Travel Groups. Selecting targeted group...")
  279. groups = self._parse_travel_groups(self.page.html)
  280. location = self.free_config.get('location')
  281. self.travel_group = next((g for g in groups if location in g['location']), None)
  282. if not self.travel_group:
  283. self._save_screenshot("group_not_found")
  284. raise NotFoundError(f"Group not found for {location}")
  285. formgroup_id = self.travel_group.get('group_number')
  286. btn_selector = f'tag:button@@name=formGroupId@@value={formgroup_id}'
  287. if self.page.wait.eles_loaded(btn_selector, timeout=10):
  288. buttons = self.page.eles(btn_selector)
  289. select_btn = next((btn for btn in reversed(buttons) if btn.rect.size[0] > 0 and btn.rect.size[1] > 0), None)
  290. if select_btn:
  291. time.sleep(random.uniform(0.5, 1.2))
  292. self.mouse.human_click_ele(select_btn)
  293. time.sleep(3)
  294. continue
  295. else:
  296. self._log("[WARN] Select button found but not visible.")
  297. else:
  298. self._log(f"[WARN] Wait timeout for group button {formgroup_id}")
  299. # 状态 6:中间过渡页,需点击 "Book Appointment" 继续往下走
  300. if self.page.ele('#book-appointment-btn', timeout=1):
  301. self._log("State: Intermediate Dashboard. Clicking Book Appointment button...")
  302. self.mouse.human_click_ele(self.page.ele('#book-appointment-btn'))
  303. time.sleep(3)
  304. continue
  305. # 状态 7:登录失败校验 或 未知加载状态
  306. if "login-actions" in current_url and has_submitted_login:
  307. self._log("Waiting on login-actions... (Might be authenticating or invalid credentials)")
  308. time.sleep(2)
  309. if self.page.ele('text:Invalid username or password', timeout=1): # 假设网页上有错误提示
  310. raise BizLogicError(message="Login Failed! Invalid credentials or Captcha rejected.")
  311. continue
  312. # 兜底:未匹配到明确状态,等待页面渲染或重定向
  313. self._log("State: Transitioning or Unknown. Waiting 2 seconds...")
  314. time.sleep(2)
  315. # 如果循环耗尽还没到达目标
  316. if not session_created:
  317. raise BizLogicError(f"Failed to reach appointment-booking after {max_steps} navigation steps. Stuck at: {self.page.url}")
  318. except Exception as e:
  319. self._log(f"Session Create Error: {e}")
  320. if self.config.debug:
  321. self._save_screenshot("create_session_except")
  322. self.cleanup()
  323. raise e
  324. def query(self, apt_type: AppointmentType) -> VSQueryResult:
  325. res = VSQueryResult()
  326. res.success = False
  327. interest_month = self.free_config.get("interest_month", time.strftime("%m-%Y"))
  328. target_date_obj = datetime.strptime(interest_month, "%m-%Y")
  329. target_month_text = target_date_obj.strftime("%B %Y")
  330. target_year = target_date_obj.year
  331. target_month_num = target_date_obj.month
  332. slots = []
  333. current_selected_ele = self.page.ele('@data-testid=btn-current-month-available')
  334. current_month_text = current_selected_ele.text.strip() if current_selected_ele else ""
  335. is_on_target_month = (current_month_text.lower() == target_month_text.lower())
  336. if not is_on_target_month:
  337. self._log(f"Current is '{current_month_text}', navigating to '{target_month_text}'...")
  338. reached_target = False
  339. for step in range(12):
  340. current_ele = self.page.ele('@data-testid=btn-current-month-available', timeout=2)
  341. if current_ele and current_ele.text.strip().lower() == target_month_text.lower():
  342. self._log(f"✅ Successfully navigated to target month: '{target_month_text}'!")
  343. reached_target = True
  344. break
  345. next_btn = self.page.ele('@data-testid=btn-next-month-available', timeout=2)
  346. if next_btn and next_btn.tag.lower() == 'button':
  347. self._log(f"Clicking next month: {next_btn.text.strip()} ...")
  348. next_btn.click(by_js=True)
  349. time.sleep(2.5)
  350. else:
  351. self._log("⚠️ Reached the end of the calendar or 'Next Month' is disabled.")
  352. break
  353. if not reached_target:
  354. self._log(f"❌ Could not navigate to target month: {target_month_text}. Stop parsing.")
  355. res.success = False
  356. res.availability_status = AvailabilityStatus.NoneAvailable
  357. return res
  358. self._log("Extracting slots from DOM using robust data-testid features...")
  359. slots = self._scan_dom_for_slots(target_year, target_month_num)
  360. else:
  361. self._log(f"Already on '{target_month_text}'. Executing silent JS fetch...")
  362. resp = self._perform_request("GET", self.page.url, retry_count=1)
  363. self._check_page_is_session_expired_or_invalid('Book your appointment', resp.text)
  364. slots = self._parse_appointment_slots(resp.text)
  365. if slots:
  366. res.success = True
  367. earliest_date = slots[0]["date"]
  368. earliest_dt = datetime.strptime(earliest_date, "%Y-%m-%d")
  369. res.availability_status = AvailabilityStatus.Available
  370. res.earliest_date = earliest_dt
  371. date_map: dict[datetime, list[TimeSlot]] = {}
  372. for s in slots:
  373. date_str = s["date"]
  374. dt = datetime.strptime(date_str, "%Y-%m-%d")
  375. date_map.setdefault(dt, []).append(
  376. TimeSlot(time=s["time"], label=str(s.get("label", "")))
  377. )
  378. res.availability = [DateAvailability(date=d, times=slots) for d, slots in date_map.items()]
  379. self._log(f"Slot Found! -> {slots}")
  380. else:
  381. self._log("No slots available.")
  382. res.success = False
  383. res.availability_status = AvailabilityStatus.NoneAvailable
  384. return res
  385. def book(self, slot_info: VSQueryResult, user_inputs: Dict = None) -> VSBookResult:
  386. res = VSBookResult()
  387. res.success = False
  388. exp_start = user_inputs.get('expected_start_date', '')
  389. exp_end = user_inputs.get('expected_end_date', '')
  390. support_pta = user_inputs.get('support_pta', True)
  391. target_labels = ['']
  392. if support_pta:
  393. target_labels.append('pta')
  394. available_dates_str =[
  395. da.date.strftime("%Y-%m-%d")
  396. for da in slot_info.availability if da.date
  397. ]
  398. valid_dates_list = self._filter_dates(available_dates_str, exp_start, exp_end)
  399. if not valid_dates_list:
  400. raise NotFoundError(message="No dates match user constraints")
  401. all_possible_slots =[]
  402. for da in slot_info.availability:
  403. if not da.date:
  404. continue
  405. date_str = da.date.strftime("%Y-%m-%d")
  406. if date_str in valid_dates_list:
  407. for t in da.times:
  408. if t.label in target_labels:
  409. all_possible_slots.append({
  410. "date": date_str,
  411. "time_obj": t,
  412. "label": t.label
  413. })
  414. if not all_possible_slots:
  415. raise NotFoundError(message="No suitable slot found (after label filtering)")
  416. selected_slot = random.choice(all_possible_slots)
  417. selected_date = selected_slot["date"]
  418. selected_time = selected_slot["time_obj"]
  419. selected_label = selected_slot["label"]
  420. self._log(f"Found {len(all_possible_slots)} valid slots. selected slot: {selected_date} {selected_time.time} {selected_label}")
  421. # ================== 新增:随机选择预订模式 ==================
  422. book_mode = random.choice([1, 2])
  423. self._log(f"Using booking mode: {book_mode}")
  424. if book_mode == 1:
  425. rand_x = random.randint(300, 800)
  426. rand_y = random.randint(400, 700)
  427. self._log(f"Mode 1: Moving mouse to ({rand_x}, {rand_y}) and clicking, select slot")
  428. self.mouse.click(rand_x, rand_y, humanize=True)
  429. js_update_form = f"""
  430. try {{
  431. const form = document.querySelector('form');
  432. if (!form) return 'Form not found';
  433. function setReactValue(input, value) {{
  434. if (!input) return;
  435. input.value = value;
  436. input.dispatchEvent(new Event('input', {{ bubbles: true }}));
  437. input.dispatchEvent(new Event('change', {{ bubbles: true }}));
  438. }}
  439. setReactValue(form.querySelector('input[name="date"]'), '{selected_date}');
  440. setReactValue(form.querySelector('input[name="time"]'), '{selected_time.time}');
  441. setReactValue(form.querySelector('input[name="appointmentLabel"]'), '{selected_label}');
  442. const submitBtn = form.querySelector('button[type="submit"]');
  443. if (submitBtn) {{
  444. submitBtn.removeAttribute('disabled');
  445. submitBtn.classList.remove('opacity-50', 'cursor-not-allowed');
  446. return 'form_updated';
  447. }} else {{
  448. return 'Submit button not found';
  449. }}
  450. }} catch (e) {{
  451. return e.toString();
  452. }}
  453. """
  454. update_res = self.page.run_js(js_update_form)
  455. self._log(f"Mode 1: Form update triggered: {update_res}")
  456. if update_res != 'form_updated':
  457. raise BizLogicError(message=f"Failed to update form in Mode 1: {update_res}")
  458. submit_btn = self.page.ele('tag:button@@type=submit')
  459. if not submit_btn:
  460. raise BizLogicError(message="Submit button not found for mouse click")
  461. self._log("Mode 1: Moving mouse to submit button and clicking")
  462. self.mouse.human_click_ele(submit_btn)
  463. inject_res = 'clicked'
  464. else:
  465. js_inject_and_click = f"""
  466. try {{
  467. const form = document.querySelector('form');
  468. if (!form) return 'Form not found';
  469. function setReactValue(input, value) {{
  470. if (!input) return;
  471. input.value = value;
  472. input.dispatchEvent(new Event('input', {{ bubbles: true }}));
  473. input.dispatchEvent(new Event('change', {{ bubbles: true }}));
  474. }}
  475. setReactValue(form.querySelector('input[name="date"]'), '{selected_date}');
  476. setReactValue(form.querySelector('input[name="time"]'), '{selected_time.time}');
  477. setReactValue(form.querySelector('input[name="appointmentLabel"]'), '{selected_label}');
  478. const submitBtn = form.querySelector('button[type="submit"]');
  479. if (submitBtn) {{
  480. submitBtn.removeAttribute('disabled');
  481. submitBtn.classList.remove('opacity-50', 'cursor-not-allowed');
  482. submitBtn.click();
  483. return 'clicked';
  484. }} else {{
  485. return 'Submit button not found';
  486. }}
  487. }} catch (e) {{
  488. return e.toString();
  489. }}
  490. """
  491. inject_res = self.page.run_js(js_inject_and_click)
  492. self._log(f"Mode 2: Form submission triggered: {inject_res}")
  493. if inject_res != 'clicked':
  494. raise BizLogicError(message="Failed to inject form or click the submit button")
  495. self._log("Waiting for Next.js to process the form submission...")
  496. for _ in range(10):
  497. try:
  498. current_page_url = self.page.url
  499. current_page_html = self.page.html
  500. appointment_confirmation_indicators = [
  501. "order-summary" in current_page_url,
  502. "partner-services" in current_page_url,
  503. "appointment-confirmation" in current_page_url,
  504. "Change my appointment" in current_page_html,
  505. "Book a new appointment" in current_page_html,
  506. ]
  507. if any(appointment_confirmation_indicators):
  508. self._log(f"✅ BOOKING SUCCESS! Redirected to: {current_page_url}")
  509. res.success = True
  510. res.label = selected_label
  511. res.book_date = selected_date
  512. res.book_time = selected_time.time
  513. self._save_screenshot("book_slot_success")
  514. break
  515. toast_selector = 'tag:div@role=alert'
  516. toast_ele = self.page.ele(toast_selector, timeout=0.5)
  517. if toast_ele:
  518. error_msg = toast_ele.text
  519. self._log(f"❌ BOOKING FAILED! Detected popup: {error_msg}")
  520. break
  521. time.sleep(0.5)
  522. except Exception:
  523. pass
  524. return res
  525. def _scan_dom_for_slots(self, target_year: int, target_month_num: int) -> list[dict]:
  526. """
  527. DOM-based slot scanning — 结合区块结构与类名/属性推断标签
  528. """
  529. slots = []
  530. day_blocks_xpath = '//div[p and div//button[contains(@data-testid, "slot")]]'
  531. day_blocks = self.page.eles(f'xpath:{day_blocks_xpath}')
  532. for block in day_blocks:
  533. p_ele = block.ele('tag:p')
  534. if not p_ele: continue
  535. day_match = re.search(r'\d+', p_ele.text)
  536. if not day_match: continue
  537. day_str = day_match.group()
  538. full_date = f"{target_year}-{target_month_num:02d}-{int(day_str):02d}"
  539. btn_selectors = [
  540. 'xpath:.//button[starts-with(@data-testid, "btn-available-slot")]',
  541. 'xpath:.//button[contains(@class, "available")]',
  542. 'xpath:.//div[contains(@class, "time-slot") and not(contains(@class, "unavailable"))]'
  543. ]
  544. available_elements = []
  545. for sel in btn_selectors:
  546. elements = block.eles(sel)
  547. if elements:
  548. available_elements.extend(elements)
  549. break
  550. seen_times = set()
  551. for el in available_elements:
  552. classes = (el.attr("class") or "").lower()
  553. test_id = (el.attr("data-testid") or "").lower()
  554. combined_attrs = f"{classes} {test_id}"
  555. if "disabled" in combined_attrs or "unavailable" in combined_attrs:
  556. continue
  557. time_match = re.search(r'\d{2}:\d{2}', el.html)
  558. if not time_match: continue
  559. time_str = time_match.group()
  560. if time_str in seen_times:
  561. continue
  562. seen_times.add(time_str)
  563. label = ''
  564. if 'prime' in combined_attrs and 'weekend' in combined_attrs:
  565. label = 'ptaw'
  566. elif 'prime' in combined_attrs or 'premium' in combined_attrs:
  567. label = 'pta'
  568. elif any(k in combined_attrs for k in ['regular', 'standard', 'default']):
  569. label = ''
  570. else:
  571. label = ''
  572. slots.append({
  573. 'date': full_date,
  574. 'time': time_str,
  575. 'label': label,
  576. 'source': 'dom'
  577. })
  578. return slots
  579. def _get_proxy_url(self):
  580. # 构造代理
  581. proxy_url = ""
  582. if self.config.proxy.ip:
  583. s = self.config.proxy
  584. if s.username:
  585. proxy_url = f"{s.proto}://{s.username}:{s.password}@{s.ip}:{s.port}"
  586. else:
  587. proxy_url = f"{s.proto}://{s.ip}:{s.port}"
  588. return proxy_url
  589. def _perform_request(self, method, url, headers=None, data=None, json_data=None, params=None, retry_count=0):
  590. """
  591. 在浏览器上下文中注入 JS 执行 Fetch
  592. """
  593. if not self.page:
  594. raise BizLogicError("Browser not initialized")
  595. if params:
  596. from urllib.parse import urlencode
  597. if '?' in url:
  598. url += '&' + urlencode(params)
  599. else:
  600. url += '?' + urlencode(params)
  601. fetch_options = {
  602. "method": method.upper(),
  603. "headers": headers or {},
  604. "credentials": "include"
  605. }
  606. # Body 处理
  607. if json_data:
  608. fetch_options['body'] = json.dumps(json_data)
  609. fetch_options['headers']['Content-Type'] = 'application/json'
  610. elif data:
  611. if isinstance(data, dict):
  612. from urllib.parse import urlencode
  613. fetch_options['body'] = urlencode(data)
  614. fetch_options['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
  615. else:
  616. fetch_options['body'] = data
  617. js_script = f"""
  618. const url = "{url}";
  619. const options = {json.dumps(fetch_options)};
  620. return fetch(url, options)
  621. .then(async response => {{
  622. const text = await response.text();
  623. const headers = {{}};
  624. response.headers.forEach((value, key) => headers[key] = value);
  625. return {{
  626. status: response.status,
  627. body: text,
  628. headers: headers,
  629. url: response.url
  630. }};
  631. }})
  632. .catch(error => {{
  633. return {{
  634. status: 0,
  635. body: error.toString(),
  636. headers: {{}},
  637. url: url
  638. }};
  639. }});
  640. """
  641. res_dict = self.page.run_js(js_script, timeout=30)
  642. resp = BrowserResponse(res_dict)
  643. if resp.status_code == 200:
  644. return resp
  645. elif resp.status_code == 401:
  646. self.is_healthy = False
  647. raise SessionExpiredOrInvalidError()
  648. elif resp.status_code == 403:
  649. if retry_count < 2:
  650. self._log(f"HTTP 403 Detected. Cloudflare session expired? Attempting refresh (Try {retry_count+1}/2)...")
  651. if self._refresh_firewall_session():
  652. self._log("Firewall session refreshed. Retrying request...")
  653. return self._perform_request(method, url, headers, data, json_data, params, retry_count+1)
  654. else:
  655. self._log("Failed to refresh firewall session.")
  656. raise PermissionDeniedError(f"HTTP 403: {resp.text[:100]}")
  657. elif resp.status_code == 429:
  658. self.is_healthy = False
  659. raise RateLimiteddError()
  660. else:
  661. if resp.status_code == 0:
  662. raise BizLogicError(f"Network Error: {resp.text}")
  663. raise BizLogicError(message=f"HTTP Error {resp.status_code}: {resp.text[:100]}")
  664. def _refresh_firewall_session(self) -> bool:
  665. """
  666. 主动刷新页面以触发 Cloudflare 挑战并尝试通过
  667. """
  668. try:
  669. # 1. 刷新当前页面 (通常 Dashboard 页)
  670. # 这会强制浏览器重新进行 HTTP 请求,从而触发 Cloudflare 拦截页
  671. self._log("Refreshing page to trigger Cloudflare...")
  672. self.page.refresh()
  673. # 2. 调用 CloudflareBypasser
  674. cf = CloudflareBypasser(self.page, log=self.config.debug)
  675. # 3. 尝试过盾 (尝试次数稍多一点,因为此时可能网络不稳定)
  676. success = cf.bypass(max_retry=10)
  677. if success:
  678. # 再次确认页面是否正常加载 (非 403 页面)
  679. title = self.page.title.lower()
  680. if "access denied" in title:
  681. return False
  682. # 等待 DOM 稍微稳定
  683. time.sleep(2)
  684. return True
  685. return False
  686. except Exception as e:
  687. self._log(f"Error during firewall refresh: {e}")
  688. return False
  689. def _solve_recaptcha(self, params) -> str:
  690. """调用 VSCloudApi 解决 ReCaptcha"""
  691. key = params.get("apiToken")
  692. if not key: raise NotFoundError("Api-token required")
  693. submit_url = "https://api.capsolver.com/createTask"
  694. task = {
  695. "type": params.get("type"),
  696. "websiteURL": params.get("page"),
  697. "websiteKey": params.get("siteKey"),
  698. }
  699. if params.get("action"):
  700. task["pageAction"] = params.get("action")
  701. # if params.get("proxy"):
  702. # p = urlparse(params.get("proxy"))
  703. # task["proxyType"] = p.proto
  704. # task["proxyAddress"] = p.hostname
  705. # task["proxyPort"] = p.port
  706. # if p.username:
  707. # task["proxyLogin"] = p.username
  708. # task["proxyPassword"] = p.password
  709. # 注意:使用 DrissionPage 后,通常是 ProxyLess 模式
  710. # 除非你想让 Capsolver 也用同样的代理(通常不需要,除非风控极严)
  711. payload = {"clientKey": key, "task": task}
  712. import requests as req # 局部引用,避免混淆
  713. r = req.post(submit_url, json=payload, timeout=20)
  714. if r.status_code != 200:
  715. raise BizLogicError(message="Failed to submit capsolver task")
  716. task_id = r.json().get("taskId")
  717. for _ in range(20):
  718. r = req.post("https://api.capsolver.com/getTaskResult", json={"clientKey": key, "taskId": task_id}, timeout=20)
  719. if r.status_code == 200:
  720. d = r.json()
  721. if d.get("status") == "ready":
  722. return d["solution"]["gRecaptchaResponse"]
  723. time.sleep(3)
  724. raise BizLogicError(message="Capsolver task timeout")
  725. def _parse_travel_groups(self, html_content) -> List[Dict]:
  726. groups = []
  727. js_pattern = r'\\"travelGroups\\":\s*(\[.*?\]),\\"availableCountriesToCreateGroups'
  728. js_match = re.search(js_pattern, html_content, re.DOTALL)
  729. if js_match:
  730. json_str = js_match.group(1).replace(r'\"', '"')
  731. data = json.loads(json_str)
  732. for g in data:
  733. groups.append({
  734. 'group_name': g.get('groupName'),
  735. 'group_number': g.get('formGroupId'),
  736. 'location': g.get('vacName')
  737. })
  738. else:
  739. self._log('Parsed travel group page, but not found travelGroups')
  740. return groups
  741. def _parse_appointment_slots(self, html_content) -> List[Dict]:
  742. slots = []
  743. pattern = r'"availableAppointments\\":\s*(\[.*\]),\\"showFlexiAppointment'
  744. match = re.search(pattern, html_content, re.DOTALL)
  745. if match:
  746. json_str = match.group(1).replace(r'\"', '"')
  747. data = json.loads(json_str)
  748. for day in data:
  749. d_str = day.get('day')
  750. for s in day.get('slots', []):
  751. labels = s.get('labels', [])
  752. lbl = ""
  753. # 简化逻辑:TLS label 列表
  754. if 'pta' in labels: lbl = 'pta'
  755. elif 'ptaw' in labels: lbl = 'ptaw'
  756. elif '' in labels or not labels: lbl = ''
  757. slots.append({
  758. 'date': d_str,
  759. 'time': s.get('time'),
  760. 'label': lbl
  761. })
  762. return slots
  763. def _check_page_is_session_expired_or_invalid(self, keyword, html: str) -> bool:
  764. if not html:
  765. self.is_healthy = False
  766. raise SessionExpiredOrInvalidError()
  767. html_lower = html.lower()
  768. if keyword.lower() not in html_lower:
  769. session_expire_or_invalid_indicators = [
  770. 'redirected automatically' in html_lower,
  771. 'login' in html_lower and 'password' in html_lower,
  772. 'session expired' in html_lower
  773. ]
  774. if any(session_expire_or_invalid_indicators):
  775. self.is_healthy = False
  776. raise SessionExpiredOrInvalidError()
  777. def _filter_dates(self, dates: List[str], start_str: str, end_str: str) -> List[str]:
  778. if not start_str or not end_str:
  779. return dates
  780. valid_dates = []
  781. s_date = datetime.strptime(start_str[:10], "%Y-%m-%d")
  782. e_date = datetime.strptime(end_str[:10], "%Y-%m-%d")
  783. for date_str in dates:
  784. curr_date = datetime.strptime(date_str, "%Y-%m-%d")
  785. if s_date <= curr_date <= e_date:
  786. valid_dates.append(date_str)
  787. random.shuffle(valid_dates)
  788. return valid_dates
  789. # --- 资源清理核心方法 ---
  790. def cleanup(self):
  791. """
  792. 销毁浏览器并彻底删除临时文件
  793. """
  794. # 1. 关闭浏览器
  795. if self.page:
  796. try:
  797. self.page.quit() # 这会关闭 Chrome 进程
  798. except Exception:
  799. pass # 忽略已关闭的错误
  800. self.page = None
  801. # 2. 删除文件
  802. # 注意:Chrome 关闭后可能需要几百毫秒释放文件锁,稍微等待
  803. if os.path.exists(self.root_workspace):
  804. for _ in range(3):
  805. try:
  806. time.sleep(0.2)
  807. shutil.rmtree(self.root_workspace, ignore_errors=True)
  808. break
  809. except Exception as e:
  810. # 如果删除失败(通常是Windows文件占用),重试
  811. self._log(f"Cleanup retry: {e}")
  812. time.sleep(0.5)
  813. # 如果依然存在,打印警告(虽然 ignore_errors=True 会掩盖报错,但可以 check exists)
  814. if os.path.exists(self.root_workspace):
  815. self._log(f"[WARN] Failed to fully remove workspace: {self.root_workspace}")
  816. # 3. [新增] 关闭代理隧道
  817. if self.tunnel:
  818. try: self.tunnel.stop()
  819. except: pass
  820. self.tunnel = None
  821. def __del__(self):
  822. """
  823. 析构函数:当对象被垃圾回收时自动调用
  824. """
  825. self.cleanup()