tls_plugin.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827
  1. import time
  2. import json
  3. import random
  4. import re
  5. import os
  6. import uuid
  7. import shutil
  8. import socket
  9. from datetime import datetime
  10. from typing import List, Dict, Optional, Any, Callable
  11. from urllib.parse import urljoin, urlparse, urlencode
  12. # DrissionPage 核心
  13. from DrissionPage import ChromiumPage, ChromiumOptions
  14. from vs_plg import IVSPlg
  15. from vs_types import VSPlgConfig, AppointmentType, VSQueryResult, VSBookResult, AvailabilityStatus, TimeSlot, DateAvailability, NotFoundError, PermissionDeniedError, RateLimiteddError, SessionExpiredOrInvalidError, BizLogicError
  16. from utils.cloudflare_bypass_for_scraping import CloudflareBypasser
  17. from toolkit.proxy_tunnel import ProxyTunnel
  18. class BrowserResponse:
  19. """模拟 requests.Response"""
  20. def __init__(self, result_dict):
  21. result_dict = result_dict or {}
  22. self.status_code = result_dict.get('status', 0)
  23. self.text = result_dict.get('body', '')
  24. self.headers = result_dict.get('headers', {})
  25. self.url = result_dict.get('url', '')
  26. self._json = None
  27. def json(self):
  28. if self._json is None:
  29. if not self.text:
  30. return {}
  31. try:
  32. self._json = json.loads(self.text)
  33. except:
  34. self._json = {}
  35. return self._json
  36. class TlsPlugin(IVSPlg):
  37. """
  38. TLSContact 签证预约插件 (DrissionPage 版)
  39. """
  40. def __init__(self, group_id: str):
  41. self.group_id = group_id
  42. self.config: Optional[VSPlgConfig] = None
  43. self.free_config: Dict[str, Any] = {}
  44. self.is_healthy = True
  45. self.logger = None
  46. self.page: Optional[ChromiumPage] = None
  47. self.travel_group: Optional[Dict] = None
  48. self.instance_id = uuid.uuid4().hex[:8]
  49. self.root_workspace = os.path.abspath(os.path.join("data/temp_browser_data", f"{self.group_id}.{self.instance_id}"))
  50. self.user_data_path = os.path.join(self.root_workspace, "user_data")
  51. if not os.path.exists(self.root_workspace):
  52. os.makedirs(self.root_workspace)
  53. self.tunnel = None
  54. self.session_create_time: float = 0
  55. def get_group_id(self) -> str:
  56. return self.group_id
  57. def set_log(self, logger: Callable[[str], None]) -> None:
  58. self.logger = logger
  59. def _log(self, message):
  60. if self.logger:
  61. self.logger(f'[TlsPlugin] [{self.group_id}] {message}')
  62. else:
  63. print(f'[TlsPlugin] [{self.group_id}] {message}')
  64. def set_config(self, config: VSPlgConfig):
  65. self.config = config
  66. self.free_config = config.free_config or {}
  67. def keep_alive(self):
  68. pass
  69. def health_check(self) -> bool:
  70. if not self.is_healthy:
  71. return False
  72. if self.page is None:
  73. return False
  74. try:
  75. if not self.page.run_js("return 1;"):
  76. return False
  77. except:
  78. return False
  79. if self.config.session_max_life > 0:
  80. current_time = time.time()
  81. elapsed_time = current_time - self.session_create_time
  82. if elapsed_time > self.config.session_max_life * 60:
  83. self._log(f"Session expired.")
  84. return False
  85. return True
  86. def _save_screenshot(self, name_prefix):
  87. try:
  88. timestamp = int(time.time())
  89. filename = f"{self.instance_id}_{name_prefix}_{timestamp}.jpg"
  90. save_path = os.path.join("data", filename)
  91. os.makedirs("data", exist_ok=True)
  92. self.page.get_screenshot(path=save_path, full_page=False)
  93. self._log(f"Screenshot saved to {save_path}")
  94. except Exception as e:
  95. self._log(f"Failed to save screenshot: {e}")
  96. def create_session(self):
  97. """
  98. 全浏览器会话创建:过盾 -> JS注入登录 -> 原生跳转
  99. """
  100. self._log(f"Initializing Session (ID: {self.instance_id})...")
  101. co = ChromiumOptions()
  102. def get_free_port():
  103. with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
  104. s.bind(('', 0))
  105. return s.getsockname()[1]
  106. debug_port = get_free_port()
  107. self._log(f"Assigned Debug Port: {debug_port}")
  108. co.set_local_port(debug_port)
  109. co.set_user_data_path(self.user_data_path)
  110. chrome_path = os.getenv("CHROME_BIN")
  111. if chrome_path and os.path.exists(chrome_path):
  112. co.set_paths(browser_path=chrome_path)
  113. if self.config.proxy and self.config.proxy.ip:
  114. p = self.config.proxy
  115. if p.username and p.password:
  116. self._log(f"Starting Proxy Tunnel for {p.ip}...")
  117. self.tunnel = ProxyTunnel(p.ip, p.port, p.username, p.password)
  118. local_proxy = self.tunnel.start()
  119. self._log(f"Tunnel started at {local_proxy}")
  120. co.set_argument(f'--proxy-server={local_proxy}')
  121. else:
  122. proxy_str = f"{p.scheme}://{p.ip}:{p.port}"
  123. co.set_argument(f'--proxy-server={proxy_str}')
  124. else:
  125. self._log("[WARN] No proxy configured!")
  126. co.headless(False)
  127. co.set_argument('--no-sandbox')
  128. co.set_argument('--disable-gpu')
  129. co.set_argument('--disable-dev-shm-usage')
  130. co.set_argument('--window-size=1920,1080')
  131. co.set_argument('--disable-blink-features=AutomationControlled')
  132. try:
  133. self.page = ChromiumPage(co)
  134. apt_config = self.free_config.get('apt_config', {})
  135. if not apt_config:
  136. raise NotFoundError("apt_config config missing")
  137. login_url = "https://visas-fr.tlscontact.com/en-us/login"
  138. params = {
  139. "issuerId": apt_config["code"],
  140. "country": apt_config["country"],
  141. "vac": apt_config["code"],
  142. "redirect": f"/en-us/country/{apt_config['country']}/vac/{apt_config['code']}"
  143. }
  144. full_login_url = f"{login_url}?{urlencode(params)}"
  145. self._log(f"Navigating: {full_login_url}")
  146. self.page.get(full_login_url)
  147. cf = CloudflareBypasser(self.page, log=self.config.debug)
  148. if not cf.bypass(max_retry=15):
  149. raise BizLogicError("Cloudflare bypass timeout")
  150. wait_start = time.time()
  151. while True:
  152. html = self.page.html.lower()
  153. cloudflare_waitingroom_indicators = [
  154. "file d'attente" in html,
  155. 'waiting room' in html
  156. ]
  157. if any(cloudflare_waitingroom_indicators):
  158. if time.time() - wait_start > 60 * 60:
  159. raise BizLogicError(message="Cloudflare waiting room timeout (1h).")
  160. self._log("In Waiting Room... Waiting for auto-refresh.")
  161. time.sleep(30)
  162. else:
  163. break
  164. # --- 登录页面检查 ---
  165. if not self.page.ele('#email-input-field'):
  166. self._log("Reloading Login Page...")
  167. self.page.get(full_login_url)
  168. if not self.page.wait.ele_displayed('#email-input-field', timeout=15):
  169. self._save_screenshot("login_load_fail")
  170. raise BizLogicError("Login form not loaded")
  171. # --- JS 注入登录 ---
  172. g_token = ""
  173. if self.page.ele('.g-recaptcha') or self.page.ele('xpath://iframe[contains(@src, "recaptcha")]'):
  174. self._log("Solving ReCaptcha...")
  175. rc_params = {
  176. "type": "ReCaptchaV2TaskProxyLess",
  177. "page": self.page.url,
  178. "siteKey": "6LcDpXcfAAAAAM7wOEsF_38DNsL20tTvPTKxpyn0",
  179. "apiToken": self.free_config.get("capsolver_key", "")
  180. }
  181. g_token = self._solve_recaptcha(rc_params)
  182. username = self.config.account.username
  183. password = self.config.account.password
  184. js_login = f"""
  185. var u = document.getElementById('email-input-field');
  186. if(u) {{ u.value = "{username}"; u.dispatchEvent(new Event('input', {{bubbles:true}})); }}
  187. var p = document.getElementById('password-input-field');
  188. if(p) {{ p.value = "{password}"; p.dispatchEvent(new Event('input', {{bubbles:true}})); }}
  189. var g = document.getElementById('g-recaptcha-response');
  190. if(g) {{ g.value = "{g_token}"; }}
  191. var btn = document.getElementById('btn-login');
  192. if(btn) {{ btn.click(); return true; }} else {{ return false; }}
  193. """
  194. self._log("Submitting Login via JS...")
  195. if not self.page.run_js(js_login):
  196. raise BizLogicError("Login button missing")
  197. self._log("Waiting for redirect...")
  198. self.page.wait.url_change('login-actions', exclude=True, timeout=45)
  199. time.sleep(3)
  200. if "login-actions" in self.page.url or "auth" in self.page.url:
  201. err = "Unknown Login Error"
  202. if "Invalid username" in self.page.html:
  203. err = "Invalid Credentials"
  204. self._save_screenshot("login_submit_fail")
  205. raise BizLogicError(f"Login Failed: {err}")
  206. self._log("Waiting for dashboard...")
  207. btn_selector = 'xpath://button[.//span[@data-testid="btn-create-new-travel-group"]]'
  208. if not self.page.wait.ele_displayed(btn_selector, timeout=10):
  209. raise BizLogicError(message=f"Waiting for selector={btn_selector} timeout")
  210. html_content = self.page.html
  211. groups = self._parse_travel_groups(html_content)
  212. target_city = apt_config['city'].lower()
  213. for g in groups:
  214. if g['location'].lower() == target_city:
  215. self.travel_group = g
  216. break
  217. if not self.travel_group:
  218. self._save_screenshot("group_not_found")
  219. raise NotFoundError(f"Group not found for {target_city}")
  220. formgroup_id = self.travel_group.get('group_number')
  221. btn_selector = f'tag:button@@name=formGroupId@@value={formgroup_id}'
  222. self._log(f"Select group_id={formgroup_id} via JS...")
  223. self.page.ele(btn_selector).click(by_js=True)
  224. self._log("Waiting for url redirect...")
  225. self.page.wait.url_change('travel-groups', exclude=True, timeout=45)
  226. time.sleep(2)
  227. if "travel-groups" in self.page.url or "auth" in self.page.url:
  228. raise BizLogicError(message="Redirect to service-level Failed!")
  229. no_applicant_indicators = [
  230. "Add a new applicant" in self.page.html,
  231. "You have not yet added an applicant. Please click the button below to add one." in self.page.html,
  232. "applicants-information" in self.page.url
  233. ]
  234. if any(no_applicant_indicators):
  235. raise BizLogicError(message=f"No applicant added")
  236. btn_selector = '#book-appointment-btn'
  237. self._log(f"Waiting for selector={btn_selector} to render...")
  238. if not self.page.wait.ele_displayed(btn_selector, timeout=15):
  239. raise BizLogicError(message=f"Waiting for selector={btn_selector} timeout")
  240. self.page.ele(btn_selector).click(by_js=True)
  241. self._log("Waiting for url redirect...")
  242. self.page.wait.url_change('service-level', exclude=True, timeout=45)
  243. time.sleep(2)
  244. if "service-level" in self.page.url or "auth" in self.page.url:
  245. raise BizLogicError(message="Redirect to appointment-booking Failed!")
  246. btn_selector = 'tag:button@text():Book your appointment'
  247. if not self.page.wait.ele_displayed(btn_selector, timeout=10):
  248. raise BizLogicError(message=f"Waiting for selector={btn_selector} timeout")
  249. self.session_create_time = time.time()
  250. self._log(f"✅ Login & Navigation Success! Target Group ID: {formgroup_id}")
  251. except Exception as e:
  252. self._log(f"Session Create Error: {e}")
  253. self.cleanup()
  254. raise e
  255. def query(self, apt_type: AppointmentType) -> VSQueryResult:
  256. res = VSQueryResult()
  257. res.success = False
  258. group_num = self.travel_group['group_number']
  259. apt_config = self.free_config.get('apt_config', {})
  260. interest_month = self.free_config.get("interest_month", time.strftime("%m-%Y"))
  261. target_date_obj = datetime.strptime(interest_month, "%m-%Y")
  262. target_month_text = target_date_obj.strftime("%B %Y")
  263. target_year = target_date_obj.year
  264. target_month_num = target_date_obj.month
  265. slots = []
  266. all_slots = []
  267. current_selected_ele = self.page.ele('@data-testid=btn-current-month-available')
  268. current_month_text = current_selected_ele.text.strip() if current_selected_ele else ""
  269. is_on_target_month = (current_month_text.lower() == target_month_text.lower())
  270. if not is_on_target_month:
  271. self._log(f"Current is '{current_month_text}', navigating to '{target_month_text}'...")
  272. for _ in range(12):
  273. target_btn_xpath = f'xpath://a[contains(@href, "month={interest_month}")]'
  274. target_btn = self.page.ele(target_btn_xpath)
  275. if target_btn:
  276. target_btn.click(by_js=True)
  277. time.sleep(3)
  278. break
  279. next_btn = self.page.ele('@data-testid=btn-next-month-available')
  280. if next_btn:
  281. next_btn.click(by_js=True)
  282. time.sleep(2)
  283. else:
  284. self._log("Warning: Cannot find target month or 'Next Month' button.")
  285. break
  286. self._log("Extracting slots from DOM using robust data-testid features...")
  287. day_blocks_xpath = '//div[p and div//button[contains(@data-testid, "slot")]]'
  288. day_blocks = self.page.eles(f'xpath:{day_blocks_xpath}')
  289. for block in day_blocks:
  290. # 1. 提取日期:只要是这个 block 下的 p 标签,必定是 "Mon 01" 这种
  291. p_ele = block.ele('tag:p')
  292. if not p_ele:
  293. continue
  294. # 直接从 p 标签的纯文本里抽取出数字,忽略前面的字母
  295. day_match = re.search(r'\d+', p_ele.text)
  296. if not day_match:
  297. continue
  298. day_str = day_match.group()
  299. full_date = f"{target_year}-{target_month_num:02d}-{int(day_str):02d}"
  300. # 2. 提取可用按钮:利用 data-testid 前缀匹配
  301. # 完美过滤掉 btn-unavailable-slot (灰色的不可用按钮)
  302. available_btns = block.eles('xpath:.//button[starts-with(@data-testid, "btn-available-slot")]')
  303. for btn in available_btns:
  304. # 提取时间:无视内部各种 span 的变动,只要 html 里有 00:00 这种格式就被截取
  305. time_match = re.search(r'\d{2}:\d{2}', btn.html)
  306. if not time_match:
  307. continue
  308. time_str = time_match.group()
  309. # 提取 Label:完全依赖测试工程师留下的 testid
  310. test_id = btn.attr('data-testid') or ""
  311. if 'prime' in test_id and 'weekend' in test_id:
  312. lbl = 'ptaw'
  313. elif 'prime' in test_id:
  314. lbl = 'pta'
  315. else:
  316. lbl = ''
  317. all_slots.append({
  318. 'date': full_date,
  319. 'time': time_str,
  320. 'label': lbl
  321. })
  322. else:
  323. self._log(f"Already on '{target_month_text}'. Executing silent JS fetch...")
  324. url = f'https://visas-fr.tlscontact.com/en-us/{group_num}/workflow/appointment-booking'
  325. params = {
  326. 'location': apt_config["code"],
  327. 'month': interest_month
  328. }
  329. resp = self._perform_request("GET", url, params=params, retry_count=1)
  330. html_content = resp.text
  331. self._check_page_is_session_expired_or_invalid('Book your appointment', html_content)
  332. all_slots = self._parse_appointment_slots(html_content)
  333. target_labels = self.free_config.get("target_labels", ["", "pta"])
  334. slots = [s for s in all_slots if s.get("label") in target_labels]
  335. if slots:
  336. res.success = True
  337. earliest_date = slots[0]["date"]
  338. earliest_dt = datetime.strptime(earliest_date, "%Y-%m-%d")
  339. res.availability_status = AvailabilityStatus.Available
  340. res.earliest_date = earliest_dt
  341. date_map: dict[datetime, list[TimeSlot]] = {}
  342. for s in slots:
  343. date_str = s["date"]
  344. dt = datetime.strptime(date_str, "%Y-%m-%d")
  345. date_map.setdefault(dt, []).append(
  346. TimeSlot(time=s["time"], label=str(s.get("label", "")))
  347. )
  348. res.availability = [DateAvailability(date=d, times=slots) for d, slots in date_map.items()]
  349. self._log(f"Slot Found! -> {slots}")
  350. else:
  351. self._log("No slots available.")
  352. res.success = False
  353. res.availability_status = AvailabilityStatus.NoneAvailable
  354. return res
  355. def book(self, slot_info: VSQueryResult, user_inputs: Dict = None) -> VSBookResult:
  356. res = VSBookResult()
  357. res.success = False
  358. exp_start = user_inputs.get('expected_start_date', '')
  359. exp_end = user_inputs.get('expected_end_date', '')
  360. support_pta = user_inputs.get('support_pta', True)
  361. target_labels = ['']
  362. if support_pta:
  363. target_labels.append('pta')
  364. available_dates_str =[
  365. da.date.strftime("%Y-%m-%d")
  366. for da in slot_info.availability if da.date
  367. ]
  368. valid_dates_list = self._filter_dates(available_dates_str, exp_start, exp_end)
  369. if not valid_dates_list:
  370. raise NotFoundError(message="No dates match user constraints")
  371. all_possible_slots =[]
  372. for da in slot_info.availability:
  373. if not da.date:
  374. continue
  375. date_str = da.date.strftime("%Y-%m-%d")
  376. if date_str in valid_dates_list:
  377. for t in da.times:
  378. if t.label in target_labels:
  379. all_possible_slots.append({
  380. "date": date_str,
  381. "time_obj": t,
  382. "label": t.label
  383. })
  384. if not all_possible_slots:
  385. raise NotFoundError(message="No suitable slot found (after label filtering)")
  386. selected_slot = random.choice(all_possible_slots)
  387. selected_date = selected_slot["date"]
  388. selected_time = selected_slot["time_obj"]
  389. selected_label = selected_slot["label"]
  390. self._log(f"Found {len(all_possible_slots)} valid slots. selected slot: {selected_date} {selected_time.time} {selected_label}")
  391. js_inject_and_click = f"""
  392. try {{
  393. const form = document.querySelector('form');
  394. if (!form) return 'Form not found';
  395. function setReactValue(input, value) {{
  396. if (!input) return;
  397. input.value = value;
  398. input.dispatchEvent(new Event('input', {{ bubbles: true }}));
  399. input.dispatchEvent(new Event('change', {{ bubbles: true }}));
  400. }}
  401. setReactValue(form.querySelector('input[name="date"]'), '{selected_date}');
  402. setReactValue(form.querySelector('input[name="time"]'), '{selected_time.time}');
  403. setReactValue(form.querySelector('input[name="appointmentLabel"]'), '{selected_label}');
  404. const submitBtn = form.querySelector('button[type="submit"]');
  405. if (submitBtn) {{
  406. submitBtn.removeAttribute('disabled');
  407. submitBtn.classList.remove('opacity-50', 'cursor-not-allowed');
  408. submitBtn.click();
  409. return 'clicked';
  410. }} else {{
  411. return 'Submit button not found';
  412. }}
  413. }} catch (e) {{
  414. return e.toString();
  415. }}
  416. """
  417. inject_res = self.page.run_js(js_inject_and_click)
  418. self._log(f"Form submission triggered: {inject_res}")
  419. if inject_res != 'clicked':
  420. raise BizLogicError(message="Failed to inject form or click the submit button")
  421. self._log("Waiting for Next.js to process the form submission...")
  422. for _ in range(10):
  423. try:
  424. current_page_url = self.page.url
  425. current_page_html = self.page.html
  426. appointment_confirmation_indicators = [
  427. "order-summary" in current_page_url,
  428. "partner-services" in current_page_url,
  429. "appointment-confirmation" in current_page_url,
  430. "Change my appointment" in current_page_html,
  431. "Book a new appointment" in current_page_html,
  432. ]
  433. if any(appointment_confirmation_indicators):
  434. self._log(f"✅ BOOKING SUCCESS! Redirected to: {current_page_url}")
  435. res.success = True
  436. res.label = selected_label
  437. res.book_date = selected_date
  438. res.book_time = selected_time.time
  439. self._save_screenshot("book_slot_success")
  440. break
  441. toast_selector = 'tag:div@role=alert'
  442. toast_ele = self.page.ele(toast_selector, timeout=0.5)
  443. if toast_ele:
  444. error_msg = toast_ele.text
  445. self._log(f"❌ BOOKING FAILED! Detected popup: {error_msg}")
  446. break
  447. time.sleep(0.5)
  448. except Exception:
  449. pass
  450. return res
  451. def _get_proxy_url(self):
  452. # 构造代理
  453. proxy_url = ""
  454. if self.config.proxy.ip:
  455. s = self.config.proxy
  456. if s.username:
  457. proxy_url = f"{s.scheme}://{s.username}:{s.password}@{s.ip}:{s.port}"
  458. else:
  459. proxy_url = f"{s.scheme}://{s.ip}:{s.port}"
  460. return proxy_url
  461. def _perform_request(self, method, url, headers=None, data=None, json_data=None, params=None, retry_count=0):
  462. """
  463. 在浏览器上下文中注入 JS 执行 Fetch
  464. """
  465. if not self.page:
  466. raise BizLogicError("Browser not initialized")
  467. if params:
  468. from urllib.parse import urlencode
  469. if '?' in url:
  470. url += '&' + urlencode(params)
  471. else:
  472. url += '?' + urlencode(params)
  473. fetch_options = {
  474. "method": method.upper(),
  475. "headers": headers or {},
  476. "credentials": "include"
  477. }
  478. # Body 处理
  479. if json_data:
  480. fetch_options['body'] = json.dumps(json_data)
  481. fetch_options['headers']['Content-Type'] = 'application/json'
  482. elif data:
  483. if isinstance(data, dict):
  484. from urllib.parse import urlencode
  485. fetch_options['body'] = urlencode(data)
  486. fetch_options['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
  487. else:
  488. fetch_options['body'] = data
  489. js_script = f"""
  490. const url = "{url}";
  491. const options = {json.dumps(fetch_options)};
  492. return fetch(url, options)
  493. .then(async response => {{
  494. const text = await response.text();
  495. const headers = {{}};
  496. response.headers.forEach((value, key) => headers[key] = value);
  497. return {{
  498. status: response.status,
  499. body: text,
  500. headers: headers,
  501. url: response.url
  502. }};
  503. }})
  504. .catch(error => {{
  505. return {{
  506. status: 0,
  507. body: error.toString(),
  508. headers: {{}},
  509. url: url
  510. }};
  511. }});
  512. """
  513. res_dict = self.page.run_js(js_script, timeout=30)
  514. resp = BrowserResponse(res_dict)
  515. if resp.status_code == 200:
  516. return resp
  517. elif resp.status_code == 401:
  518. self.is_healthy = False
  519. raise SessionExpiredOrInvalidError()
  520. elif resp.status_code == 403:
  521. if retry_count < 2:
  522. self._log(f"HTTP 403 Detected. Cloudflare session expired? Attempting refresh (Try {retry_count+1}/2)...")
  523. if self._refresh_firewall_session():
  524. self._log("Firewall session refreshed. Retrying request...")
  525. return self._perform_request(method, url, headers, data, json_data, params, retry_count+1)
  526. else:
  527. self._log("Failed to refresh firewall session.")
  528. raise PermissionDeniedError(f"HTTP 403: {resp.text[:100]}")
  529. elif resp.status_code == 429:
  530. self.is_healthy = False
  531. raise RateLimiteddError()
  532. else:
  533. if resp.status_code == 0:
  534. raise BizLogicError(f"Network Error: {resp.text}")
  535. raise BizLogicError(message=f"HTTP Error {resp.status_code}: {resp.text[:100]}")
  536. def _refresh_firewall_session(self) -> bool:
  537. """
  538. 主动刷新页面以触发 Cloudflare 挑战并尝试通过
  539. """
  540. try:
  541. # 1. 刷新当前页面 (通常 Dashboard 页)
  542. # 这会强制浏览器重新进行 HTTP 请求,从而触发 Cloudflare 拦截页
  543. self._log("Refreshing page to trigger Cloudflare...")
  544. self.page.refresh()
  545. # 2. 调用 CloudflareBypasser
  546. cf = CloudflareBypasser(self.page, log=self.config.debug)
  547. # 3. 尝试过盾 (尝试次数稍多一点,因为此时可能网络不稳定)
  548. success = cf.bypass(max_retry=10)
  549. if success:
  550. # 再次确认页面是否正常加载 (非 403 页面)
  551. title = self.page.title.lower()
  552. if "access denied" in title:
  553. return False
  554. # 等待 DOM 稍微稳定
  555. time.sleep(2)
  556. return True
  557. return False
  558. except Exception as e:
  559. self._log(f"Error during firewall refresh: {e}")
  560. return False
  561. def _solve_recaptcha(self, params) -> str:
  562. """调用 VSCloudApi 解决 ReCaptcha"""
  563. key = params.get("apiToken")
  564. if not key: raise NotFoundError("Api-token required")
  565. submit_url = "https://api.capsolver.com/createTask"
  566. task = {
  567. "type": params.get("type"),
  568. "websiteURL": params.get("page"),
  569. "websiteKey": params.get("siteKey"),
  570. }
  571. if params.get("action"):
  572. task["pageAction"] = params.get("action")
  573. # if params.get("proxy"):
  574. # p = urlparse(params.get("proxy"))
  575. # task["proxyType"] = p.scheme
  576. # task["proxyAddress"] = p.hostname
  577. # task["proxyPort"] = p.port
  578. # if p.username:
  579. # task["proxyLogin"] = p.username
  580. # task["proxyPassword"] = p.password
  581. # 注意:使用 DrissionPage 后,通常是 ProxyLess 模式
  582. # 除非你想让 Capsolver 也用同样的代理(通常不需要,除非风控极严)
  583. payload = {"clientKey": key, "task": task}
  584. import requests as req # 局部引用,避免混淆
  585. r = req.post(submit_url, json=payload, timeout=20)
  586. if r.status_code != 200:
  587. raise BizLogicError(message="Failed to submit capsolver task")
  588. task_id = r.json().get("taskId")
  589. for _ in range(20):
  590. r = req.post("https://api.capsolver.com/getTaskResult", json={"clientKey": key, "taskId": task_id}, timeout=20)
  591. if r.status_code == 200:
  592. d = r.json()
  593. if d.get("status") == "ready":
  594. return d["solution"]["gRecaptchaResponse"]
  595. time.sleep(3)
  596. raise BizLogicError(message="Capsolver task timeout")
  597. def _parse_travel_groups(self, html_content) -> List[Dict]:
  598. groups = []
  599. js_pattern = r'\\"travelGroups\\":\s*(\[.*?\]),\\"availableCountriesToCreateGroups'
  600. js_match = re.search(js_pattern, html_content, re.DOTALL)
  601. if js_match:
  602. json_str = js_match.group(1).replace(r'\"', '"')
  603. data = json.loads(json_str)
  604. for g in data:
  605. groups.append({
  606. 'group_name': g.get('groupName'),
  607. 'group_number': g.get('formGroupId'),
  608. 'location': g.get('vacName')
  609. })
  610. else:
  611. self._log('Parsed travel group page, but not found travelGroups')
  612. return groups
  613. def _parse_appointment_slots(self, html_content) -> List[Dict]:
  614. slots = []
  615. pattern = r'"availableAppointments\\":\s*(\[.*\]),\\"showFlexiAppointment'
  616. match = re.search(pattern, html_content, re.DOTALL)
  617. if match:
  618. json_str = match.group(1).replace(r'\"', '"')
  619. data = json.loads(json_str)
  620. for day in data:
  621. d_str = day.get('day')
  622. for s in day.get('slots', []):
  623. labels = s.get('labels', [])
  624. lbl = ""
  625. # 简化逻辑:TLS label 列表
  626. if 'pta' in labels: lbl = 'pta'
  627. elif 'ptaw' in labels: lbl = 'ptaw'
  628. elif '' in labels or not labels: lbl = ''
  629. slots.append({
  630. 'date': d_str,
  631. 'time': s.get('time'),
  632. 'label': lbl
  633. })
  634. return slots
  635. def _check_page_is_session_expired_or_invalid(self, keyword, html: str) -> bool:
  636. if not html:
  637. self.is_healthy = False
  638. raise SessionExpiredOrInvalidError()
  639. html_lower = html.lower()
  640. if keyword.lower() not in html_lower:
  641. session_expire_or_invalid_indicators = [
  642. 'redirected automatically' in html_lower,
  643. 'login' in html_lower and 'password' in html_lower,
  644. 'session expired' in html_lower
  645. ]
  646. if any(session_expire_or_invalid_indicators):
  647. self.is_healthy = False
  648. raise SessionExpiredOrInvalidError()
  649. def _filter_dates(self, dates: List[str], start_str: str, end_str: str) -> List[str]:
  650. if not start_str or not end_str:
  651. return dates
  652. valid_dates = []
  653. s_date = datetime.strptime(start_str[:10], "%Y-%m-%d")
  654. e_date = datetime.strptime(end_str[:10], "%Y-%m-%d")
  655. for date_str in dates:
  656. curr_date = datetime.strptime(date_str, "%Y-%m-%d")
  657. if s_date <= curr_date <= e_date:
  658. valid_dates.append(date_str)
  659. random.shuffle(valid_dates)
  660. return valid_dates
  661. # --- 资源清理核心方法 ---
  662. def cleanup(self):
  663. """
  664. 销毁浏览器并彻底删除临时文件
  665. """
  666. # 1. 关闭浏览器
  667. if self.page:
  668. try:
  669. self.page.quit() # 这会关闭 Chrome 进程
  670. except Exception:
  671. pass # 忽略已关闭的错误
  672. self.page = None
  673. # 2. 删除文件
  674. # 注意:Chrome 关闭后可能需要几百毫秒释放文件锁,稍微等待
  675. if os.path.exists(self.root_workspace):
  676. for _ in range(3):
  677. try:
  678. time.sleep(0.2)
  679. shutil.rmtree(self.root_workspace, ignore_errors=True)
  680. break
  681. except Exception as e:
  682. # 如果删除失败(通常是Windows文件占用),重试
  683. self._log(f"Cleanup retry: {e}")
  684. time.sleep(0.5)
  685. # 如果依然存在,打印警告(虽然 ignore_errors=True 会掩盖报错,但可以 check exists)
  686. if os.path.exists(self.root_workspace):
  687. self._log(f"[WARN] Failed to fully remove workspace: {self.root_workspace}")
  688. # 3. [新增] 关闭代理隧道
  689. if self.tunnel:
  690. try: self.tunnel.stop()
  691. except: pass
  692. self.tunnel = None
  693. def __del__(self):
  694. """
  695. 析构函数:当对象被垃圾回收时自动调用
  696. """
  697. self.cleanup()