tls_plugin2.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675
  1. import time
  2. import json
  3. import random
  4. import re
  5. import os
  6. from datetime import datetime
  7. from typing import List, Dict, Optional, Any, Callable
  8. from urllib.parse import urljoin, urlparse, urlencode
  9. # DrissionPage 核心
  10. from DrissionPage import ChromiumPage, ChromiumOptions
  11. from vs_plg import IVSPlg
  12. from vs_types import VSPlgConfig, VSQueryResult, VSBookResult, AvailabilityStatus, TimeSlot, DateAvailability, NotFoundError, PermissionDeniedError, RateLimiteddError, SessionExpiredOrInvalidError, BizLogicError
  13. from utils.cloudflare_bypass_for_scraping import CloudflareBypasser
  14. from toolkit.vs_cloud_api import VSCloudApi
  15. # --- 辅助函数:创建代理插件 ---
  16. def create_proxy_auth_extension(ip, port, username, password, plugin_path="./chrome_proxy_auth_plugin"):
  17. if not os.path.exists(plugin_path):
  18. os.makedirs(plugin_path)
  19. manifest_json = """
  20. {
  21. "version": "1.0.0",
  22. "manifest_version": 2,
  23. "name": "Chrome Proxy Auth Extension",
  24. "permissions": ["proxy", "tabs", "unlimitedStorage", "storage", "<all_urls>", "webRequest", "webRequestBlocking"],
  25. "background": {"scripts": ["background.js"]},
  26. "minimum_chrome_version": "22.0.0"
  27. }
  28. """
  29. background_js = f"""
  30. var config = {{
  31. mode: "fixed_servers",
  32. rules: {{
  33. singleProxy: {{scheme: "http", host: "{ip}", port: parseInt({port})}},
  34. bypassList: ["localhost"]
  35. }}
  36. }};
  37. chrome.proxy.settings.set({{value: config, scope: "regular"}}, function() {{}});
  38. function callbackFn(details) {{
  39. return {{authCredentials: {{username: "{username}", password: "{password}"}}}};
  40. }}
  41. chrome.webRequest.onAuthRequired.addListener(
  42. callbackFn, {{urls: ["<all_urls>"]}}, ['blocking']
  43. );
  44. """
  45. with open(os.path.join(plugin_path, "manifest.json"), "w") as f:
  46. f.write(manifest_json)
  47. with open(os.path.join(plugin_path, "background.js"), "w") as f:
  48. f.write(background_js)
  49. return os.path.abspath(plugin_path)
  50. class BrowserResponse:
  51. """模拟 requests.Response"""
  52. def __init__(self, result_dict):
  53. result_dict = result_dict or {}
  54. self.status_code = result_dict.get('status', 0)
  55. self.text = result_dict.get('body', '')
  56. self.headers = result_dict.get('headers', {})
  57. self.url = result_dict.get('url', '')
  58. self._json = None
  59. def json(self):
  60. if self._json is None:
  61. if not self.text:
  62. return {}
  63. try:
  64. self._json = json.loads(self.text)
  65. except:
  66. self._json = {}
  67. return self._json
  68. class TlsPlugin2(IVSPlg):
  69. """
  70. TLSContact 签证预约插件 (DrissionPage 版)
  71. """
  72. def __init__(self, group_id: str):
  73. self.group_id = group_id
  74. self.config: Optional[VSPlgConfig] = None
  75. self.free_config: Dict[str, Any] = {}
  76. self.is_healthy = True
  77. self.logger = None
  78. # 浏览器实例
  79. self.page: Optional[ChromiumPage] = None
  80. self.travel_group: Optional[Dict] = None
  81. self.session_create_time: float = 0
  82. self.real_ip: str = "0.0.0.0"
  83. def get_group_id(self) -> str:
  84. return self.group_id
  85. def set_log(self, logger: Callable[[str], None]) -> None:
  86. self.logger = logger
  87. def _log(self, message):
  88. if self.logger:
  89. self.logger(f'[TlsPlugin] [{self.group_id}] {message}')
  90. else:
  91. print(f'[TlsPlugin] [{self.group_id}] {message}')
  92. def set_config(self, config: VSPlgConfig):
  93. self.config = config
  94. self.free_config = config.free_config or {}
  95. def health_check(self) -> bool:
  96. if not self.is_healthy:
  97. return False
  98. if self.page is None:
  99. return False
  100. try:
  101. if not self.page.run_js("return 1;"):
  102. return False
  103. except:
  104. return False
  105. if self.config.session_max_life > 0:
  106. current_time = time.time()
  107. elapsed_time = current_time - self.session_create_time
  108. if elapsed_time > self.config.session_max_life * 60:
  109. self._log(f"Session expired.")
  110. return False
  111. return True
  112. def create_session(self):
  113. """
  114. 全浏览器会话创建:过盾 -> JS注入登录 -> 原生跳转
  115. """
  116. self._log("Initializing Browser Session (Full Browser Mode)...")
  117. co = ChromiumOptions()
  118. co.auto_port()
  119. if self.config.proxy and self.config.proxy.ip:
  120. p = self.config.proxy
  121. if p.username and p.password:
  122. self._log(f"Proxy: {p.ip}:{p.port} (Auth)")
  123. co.add_extension(create_proxy_auth_extension(p.ip, p.port, p.username, p.password))
  124. else:
  125. co.set_proxy(f"{p.scheme}://{p.ip}:{p.port}")
  126. co.headless(False)
  127. co.set_argument('--no-sandbox')
  128. co.set_argument('--disable-gpu')
  129. co.set_argument('--disable-blink-features=AutomationControlled')
  130. try:
  131. self.page = ChromiumPage(co)
  132. embassy = self.free_config.get('center', {})
  133. if not embassy: raise NotFoundError("center config missing")
  134. login_url = "https://visas-fr.tlscontact.com/en-us/login"
  135. params = {
  136. "issuerId": embassy["code"], "country": embassy["country"], "vac": embassy["code"],
  137. "redirect": f"/en-us/country/{embassy['country']}/vac/{embassy['code']}"
  138. }
  139. full_login_url = f"{login_url}?{urlencode(params)}"
  140. self._log(f"Navigating: {full_login_url}")
  141. self.page.get(full_login_url)
  142. # --- Cloudflare 过盾 ---
  143. cf = CloudflareBypasser(self.page, log=self.config.debug)
  144. if not cf.bypass(max_retry=15):
  145. raise BizLogicError("Cloudflare bypass timeout")
  146. # --- 登录页面检查 ---
  147. if not self.page.ele('#email-input-field'):
  148. self._log("Reloading Login Page...")
  149. self.page.get(full_login_url)
  150. if not self.page.wait.ele_displayed('#email-input-field', timeout=15):
  151. raise BizLogicError("Login form not loaded")
  152. # --- JS 注入登录 ---
  153. g_token = ""
  154. if self.page.ele('.g-recaptcha') or self.page.ele('xpath://iframe[contains(@src, "recaptcha")]'):
  155. self._log("Solving ReCaptcha...")
  156. rc_params = {
  157. "type": "ReCaptchaV2TaskProxyLess", "page": self.page.url,
  158. "siteKey": "6LcDpXcfAAAAAM7wOEsF_38DNsL20tTvPTKxpyn0",
  159. "apiToken": self.free_config.get("capsolver_key", "")
  160. }
  161. g_token = self._solve_recaptcha(rc_params)
  162. username = self.config.account.username
  163. password = self.config.account.password
  164. # 使用 JS 直接操作 DOM 并 click,让浏览器处理 302
  165. js_login = f"""
  166. var u = document.getElementById('email-input-field');
  167. if(u) {{ u.value = "{username}"; u.dispatchEvent(new Event('input', {{bubbles:true}})); }}
  168. var p = document.getElementById('password-input-field');
  169. if(p) {{ p.value = "{password}"; p.dispatchEvent(new Event('input', {{bubbles:true}})); }}
  170. var g = document.getElementById('g-recaptcha-response');
  171. if(g) {{ g.value = "{g_token}"; }}
  172. var btn = document.getElementById('btn-login');
  173. if(btn) {{ btn.click(); return true; }} else {{ return false; }}
  174. """
  175. self._log("Submitting Login via JS...")
  176. if not self.page.run_js(js_login): raise BizLogicError("Login button missing")
  177. # --- 等待跳转 ---
  178. self._log("Waiting for redirect...")
  179. self.page.wait.url_change('login-actions', exclude=True, timeout=45)
  180. # 检查是否失败
  181. if "login-actions" in self.page.url or "auth" in self.page.url:
  182. err = "Unknown Login Error"
  183. if "Invalid username" in self.page.html: err = "Invalid Credentials"
  184. raise BizLogicError(f"Login Failed: {err}")
  185. # --- 提取 Dashboard 信息 ---
  186. self._log("Waiting for dashboard...")
  187. self.page.wait.load_start()
  188. time.sleep(5)
  189. html = self.page.html
  190. self._check_page_is_session_expired_or_invalid("My travel group", html)
  191. groups = self._parse_travel_groups(html)
  192. target_city = embassy['city'].lower()
  193. for g in groups:
  194. if g['location'].lower() == target_city:
  195. self.travel_group = g
  196. break
  197. if not self.travel_group: raise NotFoundError(f"Group not found for {target_city}")
  198. self.session_create_time = time.time()
  199. self.real_ip = self._get_realnetwork_ip()
  200. self._log(f"Session Ready. Group: {self.travel_group['group_number']}")
  201. except Exception as e:
  202. self._log(f"Session Create Error: {e}")
  203. if self.page: self.page.quit(); self.page = None
  204. raise e
  205. def query(self) -> VSQueryResult:
  206. res = VSQueryResult()
  207. res.success = False
  208. embassy = self.free_config.get('center', {})
  209. group_num = self.travel_group['group_number']
  210. interest_month = self.free_config.get("interest_month", time.strftime("%m-%Y"))
  211. url = f'https://visas-fr.tlscontact.com/en-us/{group_num}/workflow/appointment-booking'
  212. params = {
  213. 'location': embassy["code"],
  214. 'month': interest_month,
  215. }
  216. # DrissionPage 自动处理 Cloudflare,直接 fetch 即可
  217. try:
  218. resp = self._perform_request("GET", url, params=params, retry_count=1)
  219. except Exception as e:
  220. self._log(f"Query request failed: {e}")
  221. raise e
  222. self._check_page_is_session_expired_or_invalid('Book your appointment', resp.text)
  223. # 解析 Slots
  224. all_slots = self._parse_appointment_slots(resp.text)
  225. target_labels = self.free_config.get("target_labels", ["", "pta"])
  226. # 根据配置过滤
  227. available = [s for s in all_slots if s.get("label") in target_labels]
  228. res.city = self.free_config.get('city', '')
  229. res.country = self.free_config.get('country', '')
  230. res.visa_type = self.free_config.get('visa_type', '')
  231. res.routing_key = self.free_config.get('routing_key', '')
  232. if available:
  233. res.success = True
  234. res.availability_status = AvailabilityStatus.Available
  235. res.earliest_date = available[0]["date"]
  236. date_map: dict[str, list[TimeSlot]] = {}
  237. for s in available:
  238. d = s["date"]
  239. date_map.setdefault(d, []).append(
  240. TimeSlot(time=s["time"], label=str(s.get("label", "")))
  241. )
  242. res.availability = [DateAvailability(date=d, times=slots) for d, slots in date_map.items()]
  243. else:
  244. res.success = False
  245. res.availability_status = AvailabilityStatus.NoneAvailable
  246. return res
  247. def book(self, slot_info: VSQueryResult, user_inputs: Dict = None) -> VSBookResult:
  248. res = VSBookResult()
  249. res.success = False
  250. embassy = self.free_config.get('center', {})
  251. group_num = self.travel_group['group_number']
  252. available_dates = [da.date for da in slot_info.availability]
  253. exp_start = user_inputs.get('expected_start_date', '')
  254. exp_end = user_inputs.get('expected_end_date', '')
  255. support_pta = user_inputs.get('support_pta', True)
  256. target_labels = ['']
  257. if support_pta:
  258. target_labels.append('pta')
  259. valid_dates = self._filter_dates(available_dates, exp_start, exp_end)
  260. if not valid_dates:
  261. raise NotFoundError(message="No dates match user constraints")
  262. selected_date = None
  263. selected_time = None
  264. selected_label = None
  265. for d in valid_dates:
  266. for da in slot_info.availability:
  267. if da.date == d:
  268. for t in da.times:
  269. if t.label in target_labels:
  270. selected_date = d
  271. selected_time = t
  272. selected_label = t.label
  273. break
  274. if selected_date: break
  275. if not selected_date:
  276. raise NotFoundError(message="No suitable slot found")
  277. # 2. 解决 ReCaptcha V3 (Action: book)
  278. page_url = f'https://visas-fr.tlscontact.com/en-us/{group_num}/workflow/appointment-booking?location={embassy["code"]}&month={selected_date[:7]}'
  279. api_token = self.free_config.get("capsolver_key", "")
  280. rc_params = {
  281. "type": "ReCaptchaV3Task",
  282. "page": page_url,
  283. "action": "book",
  284. "siteKey": "6LcTpXcfAAAAAM3VojNhyV-F1z92ADJIvcSZ39Y9",
  285. "apiToken": api_token,
  286. # "proxy": self._get_proxy_url() # ProxyLess
  287. }
  288. g_token = self._solve_recaptcha(rc_params)
  289. # 3. 构造 Next.js Payload
  290. # 注意:在 JS 中构造 FormData 比在 Python 中拼 Multipart 更容易且不易出错
  291. ACTION_ID = "60d0616946df1fc4e7c094ca6a7a04f134d0be3d53"
  292. url = f'https://visas-fr.tlscontact.com/en-us/{group_num}/workflow/appointment-booking'
  293. # State Tree 字符串
  294. router_state = '%5B%22%22%2C%7B%22children%22%3A%5B%5B%22lang%22%2C%22en-us%22%2C%22d%22%5D%2C%7B%22children%22%3A%5B%5B%22groupId%22%2C%22'+str(group_num)+'%22%2C%22d%22%5D%2C%7B%22children%22%3A%5B%22workflow%22%2C%7B%22children%22%3A%5B%22appointment-booking%22%2C%7B%22children%22%3A%5B%22__PAGE__%22%2C%7B%7D%2Cnull%2Cnull%5D%7D%2Cnull%2Cnull%2Ctrue%5D%7D%2Cnull%2Cnull%5D%7D%2Cnull%2Cnull%5D%7D%2Cnull%2Cnull%2Ctrue%5D%7D%2Cnull%2Cnull%5D'
  295. # 构造 JS 代码执行 fetch
  296. # 使用 FormData 对象来处理 multipart
  297. js_script = f"""
  298. const url = "{url}";
  299. const formData = new FormData();
  300. formData.append('1_formGroupId', '{group_num}');
  301. formData.append('1_lang', 'en-us');
  302. formData.append('1_process', 'APPOINTMENT');
  303. formData.append('1_location', '{embassy["code"]}');
  304. formData.append('1_date', '{selected_date}');
  305. formData.append('1_time', '{selected_time.time}');
  306. formData.append('1_appointmentLabel', '{selected_label}');
  307. formData.append('1_captcha_token', '{g_token}');
  308. formData.append('0', '[{{"status":"IDLE"}},"$K1"]');
  309. const headers = {{
  310. 'Next-Action': '{ACTION_ID}',
  311. 'Next-Router-State-Tree': decodeURIComponent('{router_state}'),
  312. 'Accept': 'text/x-component'
  313. }};
  314. return fetch(url, {{
  315. method: 'POST',
  316. headers: headers,
  317. body: formData
  318. }}).then(async response => {{
  319. const text = await response.text();
  320. const headers = {{}};
  321. response.headers.forEach((value, key) => headers[key] = value);
  322. return {{
  323. status: response.status,
  324. body: text,
  325. headers: headers,
  326. url: response.url
  327. }};
  328. }}).catch(err => {{
  329. return {{ status: 0, body: err.toString(), headers: {{}}, url: url }};
  330. }});
  331. """
  332. self._log("Submitting booking request via JS Fetch...")
  333. res_dict = self.page.run_js(js_script)
  334. resp = BrowserResponse(res_dict)
  335. # 4. 结果判定
  336. # Next.js Server Action 重定向通常是 303,但 fetch 可能会自动跟随
  337. # 如果 fetch 跟随了,url 会变;如果没跟随(Redirect mode: manual),status 是 303
  338. if resp.status_code == 303 or (resp.status_code == 200 and "appointment-confirmation" in resp.url):
  339. self._log(f"Booking Success! URL: {resp.url}")
  340. res.success = True
  341. res.book_date = selected_date
  342. res.book_time = selected_time
  343. return res
  344. if resp.status_code == 200:
  345. if "APPOINTMENT_LIMIT_REACHED" in resp.text:
  346. self._log("Failed: Appointment Limit Reached")
  347. elif "Invalid captcha" in resp.text:
  348. self._log("Failed: Invalid Captcha")
  349. else:
  350. self._log(f"Booking Failed (Unknown 200): {resp.text[:200]}")
  351. else:
  352. self._log(f"Booking Failed. Status: {resp.status_code}")
  353. return res
  354. # --- 辅助方法 ---
  355. def _perform_request(self, method, url, headers=None, data=None, json_data=None, params=None, retry_count=0):
  356. """
  357. 在浏览器上下文中注入 JS 执行 Fetch
  358. """
  359. if not self.page:
  360. raise BizLogicError("Browser not initialized")
  361. if params:
  362. from urllib.parse import urlencode
  363. if '?' in url:
  364. url += '&' + urlencode(params)
  365. else:
  366. url += '?' + urlencode(params)
  367. fetch_options = {
  368. "method": method.upper(),
  369. "headers": headers or {},
  370. "credentials": "include"
  371. }
  372. # Body 处理
  373. if json_data:
  374. fetch_options['body'] = json.dumps(json_data)
  375. fetch_options['headers']['Content-Type'] = 'application/json'
  376. elif data:
  377. if isinstance(data, dict):
  378. from urllib.parse import urlencode
  379. fetch_options['body'] = urlencode(data)
  380. fetch_options['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
  381. else:
  382. fetch_options['body'] = data
  383. js_script = f"""
  384. const url = "{url}";
  385. const options = {json.dumps(fetch_options)};
  386. return fetch(url, options)
  387. .then(async response => {{
  388. const text = await response.text();
  389. const headers = {{}};
  390. response.headers.forEach((value, key) => headers[key] = value);
  391. return {{
  392. status: response.status,
  393. body: text,
  394. headers: headers,
  395. url: response.url
  396. }};
  397. }})
  398. .catch(error => {{
  399. return {{
  400. status: 0,
  401. body: error.toString(),
  402. headers: {{}},
  403. url: url
  404. }};
  405. }});
  406. """
  407. res_dict = self.page.run_js(js_script, timeout=30)
  408. resp = BrowserResponse(res_dict)
  409. if resp.status_code == 200:
  410. return resp
  411. elif resp.status_code == 401:
  412. self.is_healthy = False
  413. raise SessionExpiredOrInvalidError()
  414. elif resp.status_code == 403:
  415. # [关键修改] 遇到 403 Forbidden,尝试绕盾并重试
  416. # 最多重试 2 次
  417. if retry_count < 2:
  418. self._log(f"HTTP 403 Detected. Cloudflare session expired? Attempting refresh (Try {retry_count+1}/2)...")
  419. # 尝试刷新盾
  420. if self._refresh_firewall_session():
  421. self._log("Firewall session refreshed. Retrying request...")
  422. # 递归重试
  423. return self._perform_request(method, url, headers, data, json_data, params, retry_count+1)
  424. else:
  425. self._log("Failed to refresh firewall session.")
  426. # 如果重试失败,抛出异常
  427. raise PermissionDeniedError(f"HTTP 403: {resp.text[:100]}")
  428. elif resp.status_code == 429:
  429. self.is_healthy = False
  430. raise RateLimiteddError()
  431. else:
  432. # 如果是 0,可能是 fetch 报错
  433. if resp.status_code == 0:
  434. raise BizLogicError(f"Network Error: {resp.text}")
  435. # TLS 业务错误
  436. raise BizLogicError(message=f"HTTP Error {resp.status_code}: {resp.text[:100]}")
  437. def _refresh_firewall_session(self) -> bool:
  438. """
  439. 主动刷新页面以触发 Cloudflare 挑战并尝试通过
  440. """
  441. try:
  442. # 1. 刷新当前页面 (通常 Dashboard 页)
  443. # 这会强制浏览器重新进行 HTTP 请求,从而触发 Cloudflare 拦截页
  444. self._log("Refreshing page to trigger Cloudflare...")
  445. self.page.refresh()
  446. # 2. 调用 CloudflareBypasser
  447. cf = CloudflareBypasser(self.page, log=self.config.debug)
  448. # 3. 尝试过盾 (尝试次数稍多一点,因为此时可能网络不稳定)
  449. success = cf.bypass(max_retry=10)
  450. if success:
  451. # 再次确认页面是否正常加载 (非 403 页面)
  452. title = self.page.title.lower()
  453. if "access denied" in title:
  454. return False
  455. # 等待 DOM 稍微稳定
  456. time.sleep(2)
  457. return True
  458. return False
  459. except Exception as e:
  460. self._log(f"Error during firewall refresh: {e}")
  461. return False
  462. def _get_realnetwork_ip(self):
  463. """新标签页获取 IP,规避 CORS"""
  464. try:
  465. tab = self.page.new_tab("https://api.ipify.org/?format=json")
  466. if tab.ele('tag:pre'):
  467. json_text = tab.ele('tag:pre').text
  468. else:
  469. json_text = tab.ele('tag:body').text
  470. ip = json.loads(json_text)['ip']
  471. tab.close()
  472. return ip
  473. except Exception:
  474. # 尝试清理
  475. try:
  476. if self.page.tabs_count > 1: self.page.close_tabs(self.page.tabs[-1])
  477. except: pass
  478. return "0.0.0.0"
  479. def _solve_recaptcha(self, params) -> str:
  480. """调用 VSCloudApi 解决 ReCaptcha"""
  481. key = params.get("apiToken")
  482. if not key: raise NotFoundError("Api-token required")
  483. submit_url = "https://api.capsolver.com/createTask"
  484. task = {
  485. "type": params.get("type"),
  486. "websiteURL": params.get("page"),
  487. "websiteKey": params.get("siteKey"),
  488. }
  489. if params.get("action"):
  490. task["pageAction"] = params.get("action")
  491. # 注意:使用 DrissionPage 后,通常是 ProxyLess 模式
  492. # 除非你想让 Capsolver 也用同样的代理(通常不需要,除非风控极严)
  493. payload = {"clientKey": key, "task": task}
  494. import requests as req # 局部引用,避免混淆
  495. r = req.post(submit_url, json=payload, timeout=20)
  496. if r.status_code != 200:
  497. raise BizLogicError(message="Failed to submit capsolver task")
  498. task_id = r.json().get("taskId")
  499. for _ in range(20):
  500. r = req.post("https://api.capsolver.com/getTaskResult", json={"clientKey": key, "taskId": task_id}, timeout=20)
  501. if r.status_code == 200:
  502. d = r.json()
  503. if d.get("status") == "ready":
  504. return d["solution"]["gRecaptchaResponse"]
  505. time.sleep(3)
  506. raise BizLogicError(message="Capsolver task timeout")
  507. def _parse_travel_groups(self, html: str) -> List[Dict]:
  508. groups = []
  509. js_pattern = r'\\"travelGroups\\":\s*(\[.*?\]),\\"availableCountriesToCreateGroups'
  510. js_match = re.search(js_pattern, html, re.DOTALL)
  511. if js_match:
  512. json_str = js_match.group(1).replace(r'\"', '"')
  513. data = json.loads(json_str)
  514. for g in data:
  515. groups.append({
  516. 'group_name': g.get('groupName'),
  517. 'group_number': g.get('formGroupId'),
  518. 'location': g.get('vacName')
  519. })
  520. else:
  521. self._log('Parsed travel group page, but not found travelGroups')
  522. return groups
  523. def _parse_appointment_slots(self, html: str) -> List[Dict]:
  524. slots = []
  525. pattern = r'"availableAppointments\\":\s*(\[.*\]),\\"showFlexiAppointment'
  526. match = re.search(pattern, html, re.DOTALL)
  527. if match:
  528. json_str = match.group(1).replace(r'\"', '"')
  529. data = json.loads(json_str)
  530. for day in data:
  531. d_str = day.get('day')
  532. for s in day.get('slots', []):
  533. labels = s.get('labels', [])
  534. lbl = ""
  535. # 简化逻辑:TLS label 列表
  536. if 'pta' in labels: lbl = 'pta'
  537. elif 'ptaw' in labels: lbl = 'ptaw'
  538. elif '' in labels or not labels: lbl = ''
  539. slots.append({
  540. 'date': d_str,
  541. 'time': s.get('time'),
  542. 'label': lbl
  543. })
  544. return slots
  545. def _check_page_is_session_expired_or_invalid(self, keyword, html: str) -> bool:
  546. if not html:
  547. self.is_healthy = False
  548. raise SessionExpiredOrInvalidError()
  549. # 将 html 转小写检查
  550. html_lower = html.lower()
  551. if keyword.lower() not in html_lower:
  552. if 'redirected automatically' in html_lower:
  553. self.is_healthy = False
  554. raise SessionExpiredOrInvalidError("Redirected automatically")
  555. if 'login' in html_lower and 'password' in html_lower:
  556. self.is_healthy = False
  557. raise SessionExpiredOrInvalidError("Redirected to login")
  558. if 'session expired' in html_lower:
  559. self.is_healthy = False
  560. raise SessionExpiredOrInvalidError("Session expired")
  561. def _filter_dates(self, dates: List[str], start_str: str, end_str: str) -> List[str]:
  562. if not start_str or not end_str:
  563. return dates
  564. valid_dates = []
  565. s_date = datetime.strptime(start_str[:10], "%Y-%m-%d")
  566. e_date = datetime.strptime(end_str[:10], "%Y-%m-%d")
  567. for date_str in dates:
  568. curr_date = datetime.strptime(date_str, "%Y-%m-%d")
  569. if s_date <= curr_date <= e_date:
  570. valid_dates.append(date_str)
  571. random.shuffle(valid_dates)
  572. return valid_dates