tls_plugin.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842
  1. import time
  2. import json
  3. import random
  4. import re
  5. import os
  6. import uuid
  7. import shutil
  8. from datetime import datetime
  9. from typing import List, Dict, Optional, Any, Callable
  10. from urllib.parse import urljoin, urlparse, urlencode
  11. # DrissionPage 核心
  12. from DrissionPage import ChromiumPage, ChromiumOptions
  13. from vs_plg import IVSPlg
  14. from vs_types import VSPlgConfig, AppointmentType, VSQueryResult, VSBookResult, AvailabilityStatus, TimeSlot, DateAvailability, NotFoundError, PermissionDeniedError, RateLimiteddError, SessionExpiredOrInvalidError, BizLogicError
  15. from utils.cloudflare_bypass_for_scraping import CloudflareBypasser
  16. from toolkit.proxy_tunnel import ProxyTunnel
  17. class BrowserResponse:
  18. """模拟 requests.Response"""
  19. def __init__(self, result_dict):
  20. result_dict = result_dict or {}
  21. self.status_code = result_dict.get('status', 0)
  22. self.text = result_dict.get('body', '')
  23. self.headers = result_dict.get('headers', {})
  24. self.url = result_dict.get('url', '')
  25. self._json = None
  26. def json(self):
  27. if self._json is None:
  28. if not self.text:
  29. return {}
  30. try:
  31. self._json = json.loads(self.text)
  32. except:
  33. self._json = {}
  34. return self._json
  35. class TlsPlugin(IVSPlg):
  36. """
  37. TLSContact 签证预约插件 (DrissionPage 版)
  38. """
  39. def __init__(self, group_id: str):
  40. self.group_id = group_id
  41. self.config: Optional[VSPlgConfig] = None
  42. self.free_config: Dict[str, Any] = {}
  43. self.is_healthy = True
  44. self.logger = None
  45. # 浏览器实例
  46. self.page: Optional[ChromiumPage] = None
  47. self.travel_group: Optional[Dict] = None
  48. # --- [核心修改] 并发隔离与资源管理 ---
  49. # 生成唯一实例 ID
  50. self.instance_id = uuid.uuid4().hex[:8]
  51. self.root_workspace = os.path.abspath(os.path.join("data/temp_browser_data", f"{self.group_id}.{self.instance_id}"))
  52. # 定义子目录:代理插件目录 & 浏览器用户数据目录
  53. self.user_data_path = os.path.join(self.root_workspace, "user_data")
  54. # 确保根目录存在 (子目录由具体逻辑创建)
  55. if not os.path.exists(self.root_workspace):
  56. os.makedirs(self.root_workspace)
  57. # 持有隧道实例
  58. self.tunnel = None
  59. self.session_create_time: float = 0
  60. def get_group_id(self) -> str:
  61. return self.group_id
  62. def set_log(self, logger: Callable[[str], None]) -> None:
  63. self.logger = logger
  64. def _log(self, message):
  65. if self.logger:
  66. self.logger(f'[TlsPlugin] [{self.group_id}] {message}')
  67. else:
  68. print(f'[TlsPlugin] [{self.group_id}] {message}')
  69. def set_config(self, config: VSPlgConfig):
  70. self.config = config
  71. self.free_config = config.free_config or {}
  72. def keep_alive(self):
  73. pass
  74. def health_check(self) -> bool:
  75. if not self.is_healthy:
  76. return False
  77. if self.page is None:
  78. return False
  79. try:
  80. if not self.page.run_js("return 1;"):
  81. return False
  82. except:
  83. return False
  84. if self.config.session_max_life > 0:
  85. current_time = time.time()
  86. elapsed_time = current_time - self.session_create_time
  87. if elapsed_time > self.config.session_max_life * 60:
  88. self._log(f"Session expired.")
  89. return False
  90. return True
  91. def _save_screenshot(self, name_prefix):
  92. try:
  93. timestamp = int(time.time())
  94. filename = f"{self.instance_id}_{name_prefix}_{timestamp}.jpg"
  95. save_path = os.path.join("data", filename)
  96. os.makedirs("data", exist_ok=True)
  97. # [修改] 改为 full_page=False,防止页面结构异常导致截图失败
  98. # 这样能截取到浏览器当前可视区域,最适合调试“卡住”的情况
  99. self.page.get_screenshot(path=save_path, full_page=False)
  100. self._log(f"Screenshot saved to {save_path}")
  101. except Exception as e:
  102. self._log(f"Failed to save screenshot: {e}")
  103. def create_session(self):
  104. """
  105. 全浏览器会话创建:过盾 -> JS注入登录 -> 原生跳转
  106. """
  107. self._log(f"Initializing Session (ID: {self.instance_id})...")
  108. co = ChromiumOptions()
  109. # -------------------------------------------------------------
  110. # [核心修复] 解决 'not enough values to unpack'
  111. # -------------------------------------------------------------
  112. # 1. 不要用 co.auto_port(),因为它依赖解析 stdout,会被 DBus 报错干扰
  113. # 2. 我们手动随机生成一个端口
  114. import random
  115. import socket
  116. def get_free_port():
  117. with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
  118. s.bind(('', 0))
  119. return s.getsockname()[1]
  120. debug_port = get_free_port()
  121. self._log(f"Assigned Debug Port: {debug_port}")
  122. # 3. 强制指定端口,DrissionPage 就会直接连接,不再解析日志
  123. co.set_local_port(debug_port)
  124. # --- [关键配置] 设置独立的用户数据目录 ---
  125. # 这样每个实例的 Cache, Cookies, LocalStorage 都是完全隔离的
  126. # 同时也防止了多进程争抢同一个 Default 文件夹导致的崩溃
  127. co.set_user_data_path(self.user_data_path)
  128. # --- 1. 指定浏览器路径 (适配 Docker) ---
  129. chrome_path = os.getenv("CHROME_BIN")
  130. if chrome_path and os.path.exists(chrome_path):
  131. co.set_paths(browser_path=chrome_path)
  132. # --- [核心修改] 代理配置 ---
  133. if self.config.proxy and self.config.proxy.ip:
  134. p = self.config.proxy
  135. if p.username and p.password:
  136. self._log(f"Starting Proxy Tunnel for {p.ip}...")
  137. # 1. 启动本地隧道
  138. self.tunnel = ProxyTunnel(p.ip, p.port, p.username, p.password)
  139. local_proxy = self.tunnel.start()
  140. self._log(f"Tunnel started at {local_proxy}")
  141. # 2. Chrome 连接本地免密端口
  142. # 必须使用 --proxy-server 强制指定,绝对稳健
  143. co.set_argument(f'--proxy-server={local_proxy}')
  144. else:
  145. # 无密码代理,直接用
  146. proxy_str = f"{p.scheme}://{p.ip}:{p.port}"
  147. co.set_argument(f'--proxy-server={proxy_str}')
  148. else:
  149. self._log("[WARN] No proxy configured!")
  150. co.headless(False)
  151. co.set_argument('--no-sandbox')
  152. co.set_argument('--disable-gpu')
  153. # Docker 默认 /dev/shm 只有 64MB,Chromium 很容易爆内存崩溃
  154. co.set_argument('--disable-dev-shm-usage')
  155. co.set_argument('--window-size=1920,1080')
  156. co.set_argument('--disable-blink-features=AutomationControlled')
  157. try:
  158. self.page = ChromiumPage(co)
  159. apt_config = self.free_config.get('apt_config', {})
  160. if not apt_config:
  161. raise NotFoundError("apt_config config missing")
  162. login_url = "https://visas-fr.tlscontact.com/en-us/login"
  163. params = {
  164. "issuerId": apt_config["code"],
  165. "country": apt_config["country"],
  166. "vac": apt_config["code"],
  167. "redirect": f"/en-us/country/{apt_config['country']}/vac/{apt_config['code']}"
  168. }
  169. full_login_url = f"{login_url}?{urlencode(params)}"
  170. self._log(f"Navigating: {full_login_url}")
  171. self.page.get(full_login_url)
  172. # --- Cloudflare 过盾 ---
  173. cf = CloudflareBypasser(self.page, log=self.config.debug)
  174. if not cf.bypass(max_retry=15):
  175. raise BizLogicError("Cloudflare bypass timeout")
  176. wait_start = time.time()
  177. while True:
  178. # 获取页面 HTML,转小写
  179. # 注意:如果此处报错 "页面被刷新",是 DrissionPage 的机制问题,
  180. # 但你要求先不处理复杂错误,所以这里保持最简单的写法。
  181. html = self.page.html.lower()
  182. # 检查是否在排队室 (法语或英语)
  183. if "file d'attente" in html or "waiting room" in html:
  184. # 如果等太久(比如1小时),就强制停止
  185. if time.time() - wait_start > 6 * 60:
  186. self._log("Waiting room timeout (1h).")
  187. break
  188. self._log("In Waiting Room... Waiting for auto-refresh.")
  189. time.sleep(30) # 截图说页面会自动刷新,所以这里只sleep,不动浏览器
  190. else:
  191. # 页面里没有“等候室”的字了,说明出来了
  192. break
  193. # --- 登录页面检查 ---
  194. if not self.page.ele('#email-input-field'):
  195. self._log("Reloading Login Page...")
  196. self.page.get(full_login_url)
  197. if not self.page.wait.ele_displayed('#email-input-field', timeout=15):
  198. self._save_screenshot("login_load_fail")
  199. raise BizLogicError("Login form not loaded")
  200. # --- JS 注入登录 ---
  201. g_token = ""
  202. if self.page.ele('.g-recaptcha') or self.page.ele('xpath://iframe[contains(@src, "recaptcha")]'):
  203. self._log("Solving ReCaptcha...")
  204. rc_params = {
  205. "type": "ReCaptchaV2TaskProxyLess", "page": self.page.url,
  206. "siteKey": "6LcDpXcfAAAAAM7wOEsF_38DNsL20tTvPTKxpyn0",
  207. "apiToken": self.free_config.get("capsolver_key", "")
  208. }
  209. g_token = self._solve_recaptcha(rc_params)
  210. username = self.config.account.username
  211. password = self.config.account.password
  212. # 使用 JS 直接操作 DOM 并 click,让浏览器处理 302
  213. js_login = f"""
  214. var u = document.getElementById('email-input-field');
  215. if(u) {{ u.value = "{username}"; u.dispatchEvent(new Event('input', {{bubbles:true}})); }}
  216. var p = document.getElementById('password-input-field');
  217. if(p) {{ p.value = "{password}"; p.dispatchEvent(new Event('input', {{bubbles:true}})); }}
  218. var g = document.getElementById('g-recaptcha-response');
  219. if(g) {{ g.value = "{g_token}"; }}
  220. var btn = document.getElementById('btn-login');
  221. if(btn) {{ btn.click(); return true; }} else {{ return false; }}
  222. """
  223. self._log("Submitting Login via JS...")
  224. if not self.page.run_js(js_login): raise BizLogicError("Login button missing")
  225. # --- 等待跳转 ---
  226. self._log("Waiting for redirect...")
  227. self.page.wait.url_change('login-actions', exclude=True, timeout=45)
  228. time.sleep(3)
  229. # 检查是否失败
  230. if "login-actions" in self.page.url or "auth" in self.page.url:
  231. err = "Unknown Login Error"
  232. if "Invalid username" in self.page.html: err = "Invalid Credentials"
  233. self._save_screenshot("login_submit_fail")
  234. raise BizLogicError(f"Login Failed: {err}")
  235. # --- 提取 Dashboard 信息 ---
  236. self._log("Waiting for dashboard...")
  237. self.page.wait.load_start()
  238. time.sleep(5)
  239. html = self.page.html
  240. self._check_page_is_session_expired_or_invalid("My travel group", html)
  241. groups = self._parse_travel_groups(html)
  242. target_city = apt_config['city'].lower()
  243. for g in groups:
  244. if g['location'].lower() == target_city:
  245. self.travel_group = g
  246. break
  247. if not self.travel_group:
  248. self._save_screenshot("group_not_found")
  249. raise NotFoundError(f"Group not found for {target_city}")
  250. self.session_create_time = time.time()
  251. self._log(f"Session Ready. Group: {self.travel_group['group_number']}")
  252. except Exception as e:
  253. self._log(f"Session Create Error: {e}")
  254. self.cleanup()
  255. raise e
  256. def query(self, apt_type: AppointmentType) -> VSQueryResult:
  257. res = VSQueryResult()
  258. res.success = False
  259. apt_config = self.free_config.get('apt_config', {})
  260. group_num = self.travel_group['group_number']
  261. interest_month = self.free_config.get("interest_month", time.strftime("%m-%Y"))
  262. url = f'https://visas-fr.tlscontact.com/en-us/{group_num}/workflow/appointment-booking'
  263. params = {
  264. 'location': apt_config["code"],
  265. 'month': interest_month,
  266. }
  267. # DrissionPage 自动处理 Cloudflare,直接 fetch 即可
  268. try:
  269. resp = self._perform_request("GET", url, params=params, retry_count=1)
  270. except Exception as e:
  271. self._log(f"Query request failed: {e}")
  272. raise e
  273. self._check_page_is_session_expired_or_invalid('Book your appointment', resp.text)
  274. # 解析 Slots
  275. all_slots = self._parse_appointment_slots(resp.text)
  276. target_labels = self.free_config.get("target_labels", ["", "pta"])
  277. # 根据配置过滤
  278. available = [s for s in all_slots if s.get("label") in target_labels]
  279. if available:
  280. res.success = True
  281. earliest_date = available[0]["date"]
  282. earliest_dt = datetime.strptime(earliest_date, "%Y-%m-%d")
  283. res.availability_status = AvailabilityStatus.Available
  284. res.earliest_date = earliest_dt
  285. date_map: dict[datetime, list[TimeSlot]] = {}
  286. for s in available:
  287. date_str = s["date"]
  288. dt = datetime.strptime(date_str, "%Y-%m-%d")
  289. date_map.setdefault(dt, []).append(
  290. TimeSlot(time=s["time"], label=str(s.get("label", "")))
  291. )
  292. res.availability = [DateAvailability(date=d, times=slots) for d, slots in date_map.items()]
  293. self._log(f"Slot Found! -> {available}")
  294. else:
  295. self._log("No slots available.")
  296. res.success = False
  297. res.availability_status = AvailabilityStatus.NoneAvailable
  298. return res
  299. def book(self, slot_info: VSQueryResult, user_inputs: Dict = None) -> VSBookResult:
  300. res = VSBookResult()
  301. res.success = False
  302. apt_config = self.free_config.get('apt_config', {})
  303. group_num = self.travel_group['group_number']
  304. exp_start = user_inputs.get('expected_start_date', '')
  305. exp_end = user_inputs.get('expected_end_date', '')
  306. support_pta = user_inputs.get('support_pta', True)
  307. target_labels = ['']
  308. if support_pta:
  309. target_labels.append('pta')
  310. # 获取所有可用的日期字符串用于过滤
  311. available_dates_str =[
  312. da.date.strftime("%Y-%m-%d")
  313. for da in slot_info.availability if da.date
  314. ]
  315. # ---------------------------------------------------------
  316. # 第一步:过滤出符合用户日期范围要求的日期,并随机选择一个 slot
  317. # ---------------------------------------------------------
  318. valid_dates_list = self._filter_dates(available_dates_str, exp_start, exp_end)
  319. if not valid_dates_list:
  320. raise NotFoundError(message="No dates match user constraints")
  321. all_possible_slots =[]
  322. for da in slot_info.availability:
  323. if not da.date:
  324. continue
  325. date_str = da.date.strftime("%Y-%m-%d")
  326. if date_str in valid_dates_list:
  327. for t in da.times:
  328. if t.label in target_labels:
  329. all_possible_slots.append({
  330. "date": date_str,
  331. "time_obj": t,
  332. "label": t.label
  333. })
  334. if not all_possible_slots:
  335. raise NotFoundError(message="No suitable slot found (after label filtering)")
  336. selected_slot = random.choice(all_possible_slots)
  337. selected_date = selected_slot["date"]
  338. selected_time = selected_slot["time_obj"] # TimeSlot 对象
  339. selected_label = selected_slot["label"]
  340. self._log(f"Found {len(all_possible_slots)} valid slots. Randomly selected: {selected_date} {selected_time.time}")
  341. # 基础 URL 和路由状态 (Next.js 专用)
  342. base_url = f'https://visas-fr.tlscontact.com/en-us/{group_num}/workflow/appointment-booking'
  343. router_state = f'%5B%22%22%2C%7B%22children%22%3A%5B%5B%22lang%22%2C%22en-us%22%2C%22d%22%5D%2C%7B%22children%22%3A%5B%5B%22groupId%22%2C%22{group_num}%22%2C%22d%22%5D%2C%7B%22children%22%3A%5B%22workflow%22%2C%7B%22children%22%3A%5B%22appointment-booking%22%2C%7B%22children%22%3A%5B%22__PAGE__%22%2C%7B%7D%2Cnull%2Cnull%5D%7D%2Cnull%2Cnull%2Ctrue%5D%7D%2Cnull%2Cnull%5D%7D%2Cnull%2Cnull%5D%7D%2Cnull%2Cnull%2Ctrue%5D%7D%2Cnull%2Cnull%5D'
  344. # ---------------------------------------------------------
  345. # 第二步:调用 getBasketCost 获取订单金额 (预定前置条件)
  346. # ---------------------------------------------------------
  347. self._log("Fetching basket cost...")
  348. getBasketCost_ACTION_ID = "40124cc90acef520d4fd2daf60ad3c8e21fc2c11d8"
  349. payload =[{
  350. "groupId": str(group_num),
  351. "lang": "en-us",
  352. "labels": [selected_label]
  353. }]
  354. body_data_str = json.dumps(payload)
  355. getBasketCost_js_script = f"""
  356. const url = "{base_url}";
  357. const headers = {{
  358. 'Next-Action': '{getBasketCost_ACTION_ID}',
  359. 'Next-Router-State-Tree': decodeURIComponent('{router_state}'),
  360. 'Accept': 'text/x-component',
  361. 'Accept-Language': 'zh-CN,zh;q=0.9',
  362. 'Content-Type': 'text/plain;charset=UTF-8'
  363. }};
  364. const bodyData = '{body_data_str}';
  365. return fetch(url, {{ method: 'POST', headers: headers, body: bodyData }})
  366. .then(async response => {{
  367. const text = await response.text();
  368. const headers = {{}};
  369. response.headers.forEach((value, key) => headers[key] = value);
  370. return {{ status: response.status, body: text, headers: headers, url: response.url }};
  371. }}).catch(err => {{
  372. return {{ status: 0, body: err.toString(), headers: {{}}, url: url }};
  373. }});
  374. """
  375. cost_res_dict = self.page.run_js(getBasketCost_js_script)
  376. cost_resp = BrowserResponse(cost_res_dict)
  377. if cost_resp.status_code != 200:
  378. self._log(f"Failed to get basket cost! Status: {cost_resp.status_code}. Aborting booking.")
  379. return res
  380. # 尝试解析并打印金额信息,例如总价和币种
  381. cost_match = re.search(r'"total":"([^"]+)","currency":"([^"]+)"', cost_resp.text)
  382. if cost_match:
  383. total_cost, currency = cost_match.groups()
  384. self._log(f"Basket cost checked successfully: {total_cost} {currency}")
  385. else:
  386. self._log("Basket cost checked successfully (could not parse exact amount).")
  387. # ---------------------------------------------------------
  388. # 第三步:解决 ReCaptcha V3
  389. # ---------------------------------------------------------
  390. self._log("Solving ReCaptcha V3...")
  391. page_url = f'{base_url}?location={apt_config["code"]}&month={selected_date[:7]}'
  392. api_token = self.free_config.get("capsolver_key", "")
  393. rc_params = {
  394. "type": "ReCaptchaV3TaskProxyLess",
  395. "page": page_url,
  396. "action": "book",
  397. "siteKey": "6LcTpXcfAAAAAM3VojNhyV-F1z92ADJIvcSZ39Y9",
  398. "apiToken": api_token,
  399. "proxy": self._get_proxy_url()
  400. }
  401. g_token = self._solve_recaptcha(rc_params)
  402. # ---------------------------------------------------------
  403. # 第四步:提交正式的 Appointment Booking 请求
  404. # ---------------------------------------------------------
  405. self._log("Submitting booking request via JS Fetch...")
  406. bookAppointment_ACTION_ID = "6043cfd107081bc817cbb11a8c0db17d3a063401be"
  407. bookAppointment_js_script = f"""
  408. const url = "{base_url}";
  409. const formData = new FormData();
  410. formData.append('1_formGroupId', '{group_num}');
  411. formData.append('1_lang', 'en-us');
  412. formData.append('1_process', 'APPOINTMENT');
  413. formData.append('1_location', '{apt_config["code"]}');
  414. formData.append('1_date', '{selected_date}');
  415. formData.append('1_time', '{selected_time.time}');
  416. formData.append('1_appointmentLabel', '{selected_label}');
  417. formData.append('1_captchaToken', '{g_token}');
  418. formData.append('0', '[{{"status":"IDLE"}},"$K1"]');
  419. const headers = {{
  420. 'Next-Action': '{bookAppointment_ACTION_ID}',
  421. 'Next-Router-State-Tree': decodeURIComponent('{router_state}'),
  422. 'Accept': 'text/x-component'
  423. }};
  424. return fetch(url, {{ method: 'POST', headers: headers, body: formData }})
  425. .then(async response => {{
  426. const text = await response.text();
  427. const headers = {{}};
  428. response.headers.forEach((value, key) => headers[key] = value);
  429. return {{ status: response.status, body: text, headers: headers, url: response.url }};
  430. }}).catch(err => {{
  431. return {{ status: 0, body: err.toString(), headers: {{}}, url: url }};
  432. }});
  433. """
  434. book_res_dict = self.page.run_js(bookAppointment_js_script)
  435. resp = BrowserResponse(book_res_dict)
  436. # ---------------------------------------------------------
  437. # 第五步:结果判定
  438. # ---------------------------------------------------------
  439. if resp.status_code == 303 or (resp.status_code == 200 and "appointment-confirmation" in resp.url):
  440. self._log(f"Booking Success! URL: {resp.url}")
  441. res.success = True
  442. res.book_date = selected_date
  443. res.book_time = selected_time.time
  444. return res
  445. if resp.status_code == 200:
  446. if "APPOINTMENT_LIMIT_REACHED" in resp.text:
  447. self._log("Failed: Appointment Limit Reached")
  448. elif "Invalid captcha" in resp.text:
  449. self._log("Failed: Invalid Captcha")
  450. else:
  451. self._log(f"Booking Failed (Unknown 200): {resp.text[:200]}")
  452. else:
  453. self._log(f"Booking Failed. Status: {resp.status_code}")
  454. return res
  455. def _get_proxy_url(self):
  456. # 构造代理
  457. proxy_url = ""
  458. if self.config.proxy.ip:
  459. s = self.config.proxy
  460. if s.username:
  461. proxy_url = f"{s.scheme}://{s.username}:{s.password}@{s.ip}:{s.port}"
  462. else:
  463. proxy_url = f"{s.scheme}://{s.ip}:{s.port}"
  464. return proxy_url
  465. def _perform_request(self, method, url, headers=None, data=None, json_data=None, params=None, retry_count=0):
  466. """
  467. 在浏览器上下文中注入 JS 执行 Fetch
  468. """
  469. if not self.page:
  470. raise BizLogicError("Browser not initialized")
  471. if params:
  472. from urllib.parse import urlencode
  473. if '?' in url:
  474. url += '&' + urlencode(params)
  475. else:
  476. url += '?' + urlencode(params)
  477. fetch_options = {
  478. "method": method.upper(),
  479. "headers": headers or {},
  480. "credentials": "include"
  481. }
  482. # Body 处理
  483. if json_data:
  484. fetch_options['body'] = json.dumps(json_data)
  485. fetch_options['headers']['Content-Type'] = 'application/json'
  486. elif data:
  487. if isinstance(data, dict):
  488. from urllib.parse import urlencode
  489. fetch_options['body'] = urlencode(data)
  490. fetch_options['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
  491. else:
  492. fetch_options['body'] = data
  493. js_script = f"""
  494. const url = "{url}";
  495. const options = {json.dumps(fetch_options)};
  496. return fetch(url, options)
  497. .then(async response => {{
  498. const text = await response.text();
  499. const headers = {{}};
  500. response.headers.forEach((value, key) => headers[key] = value);
  501. return {{
  502. status: response.status,
  503. body: text,
  504. headers: headers,
  505. url: response.url
  506. }};
  507. }})
  508. .catch(error => {{
  509. return {{
  510. status: 0,
  511. body: error.toString(),
  512. headers: {{}},
  513. url: url
  514. }};
  515. }});
  516. """
  517. res_dict = self.page.run_js(js_script, timeout=30)
  518. resp = BrowserResponse(res_dict)
  519. if resp.status_code == 200:
  520. return resp
  521. elif resp.status_code == 401:
  522. self.is_healthy = False
  523. raise SessionExpiredOrInvalidError()
  524. elif resp.status_code == 403:
  525. # [关键修改] 遇到 403 Forbidden,尝试绕盾并重试
  526. # 最多重试 2 次
  527. if retry_count < 2:
  528. self._log(f"HTTP 403 Detected. Cloudflare session expired? Attempting refresh (Try {retry_count+1}/2)...")
  529. # 尝试刷新盾
  530. if self._refresh_firewall_session():
  531. self._log("Firewall session refreshed. Retrying request...")
  532. # 递归重试
  533. return self._perform_request(method, url, headers, data, json_data, params, retry_count+1)
  534. else:
  535. self._log("Failed to refresh firewall session.")
  536. # 如果重试失败,抛出异常
  537. raise PermissionDeniedError(f"HTTP 403: {resp.text[:100]}")
  538. elif resp.status_code == 429:
  539. self.is_healthy = False
  540. raise RateLimiteddError()
  541. else:
  542. # 如果是 0,可能是 fetch 报错
  543. if resp.status_code == 0:
  544. raise BizLogicError(f"Network Error: {resp.text}")
  545. # TLS 业务错误
  546. raise BizLogicError(message=f"HTTP Error {resp.status_code}: {resp.text[:100]}")
  547. def _refresh_firewall_session(self) -> bool:
  548. """
  549. 主动刷新页面以触发 Cloudflare 挑战并尝试通过
  550. """
  551. try:
  552. # 1. 刷新当前页面 (通常 Dashboard 页)
  553. # 这会强制浏览器重新进行 HTTP 请求,从而触发 Cloudflare 拦截页
  554. self._log("Refreshing page to trigger Cloudflare...")
  555. self.page.refresh()
  556. # 2. 调用 CloudflareBypasser
  557. cf = CloudflareBypasser(self.page, log=self.config.debug)
  558. # 3. 尝试过盾 (尝试次数稍多一点,因为此时可能网络不稳定)
  559. success = cf.bypass(max_retry=10)
  560. if success:
  561. # 再次确认页面是否正常加载 (非 403 页面)
  562. title = self.page.title.lower()
  563. if "access denied" in title:
  564. return False
  565. # 等待 DOM 稍微稳定
  566. time.sleep(2)
  567. return True
  568. return False
  569. except Exception as e:
  570. self._log(f"Error during firewall refresh: {e}")
  571. return False
  572. def _solve_recaptcha(self, params) -> str:
  573. """调用 VSCloudApi 解决 ReCaptcha"""
  574. key = params.get("apiToken")
  575. if not key: raise NotFoundError("Api-token required")
  576. submit_url = "https://api.capsolver.com/createTask"
  577. task = {
  578. "type": params.get("type"),
  579. "websiteURL": params.get("page"),
  580. "websiteKey": params.get("siteKey"),
  581. }
  582. if params.get("action"):
  583. task["pageAction"] = params.get("action")
  584. # if params.get("proxy"):
  585. # p = urlparse(params.get("proxy"))
  586. # task["proxyType"] = p.scheme
  587. # task["proxyAddress"] = p.hostname
  588. # task["proxyPort"] = p.port
  589. # if p.username:
  590. # task["proxyLogin"] = p.username
  591. # task["proxyPassword"] = p.password
  592. # 注意:使用 DrissionPage 后,通常是 ProxyLess 模式
  593. # 除非你想让 Capsolver 也用同样的代理(通常不需要,除非风控极严)
  594. payload = {"clientKey": key, "task": task}
  595. import requests as req # 局部引用,避免混淆
  596. r = req.post(submit_url, json=payload, timeout=20)
  597. if r.status_code != 200:
  598. raise BizLogicError(message="Failed to submit capsolver task")
  599. task_id = r.json().get("taskId")
  600. for _ in range(20):
  601. r = req.post("https://api.capsolver.com/getTaskResult", json={"clientKey": key, "taskId": task_id}, timeout=20)
  602. if r.status_code == 200:
  603. d = r.json()
  604. if d.get("status") == "ready":
  605. return d["solution"]["gRecaptchaResponse"]
  606. time.sleep(3)
  607. raise BizLogicError(message="Capsolver task timeout")
  608. def _parse_travel_groups(self, html: str) -> List[Dict]:
  609. groups = []
  610. js_pattern = r'\\"travelGroups\\":\s*(\[.*?\]),\\"availableCountriesToCreateGroups'
  611. js_match = re.search(js_pattern, html, re.DOTALL)
  612. if js_match:
  613. json_str = js_match.group(1).replace(r'\"', '"')
  614. data = json.loads(json_str)
  615. for g in data:
  616. groups.append({
  617. 'group_name': g.get('groupName'),
  618. 'group_number': g.get('formGroupId'),
  619. 'location': g.get('vacName')
  620. })
  621. else:
  622. self._log('Parsed travel group page, but not found travelGroups')
  623. return groups
  624. def _parse_appointment_slots(self, html: str) -> List[Dict]:
  625. slots = []
  626. pattern = r'"availableAppointments\\":\s*(\[.*\]),\\"showFlexiAppointment'
  627. match = re.search(pattern, html, re.DOTALL)
  628. if match:
  629. json_str = match.group(1).replace(r'\"', '"')
  630. data = json.loads(json_str)
  631. for day in data:
  632. d_str = day.get('day')
  633. for s in day.get('slots', []):
  634. labels = s.get('labels', [])
  635. lbl = ""
  636. # 简化逻辑:TLS label 列表
  637. if 'pta' in labels: lbl = 'pta'
  638. elif 'ptaw' in labels: lbl = 'ptaw'
  639. elif '' in labels or not labels: lbl = ''
  640. slots.append({
  641. 'date': d_str,
  642. 'time': s.get('time'),
  643. 'label': lbl
  644. })
  645. return slots
  646. def _check_page_is_session_expired_or_invalid(self, keyword, html: str) -> bool:
  647. if not html:
  648. self.is_healthy = False
  649. raise SessionExpiredOrInvalidError()
  650. # 将 html 转小写检查
  651. html_lower = html.lower()
  652. if keyword.lower() not in html_lower:
  653. if 'redirected automatically' in html_lower:
  654. self.is_healthy = False
  655. raise SessionExpiredOrInvalidError("Redirected automatically")
  656. if 'login' in html_lower and 'password' in html_lower:
  657. self.is_healthy = False
  658. raise SessionExpiredOrInvalidError("Redirected to login")
  659. if 'session expired' in html_lower:
  660. self.is_healthy = False
  661. raise SessionExpiredOrInvalidError("Session expired")
  662. def _filter_dates(self, dates: List[str], start_str: str, end_str: str) -> List[str]:
  663. if not start_str or not end_str:
  664. return dates
  665. valid_dates = []
  666. s_date = datetime.strptime(start_str[:10], "%Y-%m-%d")
  667. e_date = datetime.strptime(end_str[:10], "%Y-%m-%d")
  668. for date_str in dates:
  669. curr_date = datetime.strptime(date_str, "%Y-%m-%d")
  670. if s_date <= curr_date <= e_date:
  671. valid_dates.append(date_str)
  672. random.shuffle(valid_dates)
  673. return valid_dates
  674. # --- 资源清理核心方法 ---
  675. def cleanup(self):
  676. """
  677. 销毁浏览器并彻底删除临时文件
  678. """
  679. # 1. 关闭浏览器
  680. if self.page:
  681. try:
  682. self.page.quit() # 这会关闭 Chrome 进程
  683. except Exception:
  684. pass # 忽略已关闭的错误
  685. self.page = None
  686. # 2. 删除文件
  687. # 注意:Chrome 关闭后可能需要几百毫秒释放文件锁,稍微等待
  688. if os.path.exists(self.root_workspace):
  689. for _ in range(3):
  690. try:
  691. time.sleep(0.2)
  692. shutil.rmtree(self.root_workspace, ignore_errors=True)
  693. break
  694. except Exception as e:
  695. # 如果删除失败(通常是Windows文件占用),重试
  696. self._log(f"Cleanup retry: {e}")
  697. time.sleep(0.5)
  698. # 如果依然存在,打印警告(虽然 ignore_errors=True 会掩盖报错,但可以 check exists)
  699. if os.path.exists(self.root_workspace):
  700. self._log(f"[WARN] Failed to fully remove workspace: {self.root_workspace}")
  701. # 3. [新增] 关闭代理隧道
  702. if self.tunnel:
  703. try: self.tunnel.stop()
  704. except: pass
  705. self.tunnel = None
  706. def __del__(self):
  707. """
  708. 析构函数:当对象被垃圾回收时自动调用
  709. """
  710. self.cleanup()