jerry hace 3 semanas
padre
commit
7f3fc1b215
Se han modificado 7 ficheros con 1419 adiciones y 17 borrados
  1. 72 4
      .gitignore
  2. 10 10
      config/config.json
  3. 0 3
      data/proxy_states.json
  4. 1 0
      docker-compose.yml
  5. 1090 0
      plugins/tls_plugin2.py
  6. 1 0
      requirements.txt
  7. 245 0
      utils/cloudflare_bypass_for_scraping2.py

+ 72 - 4
.gitignore

@@ -1,6 +1,41 @@
-__pycache__
-debug_pages
-logs
+# —— Python ——
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.tox/
+htmlcov/
+.coverage
+.coverage.*
+.pytest/
+*.egg
+*.egg-info/
+.eggs/
+dist/
+build/
+MANIFEST
+pip-wheel-metadata/
+
+# —— 虚拟环境 ——
+venv/
+.venv/
+env/
+.env/
+
+# —— 本地/机密(真密钥、本地覆盖,勿入仓库;需要样例可保留 env.example 并加 !.env.example) ——
+.env
+.env.*
+!.env.example
+*.local
+secrets.json
+
+# —— Node(若子项目使用) ——
+node_modules/
+
+# —— 操作系统 ——
 .DS_Store
 *.jpg
 node_modules*
@@ -8,4 +43,37 @@ temp_browser_data
 venv
 data/squashfs-root
 *.AppImage
-data/temp_browser_data
+data/temp_browser_data
+Thumbs.db
+Desktop.ini
+ehthumbs.db
+
+# —— 编辑器/IDE ——
+.idea/
+.vscode/*
+!.vscode/extensions.json
+*.swp
+*.swo
+*~
+.cursor/
+*.code-workspace
+
+# —— 本项目:日志、调试页、代理与会话、临时用户数据(Camoufox/Drission 等) ——
+logs/
+debug_pages/
+temp_browser_data/
+data/temp_browser_data/
+data/reg_session_*/
+data/proxy_states.json
+
+# —— 插件/自动化生成的截图(见 plugins 中 data/ 下保存路径) ——
+data/**/*.jpg
+data/**/*.jpeg
+data/**/*.png
+
+# —— 其他 ——
+*.log
+!**/fixtures/**/*.log
+*.orig
+*.tmp
+*.temp

+ 10 - 10
config/config.json

@@ -9,7 +9,7 @@
         {
             "identifier": "vfs.ie.nl",
             "debug": false,
-            "enable": true,
+            "enable": false,
             "need_account": true,
             "need_proxy": true,
             "proxy_pool": "isp_all",
@@ -453,7 +453,7 @@
         {
             "identifier": "vfs.ie.at",
             "debug": false,
-            "enable": true,
+            "enable": false,
             "need_account": true,
             "need_proxy": true,
             "proxy_pool": "isp_all",
@@ -651,7 +651,7 @@
         {
             "identifier": "vfs.ie.hu",
             "debug": false,
-            "enable": true,
+            "enable": false,
             "need_account": true,
             "need_proxy": true,
             "proxy_pool": "isp_all",
@@ -783,7 +783,7 @@
         {
             "identifier": "vfs.gb.at",
             "debug": false,
-            "enable": true,
+            "enable": false,
             "need_account": true,
             "need_proxy": true,
             "proxy_pool": "isp_all",
@@ -849,7 +849,7 @@
         {
             "identifier": "bls.ie.es",
             "debug": false,
-            "enable": true,
+            "enable": false,
             "need_account": true,
             "need_proxy": true,
             "proxy_pool": "isp_all",
@@ -971,7 +971,7 @@
         {
             "identifier": "tls.gb.fr",
             "debug": false,
-            "enable": true,
+            "enable": false,
             "need_account": true,
             "need_proxy": true,
             "proxy_pool": "isp_all",
@@ -1061,8 +1061,8 @@
             },
             "plugin_config": {
                 "lib_path": "plugins",
-                "plugin_name": "tls_plugin",
-                "plugin_bin": "tls_plugin.py",
+                "plugin_name": "tls_plugin2",
+                "plugin_bin": "tls_plugin2.py",
                 "plugin_proto": "IVSPlg"
             },
             "appointment_types": [
@@ -1088,7 +1088,7 @@
         {
             "identifier": "e-konsulat.ie.pl",
             "debug": false,
-            "enable": true,
+            "enable": false,
             "need_account": false,
             "need_proxy": true,
             "proxy_pool": "isp_all",
@@ -1290,7 +1290,7 @@
         {
             "identifier": "greekemba.ie.gr",
             "debug": false,
-            "enable": true,
+            "enable": false,
             "need_account": true,
             "need_proxy": true,
             "proxy_pool": "isp_all",

+ 0 - 3
data/proxy_states.json

@@ -1,3 +0,0 @@
-{
-    "isp_all::110009": 1777038464.6069372
-}

+ 1 - 0
docker-compose.yml

@@ -15,6 +15,7 @@ services:
       - ./plugins:/app/plugins
     environment:
       - TZ=Asia/Shanghai
+      # entrypoint 已启动 Xvfb :99,Camoufox 默认有头即可用虚拟屏;若改真无头可设: CAMOUFOX_HEADLESS=1
       - DISPLAY=:99
       - CHROME_BIN=/opt/ungoogled-chromium/chrome
     # 资源限制

+ 1090 - 0
plugins/tls_plugin2.py

@@ -0,0 +1,1090 @@
+import time
+import json
+import random
+import re
+import os
+import uuid
+import shutil
+import queue
+import threading
+from datetime import datetime
+from typing import List, Dict, Optional, Any, Callable
+from urllib.parse import urljoin, urlparse, urlencode
+
+from camoufox import NewBrowser
+from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError, Page, BrowserContext
+
+from vs_plg import IVSPlg
+from vs_types import VSPlgConfig, AppointmentType, VSQueryResult, VSBookResult, AvailabilityStatus, TimeSlot, DateAvailability, NotFoundError, PermissionDeniedError, RateLimiteddError, SessionExpiredOrInvalidError, BizLogicError 
+from utils.cloudflare_bypass_for_scraping2 import CloudflareBypasser
+
+
+def _camoufox_headless_from_env():
+    """
+    Ubuntu/无显示器 下通过环境变量选择 Camoufox 模式(与 NewBrowser 一致):
+    - 未设置 / 0 / false:有头(需真实 DISPLAY 或自行开 Xvfb 并 export DISPLAY=:99)
+    - 1 / true / yes / headless:Playwright 真无头(无需 X)
+    - virtual / xvfb:由 Camoufox 起 Xvfb 虚拟屏(需安装 Xvfb,适合要「有界面栈」又无可接显示器的 Linux)
+    """
+    v = (os.environ.get("CAMOUFOX_HEADLESS") or "").strip().lower()
+    if v in ("1", "true", "yes", "headless"):
+        return True
+    if v in ("virtual", "xvfb", "vdisplay"):
+        return "virtual"
+    return False
+
+
+class BrowserResponse:
+    """模拟 requests.Response"""
+    def __init__(self, result_dict):
+        result_dict = result_dict or {}
+        self.status_code = result_dict.get('status', 0)
+        self.text = result_dict.get('body', '')
+        self.headers = result_dict.get('headers', {})
+        self.url = result_dict.get('url', '')
+        self._json = None
+
+    def json(self):
+        if self._json is None:
+            if not self.text:
+                return {}
+            try:
+                self._json = json.loads(self.text)
+            except:
+                self._json = {}
+        return self._json
+
+class TlsPlugin(IVSPlg):
+    """
+    TLSContact 签证预约插件 (Camoufox 版)
+    """
+
+    def __init__(self, group_id: str):
+        self.group_id = group_id
+        self.config: Optional[VSPlgConfig] = None
+        self.free_config: Dict[str, Any] = {}
+        self.is_healthy = True
+        self.logger = None
+        
+        self.page: Optional[Page] = None
+        self.browser_ctx: Optional[BrowserContext] = None
+        self.playwright = None
+        self.travel_group: Optional[Dict] = None
+        
+        self.instance_id = uuid.uuid4().hex[:8]
+        self.root_workspace = os.path.abspath(os.path.join("data/temp_browser_data", f"{self.group_id}.{self.instance_id}"))
+        self.user_data_path = os.path.join(self.root_workspace, "user_data")
+    
+        if not os.path.exists(self.root_workspace):
+            os.makedirs(self.root_workspace)
+            
+        self.session_create_time: float = 0
+
+        # Playwright/Camoufox 的 Page 只能在创建它的线程使用;Sentinel 在线程池里建会话、在监控线程里 query。
+        # 用单条工作线程串行所有浏览器操作,避免跨线程卡死或 silent health_check 失败。
+        self._pw_cmd_queue: "queue.Queue[Optional[Callable[[], None]]]" = queue.Queue()
+        self._pw_thread: Optional[threading.Thread] = None
+        self._pw_worker: Optional[threading.Thread] = None
+        self._pw_thread_ready = threading.Event()
+        self._pw_thread_lock = threading.Lock()
+
+    def get_group_id(self) -> str:
+        return self.group_id
+    
+    def set_log(self, logger: Callable[[str], None]):
+        self.logger = logger
+    
+    def _log(self, message):
+        if self.logger:
+            self.logger(f'[TlsPlugin] [{self.group_id}] {message}')
+        else:
+            print(f'[TlsPlugin] [{self.group_id}] {message}')
+
+    def set_config(self, config: VSPlgConfig):
+        self.config = config
+        self.free_config = config.free_config or {}
+
+    def _ensure_pw_thread(self):
+        with self._pw_thread_lock:
+            if self._pw_thread and self._pw_thread.is_alive():
+                return
+            self._pw_thread_ready.clear()
+            t = threading.Thread(target=self._pw_loop, name=f"camoufox-tls-{self.instance_id}", daemon=True)
+            self._pw_thread = t
+            t.start()
+            if not self._pw_thread_ready.wait(timeout=60):
+                raise BizLogicError("Camoufox worker thread failed to start")
+
+    def _pw_loop(self):
+        self._pw_worker = threading.current_thread()
+        self._pw_thread_ready.set()
+        while True:
+            work = self._pw_cmd_queue.get()
+            if work is None:
+                break
+            work()
+
+    def _run_on_pw_thread(self, fn, *args, **kwargs):
+        if self._pw_worker is not None and threading.current_thread() is self._pw_worker:
+            return fn(*args, **kwargs)
+        if self._pw_worker is None or not self._pw_thread or not self._pw_thread.is_alive():
+            self._ensure_pw_thread()
+        out: List[Any] = [None, None]
+        done = threading.Event()
+
+        def work():
+            try:
+                out[1] = fn(*args, **kwargs)
+            except BaseException as e:
+                out[0] = e
+            finally:
+                done.set()
+
+        self._pw_cmd_queue.put(work)
+        if not done.wait(timeout=600):
+            self._log("Browser thread operation timed out (600s).")
+            raise BizLogicError("Browser thread operation timeout")
+        if out[0] is not None:
+            raise out[0]
+        return out[1]
+
+    def _stop_pw_thread(self):
+        with self._pw_thread_lock:
+            t = self._pw_thread
+            if not t or not t.is_alive():
+                self._pw_thread = None
+                self._pw_worker = None
+                return
+            self._pw_cmd_queue.put(None)
+        t.join(timeout=20)
+        with self._pw_thread_lock:
+            self._pw_thread = None
+            self._pw_worker = None
+        
+    def keep_alive(self):
+        if self.page is None:
+            return
+        def _work():
+            try:
+                resp = self._perform_request("GET", self.page.url, retry_count=1)
+                self._check_page_is_session_expired_or_invalid('Book your appointment', html = resp.text)
+            except SessionExpiredOrInvalidError as e:
+                self.is_healthy = False
+            except Exception as e:
+                pass
+        try:
+            self._run_on_pw_thread(_work)
+        except Exception:
+            pass
+
+    def _health_check_impl(self) -> bool:
+        if not self.is_healthy:
+            return False
+        if self.page is None:
+            return False
+        try:
+            v = self.page.evaluate("1")
+            if v != 1:
+                return False
+        except:
+            return False
+        if self.config.session_max_life > 0:
+            current_time = time.time()
+            elapsed_time = current_time - self.session_create_time
+            if elapsed_time > self.config.session_max_life * 60:
+                self._log(f"Session expired.")
+                return False
+        return True
+
+    def health_check(self) -> bool:
+        if not self.is_healthy or self.page is None:
+            return False
+        try:
+            if self._pw_worker is not None and threading.current_thread() is self._pw_worker:
+                return self._health_check_impl()
+            return self._run_on_pw_thread(self._health_check_impl)
+        except Exception:
+            return False
+    
+    def _save_screenshot(self, name_prefix):
+        try:
+            timestamp = int(time.time())
+            filename = f"{self.instance_id}_{name_prefix}_{timestamp}.jpg"
+            save_path = os.path.join("data", filename)
+            os.makedirs("data", exist_ok=True)
+            self.page.screenshot(path=save_path, full_page=False)
+            self._log(f"Screenshot saved to {save_path}")
+        except Exception as e:
+            self._log(f"Failed to save screenshot: {e}")
+
+    def create_session(self):
+        self._ensure_pw_thread()
+        try:
+            self._run_on_pw_thread(self._create_session_inner)
+        except Exception:
+            self._stop_pw_thread()
+            raise
+
+    def _create_session_inner(self):
+        """
+        全浏览器会话创建:过盾 -> JS注入登录 -> 原生跳转
+        必须在同一条 Camoufox/Playwright 工作线程中执行(Playwright 非线程安全)。
+        """
+        self._log(f"Initializing Session (ID: {self.instance_id})...")
+        proxy_cfg = None
+        if self.config.proxy and self.config.proxy.ip:
+            p = self.config.proxy
+            if p.username and p.password:
+                proxy_cfg = {
+                    "server": f"{p.scheme}://{p.ip}:{p.port}",
+                    "username": p.username,
+                    "password": p.password,
+                }
+            else:
+                proxy_cfg = {"server": f"{p.scheme}://{p.ip}:{p.port}"}
+        else:
+            self._log("[WARN] No proxy configured!")
+
+        try:
+            self.playwright = sync_playwright().start()
+            headless_opt = _camoufox_headless_from_env()
+            self._log(f"Camoufox headless={headless_opt!r} (env CAMOUFOX_HEADLESS)")
+            self.browser_ctx = NewBrowser(
+                self.playwright,
+                persistent_context=True,
+                headless=headless_opt,
+                user_data_dir=self.user_data_path,
+                proxy=proxy_cfg,
+                window=(1920, 1080),
+            )
+            self.page = self.browser_ctx.pages[0] if self.browser_ctx.pages else self.browser_ctx.new_page()
+            tls_url = self.free_config.get('tls_url', '')
+            self._log(f"Navigating: {tls_url}")
+            self.page.goto(tls_url, wait_until="domcontentloaded")
+            time.sleep(5)
+            cf_bypasser = CloudflareBypasser(self.page, log=True)
+            if not cf_bypasser.bypass(max_retry=15):
+                raise BizLogicError("Cloudflare bypass timeout")
+            time.sleep(3)
+            
+            btn_selector = "button:has-text('Login')"
+            if not self._is_selector_visible(btn_selector, timeout=3000):
+                self.page.locator("a[href*='login']").first.click(timeout=5000)
+                time.sleep(3)
+            if not self._is_selector_visible(btn_selector, timeout=10000):
+                raise BizLogicError(message=f"Can't find selector={btn_selector}")
+            time.sleep(random.uniform(0.5, 1))
+
+            # recaptchav2_token = ""
+            # if self.page.ele('.g-recaptcha') or self.page.ele('xpath://iframe[contains(@src, "recaptcha")]'):
+            #     self._log("Solving ReCaptcha...")
+            #     rc_params = {
+            #         "type": "ReCaptchaV2TaskProxyLess",
+            #         "page": self.page.url,
+            #         "siteKey": "6LcDpXcfAAAAAM7wOEsF_38DNsL20tTvPTKxpyn0", 
+            #         "apiToken": self.free_config.get("capsolver_key", "")
+            #     }
+            #     recaptchav2_token = self._solve_recaptcha(rc_params)
+
+            username = self.config.account.username
+            password = self.config.account.password
+
+            self._type_into_first_visible(
+                selectors=[
+                    "input[name='email']",
+                    "input[type='email']",
+                    "input#email",
+                    "input[autocomplete='username']",
+                    "label:has-text('Email') + input",
+                ],
+                text=username,
+                field_name="Email",
+            )
+            
+            time.sleep(random.uniform(0.5, 1.2)) 
+        
+            self._type_into_first_visible(
+                selectors=[
+                    "input[name='password']",
+                    "input[type='password']",
+                    "input#password",
+                    "input[autocomplete='current-password']",
+                    "label:has-text('Password') + input",
+                ],
+                text=password,
+                field_name="Password",
+            )
+            
+            # if recaptchav2_token:
+            #     inject_recaptchav2_token_js = f"""
+            #     var g = document.getElementById('g-recaptcha-response');
+            #     if(g) {{ g.value = "{recaptchav2_token}"; }}
+            #     """
+            #     self._log("Inject ReCaptchaV2 Token via JS...")
+            #     self.page.run_js(inject_recaptchav2_token_js)
+            #     time.sleep(random.uniform(0.5, 1.0))
+            
+            self._log("Submitting Login...")
+            time.sleep(random.uniform(0.3, 0.8))
+            self.page.locator("button:has-text('Login')").first.click(timeout=10000)
+
+            self._log("Waiting for redirect...")
+            self.page.wait_for_function(
+                "() => !window.location.href.includes('login-actions')",
+                timeout=45000,
+            )
+            
+            time.sleep(3)
+            if "login-actions" in self.page.url or "auth" in self.page.url:
+                raise BizLogicError(message="Login Failed! Invalid credentials or Captcha rejected.")
+            
+            self.page.wait_for_load_state("domcontentloaded", timeout=15000)
+            time.sleep(5)
+            
+            # groups = self._parse_travel_groups(self.page.html)
+            # location = self.free_config.get('location')
+            # for g in groups:
+            #     if g['location'] == location:
+            #         self.travel_group = g
+            #         break
+            
+            # if not self.travel_group:
+            #     self._save_screenshot("group_not_found")
+            #     raise NotFoundError(f"Group not found for {location}")
+        
+            # formgroup_id = self.travel_group.get('group_number')
+
+            # btn_selector = f'tag:button@@name=formGroupId@@value={formgroup_id}'
+            # self._log(f"Waiting for visible button to render: {formgroup_id}...")
+            
+            # self.page.wait.eles_loaded(btn_selector, timeout=15)
+            
+            # buttons = self.page.eles(btn_selector)
+            # select_btn = None
+            # for btn in reversed(buttons):
+            #     try:
+            #         w, h = btn.rect.size
+            #         if w > 0 and h > 0:
+            #             select_btn = btn
+            #             break
+            #     except Exception:
+            #         continue
+            # if not select_btn:
+            #     self._save_screenshot("visible_button_not_found")
+            #     raise BizLogicError(f"Can't find any visible Select button for group {formgroup_id}")
+            # time.sleep(random.uniform(0.5, 1.2))
+            # self.mouse.human_click_ele(select_btn)
+            
+            # self._log("Waiting for url redirect...")
+            # self.page.wait.url_change('travel-groups', exclude=True, timeout=45)
+            # time.sleep(2)
+
+            # if "travel-groups" in self.page.url or "auth" in self.page.url:
+            #     raise BizLogicError(message="Redirect to service-level Failed!")
+            
+            # no_applicant_indicators = [
+            #     "Add a new applicant" in self.page.html,
+            #     "You have not yet added an applicant. Please click the button below to add one." in self.page.html,
+            #     "applicants-information" in self.page.url
+            # ]
+            # if any(no_applicant_indicators):
+            #     raise BizLogicError(message=f"No applicant added")
+            
+            btn_selector = '#book-appointment-btn'
+            self._log(f"Waiting for selector={btn_selector} to render...")
+            if not self._is_selector_visible(btn_selector, timeout=15000):
+                raise BizLogicError(message=f"Waiting for selector={btn_selector} timeout")
+            self.page.locator(btn_selector).first.click(timeout=10000)
+
+            time.sleep(3)
+            # self._log("Waiting for url redirect...")
+            # self.page.wait.url_change('service-level', exclude=True, timeout=45)
+            # time.sleep(2)
+
+            # if "service-level" in self.page.url or "auth" in self.page.url:
+            #     raise BizLogicError(message="Redirect to appointment-booking Failed!")
+
+            btn_selector = "button:has-text('Book your appointment')"
+            if not self._is_selector_visible(btn_selector, timeout=10000):
+                raise BizLogicError(message=f"Waiting for selector={btn_selector} timeout")
+                
+            self.session_create_time = time.time()
+            self._log(f"✅ Login & Navigation Success!")
+
+        except Exception as e:
+            self._log(f"Session Create Error: {e}")
+            if self.config.debug:
+                self._save_screenshot("create_session_except")
+            self._cleanup_failed_session()
+            raise e
+
+    def query(self, apt_type: AppointmentType) -> VSQueryResult:
+        return self._run_on_pw_thread(self._query_impl, apt_type)
+
+    def _day_block_locator_candidates(self):
+        # 与 Drission 版 `//div[p and div//button[contains(@data-testid, "slot")]]` 对齐(子 div 下含 slot 按钮)
+        yield self.page.locator(
+            "xpath=//div[./p and ./div//button[contains(@data-testid, 'slot')]]"
+        )
+        # 结构略变:任意后代 button 带 slot
+        yield self.page.locator(
+            "xpath=//div[./p and .//button[contains(@data-testid, 'slot')]]"
+        )
+        # 仅要求有 p 与 slot 类按钮
+        yield self.page.locator(
+            "xpath=//div[.//p and .//button[contains(@data-testid, 'slot')]]"
+        )
+        # Playwright 原生 :has
+        yield self.page.locator("div").filter(
+            has=self.page.locator("p")
+        ).filter(
+            has=self.page.locator("button[data-testid*='slot']")
+        )
+
+    def _extract_slots_from_calendar_dom(
+        self, target_year: int, target_month_num: int
+    ) -> List[Dict[str, Any]]:
+        """多策略定位「日期块 + 可点时段按钮」,与页面结构差异/Camoufox 兼容。"""
+        all_slots: List[Dict[str, Any]] = []
+        day_blocks = None
+        for loc in self._day_block_locator_candidates():
+            try:
+                n = loc.count()
+            except Exception:
+                continue
+            if n > 0:
+                day_blocks = loc
+                self._log(f"使用日历块选择器,匹配到 {n} 个 day_blocks")
+                break
+        if day_blocks is None:
+            # 不依赖 day_block 外壳:直接扫可用按钮,再向祖先找日期
+            return self._extract_slots_from_available_buttons_only(
+                target_year, target_month_num
+            )
+
+        for i in range(day_blocks.count()):
+            block = day_blocks.nth(i)
+            p_ele = block.locator("p").first
+            if not p_ele.count():
+                continue
+            day_match = re.search(r"\d+", p_ele.inner_text())
+            if not day_match:
+                continue
+            day_str = day_match.group()
+            try:
+                full_date = f"{target_year}-{target_month_num:02d}-{int(day_str):02d}"
+            except ValueError:
+                continue
+            available_btns = block.locator("button[data-testid^='btn-available-slot']")
+            for j in range(available_btns.count()):
+                btn = available_btns.nth(j)
+                btn_html = btn.inner_html()
+                time_match = re.search(r"\d{2}:\d{2}", btn_html)
+                if not time_match:
+                    continue
+                time_str = time_match.group()
+                test_id = btn.get_attribute("data-testid") or ""
+                if "prime" in test_id and "weekend" in test_id:
+                    lbl = "ptaw"
+                elif "prime" in test_id:
+                    lbl = "pta"
+                else:
+                    lbl = ""
+                all_slots.append(
+                    {"date": full_date, "time": time_str, "label": lbl}
+                )
+        if all_slots:
+            return all_slots
+        return self._extract_slots_from_available_buttons_only(
+            target_year, target_month_num
+        )
+
+    def _extract_slots_from_available_buttons_only(
+        self, target_year: int, target_month_num: int
+    ) -> List[Dict[str, Any]]:
+        """当整块 DOM 选不中时,用可用按钮反查日期行。"""
+        all_slots: List[Dict[str, Any]] = []
+        btns = self.page.locator("button[data-testid^='btn-available-slot']")
+        n = btns.count()
+        if n == 0:
+            return []
+        self._log(f"按可用按钮回查日期,共 {n} 个 btn-available-slot")
+        for j in range(n):
+            btn = btns.nth(j)
+            row = btn.locator("xpath=./ancestor::div[.//p][1]")
+            p_ele = row.locator("p").first
+            if not p_ele.count():
+                continue
+            day_match = re.search(r"\d+", p_ele.inner_text())
+            if not day_match:
+                continue
+            day_str = day_match.group()
+            try:
+                full_date = f"{target_year}-{target_month_num:02d}-{int(day_str):02d}"
+            except ValueError:
+                continue
+            btn_html = btn.inner_html()
+            time_match = re.search(r"\d{2}:\d{2}", btn_html)
+            if not time_match:
+                continue
+            time_str = time_match.group()
+            test_id = btn.get_attribute("data-testid") or ""
+            if "prime" in test_id and "weekend" in test_id:
+                lbl = "ptaw"
+            elif "prime" in test_id:
+                lbl = "pta"
+            else:
+                lbl = ""
+            all_slots.append({"date": full_date, "time": time_str, "label": lbl})
+        return all_slots
+
+    def _query_impl(self, apt_type: AppointmentType) -> VSQueryResult:
+        res = VSQueryResult()
+        res.success = False
+        interest_month = self.free_config.get("interest_month", time.strftime("%m-%Y"))
+        
+        target_date_obj = datetime.strptime(interest_month, "%m-%Y")
+        target_month_text = target_date_obj.strftime("%B %Y")
+        target_year = target_date_obj.year
+        target_month_num = target_date_obj.month
+        
+        slots = []
+        all_slots = []
+        
+        current_selected_ele = self.page.locator('[data-testid="btn-current-month-available"]').first
+        current_month_text = current_selected_ele.inner_text().strip() if current_selected_ele.count() else ""
+
+        is_on_target_month = (current_month_text.lower() == target_month_text.lower())
+
+        if not is_on_target_month:
+            self._log(f"Current is '{current_month_text}', navigating to '{target_month_text}'...")
+            for _ in range(12):
+                target_btn_xpath = f'xpath://a[contains(@href, "month={interest_month}")]'
+                target_btn = self.page.locator(f"a[href*='month={interest_month}']").first
+                
+                if target_btn.count():
+                    target_btn.click(timeout=5000)
+                    time.sleep(3) 
+                    break
+                
+                next_btn = self.page.locator('[data-testid="btn-next-month-available"]').first
+                if next_btn.count():
+                    next_btn.click(timeout=5000)
+                    time.sleep(2) 
+                else:
+                    self._log("Warning: Cannot find target month or 'Next Month' button.")
+                    break
+
+            try:
+                self.page.wait_for_load_state("networkidle", timeout=20000)
+            except PlaywrightTimeoutError:
+                try:
+                    self.page.wait_for_load_state("domcontentloaded", timeout=10000)
+                except PlaywrightTimeoutError:
+                    pass
+            time.sleep(0.8)
+
+            self._log("Extracting slots from DOM using robust data-testid features...")
+            all_slots = self._extract_slots_from_calendar_dom(
+                target_year, target_month_num
+            )
+            if not all_slots:
+                n_slot_btns = self.page.locator("[data-testid*='slot']").count()
+                n_avail = self.page.locator("button[data-testid^='btn-available-slot']").count()
+                self._log(
+                    f"DOM 日历未解析到槽位: [data-testid*=\"slot\"]={n_slot_btns}, "
+                    f"btn-available-slot={n_avail},回退为页面 HTML 内嵌 JSON 解析"
+                )
+                try:
+                    resp = self._perform_request("GET", self.page.url, retry_count=1)
+                    self._check_page_is_session_expired_or_invalid("Book your appointment", resp.text)
+                    all_slots = self._parse_appointment_slots(resp.text)
+                except Exception as ex:
+                    self._log(f"HTML 回退解析失败: {ex}")
+
+        else:
+            self._log(f"Already on '{target_month_text}'. Executing silent JS fetch...") 
+            resp = self._perform_request("GET", self.page.url, retry_count=1)
+            self._check_page_is_session_expired_or_invalid('Book your appointment', resp.text)
+            all_slots = self._parse_appointment_slots(resp.text)
+
+        target_labels = self.free_config.get("target_labels", ["", "pta"])
+        slots = [s for s in all_slots if s.get("label") in target_labels]
+        
+        if slots:
+            res.success = True
+            earliest_date = slots[0]["date"]
+            earliest_dt = datetime.strptime(earliest_date, "%Y-%m-%d")
+            res.availability_status = AvailabilityStatus.Available
+            res.earliest_date = earliest_dt
+            date_map: dict[datetime, list[TimeSlot]] = {}
+            for s in slots:
+                date_str = s["date"]
+                dt = datetime.strptime(date_str, "%Y-%m-%d")
+                date_map.setdefault(dt, []).append(
+                    TimeSlot(time=s["time"], label=str(s.get("label", "")))
+                )
+            res.availability = [DateAvailability(date=d, times=slots) for d, slots in date_map.items()]
+            self._log(f"Slot Found! -> {slots}")
+        else:
+            self._log("No slots available.")
+            res.success = False
+            res.availability_status = AvailabilityStatus.NoneAvailable
+        return res
+
+    def book(self, slot_info: VSQueryResult, user_inputs: Dict = None) -> VSBookResult:
+        return self._run_on_pw_thread(self._book_impl, slot_info, user_inputs)
+
+    def _book_impl(self, slot_info: VSQueryResult, user_inputs: Dict = None) -> VSBookResult:
+        if user_inputs is None:
+            user_inputs = {}
+        res = VSBookResult()
+        res.success = False
+        
+        exp_start = user_inputs.get('expected_start_date', '')
+        exp_end = user_inputs.get('expected_end_date', '')
+        support_pta = user_inputs.get('support_pta', True)
+
+        target_labels = ['']
+        if support_pta:
+            target_labels.append('pta')
+
+        available_dates_str =[
+            da.date.strftime("%Y-%m-%d")
+            for da in slot_info.availability if da.date
+        ]
+        
+        valid_dates_list = self._filter_dates(available_dates_str, exp_start, exp_end)
+        if not valid_dates_list:
+            raise NotFoundError(message="No dates match user constraints")
+        
+        all_possible_slots =[]
+        for da in slot_info.availability:
+            if not da.date:
+                continue
+                
+            date_str = da.date.strftime("%Y-%m-%d")
+            if date_str in valid_dates_list:
+                for t in da.times:
+                    if t.label in target_labels:
+                        all_possible_slots.append({
+                            "date": date_str,
+                            "time_obj": t,
+                            "label": t.label
+                        })
+        
+        if not all_possible_slots:
+            raise NotFoundError(message="No suitable slot found (after label filtering)")
+
+        selected_slot = random.choice(all_possible_slots)
+        selected_date = selected_slot["date"]
+        selected_time = selected_slot["time_obj"]
+        selected_label = selected_slot["label"]
+
+        self._log(f"Found {len(all_possible_slots)} valid slots. selected slot: {selected_date} {selected_time.time} {selected_label}")
+        
+        js_inject_and_click = f"""
+        try {{
+            const form = document.querySelector('form');
+            if (!form) return 'Form not found';
+
+            function setReactValue(input, value) {{
+                if (!input) return;
+                input.value = value;
+                input.dispatchEvent(new Event('input', {{ bubbles: true }}));
+                input.dispatchEvent(new Event('change', {{ bubbles: true }}));
+            }}
+            setReactValue(form.querySelector('input[name="date"]'), '{selected_date}');
+            setReactValue(form.querySelector('input[name="time"]'), '{selected_time.time}');
+            setReactValue(form.querySelector('input[name="appointmentLabel"]'), '{selected_label}');
+            const submitBtn = form.querySelector('button[type="submit"]');
+            if (submitBtn) {{
+                submitBtn.removeAttribute('disabled');
+                submitBtn.classList.remove('opacity-50', 'cursor-not-allowed'); 
+                submitBtn.click();
+                return 'clicked';
+            }} else {{
+                return 'Submit button not found';
+            }}
+        }} catch (e) {{
+            return e.toString();
+        }}
+        """
+        
+        inject_res = self.page.evaluate(f"() => {{ {js_inject_and_click} }}")
+        self._log(f"Form submission triggered: {inject_res}")
+        
+        if inject_res != 'clicked':
+            raise BizLogicError(message="Failed to inject form or click the submit button")
+
+        self._log("Waiting for Next.js to process the form submission...")
+        for _ in range(10):
+            try:
+                current_page_url = self.page.url
+                current_page_html = self.page.content()
+                appointment_confirmation_indicators = [
+                    "order-summary" in current_page_url,
+                    "partner-services" in current_page_url,
+                    "appointment-confirmation" in current_page_url,
+                    "Change my appointment" in current_page_html,
+                    "Book a new appointment" in current_page_html,
+                ]
+                
+                if any(appointment_confirmation_indicators):
+                    self._log(f"✅ BOOKING SUCCESS! Redirected to: {current_page_url}")
+                    res.success = True
+                    res.label = selected_label
+                    res.book_date = selected_date
+                    res.book_time = selected_time.time
+                    self._save_screenshot("book_slot_success")
+                    break
+                
+                toast_selector = '[role=\"alert\"]'
+                toast_ele = self.page.locator(toast_selector).first
+                if toast_ele.count():
+                    error_msg = toast_ele.inner_text()
+                    self._log(f"❌ BOOKING FAILED! Detected popup: {error_msg}")
+                    break
+                time.sleep(0.5)
+            except Exception:
+                pass
+        return res
+    
+    def _get_proxy_url(self):
+        # 构造代理
+        proxy_url = ""
+        if self.config.proxy.ip:
+            s = self.config.proxy
+            if s.username:
+                proxy_url = f"{s.scheme}://{s.username}:{s.password}@{s.ip}:{s.port}"
+            else:
+                proxy_url = f"{s.scheme}://{s.ip}:{s.port}"
+        return proxy_url
+
+    def _perform_request(self, method, url, headers=None, data=None, json_data=None, params=None, retry_count=0):
+        """
+        在浏览器上下文中注入 JS 执行 Fetch
+        """
+        if not self.page:
+            raise BizLogicError("Browser not initialized")
+
+        if params:
+            from urllib.parse import urlencode
+            if '?' in url:
+                url += '&' + urlencode(params)
+            else:
+                url += '?' + urlencode(params)
+
+        fetch_options = {
+            "method": method.upper(),
+            "headers": headers or {},
+            "credentials": "include"
+        }
+
+        # Body 处理
+        if json_data:
+            fetch_options['body'] = json.dumps(json_data)
+            fetch_options['headers']['Content-Type'] = 'application/json'
+        elif data:
+             if isinstance(data, dict):
+                from urllib.parse import urlencode
+                fetch_options['body'] = urlencode(data)
+                fetch_options['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
+             else:
+                 fetch_options['body'] = data
+
+        js_script = f"""
+        const url = "{url}";
+        const options = {json.dumps(fetch_options)};
+        
+        return fetch(url, options)
+            .then(async response => {{
+                const text = await response.text();
+                const headers = {{}};
+                response.headers.forEach((value, key) => headers[key] = value);
+                
+                return {{
+                    status: response.status,
+                    body: text,
+                    headers: headers,
+                    url: response.url
+                }};
+            }})
+            .catch(error => {{
+                return {{
+                    status: 0,
+                    body: error.toString(),
+                    headers: {{}},
+                    url: url
+                }};
+            }});
+        """
+        
+        res_dict = self.page.evaluate(f"() => {{ {js_script} }}")
+        resp = BrowserResponse(res_dict)
+        
+        if resp.status_code == 200:
+            return resp
+        elif resp.status_code == 401:
+            self.is_healthy = False
+            raise SessionExpiredOrInvalidError()
+        elif resp.status_code == 403:
+            if retry_count < 2:
+                self._log(f"HTTP 403 Detected. Cloudflare session expired? Attempting refresh (Try {retry_count+1}/2)...")
+                if self._refresh_firewall_session():
+                    self._log("Firewall session refreshed. Retrying request...")
+                    return self._perform_request(method, url, headers, data, json_data, params, retry_count+1)
+                else:
+                    self._log("Failed to refresh firewall session.")    
+            raise PermissionDeniedError(f"HTTP 403: {resp.text[:100]}")
+        elif resp.status_code == 429:
+            self.is_healthy = False
+            raise RateLimiteddError()
+        else:
+            if resp.status_code == 0:
+                 raise BizLogicError(f"Network Error: {resp.text}")
+            raise BizLogicError(message=f"HTTP Error {resp.status_code}: {resp.text[:100]}")
+    
+    def _refresh_firewall_session(self) -> bool:
+        """
+        主动刷新页面以触发 Cloudflare 挑战并尝试通过
+        """
+        try:
+            # 1. 刷新当前页面 (通常 Dashboard 页)
+            # 这会强制浏览器重新进行 HTTP 请求,从而触发 Cloudflare 拦截页
+            self._log("Refreshing page to trigger Cloudflare...")
+            self.page.reload(wait_until="domcontentloaded")
+            
+            # 2. 调用 CloudflareBypasser
+            cf = CloudflareBypasser(self.page, log=self.config.debug)
+            
+            # 3. 尝试过盾 (尝试次数稍多一点,因为此时可能网络不稳定)
+            success = cf.bypass(max_retry=10)
+            
+            if success:
+                # 再次确认页面是否正常加载 (非 403 页面)
+                title = self.page.title().lower()
+                if "access denied" in title:
+                    return False
+                
+                # 等待 DOM 稍微稳定
+                time.sleep(2)
+                return True
+            
+            return False
+        except Exception as e:
+            self._log(f"Error during firewall refresh: {e}")
+            return False
+
+    def _solve_recaptcha(self, params) -> str:
+        """调用 VSCloudApi 解决 ReCaptcha"""
+        key = params.get("apiToken")
+        if not key: raise NotFoundError("Api-token required")
+        
+        submit_url = "https://api.capsolver.com/createTask"
+        task = {
+            "type": params.get("type"),
+            "websiteURL": params.get("page"),
+            "websiteKey": params.get("siteKey"),
+        }
+        if params.get("action"):
+            task["pageAction"] = params.get("action")
+            
+        # if params.get("proxy"):
+        #     p = urlparse(params.get("proxy"))
+        #     task["proxyType"] = p.scheme
+        #     task["proxyAddress"] = p.hostname
+        #     task["proxyPort"] = p.port
+        #     if p.username:
+        #         task["proxyLogin"] = p.username
+        #         task["proxyPassword"] = p.password
+            
+        # 注意:使用 Camoufox 后,通常是 ProxyLess 模式
+        # 除非你想让 Capsolver 也用同样的代理(通常不需要,除非风控极严)
+        
+        payload = {"clientKey": key, "task": task}
+        import requests as req # 局部引用,避免混淆
+        r = req.post(submit_url, json=payload, timeout=20)
+        if r.status_code != 200:
+            raise BizLogicError(message="Failed to submit capsolver task")
+        
+        task_id = r.json().get("taskId")
+        for _ in range(20):
+            r = req.post("https://api.capsolver.com/getTaskResult", json={"clientKey": key, "taskId": task_id}, timeout=20)
+            if r.status_code == 200:
+                d = r.json()
+                if d.get("status") == "ready":
+                    return d["solution"]["gRecaptchaResponse"]
+            time.sleep(3)
+        raise BizLogicError(message="Capsolver task timeout")
+
+    def _parse_travel_groups(self, html_content) -> List[Dict]:
+        groups = []
+        js_pattern = r'\\"travelGroups\\":\s*(\[.*?\]),\\"availableCountriesToCreateGroups'
+        js_match = re.search(js_pattern, html_content, re.DOTALL)
+        if js_match:
+            json_str = js_match.group(1).replace(r'\"', '"')
+            data = json.loads(json_str)
+            for g in data:
+                groups.append({
+                    'group_name': g.get('groupName'),
+                    'group_number': g.get('formGroupId'),
+                    'location': g.get('vacName')
+                })
+        else:
+            self._log('Parsed travel group page, but not found travelGroups')
+        return groups
+
+    def _parse_appointment_slots(self, html_content) -> List[Dict]:
+        slots = []
+        pattern = r'"availableAppointments\\":\s*(\[.*\]),\\"showFlexiAppointment'
+        match = re.search(pattern, html_content, re.DOTALL)
+        
+        if match:
+            json_str = match.group(1).replace(r'\"', '"')
+            data = json.loads(json_str)
+            for day in data:
+                d_str = day.get('day')
+                for s in day.get('slots', []):
+                    labels = s.get('labels', [])
+                    lbl = ""
+                    # 简化逻辑:TLS label 列表
+                    if 'pta' in labels: lbl = 'pta'
+                    elif 'ptaw' in labels: lbl = 'ptaw'
+                    elif '' in labels or not labels: lbl = ''
+                    
+                    slots.append({
+                        'date': d_str,
+                        'time': s.get('time'),
+                        'label': lbl
+                    })
+        return slots
+  
+    def _check_page_is_session_expired_or_invalid(self, keyword, html: str) -> bool:
+        if not html:
+            self.is_healthy = False
+            raise SessionExpiredOrInvalidError()
+        
+        html_lower = html.lower()
+        if keyword.lower() not in html_lower: 
+            session_expire_or_invalid_indicators = [
+                'redirected automatically' in html_lower,
+                'login' in html_lower and 'password' in html_lower,
+                'session expired' in html_lower
+            ]
+            if any(session_expire_or_invalid_indicators):
+                self.is_healthy = False
+                raise SessionExpiredOrInvalidError()
+            
+    def _filter_dates(self, dates: List[str], start_str: str, end_str: str) -> List[str]:
+        if not start_str or not end_str:
+            return dates
+        valid_dates = []
+        s_date = datetime.strptime(start_str[:10], "%Y-%m-%d")
+        e_date = datetime.strptime(end_str[:10], "%Y-%m-%d")
+        for date_str in dates:
+            curr_date = datetime.strptime(date_str, "%Y-%m-%d")
+            if s_date <= curr_date <= e_date:
+                valid_dates.append(date_str)
+        random.shuffle(valid_dates)
+        return valid_dates
+
+    def _is_selector_visible(self, selector: str, timeout: int = 10000) -> bool:
+        try:
+            self.page.wait_for_selector(selector, state="visible", timeout=timeout)
+            return True
+        except PlaywrightTimeoutError:
+            return False
+
+    def _human_type(self, text: str):
+        for ch in text:
+            self.page.keyboard.type(ch)
+            time.sleep(random.uniform(0.03, 0.12))
+
+    def _type_into_first_visible(self, selectors: List[str], text: str, field_name: str):
+        last_err = None
+        for selector in selectors:
+            try:
+                locator = self.page.locator(selector).first
+                locator.wait_for(state="visible", timeout=3000)
+                locator.click(timeout=3000)
+                time.sleep(random.uniform(0.2, 0.6))
+                locator.fill("")
+                self._human_type(text)
+                return
+            except Exception as e:
+                last_err = e
+                continue
+        raise BizLogicError(message=f"Can't find visible {field_name} input. Last error: {last_err}")
+    
+    def _close_playwright(self):
+        if self.page:
+            try:
+                self.page.close()
+            except Exception:
+                pass
+            self.page = None
+        if self.browser_ctx:
+            try:
+                self.browser_ctx.close()
+            except Exception:
+                pass
+            self.browser_ctx = None
+        if self.playwright:
+            try:
+                self.playwright.stop()
+            except Exception:
+                pass
+            self.playwright = None
+
+    def _rmtree_workspace(self):
+        if os.path.exists(self.root_workspace):
+            for _ in range(3):
+                try:
+                    time.sleep(0.2)
+                    shutil.rmtree(self.root_workspace, ignore_errors=True)
+                    break
+                except Exception as e:
+                    self._log(f"Cleanup retry: {e}")
+                    time.sleep(0.5)
+            if os.path.exists(self.root_workspace):
+                self._log(f"[WARN] Failed to fully remove workspace: {self.root_workspace}")
+
+    def _cleanup_failed_session(self):
+        """create_session 在工作线程内失败时调用;外层会 _stop_pw_thread。"""
+        self._close_playwright()
+        self._rmtree_workspace()
+
+    # --- 资源清理核心方法 ---
+    def cleanup(self):
+        """
+        销毁浏览器并彻底删除临时文件
+        """
+        w = getattr(self, "_pw_worker", None)
+        on_worker = w is not None and threading.current_thread() is w
+
+        if on_worker:
+            self._close_playwright()
+            self._rmtree_workspace()
+            return
+
+        if w is not None and self._pw_thread and self._pw_thread.is_alive():
+            try:
+                self._run_on_pw_thread(self._close_playwright)
+            except Exception:
+                self._close_playwright()
+            self._rmtree_workspace()
+            self._stop_pw_thread()
+        else:
+            self._close_playwright()
+            self._rmtree_workspace()
+    def __del__(self):
+        """
+        析构函数:当对象被垃圾回收时自动调用
+        """
+        self.cleanup()
+
+
+class TlsPlugin2(TlsPlugin):
+    """兼容工厂按模块名加载 `TlsPlugin2` 的场景。"""
+    pass

+ 1 - 0
requirements.txt

@@ -1,4 +1,5 @@
 DrissionPage>=4.0.0
+camoufox
 bs4
 redis
 cryptography

+ 245 - 0
utils/cloudflare_bypass_for_scraping2.py

@@ -0,0 +1,245 @@
+import random
+import time
+from typing import Any
+
+
+class CloudflareBypasser:
+    def __init__(self, driver: Any, log=True):
+        self.driver = driver
+        self.log = log
+        
+    def log_message(self, message):
+        if self.log:
+            print(message)
+    
+    def _normalize_page(self):
+        # 兼容 TlsPlugin 中的 CamoufoxPageAdapter
+        return getattr(self.driver, "_page", self.driver)
+
+    def _is_challenge_frame(self, frame) -> bool:
+        frame_name = (frame.name or "").lower()
+        frame_url = (frame.url or "").lower()
+        markers = (
+            "turnstile",
+            "challenges.cloudflare.com",
+            "challenge",
+            "cf-chl",
+        )
+        return any(m in frame_name or m in frame_url for m in markers)
+
+    def _determine_challenge_type(self) -> str:
+        try:
+            page = self._normalize_page()
+            title = (page.title() or "").lower()
+            html = (page.content() or "").lower()
+            if "please complete the captcha" in html or "turnstile" in html:
+                return "turnstile"
+            if "just a moment" in title or "checking your browser" in html:
+                return "interstitial"
+            return "none"
+        except Exception as e:
+            self.log_message(f"Error determining challenge type: {e}")
+            return "unknown"
+
+    def _click_checkbox_in_frame(self, frame) -> bool:
+        selectors = [
+            "input[type='checkbox']",
+            "[role='checkbox']",
+            "label.ctp-checkbox-label",
+            "div.ctp-checkbox-label",
+            "label[for*='cf']",
+        ]
+        for selector in selectors:
+            try:
+                loc = frame.locator(selector)
+                if loc.count() <= 0:
+                    continue
+                target = loc.first
+                target.click(timeout=2000)
+                return True
+            except Exception:
+                continue
+
+        # 在 frame 内做 open-shadow 递归查找(closed shadow 无法直接访问)
+        try:
+            clicked = frame.evaluate(
+                """
+                () => {
+                    const selectors = [
+                        "input[type='checkbox']",
+                        "[role='checkbox']",
+                        "label.ctp-checkbox-label",
+                        "div.ctp-checkbox-label",
+                        "label[for*='cf']"
+                    ];
+                    const seen = new WeakSet();
+                    const stack = [document];
+
+                    while (stack.length) {
+                        const root = stack.pop();
+                        if (!root || seen.has(root)) continue;
+                        seen.add(root);
+
+                        for (const sel of selectors) {
+                            const hit = root.querySelector(sel);
+                            if (hit) {
+                                hit.click();
+                                return true;
+                            }
+                        }
+
+                        const nodes = root.querySelectorAll ? root.querySelectorAll("*") : [];
+                        for (const node of nodes) {
+                            if (node.shadowRoot) stack.push(node.shadowRoot);
+                        }
+                    }
+                    return false;
+                }
+                """
+            )
+            return bool(clicked)
+        except Exception:
+            return False
+
+    def _click_challenge_iframe_center(self) -> bool:
+        page = self._normalize_page()
+        for frame in page.frames:
+            if not self._is_challenge_frame(frame):
+                continue
+
+            try:
+                frame_el = frame.frame_element()
+                box = frame_el.bounding_box()
+                if not box:
+                    continue
+
+                # 人类化一点:点击中心附近随机偏移,避免固定坐标
+                cx = box["x"] + box["width"] * (0.5 + random.uniform(-0.08, 0.08))
+                cy = box["y"] + box["height"] * (0.5 + random.uniform(-0.08, 0.08))
+                page.mouse.move(cx, cy, steps=10)
+                time.sleep(random.uniform(0.15, 0.45))
+                page.mouse.click(cx, cy, delay=random.randint(50, 180))
+                return True
+            except Exception:
+                continue
+        return False
+    
+    def click_verification_button(self, _is_dfs=False):
+        try:
+            page = self._normalize_page()
+            for frame in page.frames:
+                if not self._is_challenge_frame(frame):
+                    continue
+                if self._click_checkbox_in_frame(frame):
+                    self.log_message("Challenge interaction succeeded by frame selector/evaluate.")
+                    time.sleep(1)
+                    return
+
+            if self._click_challenge_iframe_center():
+                self.log_message("Challenge interaction succeeded by iframe center click.")
+                time.sleep(1)
+                return
+
+            self.log_message("Challenge click strategies exhausted.")
+
+        except Exception as e:
+            self.log_message(f"Error clicking verification button: {e}")
+
+    def is_bypassed(self):
+        try:
+            page = self._normalize_page()
+            title = (page.title() or "").lower()
+            html = (page.content() or "").lower()
+            blocked_markers = (
+                "just a moment",
+                "请稍候",
+                "checking your browser",
+                "cf-challenge",
+                "please complete the captcha",
+            )
+            return not any(m in title or m in html for m in blocked_markers)
+        except Exception as e:
+            self.log_message(f"Error checking page title: {e}")
+        return False
+
+    def _collect_page_state(self) -> str:
+        """
+        采样当前页面状态,帮助定位卡在哪一轮挑战。
+        """
+        try:
+            page = self._normalize_page()
+            title = page.title()
+            url = page.url
+            challenge_type = self._determine_challenge_type()
+            challenge_frames = 0
+            for frame in page.frames:
+                if self._is_challenge_frame(frame):
+                    challenge_frames += 1
+            return (
+                f"title={title!r}, url={url!r}, challenge_type={challenge_type}, "
+                f"challenge_frames={challenge_frames}"
+            )
+        except Exception as e:
+            return f"state_collect_error={e}"
+
+    def _collect_state_signature(self):
+        try:
+            page = self._normalize_page()
+            title = (page.title() or "").lower()
+            url = (page.url or "").lower()
+            challenge_type = self._determine_challenge_type()
+            challenge_frames = 0
+            for frame in page.frames:
+                if self._is_challenge_frame(frame):
+                    challenge_frames += 1
+            return (challenge_type, challenge_frames, title[:80], url[:120])
+        except Exception:
+            return ("unknown", -1, "", "")
+
+    def bypass(self, max_retry=5):
+        for i in range(max_retry):
+            if self.is_bypassed():
+                return True
+
+            sig_before = self._collect_state_signature()
+            state_before = self._collect_page_state()
+            self.log_message(
+                f"Verification page detected. try={i + 1}/{max_retry}, before_click: {state_before}"
+            )
+            self.click_verification_button(False)
+
+            # 点击后短暂等待,再次检查是否通过
+            time.sleep(1.2)
+            if self.is_bypassed():
+                self.log_message("Bypass success after click.")
+                return True
+
+            sig_after = self._collect_state_signature()
+            no_transition = sig_before == sig_after
+            if no_transition:
+                self.log_message("No challenge state transition detected after click.")
+                # 当页面状态完全不变时,做一次轻量刷新,触发 challenge 重新渲染
+                try:
+                    page = self._normalize_page()
+                    page.reload(wait_until="domcontentloaded")
+                    self.log_message("Page reloaded to retrigger challenge rendering.")
+                    time.sleep(1.5)
+                    if self.is_bypassed():
+                        self.log_message("Bypass success after reload.")
+                        return True
+                except Exception as reload_err:
+                    self.log_message(f"Reload failed: {reload_err}")
+
+            # 递增退避,降低持续高频点击导致的风控风险
+            wait_seconds = min(2 + i, 6)
+            state_after = self._collect_page_state()
+            self.log_message(
+                f"Bypass not yet complete, sleeping {wait_seconds}s, after_click: {state_after}"
+            )
+            time.sleep(wait_seconds)
+
+        final_ok = self.is_bypassed()
+        if not final_ok:
+            self.log_message(f"Bypass failed after retries. final_state: {self._collect_page_state()}")
+        return final_ok
+