import json import time from urllib.parse import urlparse, urlencode from DrissionPage import ChromiumPage, ChromiumOptions class BrowserResponse: """ 模拟 requests.Response 对象 """ def __init__(self, result_dict): # 防止 result_dict 为 None 的保护机制 result_dict = result_dict or {} self.status_code = result_dict.get('status', 0) self.text = result_dict.get('body', '') self.headers = result_dict.get('headers', {}) self.url = result_dict.get('url', '') self._json = None def json(self): if self._json is None: if not self.text: return {} self._json = json.loads(self.text) return self._json @property def content(self): return self.text.encode('utf-8') class DrissionHttpClient: def __init__(self, proxy=None, headless=False, user_data_dir=None): """ 初始化浏览器 """ co = ChromiumOptions() # 自动分配端口 co.auto_port() # 模拟配置 if proxy: co.set_proxy(proxy) if headless: co.headless(True) if user_data_dir: co.set_user_data_path(user_data_dir) # 优化启动参数 co.set_argument('--no-sandbox') co.set_argument('--disable-gpu') # 保持浏览器窗口大小,避免被检测为 headless 尺寸 co.set_argument('--window-size=1920,1080') self.page = ChromiumPage(co) self.current_domain = None # 初始化为 None def _ensure_domain_context(self, url): """ 确保浏览器处于目标域名的上下文中 """ parsed = urlparse(url) target_domain = parsed.netloc # --- [修复点] --- # 1. 检查 self.current_domain 是否为 None # 2. 检查 target_domain 是否在当前 domain 中 (处理子域名) if not self.current_domain or (target_domain and target_domain not in self.current_domain): base_url = f"{parsed.scheme}://{target_domain}" # VFS 特殊优化:直接去 Login 页建立 Session if "vfsglobal" in target_domain: base_url = "https://visa.vfsglobal.com/gbr/en/nld/login" print(f"[Browser] Switching Context -> {base_url}") try: self.page.get(base_url) # 等待 Cloudflare 验证完成 (DrissionPage 会自动处理大部分等待,但强制 sleep 更稳) time.sleep(5) self.current_domain = target_domain except Exception as e: print(f"[Browser] Warning: Navigation failed: {e}") def request(self, method, url, params=None, json_data=None, data=None, headers=None, timeout=30): """ 执行请求 """ self._ensure_domain_context(url) # 1. 处理 URL 参数 if params: if '?' in url: url += '&' + urlencode(params) else: url += '?' + urlencode(params) # 2. 构造 JS fetch 选项 fetch_options = { "method": method.upper(), "headers": headers or {}, "credentials": "include" # 关键:带上 Cookie } # 3. 处理 Body if json_data: fetch_options['body'] = json.dumps(json_data) # 这里的 Content-Type 会覆盖 headers 里的 fetch_options['headers']['Content-Type'] = 'application/json' elif data: if isinstance(data, dict): fetch_options['body'] = urlencode(data) fetch_options['headers']['Content-Type'] = 'application/x-www-form-urlencoded' else: fetch_options['body'] = data # 4. 注入 JS (使用 await 确保同步返回) # 这里的 return 会被 DrissionPage 捕获并传回 Python js_script = f""" const url = "{url}"; const options = {json.dumps(fetch_options)}; return fetch(url, options) .then(async response => {{ const text = await response.text(); const headers = {{}}; response.headers.forEach((value, key) => headers[key] = value); return {{ status: response.status, body: text, headers: headers, url: response.url }}; }}) .catch(error => {{ return {{ status: 0, body: error.toString(), headers: {{}}, url: url }}; }}); """ print(f"[Browser] {method} {url}") # run_js 在 4.0+ 版本可以直接拿到 return 的值 try: result = self.page.run_js(js_script, timeout=timeout) except Exception as e: # 发生超时或脚本错误 print(f"[Browser] JS Execution Error: {e}") result = {"status": 0, "body": str(e)} return BrowserResponse(result) def get(self, url, **kwargs): return self.request("GET", url, **kwargs) def post(self, url, **kwargs): return self.request("POST", url, **kwargs) def close(self): self.page.quit() # --- 测试入口 --- if __name__ == "__main__": # 使用 headless=False 观察浏览器行为 client = DrissionHttpClient(headless=False) try: # 1. 登录接口 url = "https://lift-api.vfsglobal.com/user/login" # 2. 你的 Payload # 请替换为真实的测试账号,或者随便填触发 401 也可以验证连通性 payload = { "username": "test_user", "password": "test_password", "missioncode": "nld", "countrycode": "gbr", } # 3. Headers (浏览器会自动处理大部分,这里只加业务头) headers = { "route": "gbr/en/nld", # "clientsource": "...", # 如果需要 } print("--- Start Request ---") resp = client.post(url, json_data=payload, headers=headers) print(f"\nResponse Status: {resp.status_code}") print(f"Response Body Preview: {resp.text[:200]}") if resp.status_code == 429: print("依然限流,请更换 IP") elif resp.status_code == 0: print("浏览器 Fetch 失败,检查网络或浏览器控制台") except Exception as e: print(f"Main Error: {e}") import traceback traceback.print_exc() finally: # 为了看到结果,暂停一下再关闭 time.sleep(5) client.close()