| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208 |
- import json
- import time
- from urllib.parse import urlparse, urlencode
- from DrissionPage import ChromiumPage, ChromiumOptions
- class BrowserResponse:
- """
- 模拟 requests.Response 对象
- """
- def __init__(self, result_dict):
- # 防止 result_dict 为 None 的保护机制
- result_dict = result_dict or {}
- self.status_code = result_dict.get('status', 0)
- self.text = result_dict.get('body', '')
- self.headers = result_dict.get('headers', {})
- self.url = result_dict.get('url', '')
- self._json = None
- def json(self):
- if self._json is None:
- if not self.text:
- return {}
- self._json = json.loads(self.text)
- return self._json
- @property
- def content(self):
- return self.text.encode('utf-8')
- class DrissionHttpClient:
- def __init__(self, proxy=None, headless=False, user_data_dir=None):
- """
- 初始化浏览器
- """
- co = ChromiumOptions()
- # 自动分配端口
- co.auto_port()
-
- # 模拟配置
- if proxy:
- co.set_proxy(proxy)
- if headless:
- co.headless(True)
- if user_data_dir:
- co.set_user_data_path(user_data_dir)
-
- # 优化启动参数
- co.set_argument('--no-sandbox')
- co.set_argument('--disable-gpu')
- # 保持浏览器窗口大小,避免被检测为 headless 尺寸
- co.set_argument('--window-size=1920,1080')
- self.page = ChromiumPage(co)
- self.current_domain = None # 初始化为 None
- def _ensure_domain_context(self, url):
- """
- 确保浏览器处于目标域名的上下文中
- """
- parsed = urlparse(url)
- target_domain = parsed.netloc
-
- # --- [修复点] ---
- # 1. 检查 self.current_domain 是否为 None
- # 2. 检查 target_domain 是否在当前 domain 中 (处理子域名)
- if not self.current_domain or (target_domain and target_domain not in self.current_domain):
- base_url = f"{parsed.scheme}://{target_domain}"
-
- # VFS 特殊优化:直接去 Login 页建立 Session
- if "vfsglobal" in target_domain:
- base_url = "https://visa.vfsglobal.com/gbr/en/nld/login"
-
- print(f"[Browser] Switching Context -> {base_url}")
- try:
- self.page.get(base_url)
- # 等待 Cloudflare 验证完成 (DrissionPage 会自动处理大部分等待,但强制 sleep 更稳)
- time.sleep(5)
- self.current_domain = target_domain
- except Exception as e:
- print(f"[Browser] Warning: Navigation failed: {e}")
- def request(self, method, url, params=None, json_data=None, data=None, headers=None, timeout=30):
- """
- 执行请求
- """
- self._ensure_domain_context(url)
- # 1. 处理 URL 参数
- if params:
- if '?' in url:
- url += '&' + urlencode(params)
- else:
- url += '?' + urlencode(params)
- # 2. 构造 JS fetch 选项
- fetch_options = {
- "method": method.upper(),
- "headers": headers or {},
- "credentials": "include" # 关键:带上 Cookie
- }
- # 3. 处理 Body
- if json_data:
- fetch_options['body'] = json.dumps(json_data)
- # 这里的 Content-Type 会覆盖 headers 里的
- fetch_options['headers']['Content-Type'] = 'application/json'
- elif data:
- if isinstance(data, dict):
- fetch_options['body'] = urlencode(data)
- fetch_options['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
- else:
- fetch_options['body'] = data
- # 4. 注入 JS (使用 await 确保同步返回)
- # 这里的 return 会被 DrissionPage 捕获并传回 Python
- js_script = f"""
- const url = "{url}";
- const options = {json.dumps(fetch_options)};
-
- return fetch(url, options)
- .then(async response => {{
- const text = await response.text();
- const headers = {{}};
- response.headers.forEach((value, key) => headers[key] = value);
-
- return {{
- status: response.status,
- body: text,
- headers: headers,
- url: response.url
- }};
- }})
- .catch(error => {{
- return {{
- status: 0,
- body: error.toString(),
- headers: {{}},
- url: url
- }};
- }});
- """
- print(f"[Browser] {method} {url}")
-
- # run_js 在 4.0+ 版本可以直接拿到 return 的值
- try:
- result = self.page.run_js(js_script, timeout=timeout)
- except Exception as e:
- # 发生超时或脚本错误
- print(f"[Browser] JS Execution Error: {e}")
- result = {"status": 0, "body": str(e)}
- return BrowserResponse(result)
- def get(self, url, **kwargs):
- return self.request("GET", url, **kwargs)
- def post(self, url, **kwargs):
- return self.request("POST", url, **kwargs)
-
- def close(self):
- self.page.quit()
- # --- 测试入口 ---
- if __name__ == "__main__":
- # 使用 headless=False 观察浏览器行为
- client = DrissionHttpClient(headless=False)
-
- try:
- # 1. 登录接口
- url = "https://lift-api.vfsglobal.com/user/login"
-
- # 2. 你的 Payload
- # 请替换为真实的测试账号,或者随便填触发 401 也可以验证连通性
- payload = {
- "username": "test_user",
- "password": "test_password",
- "missioncode": "nld",
- "countrycode": "gbr",
- }
- # 3. Headers (浏览器会自动处理大部分,这里只加业务头)
- headers = {
- "route": "gbr/en/nld",
- # "clientsource": "...", # 如果需要
- }
- print("--- Start Request ---")
- resp = client.post(url, json_data=payload, headers=headers)
-
- print(f"\nResponse Status: {resp.status_code}")
- print(f"Response Body Preview: {resp.text[:200]}")
-
- if resp.status_code == 429:
- print("依然限流,请更换 IP")
- elif resp.status_code == 0:
- print("浏览器 Fetch 失败,检查网络或浏览器控制台")
-
- except Exception as e:
- print(f"Main Error: {e}")
- import traceback
- traceback.print_exc()
-
- finally:
- # 为了看到结果,暂停一下再关闭
- time.sleep(5)
- client.close()
|