drission_request_lib.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. import json
  2. import time
  3. from urllib.parse import urlparse, urlencode
  4. from DrissionPage import ChromiumPage, ChromiumOptions
  5. class BrowserResponse:
  6. """
  7. 模拟 requests.Response 对象
  8. """
  9. def __init__(self, result_dict):
  10. # 防止 result_dict 为 None 的保护机制
  11. result_dict = result_dict or {}
  12. self.status_code = result_dict.get('status', 0)
  13. self.text = result_dict.get('body', '')
  14. self.headers = result_dict.get('headers', {})
  15. self.url = result_dict.get('url', '')
  16. self._json = None
  17. def json(self):
  18. if self._json is None:
  19. if not self.text:
  20. return {}
  21. self._json = json.loads(self.text)
  22. return self._json
  23. @property
  24. def content(self):
  25. return self.text.encode('utf-8')
  26. class DrissionHttpClient:
  27. def __init__(self, proxy=None, headless=False, user_data_dir=None):
  28. """
  29. 初始化浏览器
  30. """
  31. co = ChromiumOptions()
  32. # 自动分配端口
  33. co.auto_port()
  34. # 模拟配置
  35. if proxy:
  36. co.set_proxy(proxy)
  37. if headless:
  38. co.headless(True)
  39. if user_data_dir:
  40. co.set_user_data_path(user_data_dir)
  41. # 优化启动参数
  42. co.set_argument('--no-sandbox')
  43. co.set_argument('--disable-gpu')
  44. # 保持浏览器窗口大小,避免被检测为 headless 尺寸
  45. co.set_argument('--window-size=1920,1080')
  46. self.page = ChromiumPage(co)
  47. self.current_domain = None # 初始化为 None
  48. def _ensure_domain_context(self, url):
  49. """
  50. 确保浏览器处于目标域名的上下文中
  51. """
  52. parsed = urlparse(url)
  53. target_domain = parsed.netloc
  54. # --- [修复点] ---
  55. # 1. 检查 self.current_domain 是否为 None
  56. # 2. 检查 target_domain 是否在当前 domain 中 (处理子域名)
  57. if not self.current_domain or (target_domain and target_domain not in self.current_domain):
  58. base_url = f"{parsed.scheme}://{target_domain}"
  59. # VFS 特殊优化:直接去 Login 页建立 Session
  60. if "vfsglobal" in target_domain:
  61. base_url = "https://visa.vfsglobal.com/gbr/en/nld/login"
  62. print(f"[Browser] Switching Context -> {base_url}")
  63. try:
  64. self.page.get(base_url)
  65. # 等待 Cloudflare 验证完成 (DrissionPage 会自动处理大部分等待,但强制 sleep 更稳)
  66. time.sleep(5)
  67. self.current_domain = target_domain
  68. except Exception as e:
  69. print(f"[Browser] Warning: Navigation failed: {e}")
  70. def request(self, method, url, params=None, json_data=None, data=None, headers=None, timeout=30):
  71. """
  72. 执行请求
  73. """
  74. self._ensure_domain_context(url)
  75. # 1. 处理 URL 参数
  76. if params:
  77. if '?' in url:
  78. url += '&' + urlencode(params)
  79. else:
  80. url += '?' + urlencode(params)
  81. # 2. 构造 JS fetch 选项
  82. fetch_options = {
  83. "method": method.upper(),
  84. "headers": headers or {},
  85. "credentials": "include" # 关键:带上 Cookie
  86. }
  87. # 3. 处理 Body
  88. if json_data:
  89. fetch_options['body'] = json.dumps(json_data)
  90. # 这里的 Content-Type 会覆盖 headers 里的
  91. fetch_options['headers']['Content-Type'] = 'application/json'
  92. elif data:
  93. if isinstance(data, dict):
  94. fetch_options['body'] = urlencode(data)
  95. fetch_options['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
  96. else:
  97. fetch_options['body'] = data
  98. # 4. 注入 JS (使用 await 确保同步返回)
  99. # 这里的 return 会被 DrissionPage 捕获并传回 Python
  100. js_script = f"""
  101. const url = "{url}";
  102. const options = {json.dumps(fetch_options)};
  103. return fetch(url, options)
  104. .then(async response => {{
  105. const text = await response.text();
  106. const headers = {{}};
  107. response.headers.forEach((value, key) => headers[key] = value);
  108. return {{
  109. status: response.status,
  110. body: text,
  111. headers: headers,
  112. url: response.url
  113. }};
  114. }})
  115. .catch(error => {{
  116. return {{
  117. status: 0,
  118. body: error.toString(),
  119. headers: {{}},
  120. url: url
  121. }};
  122. }});
  123. """
  124. print(f"[Browser] {method} {url}")
  125. # run_js 在 4.0+ 版本可以直接拿到 return 的值
  126. try:
  127. result = self.page.run_js(js_script, timeout=timeout)
  128. except Exception as e:
  129. # 发生超时或脚本错误
  130. print(f"[Browser] JS Execution Error: {e}")
  131. result = {"status": 0, "body": str(e)}
  132. return BrowserResponse(result)
  133. def get(self, url, **kwargs):
  134. return self.request("GET", url, **kwargs)
  135. def post(self, url, **kwargs):
  136. return self.request("POST", url, **kwargs)
  137. def close(self):
  138. self.page.quit()
  139. # --- 测试入口 ---
  140. if __name__ == "__main__":
  141. # 使用 headless=False 观察浏览器行为
  142. client = DrissionHttpClient(headless=False)
  143. try:
  144. # 1. 登录接口
  145. url = "https://lift-api.vfsglobal.com/user/login"
  146. # 2. 你的 Payload
  147. # 请替换为真实的测试账号,或者随便填触发 401 也可以验证连通性
  148. payload = {
  149. "username": "test_user",
  150. "password": "test_password",
  151. "missioncode": "nld",
  152. "countrycode": "gbr",
  153. }
  154. # 3. Headers (浏览器会自动处理大部分,这里只加业务头)
  155. headers = {
  156. "route": "gbr/en/nld",
  157. # "clientsource": "...", # 如果需要
  158. }
  159. print("--- Start Request ---")
  160. resp = client.post(url, json_data=payload, headers=headers)
  161. print(f"\nResponse Status: {resp.status_code}")
  162. print(f"Response Body Preview: {resp.text[:200]}")
  163. if resp.status_code == 429:
  164. print("依然限流,请更换 IP")
  165. elif resp.status_code == 0:
  166. print("浏览器 Fetch 失败,检查网络或浏览器控制台")
  167. except Exception as e:
  168. print(f"Main Error: {e}")
  169. import traceback
  170. traceback.print_exc()
  171. finally:
  172. # 为了看到结果,暂停一下再关闭
  173. time.sleep(5)
  174. client.close()