bls_plugin.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. import re
  2. import base64
  3. import time
  4. import json
  5. import random
  6. import string
  7. from urllib.parse import urlparse, parse_qs, urlencode
  8. from typing import Dict, List, Optional, Any
  9. try:
  10. from curl_cffi import requests, const
  11. from bs4 import BeautifulSoup
  12. except ImportError:
  13. raise ImportError("Missing dependencies. Run: pip install curl-cffi beautifulsoup4")
  14. # 框架依赖
  15. from vs_plg import IVSPlg, VSError # type: ignore
  16. from vs_types import VSPlgConfig, VSQueryResult, VSBookResult, AvailabilityStatus # type: ignore
  17. from vs_log_macros import VSC_INFO, VSC_ERROR, VSC_DEBUG, VSC_WARN # type: ignore
  18. from toolkit.vs_cloud_api import VSCloudApi # type: ignore
  19. class BlsPlugin(IVSPlg):
  20. """
  21. BLS 签证预约插件 (精简版)
  22. """
  23. def __init__(self, group_id: str):
  24. self.group_id = group_id
  25. self.config: Optional[VSPlgConfig] = None
  26. self.free_config: Dict[str, Any] = {}
  27. self.session: Optional[requests.Session] = None
  28. # 运行时状态
  29. self.book_params: Dict = {}
  30. self.last_error = VSError(0, "OK")
  31. self.is_healthy = True
  32. # OCR 服务地址默认值
  33. self.ocr_service_url = "http://127.0.0.1:8085/predict/vfcode"
  34. def get_group_id(self) -> str:
  35. return self.group_id
  36. def set_config(self, config: VSPlgConfig):
  37. self.config = config
  38. try:
  39. self.free_config = json.loads(config.free_config) if config.free_config else {}
  40. except:
  41. self.free_config = {}
  42. # 从配置中读取 OCR 服务地址,如果没有则使用默认
  43. if self.free_config.get("ocr_service_url"):
  44. self.ocr_service_url = self.free_config["ocr_service_url"]
  45. def health_check(self) -> bool:
  46. return self.is_healthy
  47. def get_last_error(self) -> VSError:
  48. return self.last_error
  49. def _set_error(self, code: int, message: str):
  50. self.last_error = VSError(code, message)
  51. VSC_ERROR("bls_plg", "[%s] Error %d: %s", self.group_id, code, message)
  52. if code in [401, 403]: self.is_healthy = False
  53. # =========================================================================
  54. # 1. 登录流程 (Login)
  55. # =========================================================================
  56. def create_session(self) -> bool:
  57. VSC_INFO("bls_plg", "[%s] Creating session...", self.group_id)
  58. self.is_healthy = True
  59. # 初始化 Session
  60. self.session = requests.Session(
  61. proxy=self._get_proxy_url(),
  62. impersonate="chrome131",
  63. curl_options={const.CurlOpt.MAXAGE_CONN: 1800, const.CurlOpt.VERBOSE: False}
  64. )
  65. domain = self.free_config.get("domain")
  66. if not domain: return False
  67. # 1.1 获取登录页 & 解析参数
  68. url = f"https://{domain}/Global/account/login"
  69. resp = self._request("GET", url)
  70. if not resp: return False
  71. soup = BeautifulSoup(resp.text, 'html.parser')
  72. form_data = self._extract_hidden_fields(soup)
  73. # 解析动态 ID (UserId1, Password1 等)
  74. for inp in soup.find_all('input'):
  75. iid = inp.get('id', '')
  76. if 'UserId' in iid and re.search(r'\d+', iid):
  77. form_data["UserIdKey"] = iid # 暂存 Key
  78. form_data["UserId"] = re.search(r'\d+', iid).group(0)
  79. if 'Password' in iid and re.search(r'\d+', iid):
  80. form_data["PasswordKey"] = iid # 暂存 Key
  81. form_data["Password"] = re.search(r'\d+', iid).group(0)
  82. # 解析 data 参数 (用于验证码)
  83. data_val = self._extract_js_var(resp.text, "iframeOpenUrl", r"data=([^&]+)")
  84. # 1.2 处理验证码
  85. captcha_token = self._solve_bls_captcha(data_val, 'Global/account/login')
  86. if not captcha_token: return False
  87. # 1.3 提交登录
  88. submit_url = f"https://{domain}/Global/account/loginsubmit"
  89. payload = form_data
  90. payload["X-Requested-With"] = "XMLHttpRequest"
  91. payload["CaptchaData"] = captcha_token
  92. # 填入账号密码
  93. if "UserIdKey" in form_data: payload[form_data["UserIdKey"]] = self.config.account.username
  94. if "PasswordKey" in form_data: payload[form_data["PasswordKey"]] = self.config.account.password
  95. login_res = self._request("POST", submit_url, data=payload, headers={"Referer": url})
  96. if login_res and login_res.json().get('success'):
  97. VSC_INFO("bls_plg", "[%s] Login Successful", self.group_id)
  98. return True
  99. self._set_error(2000, "Login Failed")
  100. return False
  101. # =========================================================================
  102. # 2. 查询流程 (Query)
  103. # =========================================================================
  104. def query(self) -> VSQueryResult:
  105. res = VSQueryResult()
  106. domain = self.free_config.get("domain")
  107. if not self.session: return res
  108. # 2.1 签证类型验证 (VisaTypeVerification)
  109. url_vtv = f"https://{domain}/Global/bls/visatypeverification"
  110. resp = self._request("GET", url_vtv)
  111. if not resp: return res
  112. form_vtv = self._extract_hidden_fields(BeautifulSoup(resp.text, 'html.parser'))
  113. captcha_token = self._solve_bls_captcha(referer='Global/bls/visatypeverification')
  114. if not captcha_token: return res
  115. form_vtv['CaptchaData'] = captcha_token
  116. form_vtv["X-Requested-With"] = "XMLHttpRequest"
  117. vtv_res = self._request("POST", f"https://{domain}/Global/bls/VisaTypeVerification", data=form_vtv, headers={"Referer": url_vtv})
  118. if not vtv_res or not vtv_res.json().get('success'): return res
  119. # 2.2 签证类型选择 (VisaType)
  120. return_url = vtv_res.json()['returnUrl'] # 包含 data=xxx
  121. data_val = re.search(r"data=([^&]+)", return_url).group(1)
  122. url_vt = f"https://{domain}/Global/bls/visatype?data={data_val}"
  123. resp_vt = self._request("GET", url_vt)
  124. if not resp_vt: return res
  125. # 这里需要极其复杂的 JS 变量提取 (JS Arrays -> Match Name -> Get ID)
  126. # 为了缩减篇幅,假设 _construct_visatype_payload 封装了这些逻辑
  127. vt_payload = self._construct_visatype_payload(resp_vt.text, BeautifulSoup(resp_vt.text, 'html.parser'))
  128. if not vt_payload: return res
  129. vt_res = self._request("POST", f"https://{domain}/Global/bls/VisaType", data=vt_payload, headers={"Referer": url_vt})
  130. if not vt_res or not vt_res.json().get('success'):
  131. if vt_res and not vt_res.json().get('available'):
  132. res.success = True
  133. res.availability_status = AvailabilityStatus.NoneAvailable
  134. return res
  135. # 2.3 获取预约参数 (Book Params)
  136. final_url = vt_res.json()['returnUrl']
  137. q_params = parse_qs(urlparse(final_url).query)
  138. self.book_params = {k: v[0] for k, v in q_params.items()}
  139. # 2.4 查询日历 (ManageAppointment)
  140. url_ma = f"https://{domain}/Global/blsAppointment/ManageAppointment?{urlencode(self.book_params)}"
  141. resp_ma = self._request("GET", url_ma)
  142. if not resp_ma: return res
  143. avail_str = self._extract_js_var(resp_ma.text, "var availDates", r"var availDates =(.*?);")
  144. if avail_str:
  145. avail_json = json.loads(avail_str)
  146. # 提取日期
  147. dates = [x['DateText'] for x in avail_json['ad'] if x['SingleSlotAvailable']]
  148. if dates:
  149. res.success = True
  150. res.availability_status = AvailabilityStatus.Available
  151. res.earliest_date = dates[0]
  152. for d in dates:
  153. da = VSQueryResult.DateAvailability(date=d)
  154. da.times.append(VSQueryResult.DateAvailability.TimeSlot(time="00:00", label="Available"))
  155. res.availability.append(da)
  156. else:
  157. res.success = True
  158. res.availability_status = AvailabilityStatus.NoneAvailable
  159. return res
  160. # =========================================================================
  161. # 3. 预约流程 (Book)
  162. # =========================================================================
  163. def book(self, slot_info: VSQueryResult) -> VSBookResult:
  164. res = VSBookResult()
  165. domain = self.free_config.get("domain")
  166. if not self.book_params: return res
  167. uinfo = self.free_config.get("user_info", {})
  168. # 3.1 获取 Manage Page (为了 Token 和 JS 变量)
  169. url_ma = f"https://{domain}/Global/blsAppointment/ManageAppointment?{urlencode(self.book_params)}"
  170. resp_ma = self._request("GET", url_ma)
  171. if not resp_ma: return res
  172. ma_soup = BeautifulSoup(resp_ma.text, 'html.parser')
  173. ma_form = self._extract_hidden_fields(ma_soup)
  174. req_token = ma_form.get('__RequestVerificationToken')
  175. # 3.2 上传照片
  176. if 'passport_image_url' in uinfo:
  177. photo_bytes = requests.get(uinfo['passport_image_url']).content
  178. boundary = "----WebKitFormBoundary" + "".join(random.choices(string.ascii_letters + string.digits, k=16))
  179. upload_headers = {
  180. "content-type": f"multipart/form-data; boundary={boundary}",
  181. "requestverificationtoken": req_token,
  182. "x-requested-with": "XMLHttpRequest",
  183. "Referer": url_ma
  184. }
  185. body = (f"--{boundary}\r\nContent-Disposition: form-data; name=\"file\"; filename=\"photo.jpg\"\r\n"
  186. f"Content-Type: image/jpeg\r\n\r\n").encode("utf-8") + photo_bytes + f"\r\n--{boundary}--\r\n".encode("utf-8")
  187. up_res = self.session.post(f"https://{domain}/Global/query/UploadProfileImage", headers=upload_headers, data=body)
  188. if up_res.status_code == 200:
  189. ma_form['ApplicantPhotoId'] = up_res.json()['fileId']
  190. # 3.3 邮箱 OTP 流程
  191. data_val = self._extract_js_var(resp_ma.text, "win.iframeOpenUrl", r"data=([^&]+)")
  192. # 发送 OTP
  193. self._request("GET", f"https://{domain}/Global/blsappointment/SendAppointmentVerificationCode?code={data_val}", headers={"Referer": url_ma, "X-Requested-With": "XMLHttpRequest"})
  194. # 读取 OTP (Wait 30s max)
  195. otp_code = self._read_otp_email(wait_sec=30)
  196. if not otp_code:
  197. self._set_error(3004, "OTP timeout")
  198. return res
  199. # 验证 OTP
  200. verify_payload = {"Code": otp_code, "Value": ma_form.get('EmailCode'), "Id": ma_form.get('Id')}
  201. v_res = self._request("POST", f"https://{domain}/Global/blsappointment/VerifyEmail", data=verify_payload, headers={"Referer": url_ma, "requestverificationtoken": req_token})
  202. if not v_res or not v_res.json().get('success'): return res
  203. ma_form['EmailVerified'] = 'True'
  204. ma_form['EmailVerificationCode'] = otp_code
  205. # 3.4 锁定时间 (简单随机)
  206. target_date = slot_info.earliest_date
  207. # Query Slots in Day
  208. slot_url = f"https://{domain}/Global/blsappointment/GetAvailableSlotsByDate"
  209. # 构造复杂的 query params... 省略部分非关键参数
  210. slot_params = {
  211. "appointmentDate": target_date,
  212. "locationId": ma_form.get("LocationId"),
  213. "categoryId": ma_form.get("AppointmentCategoryId"),
  214. "visaType": ma_form.get("VisaType"),
  215. "visaSubType": ma_form.get("VisaSubTypeId"),
  216. "applicantCount": 1,
  217. "dataSource": ma_form.get("DataSource"),
  218. "missionId": ma_form.get("MissionId")
  219. }
  220. slots_res = self._request("POST", slot_url, params=slot_params, headers={"Referer": url_ma, "requestverificationtoken": req_token})
  221. if not slots_res: return res
  222. slots_data = sorted(slots_res.json(), key=lambda x: -x["Count"]) # 选剩余最多的
  223. if not slots_data or slots_data[0]['Count'] <= 0: return res
  224. target_time = slots_data[0]['Name']
  225. ma_form['ServerAppointmentDate'] = target_date
  226. ma_form['AppointmentDetailsList'] = '[]'
  227. # 这里的 key 是动态的 ID,需重新解析 ID
  228. date_id = re.search(r'AppointmentDate(\d+)', str(ma_soup)).group(1)
  229. slot_id = re.search(r'AppointmentSlot(\d+)', str(ma_soup)).group(1)
  230. ma_form[f'AppointmentDate{date_id}'] = target_date
  231. ma_form[f'AppointmentSlot{slot_id}'] = target_time
  232. # 3.5 再次验证码 & 提交 ManageAppointment
  233. captcha_token = self._solve_bls_captcha(data_val, f'Global/blsAppointment/ManageAppointment?{urlencode(self.book_params)}')
  234. if not captcha_token: return res
  235. ma_form['CaptchaData'] = captcha_token
  236. final_ma_res = self._request("POST", f"https://{domain}/Global/BLSAppointment/ManageAppointment", data=ma_form, headers={"Referer": url_ma})
  237. if not final_ma_res: return res
  238. appt_model_id = final_ma_res.json().get('model', {}).get('Id')
  239. if not appt_model_id: return res
  240. # 3.6 填写申请表 (VisaAppointmentForm)
  241. # 获取页面 -> 解析 JS 变量 -> 映射 UserInfo -> 提交
  242. # 这里逻辑较深,核心是映射。简化为提交一个空的 applicants JSON,实际需完整映射。
  243. # 假设 _fill_applicant_form 做了这些工作
  244. if self._submit_final_form(appt_model_id, uinfo, self.book_params, req_token):
  245. # 成功,返回 Liveness 链接
  246. res.success = True
  247. res.session_id = self._generate_id()
  248. res.order_id = res.session_id
  249. res.payment_link = f"https://{domain}/Global/BlsAppointment/livenessView?id={appt_model_id}"
  250. # 将 Session 信息存入 Cloud 以便前端接管
  251. self._save_session_to_cloud(res.session_id, res.payment_link)
  252. VSC_INFO("bls_plg", "[%s] Book Success. Liveness URL: %s", self.group_id, res.payment_link)
  253. return res
  254. # =========================================================================
  255. # 辅助函数 (Helpers)
  256. # =========================================================================
  257. def _request(self, method, url, **kwargs):
  258. print(f'_request {method} {url}')
  259. headers = {
  260. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/131.0.0.0 Safari/537.36',
  261. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8'
  262. }
  263. if 'headers' in kwargs: headers.update(kwargs['headers'])
  264. kwargs['headers'] = headers
  265. try:
  266. resp = self.session.request(method, url, timeout=60, **kwargs)
  267. if resp.status_code == 401: self._set_error(401, "Unauthorized")
  268. elif resp.status_code in [403, 429]: self._set_error(resp.status_code, "Blocked")
  269. if resp.status_code == 200: return resp
  270. except Exception as e:
  271. VSC_WARN("bls_plg", f"Request Error: {e}")
  272. return None
  273. def _solve_bls_captcha(self, data='', referer='') -> Optional[str]:
  274. """
  275. 验证码处理:获取图片 -> 调用远程 OCR 服务 -> 提交验证
  276. """
  277. domain = self.free_config.get("domain")
  278. url = f"https://{domain}/Global/NewCaptcha/GenerateCaptcha"
  279. if data: url = f"https://{domain}/Global/CaptchaPublic/GenerateCaptcha?data={data}"
  280. resp = self._request("GET", url, headers={"Referer": f"https://{domain}/{referer}"})
  281. if not resp: return None
  282. soup = BeautifulSoup(resp.text, 'html.parser')
  283. # 1. 提取目标数字
  284. target_text = soup.get_text()
  285. target_match = re.search(r'Select\s*(\d+)', target_text)
  286. if not target_match: return None
  287. target_num = target_match.group(1)
  288. selected_ids = []
  289. # 2. 遍历图片并调用远程 OCR
  290. imgs = soup.find_all('img', class_='captcha-img')
  291. for img in imgs:
  292. src = img.get('src', '')
  293. if 'base64,' in src:
  294. b64 = src.split('base64,')[1]
  295. img_bytes = base64.b64decode(b64)
  296. # === 远程调用 Start ===
  297. try:
  298. # 直接发送二进制 body
  299. ocr_resp = requests.post(
  300. self.ocr_service_url,
  301. data=img_bytes,
  302. headers={"Content-Type": "application/octet-stream"},
  303. timeout=5
  304. )
  305. if ocr_resp.status_code == 200:
  306. res_json = ocr_resp.json()
  307. ocr_res = res_json.get('data', '').replace('$', '')
  308. VSC_DEBUG("bls_plg", f"OCR: {ocr_res} (Target: {target_num})")
  309. if ocr_res == target_num:
  310. selected_ids.append(img.get('id'))
  311. except Exception as e:
  312. VSC_WARN("bls_plg", f"OCR Service Failed: {e}")
  313. # === 远程调用 End ===
  314. if not selected_ids: return None
  315. # 3. 提交选中结果
  316. form = self._extract_hidden_fields(soup)
  317. form['SelectedImages'] = ",".join(selected_ids)
  318. submit_url = f"https://{domain}/Global/{'CaptchaPublic' if data else 'NewCaptcha'}/SubmitCaptcha"
  319. res = self._request("POST", submit_url, data=form, headers={"X-Requested-With": "XMLHttpRequest", "Referer": url})
  320. if res and res.json().get('captcha'):
  321. return res.json()['captcha']
  322. return None
  323. def _extract_hidden_fields(self, soup) -> Dict:
  324. params = {}
  325. form = soup.find("form")
  326. if form:
  327. for inp in form.find_all("input"):
  328. name = inp.get("name")
  329. if name: params[name] = inp.get("value", "")
  330. return params
  331. def _extract_js_var(self, html, context, pattern):
  332. # 简单正则提取
  333. if context in html:
  334. match = re.search(pattern, html)
  335. if match: return match.group(1)
  336. return ""
  337. def _construct_visatype_payload(self, html, soup):
  338. # 简化版:提取 ID 逻辑。实际需根据 free_config 的 VisaType 名称匹配 JS 数组中的 ID
  339. # 这里仅展示结构,核心是利用 self.free_config['visaType'] 等去匹配
  340. params = self._extract_hidden_fields(soup)
  341. # Helper inner function to find ID from JS array
  342. def find_id(var_name, target_name, key="Name", val_key="Id"):
  343. json_str = self._extract_js_var(html, f"var {var_name}", rf"var {var_name}\s*=\s*(.*?);")
  344. if json_str:
  345. try:
  346. data = json.loads(json_str)
  347. for item in data:
  348. if item.get(key) == target_name: return item.get(val_key)
  349. except: pass
  350. return None
  351. # 示例:Jurisdiction
  352. if self.free_config.get('jurisdiction'):
  353. jid = find_id("jurisdictionData", self.free_config['jurisdiction'])
  354. if jid: params[f'JurisdictionId{jid}'] = jid # 这里的 Key 也是动态的,BLS 特色
  355. # ... 对 Location, VisaType, VisaSubType 重复此逻辑 ...
  356. params["X-Requested-With"] = "XMLHttpRequest"
  357. params["ResponseData"] = "[]" # 必须字段
  358. return params
  359. def _submit_final_form(self, model_id, uinfo, book_params, token):
  360. # 1. Get Form HTML -> 2. Parse JS Data -> 3. Map uinfo -> 4. Post
  361. # 略,参考原代码 parse_application_form_excel 和 _fix_applicant_data
  362. # 这是一个纯数据映射过程
  363. return True
  364. def _read_otp_email(self, wait_sec=30):
  365. # 轮询 Cloud API
  366. for _ in range(wait_sec // 5):
  367. time.sleep(5)
  368. # content = VSCloudApi.Instance().fetch_mail_content(...)
  369. # ...
  370. pass
  371. return "123456" # Mock
  372. def _save_session_to_cloud(self, sid, url):
  373. cookies = json.dumps(requests.utils.dict_from_cookiejar(self.session.cookies))
  374. VSCloudApi.Instance().create_http_session(sid, cookies, "", "", "", url, {})
  375. def _get_proxy_url(self):
  376. p = self.config.proxy
  377. if not p.ip: return ""
  378. if p.username: return f"{p.scheme}://{p.username}:{p.password}@{p.ip}:{p.port}"
  379. return f"{p.scheme}://{p.ip}:{p.port}"
  380. def _generate_id(self):
  381. return "".join(random.choices(string.ascii_letters + string.digits, k=8))