bls_plugin.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. import re
  2. import base64
  3. import time
  4. import json
  5. import random
  6. import string
  7. from pathlib import Path
  8. from urllib.parse import urlparse, parse_qs, urlencode
  9. from typing import Dict, List, Optional, Any
  10. try:
  11. from curl_cffi import requests, const
  12. from bs4 import BeautifulSoup
  13. except ImportError:
  14. raise ImportError("Missing dependencies. Run: pip install curl-cffi beautifulsoup4")
  15. # 框架依赖
  16. from vs_plg import IVSPlg, VSError # type: ignore
  17. from vs_types import VSPlgConfig, VSQueryResult, VSBookResult, AvailabilityStatus # type: ignore
  18. from vs_log_macros import VSC_INFO, VSC_ERROR, VSC_DEBUG, VSC_WARN # type: ignore
  19. from toolkit.vs_cloud_api import VSCloudApi # type: ignore
  20. from utils.browser_util import get_browser
  21. class BlsPlugin(IVSPlg):
  22. """
  23. BLS 签证预约插件 (精简版)
  24. """
  25. def __init__(self, group_id: str):
  26. self.group_id = group_id
  27. self.config: Optional[VSPlgConfig] = None
  28. self.free_config: Dict[str, Any] = {}
  29. self.session: Optional[requests.Session] = None
  30. # 运行时状态
  31. self.book_params: Dict = {}
  32. self.last_error = VSError(0, "OK")
  33. self.is_healthy = True
  34. # OCR 服务地址默认值
  35. self.ocr_service_url = "http://127.0.0.1:8085/predict/vfcode?model=pytorch"
  36. self.browser = get_browser()
  37. def get_group_id(self) -> str:
  38. return self.group_id
  39. def set_config(self, config: VSPlgConfig):
  40. self.config = config
  41. try:
  42. self.free_config = json.loads(config.free_config) if config.free_config else {}
  43. except:
  44. self.free_config = {}
  45. # 从配置中读取 OCR 服务地址,如果没有则使用默认
  46. if self.free_config.get("ocr_service_url"):
  47. self.ocr_service_url = self.free_config["ocr_service_url"]
  48. def health_check(self) -> bool:
  49. return self.is_healthy
  50. def get_last_error(self) -> VSError:
  51. return self.last_error
  52. def _set_error(self, code: int, message: str):
  53. self.last_error = VSError(code, message)
  54. VSC_ERROR("bls_plg", "[%s] Error %d: %s", self.group_id, code, message)
  55. if code in [401, 403]: self.is_healthy = False
  56. # =========================================================================
  57. # 1. 登录流程 (Login)
  58. # =========================================================================
  59. def create_session(self) -> bool:
  60. VSC_INFO("bls_plg", "[%s] Creating session...", self.group_id)
  61. self.is_healthy = True
  62. # 初始化 Session
  63. self.session = requests.Session(
  64. proxy=self._get_proxy_url(),
  65. impersonate="chrome131",
  66. curl_options={const.CurlOpt.MAXAGE_CONN: 1800, const.CurlOpt.VERBOSE: False}
  67. )
  68. domain = self.free_config.get("domain")
  69. if not domain:
  70. return False
  71. # 1.1 获取登录页 & 解析参数
  72. url = f"https://{domain}/Global/account/login"
  73. resp = self._request("GET", url)
  74. if not resp:
  75. return False
  76. soup = BeautifulSoup(resp.text, 'html.parser')
  77. form_data = self._extract_hidden_fields(soup)
  78. # 解析动态 ID (UserId1, Password1 等)
  79. for inp in soup.find_all('input'):
  80. iid = inp.get('id', '')
  81. if 'UserId' in iid and re.search(r'\d+', iid):
  82. form_data["UserIdKey"] = iid # 暂存 Key
  83. form_data["UserId"] = re.search(r'\d+', iid).group(0)
  84. if 'Password' in iid and re.search(r'\d+', iid):
  85. form_data["PasswordKey"] = iid # 暂存 Key
  86. form_data["Password"] = re.search(r'\d+', iid).group(0)
  87. # 解析 data 参数 (用于验证码)
  88. data_val = self._extract_js_var(resp.text, "iframeOpenUrl", r"data=([^']+)")
  89. # 1.2 处理验证码
  90. captcha_token = self._solve_bls_captcha(data_val)
  91. if not captcha_token:
  92. return False
  93. # 1.3 提交登录
  94. submit_url = f"https://{domain}/Global/account/loginsubmit"
  95. payload = form_data
  96. payload["X-Requested-With"] = "XMLHttpRequest"
  97. payload["CaptchaData"] = captcha_token
  98. # 填入账号密码
  99. if "UserIdKey" in form_data: payload[form_data["UserIdKey"]] = self.config.account.username
  100. if "PasswordKey" in form_data: payload[form_data["PasswordKey"]] = self.config.account.password
  101. login_res = self._request("POST", submit_url, data=payload)
  102. if login_res and login_res.json().get('success'):
  103. VSC_INFO("bls_plg", "[%s] Login Successful", self.group_id)
  104. return True
  105. self._set_error(2000, "Login Failed")
  106. return False
  107. # =========================================================================
  108. # 2. 查询流程 (Query)
  109. # =========================================================================
  110. def query(self) -> VSQueryResult:
  111. res = VSQueryResult()
  112. domain = self.free_config.get("domain")
  113. if not self.session: return res
  114. # 2.1 签证类型验证 (VisaTypeVerification)
  115. url_vtv = f"https://{domain}/Global/bls/visatypeverification"
  116. resp = self._request("GET", url_vtv)
  117. if not resp: return res
  118. form_vtv = self._extract_hidden_fields(BeautifulSoup(resp.text, 'html.parser'))
  119. captcha_token = self._solve_bls_captcha()
  120. if not captcha_token: return res
  121. form_vtv['CaptchaData'] = captcha_token
  122. form_vtv["X-Requested-With"] = "XMLHttpRequest"
  123. vtv_res = self._request("POST", f"https://{domain}/Global/bls/VisaTypeVerification", data=form_vtv)
  124. if not vtv_res or not vtv_res.json().get('success'): return res
  125. # 2.2 签证类型选择 (VisaType)
  126. return_url = vtv_res.json()['returnUrl'] # 包含 data=xxx
  127. data_val = re.search(r"data=([^&]+)", return_url).group(1)
  128. url_vt = f"https://{domain}/Global/bls/visatype?data={data_val}"
  129. resp_vt = self._request("GET", url_vt)
  130. if not resp_vt: return res
  131. # 这里需要极其复杂的 JS 变量提取 (JS Arrays -> Match Name -> Get ID)
  132. # 为了缩减篇幅,假设 _construct_visatype_payload 封装了这些逻辑
  133. vt_payload = self._construct_visatype_payload(resp_vt.text, BeautifulSoup(resp_vt.text, 'html.parser'))
  134. if not vt_payload: return res
  135. vt_res = self._request("POST", f"https://{domain}/Global/bls/VisaType", data=vt_payload)
  136. if not vt_res or not vt_res.json().get('success'):
  137. if vt_res and not vt_res.json().get('available'):
  138. res.success = True
  139. res.availability_status = AvailabilityStatus.NoneAvailable
  140. return res
  141. # 2.3 获取预约参数 (Book Params)
  142. final_url = vt_res.json()['returnUrl']
  143. q_params = parse_qs(urlparse(final_url).query)
  144. self.book_params = {k: v[0] for k, v in q_params.items()}
  145. # 2.4 查询日历 (ManageAppointment)
  146. url_ma = f"https://{domain}/Global/blsAppointment/ManageAppointment?{urlencode(self.book_params)}"
  147. resp_ma = self._request("GET", url_ma)
  148. if not resp_ma: return res
  149. avail_str = self._extract_js_var(resp_ma.text, "var availDates", r"var availDates =(.*?);")
  150. if avail_str:
  151. avail_json = json.loads(avail_str)
  152. # 提取日期
  153. dates = [x['DateText'] for x in avail_json['ad'] if x['SingleSlotAvailable']]
  154. if dates:
  155. res.success = True
  156. res.availability_status = AvailabilityStatus.Available
  157. res.earliest_date = dates[0]
  158. for d in dates:
  159. da = VSQueryResult.DateAvailability(date=d)
  160. da.times.append(VSQueryResult.DateAvailability.TimeSlot(time="00:00", label="Available"))
  161. res.availability.append(da)
  162. else:
  163. res.success = True
  164. res.availability_status = AvailabilityStatus.NoneAvailable
  165. return res
  166. # =========================================================================
  167. # 3. 预约流程 (Book)
  168. # =========================================================================
  169. def book(self, slot_info: VSQueryResult) -> VSBookResult:
  170. res = VSBookResult()
  171. domain = self.free_config.get("domain")
  172. if not self.book_params: return res
  173. uinfo = self.free_config.get("user_info", {})
  174. # 3.1 获取 Manage Page (为了 Token 和 JS 变量)
  175. url_ma = f"https://{domain}/Global/blsAppointment/ManageAppointment?{urlencode(self.book_params)}"
  176. resp_ma = self._request("GET", url_ma)
  177. if not resp_ma: return res
  178. ma_soup = BeautifulSoup(resp_ma.text, 'html.parser')
  179. ma_form = self._extract_hidden_fields(ma_soup)
  180. req_token = ma_form.get('__RequestVerificationToken')
  181. # 3.2 上传照片
  182. if 'passport_image_url' in uinfo:
  183. photo_bytes = requests.get(uinfo['passport_image_url']).content
  184. boundary = "----WebKitFormBoundary" + "".join(random.choices(string.ascii_letters + string.digits, k=16))
  185. upload_headers = {
  186. "content-type": f"multipart/form-data; boundary={boundary}",
  187. "requestverificationtoken": req_token,
  188. "x-requested-with": "XMLHttpRequest",
  189. }
  190. body = (f"--{boundary}\r\nContent-Disposition: form-data; name=\"file\"; filename=\"photo.jpg\"\r\n"
  191. f"Content-Type: image/jpeg\r\n\r\n").encode("utf-8") + photo_bytes + f"\r\n--{boundary}--\r\n".encode("utf-8")
  192. up_res = self.session.post(f"https://{domain}/Global/query/UploadProfileImage", headers=upload_headers, data=body)
  193. if up_res.status_code == 200:
  194. ma_form['ApplicantPhotoId'] = up_res.json()['fileId']
  195. # 3.3 邮箱 OTP 流程
  196. data_val = self._extract_js_var(resp_ma.text, "win.iframeOpenUrl", r"data=([^&]+)")
  197. # 发送 OTP
  198. self._request("GET", f"https://{domain}/Global/blsappointment/SendAppointmentVerificationCode?code={data_val}", headers={"X-Requested-With": "XMLHttpRequest"})
  199. # 读取 OTP (Wait 30s max)
  200. otp_code = self._read_otp_email(wait_sec=30)
  201. if not otp_code:
  202. self._set_error(3004, "OTP timeout")
  203. return res
  204. # 验证 OTP
  205. verify_payload = {"Code": otp_code, "Value": ma_form.get('EmailCode'), "Id": ma_form.get('Id')}
  206. v_res = self._request("POST", f"https://{domain}/Global/blsappointment/VerifyEmail", data=verify_payload, headers={"requestverificationtoken": req_token})
  207. if not v_res or not v_res.json().get('success'): return res
  208. ma_form['EmailVerified'] = 'True'
  209. ma_form['EmailVerificationCode'] = otp_code
  210. # 3.4 锁定时间 (简单随机)
  211. target_date = slot_info.earliest_date
  212. # Query Slots in Day
  213. slot_url = f"https://{domain}/Global/blsappointment/GetAvailableSlotsByDate"
  214. # 构造复杂的 query params... 省略部分非关键参数
  215. slot_params = {
  216. "appointmentDate": target_date,
  217. "locationId": ma_form.get("LocationId"),
  218. "categoryId": ma_form.get("AppointmentCategoryId"),
  219. "visaType": ma_form.get("VisaType"),
  220. "visaSubType": ma_form.get("VisaSubTypeId"),
  221. "applicantCount": 1,
  222. "dataSource": ma_form.get("DataSource"),
  223. "missionId": ma_form.get("MissionId")
  224. }
  225. slots_res = self._request("POST", slot_url, params=slot_params, headers={"requestverificationtoken": req_token})
  226. if not slots_res: return res
  227. slots_data = sorted(slots_res.json(), key=lambda x: -x["Count"]) # 选剩余最多的
  228. if not slots_data or slots_data[0]['Count'] <= 0: return res
  229. target_time = slots_data[0]['Name']
  230. ma_form['ServerAppointmentDate'] = target_date
  231. ma_form['AppointmentDetailsList'] = '[]'
  232. # 这里的 key 是动态的 ID,需重新解析 ID
  233. date_id = re.search(r'AppointmentDate(\d+)', str(ma_soup)).group(1)
  234. slot_id = re.search(r'AppointmentSlot(\d+)', str(ma_soup)).group(1)
  235. ma_form[f'AppointmentDate{date_id}'] = target_date
  236. ma_form[f'AppointmentSlot{slot_id}'] = target_time
  237. # 3.5 再次验证码 & 提交 ManageAppointment
  238. captcha_token = self._solve_bls_captcha(data_val)
  239. if not captcha_token: return res
  240. ma_form['CaptchaData'] = captcha_token
  241. final_ma_res = self._request("POST", f"https://{domain}/Global/BLSAppointment/ManageAppointment", data=ma_form)
  242. if not final_ma_res: return res
  243. appt_model_id = final_ma_res.json().get('model', {}).get('Id')
  244. if not appt_model_id: return res
  245. # 3.6 填写申请表 (VisaAppointmentForm)
  246. # 获取页面 -> 解析 JS 变量 -> 映射 UserInfo -> 提交
  247. # 这里逻辑较深,核心是映射。简化为提交一个空的 applicants JSON,实际需完整映射。
  248. # 假设 _fill_applicant_form 做了这些工作
  249. if self._submit_final_form(appt_model_id, uinfo, self.book_params, req_token):
  250. # 成功,返回 Liveness 链接
  251. res.success = True
  252. res.session_id = self._generate_id()
  253. res.order_id = res.session_id
  254. res.payment_link = f"https://{domain}/Global/BlsAppointment/livenessView?id={appt_model_id}"
  255. # 将 Session 信息存入 Cloud 以便前端接管
  256. self._save_session_to_cloud(res.session_id, res.payment_link)
  257. VSC_INFO("bls_plg", "[%s] Book Success. Liveness URL: %s", self.group_id, res.payment_link)
  258. return res
  259. # =========================================================================
  260. # 辅助函数 (Helpers)
  261. # =========================================================================
  262. def _request(self, method, url, **kwargs):
  263. print(f'_request {method} {url}')
  264. headers = {
  265. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/131.0.0.0 Safari/537.36',
  266. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8'
  267. }
  268. if 'headers' in kwargs: headers.update(kwargs['headers'])
  269. kwargs['headers'] = headers
  270. try:
  271. resp = self.session.request(method, url, timeout=60, **kwargs)
  272. if resp.status_code == 401: self._set_error(401, "Unauthorized")
  273. elif resp.status_code in [403, 429]: self._set_error(resp.status_code, "Blocked")
  274. if resp.status_code == 200: return resp
  275. except Exception as e:
  276. VSC_WARN("bls_plg", f"Request Error: {e}")
  277. return None
  278. def _solve_bls_captcha(self, data='') -> Optional[str]:
  279. """
  280. 验证码处理:获取图片 -> 调用远程 OCR 服务 -> 提交验证
  281. """
  282. domain = self.free_config.get("domain")
  283. url = f"https://{domain}/Global/NewCaptcha/GenerateCaptcha"
  284. if data: url = f"https://{domain}/Global/CaptchaPublic/GenerateCaptcha?data={data}"
  285. resp = self._request("GET", url)
  286. if not resp:
  287. return None
  288. with open("tmp.html", 'w') as f:
  289. f.write(resp.text)
  290. selected_ids = []
  291. html_file_path = Path("tmp.html").resolve()
  292. file_url = f'file://{html_file_path}'
  293. self.browser.get(file_url)
  294. captions_ele = self.browser.ele('xpath://*[@id="captcha-main-div"]/div/div[1]', timeout=5)
  295. if not captions_ele:
  296. return None
  297. caption_eles = captions_ele.children()
  298. caption_text = ''
  299. for caption in caption_eles:
  300. if not caption.states.is_covered:
  301. caption_text = caption.text
  302. numbers = re.findall(r'\d+', caption_text)[0]
  303. captcha_images_ele = self.browser.ele('xpath://*[@id="captcha-main-div"]/div/div[2]')
  304. captcha_image_eles = captcha_images_ele.children()
  305. rect_dict = {}
  306. for captcha_image in captcha_image_eles:
  307. img = captcha_image.ele('.captcha-img')
  308. if img.states.has_rect:
  309. rect_dict[img._backend_id] = img.states.has_rect
  310. for captcha_image in captcha_image_eles:
  311. img = captcha_image.ele('.captcha-img')
  312. if img.states.has_rect and img.states.is_covered == False:
  313. img_src = img.attr('src')
  314. if img_src and img_src.startswith('data:image'):
  315. base64_data = re.sub('^data:image/.+;base64,', '', img_src)
  316. img_bytes = base64.b64decode(base64_data)
  317. # === 远程调用 Start ===
  318. try:
  319. # 直接发送二进制 body
  320. ocr_resp = requests.post(
  321. self.ocr_service_url,
  322. data=img_bytes,
  323. headers={"Content-Type": "application/octet-stream"},
  324. timeout=5
  325. )
  326. if ocr_resp.status_code == 200:
  327. res_json = ocr_resp.json()
  328. ocr_res = res_json.get('data', '').replace('$', '')[:3]
  329. VSC_INFO("bls_plg", f'ocr captcha id={captcha_image.attr("id")} result={ocr_res}, target={numbers}')
  330. if ocr_res == numbers:
  331. eid = captcha_image.attr('id')
  332. selected_ids.append(eid)
  333. except Exception as e:
  334. VSC_WARN("bls_plg", f"OCR Service Failed: {e}")
  335. if not selected_ids:
  336. return None
  337. VSC_INFO("bls_plg", f'select_ids={selected_ids}')
  338. soup = BeautifulSoup(resp.text, 'html.parser')
  339. # 3. 提交选中结果
  340. form = self._extract_hidden_fields(soup)
  341. form['SelectedImages'] = ",".join(selected_ids)
  342. submit_url = f"https://{domain}/Global/{'CaptchaPublic' if data else 'NewCaptcha'}/SubmitCaptcha"
  343. res = self._request("POST", submit_url, data=form, headers={"X-Requested-With": "XMLHttpRequest"})
  344. if res and res.json().get('captcha'):
  345. return res.json()['captcha']
  346. return None
  347. def _extract_hidden_fields(self, soup) -> Dict:
  348. params = {}
  349. form = soup.find("form")
  350. if form:
  351. for inp in form.find_all("input"):
  352. name = inp.get("name")
  353. if name: params[name] = inp.get("value", "")
  354. return params
  355. def _extract_js_var(self, html, context, pattern):
  356. # 简单正则提取
  357. if context in html:
  358. match = re.search(pattern, html)
  359. if match: return match.group(1)
  360. return ""
  361. def _construct_visatype_payload(self, html, soup):
  362. # 简化版:提取 ID 逻辑。实际需根据 free_config 的 VisaType 名称匹配 JS 数组中的 ID
  363. # 这里仅展示结构,核心是利用 self.free_config['visaType'] 等去匹配
  364. params = self._extract_hidden_fields(soup)
  365. # Helper inner function to find ID from JS array
  366. def find_id(var_name, target_name, key="Name", val_key="Id"):
  367. json_str = self._extract_js_var(html, f"var {var_name}", rf"var {var_name}\s*=\s*(.*?);")
  368. if json_str:
  369. try:
  370. data = json.loads(json_str)
  371. for item in data:
  372. if item.get(key) == target_name: return item.get(val_key)
  373. except: pass
  374. return None
  375. # 示例:Jurisdiction
  376. if self.free_config.get('jurisdiction'):
  377. jid = find_id("jurisdictionData", self.free_config['jurisdiction'])
  378. if jid: params[f'JurisdictionId{jid}'] = jid # 这里的 Key 也是动态的,BLS 特色
  379. # ... 对 Location, VisaType, VisaSubType 重复此逻辑 ...
  380. params["X-Requested-With"] = "XMLHttpRequest"
  381. params["ResponseData"] = "[]" # 必须字段
  382. return params
  383. def _submit_final_form(self, model_id, uinfo, book_params, token):
  384. # 1. Get Form HTML -> 2. Parse JS Data -> 3. Map uinfo -> 4. Post
  385. # 略,参考原代码 parse_application_form_excel 和 _fix_applicant_data
  386. # 这是一个纯数据映射过程
  387. return True
  388. def _read_otp_email(self, wait_sec=30):
  389. # 轮询 Cloud API
  390. for _ in range(wait_sec // 5):
  391. time.sleep(5)
  392. # content = VSCloudApi.Instance().fetch_mail_content(...)
  393. # ...
  394. pass
  395. return "123456" # Mock
  396. def _save_session_to_cloud(self, sid, url):
  397. cookies = json.dumps(requests.utils.dict_from_cookiejar(self.session.cookies))
  398. VSCloudApi.Instance().create_http_session(sid, cookies, "", "", "", url, {})
  399. def _get_proxy_url(self):
  400. p = self.config.proxy
  401. if not p.ip: return ""
  402. if p.username: return f"{p.scheme}://{p.username}:{p.password}@{p.ip}:{p.port}"
  403. return f"{p.scheme}://{p.ip}:{p.port}"
  404. def _generate_id(self):
  405. return "".join(random.choices(string.ascii_letters + string.digits, k=8))