de_plugin.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. import time
  2. import json
  3. import random
  4. import re
  5. import os
  6. import uuid
  7. import shutil
  8. import base64
  9. import socket
  10. from datetime import datetime
  11. from typing import List, Dict, Optional, Any, Callable
  12. from urllib.parse import urljoin, urlparse, urlencode
  13. # DrissionPage 核心
  14. from DrissionPage import ChromiumPage, ChromiumOptions
  15. from vs_plg import IVSPlg
  16. from vs_types import VSPlgConfig, AppointmentType, VSQueryResult, VSBookResult, AvailabilityStatus, TimeSlot, DateAvailability, NotFoundError, PermissionDeniedError, RateLimiteddError, SessionExpiredOrInvalidError, BizLogicError
  17. from toolkit.vs_cloud_api import VSCloudApi
  18. from utils.cloudflare_bypass_for_scraping import CloudflareBypasser
  19. from toolkit.proxy_tunnel import ProxyTunnel
  20. from toolkit.ocr_engine import DddOcrEngine
  21. class BrowserResponse:
  22. def __init__(self, result_dict):
  23. result_dict = result_dict or {}
  24. self.status_code = result_dict.get('status', 0)
  25. self.text = result_dict.get('body', '')
  26. self.headers = result_dict.get('headers', {})
  27. self.url = result_dict.get('url', '')
  28. self._json = None
  29. def json(self):
  30. if self._json is None:
  31. if not self.text: return {}
  32. try: self._json = json.loads(self.text)
  33. except: self._json = {}
  34. return self._json
  35. def to_yyyymmdd(data_str: str, date_str_format: str, target_format: str="%Y-%m-%d"):
  36. dt = datetime.strptime(data_str, date_str_format)
  37. return dt.strftime("%Y-%m-%d")
  38. def get_alias_email(email: str, new_domain: str = "gmail-app.com") -> str:
  39. if "@" not in email: raise ValueError(f"Invalid email: {email}")
  40. local_part, _ = email.rsplit("@", 1)
  41. return f"{local_part}@{new_domain}"
  42. class DePlugin(IVSPlg):
  43. """
  44. Germany (Visametric) 签证预约插件 (Browser + Tunnel Mode)
  45. """
  46. def __init__(self, group_id: str):
  47. self.group_id = group_id
  48. self.config: Optional[VSPlgConfig] = None
  49. self.free_config: Dict[str, Any] = {}
  50. self.logger = None
  51. # 浏览器实例
  52. self.page: Optional[ChromiumPage] = None
  53. # 资源隔离
  54. self.instance_id = uuid.uuid4().hex[:8]
  55. self.root_workspace = os.path.abspath(os.path.join("data/temp_browser_data", f"{self.group_id}.{self.instance_id}"))
  56. self.user_data_path = os.path.join(self.root_workspace, "user_data")
  57. if not os.path.exists(self.root_workspace):
  58. os.makedirs(self.root_workspace)
  59. self.tunnel = None # 代理隧道
  60. self.is_healthy = True
  61. self.session_create_time: float = 0
  62. # 字符识别引擎
  63. self.ocr_engine: Optional[DddOcrEngine] = None
  64. # 业务状态
  65. self.base_url = "https://ie-appointment.visametric.com"
  66. self.csrf_token = ""
  67. self.personal_info_val = ""
  68. self.email_val_control = ""
  69. def get_group_id(self) -> str:
  70. return self.group_id
  71. def set_log(self, logger: Callable[[str], None]) -> None:
  72. self.logger = logger
  73. def _log(self, message):
  74. if self.logger:
  75. self.logger(f'[DePlugin] [{self.group_id}] {message}')
  76. else:
  77. print(f'[DePlugin] [{self.group_id}] {message}')
  78. def set_config(self, config: VSPlgConfig):
  79. self.config = config
  80. self.free_config = config.free_config or {}
  81. if self.free_config.get("base_url"):
  82. self.base_url = self.free_config["base_url"].rstrip('/')
  83. def keep_alive(self):
  84. pass
  85. def health_check(self) -> bool:
  86. if not self.is_healthy:
  87. return False
  88. if not self.page:
  89. return False
  90. try:
  91. if not self.page.run_js("return 1;"):
  92. return False
  93. except:
  94. return False
  95. if self.config.session_max_life > 0:
  96. if time.time() - self.session_create_time > self.config.session_max_life * 60:
  97. self._log("Session expired.")
  98. return False
  99. return True
  100. def create_session(self):
  101. """
  102. 创建会话:启动浏览器 -> 代理隧道 -> 过盾 -> 提取 Captcha -> 本地识别 -> 提交 -> 获取 Context
  103. """
  104. self._log(f"Initializing Session (ID: {self.instance_id})...")
  105. self.ocr_engine = DddOcrEngine()
  106. co = ChromiumOptions()
  107. # 端口分配 (Docker 适配)
  108. def get_free_port():
  109. with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
  110. s.bind(('', 0)); return s.getsockname()[1]
  111. co.set_local_port(get_free_port())
  112. # 路径与隔离
  113. co.set_user_data_path(self.user_data_path)
  114. chrome_path = os.getenv("CHROME_BIN")
  115. if chrome_path and os.path.exists(chrome_path):
  116. co.set_paths(browser_path=chrome_path)
  117. # 代理隧道
  118. if self.config.proxy and self.config.proxy.ip:
  119. p = self.config.proxy
  120. if p.username and p.password:
  121. self._log(f"Starting Tunnel for {p.ip}...")
  122. self.tunnel = ProxyTunnel(p.ip, p.port, p.username, p.password)
  123. local_proxy = self.tunnel.start()
  124. self._log(f"Tunnel started at {local_proxy}")
  125. co.set_argument(f'--proxy-server={local_proxy}')
  126. else:
  127. proxy_str = f"{p.scheme}://{p.ip}:{p.port}"
  128. co.set_argument(f'--proxy-server={proxy_str}')
  129. else:
  130. self._log("[WARN] No proxy configured!")
  131. # Docker 核心参数
  132. co.headless(False)
  133. co.set_argument('--no-sandbox')
  134. co.set_argument('--disable-gpu')
  135. co.set_argument('--disable-dev-shm-usage')
  136. co.set_argument('--window-size=1920,1080')
  137. co.set_argument('--disable-blink-features=AutomationControlled')
  138. co.set_argument('--ignore-certificate-errors')
  139. try:
  140. self.page = ChromiumPage(co)
  141. # 1. 访问首页
  142. url_home = f"{self.base_url}/en"
  143. self._log(f"Navigating to {url_home}")
  144. self.page.get(url_home)
  145. # 2. Cloudflare 过盾
  146. cf = CloudflareBypasser(self.page, log=self.config.debug)
  147. if not cf.bypass(max_retry=15):
  148. if "access denied" in self.page.title.lower():
  149. raise BizLogicError("Cloudflare Access Denied")
  150. raise BizLogicError("Cloudflare bypass timeout")
  151. # 3. 提取 CSRF 和 验证码
  152. # 等待页面加载
  153. meta_ele = self.page.ele('xpath://meta[@name="csrf-token"]', timeout=30)
  154. if not meta_ele:
  155. # 截图调试,看看是不是还在 Cloudflare 或者加载失败
  156. self.page.get_screenshot(path='csrf_not_found.jpg')
  157. raise NotFoundError("CSRF Token meta tag not found (Page load failed?)")
  158. self.csrf_token = meta_ele.attr('content')
  159. # 提取验证码图片 (Visametric Base64)
  160. html = self.page.html
  161. match = re.search(r'"data:image/png;base64,"\s*\+\s*"(.*?)"', html)
  162. if not match:
  163. # 尝试直接找 img
  164. try:
  165. img_ele = self.page.ele('xpath://img[contains(@src, "data:image")]')
  166. if img_ele:
  167. b64_src = img_ele.attr('src')
  168. captcha_b64 = b64_src.split(',')[1]
  169. else:
  170. raise NotFoundError("Captcha image not found")
  171. except:
  172. raise NotFoundError("Captcha image not found (Regex failed)")
  173. else:
  174. captcha_b64 = match.group(1)
  175. image_bytes = base64.b64decode(captcha_b64)
  176. # 4. 识别验证码 (本地 OCR 服务)
  177. captcha_code = self.ocr_engine.inference_captcha(image_bytes)
  178. # 5. 提交验证码 (获取 PersonalInfo)
  179. self._submit_captcha(captcha_code)
  180. self.session_create_time = time.time()
  181. self._log("Session created successfully.")
  182. except Exception as e:
  183. self._log(f"Session Create Failed: {e}")
  184. self.cleanup()
  185. raise e
  186. def _submit_captcha(self, code):
  187. """
  188. 提交验证码,获取 personalinfo 和 emailValControl
  189. """
  190. url = f"{self.base_url}/en/appointment-form"
  191. payload = {
  192. '_token': self.csrf_token,
  193. 'cpJvnsControl': '',
  194. 'mailConfirmCode': code
  195. }
  196. # 使用 Fetch 提交 (Form-UrlEncoded)
  197. resp = self._perform_request('POST', url, data=payload, headers={
  198. 'X-Requested-With': 'XMLHttpRequest'
  199. })
  200. # 解析返回的 HTML 片段
  201. html = resp.text
  202. # 提取 personalinfo
  203. match_pi = re.search(r"personalinfo:\s*'([^']*)'", html)
  204. if match_pi: self.personal_info_val = match_pi.group(1)
  205. # 提取 emailValControl
  206. match_ev = re.search(r"emailValControl:\s*'([^']*)'", html)
  207. if match_ev: self.email_val_control = match_ev.group(1)
  208. if not self.personal_info_val:
  209. raise NotFoundError(message="Personalinfo not found in captcha response")
  210. # 更新 CSRF (如果返回了新的)
  211. m = re.search(r'name="csrf-token" content="([^"]+)"', html)
  212. if m: self.csrf_token = m.group(1)
  213. def query(self, apt_type: AppointmentType) -> VSQueryResult:
  214. res = VSQueryResult()
  215. res.success = False
  216. consular_id = self.free_config.get("consularid", "1")
  217. url = f"{self.base_url}/en/getdate"
  218. payload = {
  219. "consularid": consular_id,
  220. "exitid": "1",
  221. "servicetypeid": "1",
  222. "calendarType": "2",
  223. "totalperson": "1"
  224. }
  225. headers = {
  226. 'X-CSRF-TOKEN': self.csrf_token,
  227. 'X-Requested-With': 'XMLHttpRequest'
  228. }
  229. try:
  230. resp = self._perform_request('POST', url, data=payload, headers=headers, retry_count=1)
  231. except Exception as e:
  232. self._log(f"Query Error: {e}")
  233. raise e
  234. j = resp.json()
  235. dates = j.get("getDateEnable", [])
  236. if dates:
  237. res.success = True
  238. res.availability_status = AvailabilityStatus.Available
  239. earliest_date = dates[0]
  240. earliest_dt = datetime.strptime(earliest_date, "%d-%m-%Y")
  241. res.earliest_date = earliest_dt
  242. res.availability = [
  243. DateAvailability(date=datetime.strptime(d, "%d-%m-%Y"), times=[])
  244. for d in dates
  245. ]
  246. else:
  247. res.availability_status = AvailabilityStatus.NoneAvailable
  248. return res
  249. def book(self, slot_info: VSQueryResult, user_inputs: Dict) -> VSBookResult:
  250. res = VSBookResult()
  251. available_dates = [da.date for da in slot_info.availability]
  252. exp_start = user_inputs.get('expected_start_date', '')
  253. exp_end = user_inputs.get('expected_end_date', '')
  254. available_dates_str = [
  255. da.date.strftime("%Y-%m-%d")
  256. for da in slot_info.availability
  257. ]
  258. valid_dates = self._filter_dates(available_dates, exp_start, exp_end)
  259. if not valid_dates:
  260. raise NotFoundError("No dates match constraints")
  261. target_date = random.choice(valid_dates)
  262. self._log(f"Selected date: {target_date}")
  263. # 1. 获取时间 Slot
  264. time_slot = self._get_slot_time(target_date)
  265. # 2. 发送邮件流程
  266. alias_email = get_alias_email(user_inputs.get("email"), new_domain='gmail-app.com')
  267. self._send_email_step1(alias_email)
  268. self._send_email_step2("0")
  269. # 3. 读取 OTP
  270. otp_code = self._read_otp_email(alias_email)
  271. # 4. 提交确认
  272. book_res_html = self._confirm_appointment(target_date, time_slot, user_inputs, otp_code, alias_email)
  273. if "complete all required fields" in book_res_html.lower():
  274. raise BizLogicError("Incomplete fields response")
  275. match = re.search(r'https:\/\/checkout\.stripe\.com\/c\/pay\/[^\s"]+', book_res_html)
  276. res.success = True
  277. res.fee_amount = 3000
  278. res.fee_currency = 'EUR'
  279. res.book_date = target_date
  280. res.book_time = time_slot['time']
  281. if match:
  282. res.payment_link = match.group(0)
  283. self._log(f"Payment Link: {res.payment_link}")
  284. return res
  285. # ---------------------------------------------------------
  286. # 辅助方法
  287. # ---------------------------------------------------------
  288. def _perform_request(self, method, url, headers=None, data=None, json_data=None, params=None, retry_count=0):
  289. if not self.page:
  290. raise BizLogicError("Browser not init")
  291. req_url = url
  292. if params:
  293. sep = '&' if '?' in req_url else '?'
  294. req_url += sep + urlencode(params)
  295. fetch_opts = { "method": method.upper(), "headers": headers or {}, "credentials": "include" }
  296. if json_data:
  297. fetch_opts['body'] = json.dumps(json_data)
  298. fetch_opts['headers']['Content-Type'] = 'application/json'
  299. elif data:
  300. if isinstance(data, dict):
  301. fetch_opts['body'] = urlencode(data)
  302. fetch_opts['headers']['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
  303. else:
  304. fetch_opts['body'] = data
  305. js = f"""
  306. return fetch("{req_url}", {json.dumps(fetch_opts)})
  307. .then(async r => {{
  308. const h = {{}}; r.headers.forEach((v, k) => h[k] = v);
  309. return {{ status: r.status, body: await r.text(), headers: h, url: r.url }};
  310. }}).catch(e => {{ return {{ status: 0, body: e.toString() }}; }});
  311. """
  312. resp = BrowserResponse(self.page.run_js(js, timeout=60))
  313. if resp.status_code == 200:
  314. return resp
  315. elif resp.status_code == 403:
  316. if "Just a moment" in resp.text and retry_count < 2:
  317. self._log("Cloudflare 403. Refreshing...")
  318. if self._refresh_firewall_session():
  319. return self._perform_request(method, url, headers, data, json_data, params, retry_count+1)
  320. raise PermissionDeniedError(f"HTTP 403: {resp.text[:100]}")
  321. elif resp.status_code == 429:
  322. self.is_healthy = False
  323. raise RateLimiteddError()
  324. elif resp.status_code in [401, 419]:
  325. self.is_healthy = False
  326. raise SessionExpiredOrInvalidError()
  327. else:
  328. raise BizLogicError(f"HTTP {resp.status_code}: {resp.text[:100]}")
  329. def _refresh_firewall_session(self):
  330. try:
  331. self.page.refresh()
  332. cf = CloudflareBypasser(self.page, log=self.config.debug)
  333. return cf.bypass(max_retry=10)
  334. except: return False
  335. def _get_slot_time(self, date) -> Dict:
  336. url = f"{self.base_url}/en/senddate"
  337. dt_m = datetime.strptime(date, "%Y-%m-%d")
  338. converted_date = dt_m.strftime("%d-%m-%Y")
  339. payload = {
  340. "fulldate": converted_date,
  341. "totalperson": "1",
  342. "set_new_consular_id": self.free_config.get("consularid", "1"),
  343. "set_new_exit_office_id": "1",
  344. "calendarType": "2",
  345. "set_new_service_type_id": "1",
  346. "personalinfo": self.personal_info_val
  347. }
  348. headers = {'X-CSRF-TOKEN': self.csrf_token, 'X-Requested-With': 'XMLHttpRequest'}
  349. resp = self._perform_request('POST', url, data=payload, headers=headers)
  350. # 使用 Regex 提取 Slot
  351. times = []
  352. # pattern: data-id="123" ... <i>09:00</i>
  353. for m in re.finditer(r'data-id="([^"]+)"[^>]*data-all="([^"]+)"[^>]*>.*?<i>(.*?)</i>', resp.text, re.DOTALL):
  354. times.append({'data_id': m.group(1), 'data_all': m.group(2), 'time': m.group(3).strip()})
  355. if not times: raise NotFoundError("No time slots")
  356. return random.choice(times)
  357. def _send_email_step1(self, email):
  358. url = f"{self.base_url}/en/jky45fgd"
  359. payload = { "emailCheck": email, "personalinfo": self.personal_info_val }
  360. headers = {'X-CSRF-TOKEN': self.csrf_token, 'X-Requested-With': 'XMLHttpRequest'}
  361. self._perform_request('POST', url, data=payload, headers=headers)
  362. def _send_email_step2(self, code_val):
  363. url = f"{self.base_url}/en/confirmCodeSendMail"
  364. payload = { "confirmCode": code_val, "emailValControl": self.email_val_control }
  365. headers = {'X-CSRF-TOKEN': self.csrf_token, 'X-Requested-With': 'XMLHttpRequest'}
  366. self._perform_request('POST', url, data=payload, headers=headers)
  367. def _read_otp_email(self, recipient) -> str:
  368. master_email = "visafly666@gmail.com"
  369. sender = 'Visametric - verify at visametric.com'
  370. now_utc = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
  371. for i in range(12):
  372. c = VSCloudApi.Instance().fetch_mail_content(master_email, sender, recipient, 'Verification Code', 'Verification code', now_utc, 300)
  373. if c:
  374. m = re.search(r'\b\d{6}\b', c)
  375. if m: return m.group(0)
  376. time.sleep(5)
  377. raise NotFoundError("OTP timeout")
  378. def _confirm_appointment(self, date, slot_data, user_inputs, otp, alias_email):
  379. url = f"{self.base_url}/en/personal/appointment/create"
  380. def _get_dob(d):
  381. try: return datetime.strptime(d[:10], "%Y-%m-%d")
  382. except: return datetime.now()
  383. dob = _get_dob(user_inputs.get('birthday', ''))
  384. payload = {
  385. "_token": self.csrf_token,
  386. "country": str(self.free_config.get("consularid", "1")),
  387. "visitingcountry": str(self.free_config.get("consularid", "1")),
  388. "city": "6",
  389. "office": "1",
  390. "officetype": "1",
  391. "totalPerson": "1",
  392. "name1": user_inputs.get('first_name', '').upper(),
  393. "surname1": user_inputs.get('last_name', '').upper(),
  394. "nationality1": "2",
  395. "birthday1": str(dob.day),
  396. "birthmonth1": str(dob.month),
  397. "birthyear1": str(dob.year),
  398. "passport1": user_inputs.get('passport_no'),
  399. "passportExpirationDate1": datetime.strptime(user_inputs.get('passport_expiry_date', '')[:10], "%Y-%m-%d").strftime("%d-%m-%Y"),
  400. "email1": alias_email,
  401. "phone1": user_inputs.get('phone_no'),
  402. "alternativephone1": "",
  403. "mailConfirmCode": otp,
  404. "ctval": slot_data['data_id'],
  405. "qtallvert": slot_data['data_all'],
  406. "oldofficetype": "1",
  407. "oldtotalperson": "1",
  408. "rePaymentControl": "0",
  409. "view_set_app_country": "Schengen - Tourism/Family&Friend Visit/Transit Visa/Other Purposes",
  410. "view_set_app_office": "Dublin",
  411. "view_set_app_service_type": "NORMAL",
  412. "cargoactive": "0",
  413. "setnewcalendarstatus": "2",
  414. "availableDaycontrol": "0",
  415. "travelStartDate": datetime.strptime(user_inputs.get('travel_date', '')[:10], "%Y-%m-%d").strftime("%d-%m-%Y"),
  416. "personalapproveTerms": "1"
  417. }
  418. # 补全空字段 (Person 2-4)
  419. for i in range(2, 5):
  420. payload.update({
  421. f"name{i}": "", f"surname{i}": "", f"nationality{i}": "0", f"birthday{i}": "0", f"birthmonth{i}": "0", f"birthyear{i}": "0", f"passport{i}": "", f"passportExpirationDate{i}": "", f"email{i}": alias_email, f"phone{i}": user_inputs.get('phone_no'), f"alternativephone{i}": ""
  422. })
  423. headers = {'X-Requested-With': 'XMLHttpRequest'}
  424. return self._perform_request('POST', url, data=payload, headers=headers).text
  425. def _filter_dates(self, dates, start, end):
  426. if not start or not end: return dates
  427. valid = []
  428. s = datetime.strptime(start[:10], "%Y-%m-%d")
  429. e = datetime.strptime(end[:10], "%Y-%m-%d")
  430. for d in dates:
  431. c = datetime.strptime(d, "%Y-%m-%d")
  432. if s <= c <= e: valid.append(d)
  433. random.shuffle(valid)
  434. return valid
  435. def cleanup(self):
  436. if self.page:
  437. try: self.page.quit()
  438. except: pass
  439. self.page = None
  440. if os.path.exists(self.root_workspace):
  441. for _ in range(3):
  442. try: time.sleep(0.2); shutil.rmtree(self.root_workspace, ignore_errors=True); break
  443. except: time.sleep(0.5)
  444. if self.tunnel:
  445. try: self.tunnel.stop()
  446. except: pass
  447. self.tunnel = None
  448. def __del__(self):
  449. self.cleanup()