de_plugin2.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. import time
  2. import json
  3. import random
  4. import re
  5. import os
  6. import uuid
  7. import shutil
  8. import base64
  9. import socket
  10. from datetime import datetime
  11. from typing import List, Dict, Optional, Any, Callable
  12. from urllib.parse import urljoin, urlparse, urlencode
  13. # DrissionPage 核心
  14. from DrissionPage import ChromiumPage, ChromiumOptions
  15. from vs_plg import IVSPlg
  16. from vs_types import VSPlgConfig, AppointmentType, VSQueryResult, VSBookResult, AvailabilityStatus, TimeSlot, DateAvailability, NotFoundError, PermissionDeniedError, RateLimiteddError, SessionExpiredOrInvalidError, BizLogicError
  17. from toolkit.vs_cloud_api import VSCloudApi
  18. from utils.cloudflare_bypass_for_scraping import CloudflareBypasser
  19. from toolkit.proxy_tunnel import ProxyTunnel
  20. from toolkit.ocr_engine import DddOcrEngine
  21. class BrowserResponse:
  22. def __init__(self, result_dict):
  23. result_dict = result_dict or {}
  24. self.status_code = result_dict.get('status', 0)
  25. self.text = result_dict.get('body', '')
  26. self.headers = result_dict.get('headers', {})
  27. self.url = result_dict.get('url', '')
  28. self._json = None
  29. def json(self):
  30. if self._json is None:
  31. if not self.text: return {}
  32. try: self._json = json.loads(self.text)
  33. except: self._json = {}
  34. return self._json
  35. def to_yyyymmdd(data_str: str, date_str_format: str, target_format: str="%Y-%m-%d"):
  36. dt = datetime.strptime(data_str, date_str_format)
  37. return dt.strftime("%Y-%m-%d")
  38. def get_alias_email(email: str, new_domain: str = "gmail-app.com") -> str:
  39. if "@" not in email: raise ValueError(f"Invalid email: {email}")
  40. local_part, _ = email.rsplit("@", 1)
  41. return f"{local_part}@{new_domain}"
  42. class DePlugin2(IVSPlg):
  43. """
  44. Germany (Visametric) 签证预约插件 (Browser + Tunnel Mode)
  45. """
  46. def __init__(self, group_id: str):
  47. self.group_id = group_id
  48. self.config: Optional[VSPlgConfig] = None
  49. self.free_config: Dict[str, Any] = {}
  50. self.logger = None
  51. # 浏览器实例
  52. self.page: Optional[ChromiumPage] = None
  53. # 资源隔离
  54. self.instance_id = uuid.uuid4().hex[:8]
  55. self.root_workspace = os.path.abspath(os.path.join("temp_browser_data", f"{self.group_id}_{self.instance_id}"))
  56. self.user_data_path = os.path.join(self.root_workspace, "user_data")
  57. if not os.path.exists(self.root_workspace):
  58. os.makedirs(self.root_workspace)
  59. self.tunnel = None # 代理隧道
  60. self.is_healthy = True
  61. self.session_create_time: float = 0
  62. # 字符识别引擎
  63. self.ocr_engine: Optional[DddOcrEngine] = None
  64. # 业务状态
  65. self.base_url = "https://ie-appointment.visametric.com"
  66. self.csrf_token = ""
  67. self.personal_info_val = ""
  68. self.email_val_control = ""
  69. def get_group_id(self) -> str:
  70. return self.group_id
  71. def set_log(self, logger: Callable[[str], None]) -> None:
  72. self.logger = logger
  73. def _log(self, message):
  74. if self.logger:
  75. self.logger(f'[DePlugin] [{self.group_id}] {message}')
  76. else:
  77. print(f'[DePlugin] [{self.group_id}] {message}')
  78. def set_config(self, config: VSPlgConfig):
  79. self.config = config
  80. self.free_config = config.free_config or {}
  81. if self.free_config.get("base_url"):
  82. self.base_url = self.free_config["base_url"].rstrip('/')
  83. def health_check(self) -> bool:
  84. if not self.is_healthy:
  85. return False
  86. if not self.page:
  87. return False
  88. try:
  89. if not self.page.run_js("return 1;"):
  90. return False
  91. except:
  92. return False
  93. if self.config.session_max_life > 0:
  94. if time.time() - self.session_create_time > self.config.session_max_life * 60:
  95. self._log("Session expired.")
  96. return False
  97. return True
  98. def create_session(self):
  99. """
  100. 创建会话:启动浏览器 -> 代理隧道 -> 过盾 -> 提取 Captcha -> 本地识别 -> 提交 -> 获取 Context
  101. """
  102. self._log(f"Initializing Session (ID: {self.instance_id})...")
  103. self.ocr_engine = DddOcrEngine()
  104. co = ChromiumOptions()
  105. # 端口分配 (Docker 适配)
  106. def get_free_port():
  107. with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
  108. s.bind(('', 0)); return s.getsockname()[1]
  109. co.set_local_port(get_free_port())
  110. # 路径与隔离
  111. co.set_user_data_path(self.user_data_path)
  112. chrome_path = os.getenv("CHROME_BIN")
  113. if chrome_path and os.path.exists(chrome_path):
  114. co.set_paths(browser_path=chrome_path)
  115. # 代理隧道
  116. if self.config.proxy and self.config.proxy.ip:
  117. p = self.config.proxy
  118. if p.username and p.password:
  119. self._log(f"Starting Tunnel for {p.ip}...")
  120. self.tunnel = ProxyTunnel(p.ip, p.port, p.username, p.password)
  121. local_proxy = self.tunnel.start()
  122. self._log(f"Tunnel started at {local_proxy}")
  123. co.set_argument(f'--proxy-server={local_proxy}')
  124. else:
  125. proxy_str = f"{p.scheme}://{p.ip}:{p.port}"
  126. co.set_argument(f'--proxy-server={proxy_str}')
  127. else:
  128. self._log("[WARN] No proxy configured!")
  129. # Docker 核心参数
  130. co.headless(False)
  131. co.set_argument('--no-sandbox')
  132. co.set_argument('--disable-gpu')
  133. co.set_argument('--disable-dev-shm-usage')
  134. co.set_argument('--window-size=1920,1080')
  135. co.set_argument('--disable-blink-features=AutomationControlled')
  136. co.set_argument('--ignore-certificate-errors')
  137. try:
  138. self.page = ChromiumPage(co)
  139. # 1. 访问首页
  140. url_home = f"{self.base_url}/en"
  141. self._log(f"Navigating to {url_home}")
  142. self.page.get(url_home)
  143. # 2. Cloudflare 过盾
  144. cf = CloudflareBypasser(self.page, log=self.config.debug)
  145. if not cf.bypass(max_retry=15):
  146. if "access denied" in self.page.title.lower():
  147. raise BizLogicError("Cloudflare Access Denied")
  148. raise BizLogicError("Cloudflare bypass timeout")
  149. # 3. 提取 CSRF 和 验证码
  150. # 等待页面加载
  151. meta_ele = self.page.ele('xpath://meta[@name="csrf-token"]', timeout=30)
  152. if not meta_ele:
  153. # 截图调试,看看是不是还在 Cloudflare 或者加载失败
  154. self.page.get_screenshot(path='csrf_not_found.jpg')
  155. raise NotFoundError("CSRF Token meta tag not found (Page load failed?)")
  156. self.csrf_token = meta_ele.attr('content')
  157. # 提取验证码图片 (Visametric Base64)
  158. html = self.page.html
  159. match = re.search(r'"data:image/png;base64,"\s*\+\s*"(.*?)"', html)
  160. if not match:
  161. # 尝试直接找 img
  162. try:
  163. img_ele = self.page.ele('xpath://img[contains(@src, "data:image")]')
  164. if img_ele:
  165. b64_src = img_ele.attr('src')
  166. captcha_b64 = b64_src.split(',')[1]
  167. else:
  168. raise NotFoundError("Captcha image not found")
  169. except:
  170. raise NotFoundError("Captcha image not found (Regex failed)")
  171. else:
  172. captcha_b64 = match.group(1)
  173. image_bytes = base64.b64decode(captcha_b64)
  174. # 4. 识别验证码 (本地 OCR 服务)
  175. captcha_code = self.ocr_engine.inference_captcha(image_bytes)
  176. # 5. 提交验证码 (获取 PersonalInfo)
  177. self._submit_captcha(captcha_code)
  178. self.session_create_time = time.time()
  179. self._log("Session created successfully.")
  180. except Exception as e:
  181. self._log(f"Session Create Failed: {e}")
  182. self.cleanup()
  183. raise e
  184. def _submit_captcha(self, code):
  185. """
  186. 提交验证码,获取 personalinfo 和 emailValControl
  187. """
  188. url = f"{self.base_url}/en/appointment-form"
  189. payload = {
  190. '_token': self.csrf_token,
  191. 'cpJvnsControl': '',
  192. 'mailConfirmCode': code
  193. }
  194. # 使用 Fetch 提交 (Form-UrlEncoded)
  195. resp = self._perform_request('POST', url, data=payload, headers={
  196. 'X-Requested-With': 'XMLHttpRequest'
  197. })
  198. # 解析返回的 HTML 片段
  199. html = resp.text
  200. # 提取 personalinfo
  201. match_pi = re.search(r"personalinfo:\s*'([^']*)'", html)
  202. if match_pi: self.personal_info_val = match_pi.group(1)
  203. # 提取 emailValControl
  204. match_ev = re.search(r"emailValControl:\s*'([^']*)'", html)
  205. if match_ev: self.email_val_control = match_ev.group(1)
  206. if not self.personal_info_val:
  207. raise NotFoundError(message="Personalinfo not found in captcha response")
  208. # 更新 CSRF (如果返回了新的)
  209. m = re.search(r'name="csrf-token" content="([^"]+)"', html)
  210. if m: self.csrf_token = m.group(1)
  211. def query(self, apt_type: AppointmentType) -> VSQueryResult:
  212. res = VSQueryResult()
  213. res.success = False
  214. consular_id = self.free_config.get("consularid", "1")
  215. url = f"{self.base_url}/en/getdate"
  216. payload = {
  217. "consularid": consular_id,
  218. "exitid": "1",
  219. "servicetypeid": "1",
  220. "calendarType": "2",
  221. "totalperson": "1"
  222. }
  223. headers = {
  224. 'X-CSRF-TOKEN': self.csrf_token,
  225. 'X-Requested-With': 'XMLHttpRequest'
  226. }
  227. try:
  228. resp = self._perform_request('POST', url, data=payload, headers=headers, retry_count=1)
  229. except Exception as e:
  230. self._log(f"Query Error: {e}")
  231. raise e
  232. j = resp.json()
  233. dates = j.get("getDateEnable", [])
  234. if dates:
  235. res.success = True
  236. res.availability_status = AvailabilityStatus.Available
  237. res.earliest_date = to_yyyymmdd(dates[0], "%d-%m-%Y")
  238. res.availability = [
  239. DateAvailability(date=to_yyyymmdd(d, "%d-%m-%Y"), times=[])
  240. for d in dates
  241. ]
  242. else:
  243. res.availability_status = AvailabilityStatus.NoneAvailable
  244. return res
  245. def book(self, slot_info: VSQueryResult, user_inputs: Dict) -> VSBookResult:
  246. res = VSBookResult()
  247. available_dates = [da.date for da in slot_info.availability]
  248. exp_start = user_inputs.get('expected_start_date', '')
  249. exp_end = user_inputs.get('expected_end_date', '')
  250. valid_dates = self._filter_dates(available_dates, exp_start, exp_end)
  251. if not valid_dates:
  252. raise NotFoundError("No dates match constraints")
  253. target_date = random.choice(valid_dates)
  254. self._log(f"Selected date: {target_date}")
  255. # 1. 获取时间 Slot
  256. time_slot = self._get_slot_time(target_date)
  257. # 2. 发送邮件流程
  258. alias_email = get_alias_email(user_inputs.get("email"), new_domain='gmail-app.com')
  259. self._send_email_step1(alias_email)
  260. self._send_email_step2("0")
  261. # 3. 读取 OTP
  262. otp_code = self._read_otp_email(alias_email)
  263. # 4. 提交确认
  264. book_res_html = self._confirm_appointment(target_date, time_slot, user_inputs, otp_code, alias_email)
  265. if "complete all required fields" in book_res_html.lower():
  266. raise BizLogicError("Incomplete fields response")
  267. match = re.search(r'https:\/\/checkout\.stripe\.com\/c\/pay\/[^\s"]+', book_res_html)
  268. res.success = True
  269. res.fee_amount = 3000
  270. res.fee_currency = 'EUR'
  271. res.book_date = target_date
  272. res.book_time = time_slot['time']
  273. if match:
  274. res.payment_link = match.group(0)
  275. self._log(f"Payment Link: {res.payment_link}")
  276. return res
  277. # ---------------------------------------------------------
  278. # 辅助方法
  279. # ---------------------------------------------------------
  280. def _perform_request(self, method, url, headers=None, data=None, json_data=None, params=None, retry_count=0):
  281. if not self.page:
  282. raise BizLogicError("Browser not init")
  283. req_url = url
  284. if params:
  285. sep = '&' if '?' in req_url else '?'
  286. req_url += sep + urlencode(params)
  287. fetch_opts = { "method": method.upper(), "headers": headers or {}, "credentials": "include" }
  288. if json_data:
  289. fetch_opts['body'] = json.dumps(json_data)
  290. fetch_opts['headers']['Content-Type'] = 'application/json'
  291. elif data:
  292. if isinstance(data, dict):
  293. fetch_opts['body'] = urlencode(data)
  294. fetch_opts['headers']['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
  295. else:
  296. fetch_opts['body'] = data
  297. js = f"""
  298. return fetch("{req_url}", {json.dumps(fetch_opts)})
  299. .then(async r => {{
  300. const h = {{}}; r.headers.forEach((v, k) => h[k] = v);
  301. return {{ status: r.status, body: await r.text(), headers: h, url: r.url }};
  302. }}).catch(e => {{ return {{ status: 0, body: e.toString() }}; }});
  303. """
  304. resp = BrowserResponse(self.page.run_js(js, timeout=60))
  305. if resp.status_code == 200:
  306. return resp
  307. elif resp.status_code == 403:
  308. if "Just a moment" in resp.text and retry_count < 2:
  309. self._log("Cloudflare 403. Refreshing...")
  310. if self._refresh_firewall_session():
  311. return self._perform_request(method, url, headers, data, json_data, params, retry_count+1)
  312. raise PermissionDeniedError(f"HTTP 403: {resp.text[:100]}")
  313. elif resp.status_code == 429:
  314. self.is_healthy = False
  315. raise RateLimiteddError()
  316. elif resp.status_code in [401, 419]:
  317. self.is_healthy = False
  318. raise SessionExpiredOrInvalidError()
  319. else:
  320. raise BizLogicError(f"HTTP {resp.status_code}: {resp.text[:100]}")
  321. def _refresh_firewall_session(self):
  322. try:
  323. self.page.refresh()
  324. cf = CloudflareBypasser(self.page, log=self.config.debug)
  325. return cf.bypass(max_retry=10)
  326. except: return False
  327. def _get_slot_time(self, date) -> Dict:
  328. url = f"{self.base_url}/en/senddate"
  329. dt_m = datetime.strptime(date, "%Y-%m-%d")
  330. converted_date = dt_m.strftime("%d-%m-%Y")
  331. payload = {
  332. "fulldate": converted_date,
  333. "totalperson": "1",
  334. "set_new_consular_id": self.free_config.get("consularid", "1"),
  335. "set_new_exit_office_id": "1",
  336. "calendarType": "2",
  337. "set_new_service_type_id": "1",
  338. "personalinfo": self.personal_info_val
  339. }
  340. headers = {'X-CSRF-TOKEN': self.csrf_token, 'X-Requested-With': 'XMLHttpRequest'}
  341. resp = self._perform_request('POST', url, data=payload, headers=headers)
  342. # 使用 Regex 提取 Slot
  343. times = []
  344. # pattern: data-id="123" ... <i>09:00</i>
  345. for m in re.finditer(r'data-id="([^"]+)"[^>]*data-all="([^"]+)"[^>]*>.*?<i>(.*?)</i>', resp.text, re.DOTALL):
  346. times.append({'data_id': m.group(1), 'data_all': m.group(2), 'time': m.group(3).strip()})
  347. if not times: raise NotFoundError("No time slots")
  348. return random.choice(times)
  349. def _send_email_step1(self, email):
  350. url = f"{self.base_url}/en/jky45fgd"
  351. payload = { "emailCheck": email, "personalinfo": self.personal_info_val }
  352. headers = {'X-CSRF-TOKEN': self.csrf_token, 'X-Requested-With': 'XMLHttpRequest'}
  353. self._perform_request('POST', url, data=payload, headers=headers)
  354. def _send_email_step2(self, code_val):
  355. url = f"{self.base_url}/en/confirmCodeSendMail"
  356. payload = { "confirmCode": code_val, "emailValControl": self.email_val_control }
  357. headers = {'X-CSRF-TOKEN': self.csrf_token, 'X-Requested-With': 'XMLHttpRequest'}
  358. self._perform_request('POST', url, data=payload, headers=headers)
  359. def _read_otp_email(self, recipient) -> str:
  360. master_email = "visafly666@gmail.com"
  361. sender = 'Visametric - verify at visametric.com'
  362. now_utc = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
  363. for i in range(12):
  364. c = VSCloudApi.Instance().fetch_mail_content(master_email, sender, recipient, 'Verification Code', 'Verification code', now_utc, 300)
  365. if c:
  366. m = re.search(r'\b\d{6}\b', c)
  367. if m: return m.group(0)
  368. time.sleep(5)
  369. raise NotFoundError("OTP timeout")
  370. def _confirm_appointment(self, date, slot_data, user_inputs, otp, alias_email):
  371. url = f"{self.base_url}/en/personal/appointment/create"
  372. def _get_dob(d):
  373. try: return datetime.strptime(d[:10], "%Y-%m-%d")
  374. except: return datetime.now()
  375. dob = _get_dob(user_inputs.get('birthday', ''))
  376. payload = {
  377. "_token": self.csrf_token,
  378. "country": str(self.free_config.get("consularid", "1")),
  379. "visitingcountry": str(self.free_config.get("consularid", "1")),
  380. "city": "6",
  381. "office": "1",
  382. "officetype": "1",
  383. "totalPerson": "1",
  384. "name1": user_inputs.get('first_name', '').upper(),
  385. "surname1": user_inputs.get('last_name', '').upper(),
  386. "nationality1": "2",
  387. "birthday1": str(dob.day),
  388. "birthmonth1": str(dob.month),
  389. "birthyear1": str(dob.year),
  390. "passport1": user_inputs.get('passport_no'),
  391. "passportExpirationDate1": datetime.strptime(user_inputs.get('passport_expiry_date', '')[:10], "%Y-%m-%d").strftime("%d-%m-%Y"),
  392. "email1": alias_email,
  393. "phone1": user_inputs.get('phone_no'),
  394. "alternativephone1": "",
  395. "mailConfirmCode": otp,
  396. "ctval": slot_data['data_id'],
  397. "qtallvert": slot_data['data_all'],
  398. "oldofficetype": "1",
  399. "oldtotalperson": "1",
  400. "rePaymentControl": "0",
  401. "view_set_app_country": "Schengen - Tourism/Family&Friend Visit/Transit Visa/Other Purposes",
  402. "view_set_app_office": "Dublin",
  403. "view_set_app_service_type": "NORMAL",
  404. "cargoactive": "0",
  405. "setnewcalendarstatus": "2",
  406. "availableDaycontrol": "0",
  407. "travelStartDate": datetime.strptime(user_inputs.get('travel_date', '')[:10], "%Y-%m-%d").strftime("%d-%m-%Y"),
  408. "personalapproveTerms": "1"
  409. }
  410. # 补全空字段 (Person 2-4)
  411. for i in range(2, 5):
  412. payload.update({
  413. f"name{i}": "", f"surname{i}": "", f"nationality{i}": "0", f"birthday{i}": "0", f"birthmonth{i}": "0", f"birthyear{i}": "0", f"passport{i}": "", f"passportExpirationDate{i}": "", f"email{i}": alias_email, f"phone{i}": user_inputs.get('phone_no'), f"alternativephone{i}": ""
  414. })
  415. headers = {'X-Requested-With': 'XMLHttpRequest'}
  416. return self._perform_request('POST', url, data=payload, headers=headers).text
  417. def _filter_dates(self, dates, start, end):
  418. if not start or not end: return dates
  419. valid = []
  420. s = datetime.strptime(start[:10], "%Y-%m-%d")
  421. e = datetime.strptime(end[:10], "%Y-%m-%d")
  422. for d in dates:
  423. c = datetime.strptime(d, "%Y-%m-%d")
  424. if s <= c <= e: valid.append(d)
  425. random.shuffle(valid)
  426. return valid
  427. def cleanup(self):
  428. if self.page:
  429. try: self.page.quit()
  430. except: pass
  431. self.page = None
  432. if os.path.exists(self.root_workspace):
  433. for _ in range(3):
  434. try: time.sleep(0.2); shutil.rmtree(self.root_workspace, ignore_errors=True); break
  435. except: time.sleep(0.5)
  436. if self.tunnel:
  437. try: self.tunnel.stop()
  438. except: pass
  439. self.tunnel = None
  440. def __del__(self):
  441. self.cleanup()