pol_plugin.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. import time
  2. import json
  3. import random
  4. import re
  5. import os
  6. import uuid
  7. import shutil
  8. import base64
  9. import socket
  10. import easyocr
  11. from datetime import datetime
  12. from typing import List, Dict, Optional, Any, Callable
  13. from urllib.parse import urljoin, urlparse, urlencode
  14. # DrissionPage 核心
  15. from DrissionPage import ChromiumPage, ChromiumOptions
  16. from vs_plg import IVSPlg
  17. from vs_types import VSPlgConfig, AppointmentType, VSQueryResult, VSBookResult, AvailabilityStatus, TimeSlot, DateAvailability, NotFoundError, PermissionDeniedError, RateLimiteddError, SessionExpiredOrInvalidError, BizLogicError
  18. from toolkit.vs_cloud_api import VSCloudApi
  19. from toolkit.proxy_tunnel import ProxyTunnel
  20. from utils.fingerprint_utils import FingerprintGenerator
  21. class BrowserResponse:
  22. def __init__(self, result_dict):
  23. result_dict = result_dict or {}
  24. self.status_code = result_dict.get('status', 0)
  25. self.text = result_dict.get('body', '')
  26. self.headers = result_dict.get('headers', {})
  27. self.url = result_dict.get('url', '')
  28. self._json = None
  29. def json(self):
  30. if self._json is None:
  31. if not self.text: return {}
  32. try: self._json = json.loads(self.text)
  33. except: self._json = {}
  34. return self._json
  35. def to_yyyymmdd(data_str: str, date_str_format: str, target_format: str="%Y-%m-%d"):
  36. dt = datetime.strptime(data_str, date_str_format)
  37. return dt.strftime("%Y-%m-%d")
  38. def get_alias_email(email: str, new_domain: str = "gmail-app.com") -> str:
  39. if "@" not in email: raise ValueError(f"Invalid email: {email}")
  40. local_part, _ = email.rsplit("@", 1)
  41. return f"{local_part}@{new_domain}"
  42. class PolPlugin(IVSPlg):
  43. """
  44. Poland (e-konsulat) 签证预约插件 (Browser + Tunnel Mode)
  45. """
  46. def __init__(self, group_id: str):
  47. self.group_id = group_id
  48. self.config: Optional[VSPlgConfig] = None
  49. self.free_config: Dict[str, Any] = {}
  50. self.logger = None
  51. # 浏览器实例
  52. self.page: Optional[ChromiumPage] = None
  53. # 资源隔离
  54. self.instance_id = uuid.uuid4().hex[:8]
  55. self.root_workspace = os.path.abspath(os.path.join("data/temp_browser_data", f"{self.group_id}.{self.instance_id}"))
  56. self.user_data_path = os.path.join(self.root_workspace, "user_data")
  57. if not os.path.exists(self.root_workspace):
  58. os.makedirs(self.root_workspace)
  59. self.reader = easyocr.Reader(['en'], gpu=False)
  60. self.tunnel = None # 代理隧道
  61. self.is_healthy = True
  62. self.session_create_time: float = 0
  63. def get_group_id(self) -> str:
  64. return self.group_id
  65. def set_log(self, logger: Callable[[str], None]) -> None:
  66. self.logger = logger
  67. def _log(self, message):
  68. if self.logger:
  69. self.logger(f'[PolPlugin] [{self.group_id}] {message}')
  70. else:
  71. print(f'[PolPlugin] [{self.group_id}] {message}')
  72. def set_config(self, config: VSPlgConfig):
  73. self.config = config
  74. self.free_config = config.free_config or {}
  75. def keep_alive(self):
  76. pass
  77. def health_check(self) -> bool:
  78. if not self.is_healthy:
  79. return False
  80. if not self.page:
  81. return False
  82. try:
  83. if not self.page.run_js("return 1;"):
  84. return False
  85. except:
  86. return False
  87. if self.config.session_max_life > 0:
  88. if time.time() - self.session_create_time > self.config.session_max_life * 60:
  89. self._log("Session expired.")
  90. return False
  91. return True
  92. def create_session(self):
  93. """
  94. 创建会话:启动浏览器 -> 代理隧道 -> 提取 Captcha -> 本地识别 -> 提交 -> 获取 Context
  95. """
  96. self._log(f"Initializing Session (ID: {self.instance_id})...")
  97. co = ChromiumOptions()
  98. def get_free_port():
  99. with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
  100. s.bind(('', 0)); return s.getsockname()[1]
  101. co.set_local_port(get_free_port())
  102. co.set_user_data_path(self.user_data_path)
  103. chrome_path = os.getenv("CHROME_BIN")
  104. if chrome_path and os.path.exists(chrome_path):
  105. co.set_paths(browser_path=chrome_path)
  106. if self.config.proxy and self.config.proxy.ip:
  107. p = self.config.proxy
  108. if p.username and p.password:
  109. self._log(f"Starting Tunnel for {p.ip}...")
  110. self.tunnel = ProxyTunnel(p.ip, p.port, p.username, p.password)
  111. local_proxy = self.tunnel.start()
  112. self._log(f"Tunnel started at {local_proxy}")
  113. co.set_argument(f'--proxy-server={local_proxy}')
  114. else:
  115. proxy_str = f"{p.scheme}://{p.ip}:{p.port}"
  116. co.set_argument(f'--proxy-server={proxy_str}')
  117. else:
  118. self._log("[WARN] No proxy configured!")
  119. fingerprint_gen = FingerprintGenerator()
  120. specific_fp = fingerprint_gen.generate(self.config.account.username)
  121. self._log(f'browser fingerprint={specific_fp}')
  122. co.headless(False)
  123. co.set_argument('--no-sandbox')
  124. # co.set_argument('--disable-gpu')
  125. co.set_argument('--disable-dev-shm-usage')
  126. co.set_argument('--window-size=1920,1080')
  127. co.set_argument('--disable-blink-features=AutomationControlled')
  128. co.set_argument('--ignore-certificate-errors')
  129. co.set_argument('--ignore-gpu-blocklist') # 忽略无显卡黑名单
  130. co.set_argument('--enable-webgl') # 强制开启 WebGL
  131. co.set_argument('--use-gl=angle') # 使用 ANGLE 渲染后端
  132. co.set_argument('--use-angle=swiftshader')# 强制使用 CPU 进行 3D 渲染 (这步最关键!)
  133. co.set_argument(f"--fingerprint={specific_fp.get('seed')}")
  134. co.set_argument(f"--fingerprint-platform={specific_fp.get('platform')}")
  135. co.set_argument(f"--fingerprint-brand={specific_fp.get('brand')}")
  136. try:
  137. self.page = ChromiumPage(co)
  138. url_home = "https://secure.e-konsulat.gov.pl"
  139. self._log(f"Navigating to {url_home}")
  140. self.page.get(url_home)
  141. self.page.wait.doc_loaded()
  142. self.session_create_time = time.time()
  143. self._log("Session created successfully.")
  144. except Exception as e:
  145. self._log(f"Session Create Failed: {e}")
  146. self.cleanup()
  147. raise e
  148. def query(self, apt_type: AppointmentType) -> VSQueryResult:
  149. res = VSQueryResult()
  150. res.success = False
  151. query_url = self.free_config.get('query_url')
  152. service_type = self.free_config.get('service_type')
  153. location = self.free_config.get('location')
  154. self._log(f"Navigating to {query_url}")
  155. self.page.get(query_url)
  156. captcha_image_selector = 't:img@alt=Weryfikacja obrazkowa'
  157. if not self.page.wait.ele_displayed(captcha_image_selector, timeout=30):
  158. raise BizLogicError(message=f"Wait for selector={captcha_image_selector} timeout")
  159. time.sleep(3)
  160. img_ele = self.page.ele(captcha_image_selector)
  161. img_src = img_ele.attr('src')
  162. base64_data = img_src.split(',')[1]
  163. image_bytes = base64.b64decode(base64_data)
  164. result = self.reader.readtext(image_bytes)
  165. captcha_code = result[0][-2] if result else ""
  166. self._log(f"Captcha code={captcha_code}")
  167. if not captcha_code:
  168. BizLogicError(message="Solve captcha failed")
  169. input_ele = self.page.ele('t:input@aria-label=Znaki z obrazka')
  170. input_ele.clear()
  171. input_ele.input(captcha_code)
  172. btn_selector = 'Dalej'
  173. self.page.ele(btn_selector).click(by_js=True)
  174. toast_ele = self.page.ele('tag:app-toast', timeout=2)
  175. if toast_ele:
  176. error_msg = toast_ele.text.replace('\n', ' ').strip()
  177. raise BizLogicError(message=f"Captcha verify error={error_msg}")
  178. if not self._select_mat_option('Rodzaj usługi', service_type):
  179. raise BizLogicError(message=f'Process select box failed')
  180. if not self._select_mat_option('Lokalizacja', location):
  181. raise BizLogicError(message=f'Process select box failed')
  182. if not self._select_mat_option('Chcę zarezerwować termin dla', '1 osob'):
  183. raise BizLogicError(message=f'Process select box failed')
  184. available_dates = []
  185. self._log("Wait Query Slot...")
  186. for _ in range(20):
  187. try:
  188. no_slot_alert = self.page.ele('text:Chwilowo wszystkie udostępnione terminy', timeout=0.1)
  189. if no_slot_alert:
  190. self._log("No slots available")
  191. break
  192. listbox = self.page.ele('@role=listbox', timeout=0.1)
  193. if not listbox:
  194. termin_label = self.page.ele('tag:mat-label@@text():Termin', timeout=0.5)
  195. if termin_label:
  196. termin_select = termin_label.parent('tag:app-select-control').ele('tag:mat-select')
  197. if termin_select and 'mat-select-disabled' not in str(termin_select.attr('class')):
  198. try:
  199. termin_select.click()
  200. except:
  201. termin_select.click(by_js=True)
  202. time.sleep(0.5)
  203. listbox = self.page.ele('@role=listbox', timeout=1)
  204. if listbox:
  205. option_elements = listbox.eles('.mat-option-text')
  206. for ele in option_elements:
  207. date_str = ele.text.strip()
  208. if date_str:
  209. available_dates.append(date_str)
  210. if available_dates:
  211. self._log(f"✅ Success extracted dates: {available_dates}")
  212. break
  213. except Exception as e:
  214. self._log(f"Query loop exception: {e}")
  215. time.sleep(0.5)
  216. if available_dates:
  217. selected_date = random.choice(available_dates)
  218. self._log(f"🎲 Random select date: {selected_date}...")
  219. locked = self._lock_slot(selected_date)
  220. if locked:
  221. session_id = self._save_browser_session()
  222. wechat_message = f"🎉 [Poland] Slot locked\n📍 location: {location}\n📅 date: {selected_date}\n🔑 SessionId: {session_id}"
  223. VSCloudApi.Instance().push_weixin_text(wechat_message)
  224. res.success = True
  225. res.availability_status = AvailabilityStatus.Available
  226. earliest_date = available_dates[0]
  227. earliest_dt = datetime.strptime(earliest_date, "%Y-%m-%d")
  228. res.earliest_date = earliest_dt
  229. res.availability = [
  230. DateAvailability(
  231. date=datetime.strptime(d, "%Y-%m-%d"),
  232. times=[],
  233. )
  234. for d in available_dates
  235. ]
  236. else:
  237. res.success = False
  238. res.availability_status = AvailabilityStatus.NoneAvailable
  239. res.availability = []
  240. return res
  241. def _lock_slot(self, lock_date):
  242. slot_selector = f'xpath://span[contains(@class, "mat-option-text") and contains(text(), "{lock_date}")]'
  243. slot_ele = self.page.ele(slot_selector, timeout=1)
  244. if not slot_ele:
  245. termin_label = self.page.ele('tag:mat-label@@text():Termin', timeout=1)
  246. if termin_label:
  247. termin_select = termin_label.parent('tag:app-select-control').ele('tag:mat-select')
  248. if termin_select and 'mat-select-disabled' not in str(termin_select.attr('class')):
  249. try:
  250. termin_select.click()
  251. except:
  252. termin_select.click(by_js=True)
  253. time.sleep(0.5)
  254. slot_ele = self.page.ele(slot_selector, timeout=3)
  255. if not slot_ele:
  256. self._log(f"❌ Can't find date {lock_date} to click.")
  257. return False
  258. try:
  259. slot_ele.click()
  260. except:
  261. slot_ele.click(by_js=True)
  262. self._log(f"✅ Clicked date: {lock_date}")
  263. time.sleep(1)
  264. btn_selector = 'xpath://button[.//span[contains(text(), "Dalej")]]'
  265. next_btn = self.page.ele(btn_selector, timeout=3)
  266. if not next_btn:
  267. self._log("❌ Can't find 'Dalej' button")
  268. return False
  269. try:
  270. next_btn.click()
  271. except:
  272. next_btn.click(by_js=True)
  273. self._log("✅ Clicked Dalej, locking slot...")
  274. return self.page.wait.url_change('weryfikacja-obrazkowa', exclude=True, timeout=15)
  275. def _select_mat_option(self, label_text, option_text):
  276. self._log(f"choose: {label_text} -> {option_text}")
  277. label = self.page.ele(f'tag:mat-label@@text():{label_text}', timeout=5)
  278. if not label:
  279. self._log(f"Can't find label: {label_text}")
  280. return False
  281. container = label.parent('tag:app-select-control')
  282. select_box = container.ele('tag:mat-select')
  283. if not select_box:
  284. self._log("Can't find select box")
  285. return False
  286. select_box.click(by_js=True)
  287. time.sleep(0.5)
  288. option = self.page.ele(f'tag:mat-option@@text():{option_text}', timeout=3)
  289. if option:
  290. option.click(by_js=True)
  291. time.sleep(0.5)
  292. return True
  293. else:
  294. self._log(f"Can't find option: {option_text}")
  295. return False
  296. def book(self, slot_info: VSQueryResult, user_inputs: Dict) -> VSBookResult:
  297. res = VSBookResult()
  298. return res
  299. def _save_browser_session(self):
  300. self._log("Abstract browser session env...")
  301. cookies_dict = self.page.cookies(all_domains=True, all_info=True)
  302. cookies_str = cookies_dict.as_json()
  303. local_storage_str = self.page.run_js('return JSON.stringify(window.localStorage) || "{}"')
  304. session_storage_str = self.page.run_js('return JSON.stringify(window.sessionStorage) || "{}"')
  305. proxy_str = ""
  306. if hasattr(self, 'config') and hasattr(self.config, 'proxy') and self.config.proxy.ip:
  307. p = self.config.proxy
  308. if p.username and p.password:
  309. proxy_str = f"{p.scheme}://{p.username}:{p.password}@{p.ip}:{p.port}"
  310. else:
  311. proxy_str = f"{p.scheme}://{p.ip}:{p.port}"
  312. session_data = VSCloudApi.Instance().create_http_session(
  313. session_id=str(uuid.uuid4().hex),
  314. cookies=cookies_str,
  315. local_storage=local_storage_str,
  316. session_storage=session_storage_str,
  317. user_agent=self.page.user_agent,
  318. page=self.page.url,
  319. proxy=proxy_str
  320. )
  321. return session_data.get('session_id')
  322. def _perform_request(self, method, url, headers=None, data=None, json_data=None, params=None, retry_count=0):
  323. if not self.page:
  324. raise BizLogicError("Browser not init")
  325. req_url = url
  326. if params:
  327. sep = '&' if '?' in req_url else '?'
  328. req_url += sep + urlencode(params)
  329. fetch_opts = { "method": method.upper(), "headers": headers or {}, "credentials": "include" }
  330. if json_data:
  331. fetch_opts['body'] = json.dumps(json_data)
  332. fetch_opts['headers']['Content-Type'] = 'application/json'
  333. elif data:
  334. if isinstance(data, dict):
  335. fetch_opts['body'] = urlencode(data)
  336. fetch_opts['headers']['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
  337. else:
  338. fetch_opts['body'] = data
  339. js = f"""
  340. return fetch("{req_url}", {json.dumps(fetch_opts)})
  341. .then(async r => {{
  342. const h = {{}}; r.headers.forEach((v, k) => h[k] = v);
  343. return {{ status: r.status, body: await r.text(), headers: h, url: r.url }};
  344. }}).catch(e => {{ return {{ status: 0, body: e.toString() }}; }});
  345. """
  346. resp = BrowserResponse(self.page.run_js(js, timeout=60))
  347. if resp.status_code == 200:
  348. return resp
  349. elif resp.status_code == 403:
  350. if "Just a moment" in resp.text and retry_count < 2:
  351. self._log("Cloudflare 403. Refreshing...")
  352. if self._refresh_firewall_session():
  353. return self._perform_request(method, url, headers, data, json_data, params, retry_count+1)
  354. raise PermissionDeniedError(f"HTTP 403: {resp.text[:100]}")
  355. elif resp.status_code == 429:
  356. self.is_healthy = False
  357. raise RateLimiteddError()
  358. elif resp.status_code in [401, 419]:
  359. self.is_healthy = False
  360. raise SessionExpiredOrInvalidError()
  361. else:
  362. raise BizLogicError(f"HTTP {resp.status_code}: {resp.text[:100]}")
  363. def _filter_dates(self, dates, start, end):
  364. if not start or not end: return dates
  365. valid = []
  366. s = datetime.strptime(start[:10], "%Y-%m-%d")
  367. e = datetime.strptime(end[:10], "%Y-%m-%d")
  368. for d in dates:
  369. c = datetime.strptime(d, "%Y-%m-%d")
  370. if s <= c <= e: valid.append(d)
  371. random.shuffle(valid)
  372. return valid
  373. def cleanup(self):
  374. if self.page:
  375. try: self.page.quit()
  376. except: pass
  377. self.page = None
  378. if os.path.exists(self.root_workspace):
  379. for _ in range(3):
  380. try: time.sleep(0.2); shutil.rmtree(self.root_workspace, ignore_errors=True); break
  381. except: time.sleep(0.5)
  382. if self.tunnel:
  383. try: self.tunnel.stop()
  384. except: pass
  385. self.tunnel = None
  386. def __del__(self):
  387. self.cleanup()