sentinel.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. import os
  2. import time
  3. import json
  4. import random
  5. import threading
  6. import redis
  7. from typing import List, Dict, Callable
  8. from vs_types import GroupConfig, VSPlgConfig, Task, QueryWaitMode
  9. from vs_plg_factory import VSPlgFactory
  10. from toolkit.thread_pool import ThreadPool
  11. from toolkit.vs_cloud_api import VSCloudApi
  12. from toolkit.backoff import ExponentialBackoff
  13. class SentinelGCO:
  14. def __init__(self, cfg: GroupConfig, redis_conf: Dict, logger: Callable[[str], None] = None):
  15. self.m_cfg = cfg
  16. self.m_factory = VSPlgFactory()
  17. self.m_logger = logger
  18. self.m_tasks: List[Task] = []
  19. self.m_lock = threading.RLock()
  20. self.m_stop_event = threading.Event()
  21. self.redis_client = redis.Redis(**redis_conf)
  22. self.m_pending_builtin = 0
  23. # 1. 全局建连退避:起步 1 分钟,封顶 1 小时 (保护登录接口)
  24. self.group_backoff = ExponentialBackoff(base_delay=60.0, max_delay=3600.0, factor=2.0)
  25. self.m_last_spawn_time = 0.0
  26. def _log(self, message):
  27. if self.m_logger:
  28. self.m_logger(f'[SENTINEL] [{self.m_cfg.identifier}] {message}')
  29. def _get_average_interval(self) -> float:
  30. """计算当前组平均的查询间隔(秒)"""
  31. mode = self.m_cfg.query_wait.mode
  32. if mode == QueryWaitMode.Loop:
  33. return 1.0
  34. elif mode == QueryWaitMode.Fixed:
  35. return float(self.m_cfg.query_wait.fixed_wait)
  36. elif mode == QueryWaitMode.Random:
  37. return (self.m_cfg.query_wait.random_min + self.m_cfg.query_wait.random_max) / 2.0
  38. return 30.0
  39. def start(self):
  40. if not self.m_cfg.enable:
  41. return
  42. self._log("Starting Sentinel...")
  43. plugin_name = self.m_cfg.plugin_config.plugin_name
  44. class_name = "".join(part.title() for part in plugin_name.split('_'))
  45. plugin_path = os.path.join(self.m_cfg.plugin_config.lib_path, self.m_cfg.plugin_config.plugin_bin)
  46. self.m_factory.register_plugin(plugin_name, plugin_path, class_name)
  47. threading.Thread(target=self._monitor_loop, daemon=True, name="Sentinel-Monitor").start()
  48. threading.Thread(target=self._creator_loop, daemon=True, name="Sentinel-Creator").start()
  49. def stop(self):
  50. self._log("Stopping Sentinel...")
  51. self.m_stop_event.set()
  52. with self.m_lock:
  53. tasks_to_cleanup = list(self.m_tasks)
  54. self.m_tasks.clear()
  55. for task in tasks_to_cleanup:
  56. self._cleanup_task(task, "sentinel stopped")
  57. def _cleanup_task(self, task: Task, reason: str):
  58. try:
  59. if task and task.instance and hasattr(task.instance, "cleanup"):
  60. self._log(f"Cleaning up sentinel instance. reason={reason}")
  61. task.instance.cleanup()
  62. except Exception as e:
  63. self._log(f"Cleanup failed. reason={reason}, error={e}")
  64. def _remove_task(self, task: Task, reason: str):
  65. removed = False
  66. with self.m_lock:
  67. if task in self.m_tasks:
  68. self.m_tasks.remove(task)
  69. removed = True
  70. if removed:
  71. self._cleanup_task(task, reason)
  72. def _get_redis_key(self, routing_key: str) -> str:
  73. return f"vs:signal:{routing_key}"
  74. def _monitor_loop(self):
  75. self._log("Monitor loop started.")
  76. rng = random.Random()
  77. while not self.m_stop_event.is_set():
  78. try:
  79. time.sleep(0.5)
  80. now = time.time()
  81. with self.m_lock:
  82. tasks_to_check = list(self.m_tasks)
  83. active_tasks = []
  84. dead_tasks = []
  85. for t in tasks_to_check:
  86. try:
  87. if t.instance.health_check():
  88. active_tasks.append(t)
  89. else:
  90. dead_tasks.append(t)
  91. except Exception as e:
  92. dead_tasks.append(t)
  93. self._log(f"Health check failed: {e}")
  94. if dead_tasks:
  95. with self.m_lock:
  96. current_tasks = list(self.m_tasks)
  97. self.m_tasks = [t for t in self.m_tasks if t in active_tasks]
  98. for t in dead_tasks:
  99. if t in current_tasks:
  100. self._cleanup_task(t, "health check failed")
  101. else:
  102. with self.m_lock:
  103. self.m_tasks = [t for t in self.m_tasks if t in active_tasks]
  104. for task in active_tasks:
  105. if now < task.next_run:
  106. continue
  107. apt_types = self.m_cfg.appointment_types
  108. if not apt_types:
  109. continue
  110. weights = [float(item.weight) for item in apt_types]
  111. apt_type = random.choices(apt_types, weights=weights, k=1)[0]
  112. interval = 30
  113. mode = task.qw_cfg.mode
  114. if mode == QueryWaitMode.Loop:
  115. interval = 1
  116. elif mode == QueryWaitMode.Fixed:
  117. interval = task.qw_cfg.fixed_wait
  118. elif mode == QueryWaitMode.Random:
  119. interval = rng.randint(task.qw_cfg.random_min, task.qw_cfg.random_max)
  120. task.next_run = time.time() + interval
  121. try:
  122. VSCloudApi.Instance().slot_refresh_start(apt_type.routing_key, country=apt_type.country, city=apt_type.city, visa_type=apt_type.visa_type)
  123. result = task.instance.query(apt_type)
  124. result.apt_type = apt_type
  125. if result.success:
  126. ttl = self.m_cfg.sentinel.signal_ttl
  127. self._log(f"🔥 SLOT FOUND! Writing signal to Redis (TTL: {ttl}s)")
  128. payload = {
  129. "group_id": self.m_cfg.identifier,
  130. "apt_type": apt_type.model_dump(),
  131. "query_result": result.to_snapshot_payload(),
  132. "timestamp": now
  133. }
  134. redis_key = self._get_redis_key(apt_type.routing_key)
  135. self.redis_client.publish(redis_key, json.dumps(payload))
  136. payload["query_result"]["website"] = self.m_cfg.website
  137. VSCloudApi.Instance().slot_snapshot_report(payload["query_result"])
  138. VSCloudApi.Instance().slot_refresh_success(apt_type.routing_key)
  139. except Exception as e:
  140. self._log(f"Query exception: {e}")
  141. VSCloudApi.Instance().slot_refresh_fail(apt_type.routing_key, error=str(e))
  142. except Exception as e:
  143. self._log(f"Monitor loop error: {e}")
  144. time.sleep(2)
  145. def _creator_loop(self):
  146. self._log("Creator loop started.")
  147. group_cd_key = f"vs:group:cooldown:{self.m_cfg.identifier}"
  148. while not self.m_stop_event.is_set():
  149. time.sleep(2)
  150. with self.m_lock:
  151. if self.redis_client.exists(group_cd_key):
  152. continue
  153. current = len(self.m_tasks)
  154. pending = self.m_pending_builtin
  155. target = self.m_cfg.sentinel.target_instances
  156. if (current + pending) < target:
  157. now = time.time()
  158. avg_interval = self._get_average_interval()
  159. stagger_delay = avg_interval / max(1, target)
  160. stagger_delay = max(10.0, stagger_delay)
  161. if now - self.m_last_spawn_time >= stagger_delay:
  162. with self.m_lock:
  163. self.m_last_spawn_time = now
  164. self._log(f"Staggered spawn triggered. Next spawn in {stagger_delay:.1f}s")
  165. self._spawn_sentinel_worker()
  166. def _spawn_sentinel_worker(self):
  167. with self.m_lock:
  168. self.m_pending_builtin += 1
  169. def _job():
  170. instance = None
  171. success = False
  172. try:
  173. plg_cfg = VSPlgConfig()
  174. plg_cfg.debug = self.m_cfg.debug
  175. plg_cfg.free_config = self.m_cfg.free_config
  176. plg_cfg.session_max_life = self.m_cfg.session_max_life
  177. if not self.m_cfg.need_account:
  178. plg_cfg.account.id = 0
  179. plg_cfg.account.username = "Guest"
  180. else:
  181. acc = VSCloudApi.Instance().get_next_account(self.m_cfg.sentinel.account_pool_id, self.m_cfg.sentinel.account_cd, test=False)
  182. plg_cfg.account.id = acc['id']
  183. plg_cfg.account.username = acc['username']
  184. plg_cfg.account.password = acc['password']
  185. if self.m_cfg.need_proxy:
  186. proxy = VSCloudApi.Instance().get_next_proxy(self.m_cfg.proxy_pool, self.m_cfg.proxy_cd, test=False)
  187. plg_cfg.proxy.id = proxy['id']
  188. plg_cfg.proxy.ip = proxy['ip']
  189. plg_cfg.proxy.port = proxy['port']
  190. plg_cfg.proxy.proto = proxy['proto']
  191. plg_cfg.proxy.username = proxy['username']
  192. plg_cfg.proxy.password = proxy['password']
  193. instance = self.m_factory.create(self.m_cfg.identifier, self.m_cfg.plugin_config.plugin_name)
  194. instance.set_log(self.m_logger)
  195. instance.set_config(plg_cfg)
  196. instance.create_session()
  197. with self.m_lock:
  198. self.m_tasks.append(
  199. Task(instance=instance,qw_cfg=self.m_cfg.query_wait,next_run=time.time(), book_allowed=False))
  200. group_fail_key = f"vs:group:failures:{self.m_cfg.identifier}"
  201. self.redis_client.delete(group_fail_key)
  202. success = True
  203. self._log(f"+++ Sentinel spawned: {plg_cfg.account.username}")
  204. except Exception as e:
  205. err_str = str(e)
  206. resource_not_found_indicators = [
  207. "40401" in err_str,
  208. "Account not found" in err_str,
  209. "Proxy not found" in err_str,
  210. ]
  211. if any(resource_not_found_indicators):
  212. return
  213. self._log(f"Spawn failed: {e}")
  214. rate_limited_indicators = [
  215. "42901" in err_str,
  216. "Rate limited" in err_str
  217. ]
  218. if any(rate_limited_indicators):
  219. group_fail_key = f"vs:group:failures:{self.m_cfg.identifier}"
  220. group_cd_key = f"vs:group:cooldown:{self.m_cfg.identifier}"
  221. g_fails = self.redis_client.incr(group_fail_key)
  222. g_cd = self.group_backoff.calculate(g_fails)
  223. self.redis_client.set(group_cd_key, "1", ex=int(g_cd))
  224. self._log(f"📉 [Rate Limited] Sentinel Spawn failed {g_fails} times. Global Backoff: {g_cd:.1f}s.")
  225. finally:
  226. if not success and instance is not None:
  227. try:
  228. if hasattr(instance, "cleanup"):
  229. instance.cleanup()
  230. except Exception as e:
  231. self._log(f"Cleanup failed after spawn failure: {e}")
  232. with self.m_lock:
  233. self.m_pending_builtin = max(0, self.m_pending_builtin - 1)
  234. ThreadPool.getInstance().enqueue(_job)