cloudflare_bypass_for_scraping.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. import time
  2. from DrissionPage import ChromiumPage
  3. class CloudflareBypasser:
  4. def __init__(self, driver: ChromiumPage, log=True):
  5. self.driver = driver
  6. self.log = log
  7. def log_message(self, message):
  8. if self.log:
  9. print(message)
  10. def search_recursively_shadow_root_with_iframe(self, ele, depth=0, max_depth=3):
  11. if depth > max_depth:
  12. self.log_message(f"Max depth {max_depth} reached during iframe search.")
  13. return None
  14. if ele.shadow_root:
  15. if ele.shadow_root.child().tag == "iframe":
  16. return ele.shadow_root.child()
  17. else:
  18. for child in ele.children():
  19. result = self.search_recursively_shadow_root_with_iframe(child, depth + 1, max_depth)
  20. if result:
  21. return result
  22. return None
  23. def search_recursively_shadow_root_with_cf_input(self,ele):
  24. if ele.shadow_root:
  25. if ele.shadow_root.ele("tag:input"):
  26. return ele.shadow_root.ele("tag:input")
  27. else:
  28. for child in ele.children():
  29. result = self.search_recursively_shadow_root_with_cf_input(child)
  30. if result:
  31. return result
  32. return None
  33. def locate_cf_button(self, dfs=False):
  34. try:
  35. button = None
  36. eles = self.driver.eles("tag:input")
  37. for ele in eles:
  38. attrs = ele.attrs
  39. if "name" in attrs and "type" in attrs:
  40. if "turnstile" in attrs["name"] and attrs["type"] == "hidden":
  41. if ele.parent() and ele.parent().shadow_root:
  42. button = ele.parent().shadow_root.child()("tag:body").shadow_root("tag:input")
  43. break
  44. if button:
  45. return button
  46. else:
  47. if dfs:
  48. self.log_message("Basic search failed. Searching for button recursively.")
  49. ele = self.driver.ele("tag:body")
  50. iframe = self.search_recursively_shadow_root_with_iframe(ele)
  51. if iframe:
  52. return self.search_recursively_shadow_root_with_cf_input(iframe("tag:body"))
  53. else:
  54. self.log_message("Iframe not found. Button search failed.")
  55. return None
  56. except Exception as e:
  57. self.log_message(f"Error locating verification button: {e}")
  58. return None
  59. def click_verification_button(self, is_dfs):
  60. try:
  61. button = self.locate_cf_button(dfs=is_dfs)
  62. if button:
  63. self.log_message("Verification button found. Attempting to click.")
  64. if button.states.is_displayed and button.states.is_enabled:
  65. button.click()
  66. time.sleep(1) # 确保事件触发
  67. else:
  68. self.log_message("Button is not clickable.")
  69. else:
  70. self.log_message("Verification button not found.")
  71. except Exception as e:
  72. self.log_message(f"Error clicking verification button: {e}")
  73. def is_bypassed(self):
  74. try:
  75. title = self.driver.title.lower()
  76. return "just a moment" not in title and "请稍候" not in title
  77. except Exception as e:
  78. self.log_message(f"Error checking page title: {e}")
  79. return False
  80. def bypass(self, max_retry=5):
  81. for i in range(max_retry):
  82. if self.is_bypassed():
  83. return True
  84. self.log_message(f"Verification page detected. Trying to bypass times={i}...")
  85. self.click_verification_button(False)
  86. time.sleep(2)
  87. return self.is_bypassed()