1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
   |  import random import time from typing import Optional, Dict import requests from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry
  DEFAULT_HEADERS = {     "User-Agent": (         "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "         "AppleWebKit/537.36 (KHTML, like Gecko) "         "Chrome/124.0.0.0 Safari/537.36"     ),     "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",     "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",     "Accept-Encoding": "gzip, deflate, br",     "Cache-Control": "no-cache", }
  class StableFetcher:     def __init__(self, base_delay: float = 0.6, max_delay: float = 8.0, proxies: Optional[Dict[str, str]] = None):         self.sess = requests.Session()         retry = Retry(             total=5,                           connect=3,                         read=3,                            backoff_factor=0.5,                status_forcelist=[429, 500, 502, 503, 504],             allowed_methods=["GET", "POST"],             respect_retry_after_header=True,         )         adapter = HTTPAdapter(max_retries=retry, pool_connections=50, pool_maxsize=50)         self.sess.mount("http://", adapter)         self.sess.mount("https://", adapter)         self.base_delay = base_delay         self.max_delay = max_delay         self.proxies = proxies or {}
      def _sleep_with_jitter(self, step: int):                  delay = min(self.base_delay * (2 ** (step - 1)), self.max_delay)         jitter = random.uniform(0, delay * 0.25)         time.sleep(delay + jitter)
      def get(self, url: str, headers: Optional[Dict[str, str]] = None, max_attempts: int = 4) -> requests.Response:         final_headers = {**DEFAULT_HEADERS, **(headers or {})}         last_exc = None         for i in range(1, max_attempts + 1):             try:                 r = self.sess.get(url, headers=final_headers, proxies=self.proxies, timeout=15, allow_redirects=True)                                  if r.status_code in (403, 429):                     self._sleep_with_jitter(i)                     continue                 return r             except requests.RequestException as e:                 last_exc = e                 self._sleep_with_jitter(i)         raise last_exc if last_exc else RuntimeError("请求失败且无异常信息")
  if __name__ == "__main__":     fetcher = StableFetcher(proxies=None)       url = "https://example.com/search?q=python"       resp = fetcher.get(url)     print(resp.status_code, resp.url)                         
 
  |