import requests from bs4 import BeautifulSoup import time BASE_URL = "https://xxx.com/list?page=" START_PAGE = 1 END_PAGE = 100 HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" } def crawl_page(page): url = f"{BASE_URL}{page}" try: resp = requests.get(url, headers=HEADERS, timeout=10) resp.raise_for_status() soup = BeautifulSoup(resp.text, "html.parser") items = soup.find_all("div", class_="item") for item in items: title = item.find("h3").get_text(strip=True) print(title) print(f"第 {page} 页爬取完成") except Exception as e: print(f"第 {page} 页出错:{e}") if __name__ == "__main__": for page in range(START_PAGE, END_PAGE + 1): crawl_page(page) time.sleep(1)