diff --git a/260324+2509165020.py/260324+2509165020.txt b/260324+2509165020.py/260324+2509165020.txt new file mode 100644 index 0000000..0fa1073 --- /dev/null +++ b/260324+2509165020.py/260324+2509165020.txt @@ -0,0 +1,29 @@ +import requests +from bs4 import BeautifulSoup +import time +BASE_URL = "https://xxx.com/list?page=" +START_PAGE = 1 +END_PAGE = 100 +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" +} + +def crawl_page(page): + url = f"{BASE_URL}{page}" + try: + resp = requests.get(url, headers=HEADERS, timeout=10) + resp.raise_for_status() + soup = BeautifulSoup(resp.text, "html.parser") + items = soup.find_all("div", class_="item") + for item in items: + title = item.find("h3").get_text(strip=True) + print(title) + print(f"第 {page} 页爬取完成") + + except Exception as e: + print(f"第 {page} 页出错:{e}") + +if __name__ == "__main__": + for page in range(START_PAGE, END_PAGE + 1): + crawl_page(page) + time.sleep(1) \ No newline at end of file diff --git a/鐖櫕3.24/douban.py.py b/鐖櫕3.24/douban.py.py deleted file mode 100644 index c699b82..0000000 --- a/鐖櫕3.24/douban.py.py +++ /dev/null @@ -1,14 +0,0 @@ -import requests -from bs4 import BeautifulSoup - -headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' -} - -url = 'https://www.douban.com/doulist/3936288/' -response = requests.get(url, headers=headers) -soup = BeautifulSoup(response.text, 'html.parser') - -titles = soup.select('.title a') -for t in titles: - print(t.text.strip()) \ No newline at end of file