From 17aa0d9d4c4ace5d96fbf71186f89ea54a44093e Mon Sep 17 00:00:00 2001 From: 2509165020 <2509165020@student.edu.cn> Date: Thu, 26 Mar 2026 15:14:24 +0800 Subject: [PATCH] =?UTF-8?q?=E7=BD=91=E7=BB=9C=E6=95=B0=E6=8D=AE=E9=87=87?= =?UTF-8?q?=E9=9B=86=EF=BC=88=E7=88=AC=E8=99=AB=E5=9F=BA=E7=A1=80=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 260324+2509165020.py.py | 35 ++++++++++++++++++++++ 260324+2509165020.py/260324+2509165020.txt | 29 ------------------ 2 files changed, 35 insertions(+), 29 deletions(-) create mode 100644 260324+2509165020.py.py delete mode 100644 260324+2509165020.py/260324+2509165020.txt diff --git a/260324+2509165020.py.py b/260324+2509165020.py.py new file mode 100644 index 0000000..690d129 --- /dev/null +++ b/260324+2509165020.py.py @@ -0,0 +1,35 @@ +import requests +from bs4 import BeautifulSoup + + +base_url = "https://www.douban.com/doulist/3936287/?start={}&sort=time&playable=0&sub_type=" + +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36', + 'Referer': 'https://www.douban.com/' } + +for page in range(10): + + start = page * 25 + url = base_url.format(start) + + try: + + response = requests.get(url, headers=headers, timeout=10) + response.encoding = 'utf-8' + + + soup = BeautifulSoup(response.text, 'html.parser') + + items = soup.find_all('div', class_='doulist-item') + + print(f"===== 第 {page+1} 页 =====") + for item in items: + + title_tag = item.find('div', class_='title') + if title_tag and title_tag.a: + movie_title = title_tag.a.get_text(strip=True) + print(movie_title) + + except Exception as e: + print(f"第 {page+1} 页请求失败:{str(e)}") \ No newline at end of file diff --git a/260324+2509165020.py/260324+2509165020.txt b/260324+2509165020.py/260324+2509165020.txt deleted file mode 100644 index 576705f..0000000 --- a/260324+2509165020.py/260324+2509165020.txt +++ /dev/null @@ -1,29 +0,0 @@ -import requests -from bs4 import BeautifulSoup -import time -BASE_URL = "https://www.douban.com/doulist/3936288/" -START_PAGE = 1 -END_PAGE = 100 -HEADERS = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" -} - -def crawl_page(page): - url = f"{BASE_URL}{page}" - try: - resp = requests.get(url, headers=HEADERS, timeout=10) - resp.raise_for_status() - soup = BeautifulSoup(resp.text, "html.parser") - items = soup.find_all("div", class_="item") - for item in items: - title = item.find("h3").get_text(strip=True) - print(title) - print(f"µÚ {page} Ò³ÅÀÈ¡Íê³É") - - except Exception as e: - print(f"µÚ {page} Ò³³ö´í£º{e}") - -if __name__ == "__main__": - for page in range(START_PAGE, END_PAGE + 1): - crawl_page(page) - time.sleep(1) \ No newline at end of file