From 7be256b0d62213aa04c144b208e94b9f0b90140c Mon Sep 17 00:00:00 2001 From: 2509165020 <2509165020@student.edu.cn> Date: Thu, 19 Mar 2026 20:58:44 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90=E8=AF=BE=E5=A0=82=E4=BD=9C?= =?UTF-8?q?=E4=B8=9A=E7=88=AC=E8=99=AB3.19?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 爬虫1/爬虫课堂作业3.19.py.txt | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 爬虫1/爬虫课堂作业3.19.py.txt diff --git a/爬虫1/爬虫课堂作业3.19.py.txt b/爬虫1/爬虫课堂作业3.19.py.txt new file mode 100644 index 0000000..a34b971 --- /dev/null +++ b/爬虫1/爬虫课堂作业3.19.py.txt @@ -0,0 +1,19 @@ +import requests +from bs4 import BeautifulSoup +url = 'https://movie.douban.com/top250' +params = {'start': '0', 'filter': ''} +response = requests.get(url, params=params) +if response.status_code == 200: + html_content = response.text + print("请求成功,获取到 HTML 内容") +else: + print(f"请求失败,状态码: {response.status_code}") +soup = BeautifulSoup(html_content, 'lxml') +title = soup.find('title').string +print("页面标题:", title) +links = soup.find_all('a') +for link in links: + print("链接地址:", link.get('href')) +div_elements = soup.select('div.item') +for div in div_elements: + print("电影条目内容:", div.text) \ No newline at end of file