Files
task-2-1-data-collection/m.py
2026-03-24 11:30:36 +08:00

26 lines
1003 B
Python

import requests
from bs4 import BeautifulSoup
headers = {
'User-Agent':'Mozilla/5.0(Windows NT 10.0;win64; x64)AppleWekit/537.36(KHRML,like Gecko) Cheome/120.0.0.0 Safari/537.36'
}
url = 'https://www.douban.com/doulist/3936288/'
response = requests.get(url,headers=headers)
if response.status_code ==200:
html_content = response.text
print("请求成功,获取到HTML内容")
soup = BeautifulSoup(html_content,'html.parser')
title = soup.find('title').string
print("页面标题:",title)
links = soup.find_all('a')
for link in links:
href = link.get('href')
if href:
print("链接地址:",href)
book_items = soup.select('div.doulist-item')
for item in book_items:
book_title = item.select_one('div.title a').get_text(strip=True)
book_author =item.select_one('div.subject-cast').get_text(strip=True)
print(f"书名:{book_title}|作者:{book_author}")
else:
print(f"请求失败,状态码:{response.status_code}")