Files
task-2-1-data-collection/2509165016 爬图片.py
2026-03-26 16:06:50 +08:00

45 lines
1.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
from bs4 import BeautifulSoup
url = 'https://picsum.photos/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers, timeout=10)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')
# 先尝试获取所有带resize类的图片
img_tags = soup.select('img.resize')
# 如果数量不足5张就扩大范围获取所有img标签
if len(img_tags) < 5:
img_tags = soup.select('img')
# 确保最多只爬5张
target_count = 5
downloaded = 0
for i, img_tag in enumerate(img_tags):
if downloaded >= target_count:
break
img_src = img_tag.get('src')
# 过滤掉无效的src比如空值或非图片链接
if not img_src or not img_src.startswith(('http', '/')):
continue
# 处理相对路径
if img_src.startswith('/'):
img_src = f'https://picsum.photos{img_src}'
print(f"正在下载第{downloaded+1}张图片:{img_src}")
try:
img_response = requests.get(img_src, headers=headers, timeout=10)
with open(f'image_{downloaded+1}.jpg', 'wb') as f:
f.write(img_response.content)
downloaded += 1
print(f"{downloaded}张图片下载完成!")
except Exception as e:
print(f"{downloaded+1}张图片下载失败:{e}")
continue
print(f"\n✅ 共下载完成 {downloaded} 张图片!")