完成作二:爬图片
This commit is contained in:
45
2509165016 爬图片.py
Normal file
45
2509165016 爬图片.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
url = 'https://picsum.photos/'
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.get(url, headers=headers, timeout=10)
|
||||||
|
response.encoding = 'utf-8'
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
# 先尝试获取所有带resize类的图片
|
||||||
|
img_tags = soup.select('img.resize')
|
||||||
|
# 如果数量不足5张,就扩大范围获取所有img标签
|
||||||
|
if len(img_tags) < 5:
|
||||||
|
img_tags = soup.select('img')
|
||||||
|
|
||||||
|
# 确保最多只爬5张
|
||||||
|
target_count = 5
|
||||||
|
downloaded = 0
|
||||||
|
|
||||||
|
for i, img_tag in enumerate(img_tags):
|
||||||
|
if downloaded >= target_count:
|
||||||
|
break
|
||||||
|
img_src = img_tag.get('src')
|
||||||
|
# 过滤掉无效的src(比如空值或非图片链接)
|
||||||
|
if not img_src or not img_src.startswith(('http', '/')):
|
||||||
|
continue
|
||||||
|
# 处理相对路径
|
||||||
|
if img_src.startswith('/'):
|
||||||
|
img_src = f'https://picsum.photos{img_src}'
|
||||||
|
|
||||||
|
print(f"正在下载第{downloaded+1}张图片:{img_src}")
|
||||||
|
try:
|
||||||
|
img_response = requests.get(img_src, headers=headers, timeout=10)
|
||||||
|
with open(f'image_{downloaded+1}.jpg', 'wb') as f:
|
||||||
|
f.write(img_response.content)
|
||||||
|
downloaded += 1
|
||||||
|
print(f"第{downloaded}张图片下载完成!")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"第{downloaded+1}张图片下载失败:{e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"\n✅ 共下载完成 {downloaded} 张图片!")
|
||||||
Reference in New Issue
Block a user