完成作二：爬图片

2026-03-26 16:06:50 +08:00
parent 256d2cb21d
commit 84efd2fdf7
1 changed files with 45 additions and 0 deletions
--- a/爬图片.py
+++ b/爬图片.py
@@ -0,0 +1,45 @@
+import requests
+from bs4 import BeautifulSoup
+
+url = 'https://picsum.photos/'
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+}
+
+response = requests.get(url, headers=headers, timeout=10)
+response.encoding = 'utf-8'
+soup = BeautifulSoup(response.text, 'html.parser')
+
+# 先尝试获取所有带resize类的图片
+img_tags = soup.select('img.resize')
+# 如果数量不足5张，就扩大范围获取所有img标签
+if len(img_tags) < 5:
+    img_tags = soup.select('img')
+
+# 确保最多只爬5张
+target_count = 5
+downloaded = 0
+
+for i, img_tag in enumerate(img_tags):
+    if downloaded >= target_count:
+        break
+    img_src = img_tag.get('src')
+    # 过滤掉无效的src（比如空值或非图片链接）
+    if not img_src or not img_src.startswith(('http', '/')):
+        continue
+    # 处理相对路径
+    if img_src.startswith('/'):
+        img_src = f'https://picsum.photos{img_src}'
+    
+    print(f"正在下载第{downloaded+1}张图片：{img_src}")
+    try:
+        img_response = requests.get(img_src, headers=headers, timeout=10)
+        with open(f'image_{downloaded+1}.jpg', 'wb') as f:
+            f.write(img_response.content)
+        downloaded += 1
+        print(f"第{downloaded}张图片下载完成！")
+    except Exception as e:
+        print(f"第{downloaded+1}张图片下载失败：{e}")
+        continue
+
+print(f"\n✅ 共下载完成 {downloaded} 张图片！")