From 84efd2fdf77e1695567a26c07c13b5e478980d00 Mon Sep 17 00:00:00 2001 From: 2509165016 <2509165016@student.edu.cn> Date: Thu, 26 Mar 2026 16:06:50 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90=E4=BD=9C=E4=BA=8C=EF=BC=9A?= =?UTF-8?q?=E7=88=AC=E5=9B=BE=E7=89=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 2509165016 爬图片.py | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 2509165016 爬图片.py diff --git a/2509165016 爬图片.py b/2509165016 爬图片.py new file mode 100644 index 0000000..c8963ec --- /dev/null +++ b/2509165016 爬图片.py @@ -0,0 +1,45 @@ +import requests +from bs4 import BeautifulSoup + +url = 'https://picsum.photos/' +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' +} + +response = requests.get(url, headers=headers, timeout=10) +response.encoding = 'utf-8' +soup = BeautifulSoup(response.text, 'html.parser') + +# 先尝试获取所有带resize类的图片 +img_tags = soup.select('img.resize') +# 如果数量不足5张,就扩大范围获取所有img标签 +if len(img_tags) < 5: + img_tags = soup.select('img') + +# 确保最多只爬5张 +target_count = 5 +downloaded = 0 + +for i, img_tag in enumerate(img_tags): + if downloaded >= target_count: + break + img_src = img_tag.get('src') + # 过滤掉无效的src(比如空值或非图片链接) + if not img_src or not img_src.startswith(('http', '/')): + continue + # 处理相对路径 + if img_src.startswith('/'): + img_src = f'https://picsum.photos{img_src}' + + print(f"正在下载第{downloaded+1}张图片:{img_src}") + try: + img_response = requests.get(img_src, headers=headers, timeout=10) + with open(f'image_{downloaded+1}.jpg', 'wb') as f: + f.write(img_response.content) + downloaded += 1 + print(f"第{downloaded}张图片下载完成!") + except Exception as e: + print(f"第{downloaded+1}张图片下载失败:{e}") + continue + +print(f"\n✅ 共下载完成 {downloaded} 张图片!") \ No newline at end of file