From 59ddd4fab6e6f4aaf6f1f26b72d4929ba0ffffb4 Mon Sep 17 00:00:00 2001 From: 2509165046 <2509165046@student.edu.cn> Date: Tue, 24 Mar 2026 20:05:47 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90=E4=BD=9C=E4=B8=9A=E4=BA=8C?= =?UTF-8?q?=E5=92=8C=E4=B8=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 爬虫.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 爬虫.py diff --git a/爬虫.py b/爬虫.py new file mode 100644 index 0000000..fad8824 --- /dev/null +++ b/爬虫.py @@ -0,0 +1,31 @@ +import requests +from bs4 import BeautifulSoup as bs +url = 'https://www.baidu.com' +params = {'key': 'value'} +response = requests.get(url,params=params) +if response.status_code == 200: + html_content = response.text + print("请求成功,获取到HTML内容") +else: + print(f"请求失败,状态码:{response.status_code}") +print(response.status_code) +print('-------------') +html_content = response.text +print('--------') +soup = bs(html_content,'lxml') +print(soup) +print('===========') +title = soup.find('title').string +print(title) +print('===========') +links = soup.find_all('a') +print(links) +print("============") +for link in links: + # print("11111111") + print("链接:",link.get('href')) +div_elements = soup.select('div.di') +print(div_elements) +for div in div_elements: + print('div:',div.text) + \ No newline at end of file