From 23bbe9d3027e3194792cc8bab35911fdb7a7eea9 Mon Sep 17 00:00:00 2001 From: 2509165045 <2509165045@student.edu.cn> Date: Thu, 19 Mar 2026 15:31:42 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90=E4=BD=9C=E4=B8=9A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 爬虫.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 爬虫.py diff --git a/爬虫.py b/爬虫.py new file mode 100644 index 0000000..092811d --- /dev/null +++ b/爬虫.py @@ -0,0 +1,30 @@ +import requests +from bs4 import BeautifulSoup as bs +url = 'https://www.baidu.com' +params = {'key': 'value'} +response = requests.get(url,params=params) +if response.status_code == 200: + html_content = response.text + print("请求成功,获取到HTML内容") +else: + print(f"请求失败,状态码:{response.status_code}") +print(response.status_code) +print('-------------') +html_content = response.text +print('--------') +soup = bs(html_content,'lxml') +print(soup) +print('===========') +title = soup.find('title').string +print(title) +print('===========') +links = soup.find_all('a') +print(links) +print("============") +for link in links: + # print("11111111") + print("链接:",link.get('href')) +div_elements = soup.select('div.di') +print(div_elements) +for div in div_elements: + print('div:',div.text)