Files
task-2-1-data-collection/1243/爬虫.py.txt
2026-03-19 16:10:03 +08:00

30 lines
870 B
Plaintext

import requests
from bs4 import BeautifulSoup as bs
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
url = 'https://www.baidu.com'
params = {'key':'value'}
try:
response = requests.get(url, params=params, headers=headers, timeout=5)
response.raise_for_status()
print("״̬Âë:", response.status_code)
print('-'*20)
html_content = response.text
soup = bs(html_content, 'lxml')
title = soup.find('title').string
print("Ò³Ãæ±êÌâ:", title)
print('='*20)
links = soup.find_all('a')
for link in links:
href = link.get('href')
if href:
print("Á´½Ó:", href)
print('='*20)
except requests.exceptions.RequestException as e:
print(f"ÇëÇó´íÎó: {e}")
except Exception as e:
print(f"½âÎö´íÎó: {e}")