diff --git a/爬虫.py b/爬虫.py new file mode 100644 index 0000000..fad8824 --- /dev/null +++ b/爬虫.py @@ -0,0 +1,31 @@ +import requests +from bs4 import BeautifulSoup as bs +url = 'https://www.baidu.com' +params = {'key': 'value'} +response = requests.get(url,params=params) +if response.status_code == 200: + html_content = response.text + print("请求成功,获取到HTML内容") +else: + print(f"请求失败,状态码:{response.status_code}") +print(response.status_code) +print('-------------') +html_content = response.text +print('--------') +soup = bs(html_content,'lxml') +print(soup) +print('===========') +title = soup.find('title').string +print(title) +print('===========') +links = soup.find_all('a') +print(links) +print("============") +for link in links: + # print("11111111") + print("链接:",link.get('href')) +div_elements = soup.select('div.di') +print(div_elements) +for div in div_elements: + print('div:',div.text) + \ No newline at end of file