diff --git a/2509165012罗欣怡.py b/2509165012罗欣怡.py new file mode 100644 index 0000000..6c80d41 --- /dev/null +++ b/2509165012罗欣怡.py @@ -0,0 +1,28 @@ +import requests +from bs4 import BeautifulSoup + +url = "https://www.douban.com/doulist/3936288/?start={}&sort=time&playable=&sub_type=" +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' +} + +response = requests.get(url, headers=headers, timeout=10) +response.encoding = 'utf-8' +soup = BeautifulSoup(response.text, 'html.parser') + +# print(soup) +print("--------------------------") +print("--------------------------") +print("--------------------------") + +for page in range(10): + a = page*25 + url = f"https://www.douban.com/doulist/3936288/?start={a}&sort=time&playable=&sub_type=" + + print(f"爬取第{page}页内容") + for b in soup.find_all('a'): +# print(a) +# href = b.get('href','') +# if '/subject/' in href: +# title = b.get_text(strip=True) +# print(title) diff --git a/task-2-1-data-collection b/task-2-1-data-collection new file mode 160000 index 0000000..c46532c --- /dev/null +++ b/task-2-1-data-collection @@ -0,0 +1 @@ +Subproject commit c46532c6877c97fe08093f8eacf737d536950243