Files
task-2-3-File-Operations/260331-2509165034.py
2026-03-31 11:30:40 +08:00

19 lines
722 B
Python

import requests
from bs4 import BeautifulSoup
import csv
import json
import time
headers = {
'User-Agent':'mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
movies_data = []
for page in range(10):
url = f'https://movie.douban.com/top250?start={page * 25}'
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text,'html.parser')
for item in soup.find_all('div',class_='item'):
title_info = item.find('div',class_='hd').find('a').get_text(strip=Ture).replace('\n','')
info = item.find('div',class_='bd').p.get_text(strip=True).split('\n')[0]
movie = {
}