import requests
from bs4 import BeautifulSoup
import csv
import time
import random

# ģ
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}

movie_list = []

# ȡ 10 ҳһ 250 Ӱ
for page in range(10):
    start = page * 25
    url = f'https://movie.douban.com/top250?start={start}&filter='
    print(f'ȡ {page+1} ҳ...')

    # ӳ٣ֹ
    time.sleep(random.uniform(0.5, 1.5))

    try:
        # 1. ȡҳ
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        # 2. ҳ
        soup = BeautifulSoup(response.text, 'lxml')
        items = soup.select('.item')

        # 3. ȡÿӰϢ
        for item in items:
            rank = item.select_one('.pic em').text  # 
            title = item.select_one('.hd .title').text  # Ӱ
            score = item.select_one('.rating_num').text  # 
            quote = item.select_one('.inq').text if item.select_one('.inq') else ""
            info = item.select_one('.bd p').text.strip()  # 

            movie = {
                "": rank,
                "Ӱ": title,
                "": score,
                "": quote,
                "Ϣ": info
            }
            movie_list.append(movie)

    except Exception as e:
        print(f" {page+1} ҳȡʧܣ{e}")

# 4. Ϊ CSV ļ
with open("douban_top250_bs4.csv", "w", encoding="utf-8-sig", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=["", "Ӱ", "", "", "Ϣ"])
    writer.writeheader()
    writer.writerows(movie_list)

print(f"? ȡɣ {len(movie_list)} ݣѱ浽 douban_top250_bs4.csv")