import requests
from bs4 import BeautifulSoup
import json
import csv

headers = {'User-Agent':'Mozilla/5.0(Windows NT 10.0;Win64; x64) AppleWebKit/537.36(KHTML,like Gecko) Chrome/91.0.4472.124 Safari/537.36'}

movies = []

for start in range(0,250,25):
    url = f'https://movie.douban.com/top250?start={start}'
    response = requests.get(url,headers=headers)
    soup = BeautifulSoup(response.text,'html.parser')

    for item in soup.find_all('div',class_='item'):
        title_tag = item.find('span',class_='title')
        if title_tag:
            title = title_tag.text
        else:
            title = "未找到标题"
            print(f"在这个item里没找到标题:{item}")
        other_tag = item.find('span',class_='other')
        other_title = other_tag.text if other_tag else""
        info_tag = item.find('p',class_='')
        info = info_tag.text.strip() if info_tag else ""
        playable_tag = item.find('span',class_='playable')
        if playable_tag:
            year_tag = playable_tag.find_previous_sibling('span',class_='year')
            year = year_tag.text.strip('()') if year_tag else "未知年份"
        else:
            year_tag = item.find('span',class_='year')
            year = year_tag.text.strip('()') if year_tag else "未知年份"

        director = info.split('导演:')[1].split('主演:')[0].strip() if'导演:' in info else ''
        actors = info.split('主演:')[1].strip() if '主演:' in info else ''

        movie = {
            '中文名': title,
            '英文名': other_title.replace('/','').strip(),
            '年份': year,
            '导演': director,
            '主演': actors
        }
        movies.append(movie)

with open('movies.tst','w',encoding='utf-8') as f:
    f.write('中文名\t英文名\t年份\t导演\t主演\n')
    for movie in movies:
        f.write(f"{movie['中文名']}\t{movie['英文名']}\t{movie['年份']}\t{movie['导演']}\t{movie['主演']}\n")


with open('movies.csv','w',encoding='utf-8',newline='') as f:
    writer = csv.DictWriter(f,fieldnames=['中文名','英文名','年份','导演','主演'])
    writer.writeheader()
    writer.writerows(movies)


with open('movies.json','w',encoding='utf-8') as f:
    json.dump(movies,f,ensure_ascii=False,indent=4)


print("爬取完成!已生成 movies.tst, movies.csv, movies.json")