import requests
from bs4 import BeautifulSoup
import time

def scrape_douban_top250_to_txt():
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
    }
   
    with open('douban_movies.txt', 'w', encoding='utf-8') as txtfile:
        for start in range(0, 250, 25):
            url = f'https://movie.douban.com/top250?start={start}'
            
            try:
                response = requests.get(url, headers=headers, timeout=10)
                response.raise_for_status()
                
                soup = BeautifulSoup(response.text, 'html.parser')
                movies = soup.find_all('div', class_='item'

                for movie in movies:
                    rank = movie.find('em').text
                    title = movie.find('span', class_='title').text
                    rating = movie.find('span', class_='rating_num').text
                    people_span = movie.find('div', class_='star').find_all('span')[-1]
                    people = people_span.text.replace('', '')
                    quote_tag = movie.find('span', class_='inq')
                    quote = quote_tag.text if quote_tag else ""

                    info = movie.find('p', class_='').text.strip().split('\n')
                    director_actor = info[0].strip()
                    
                    if ': ' in director_actor:
                        director = director_actor.split(': ')[1].split(': ')[0].strip()
                        actor = director_actor.split(': ')[1].strip() if ': ' in director_actor else ""
                    else:
                        director = ""
                        actor = ""
                    
                    year_area_type = info[1].strip().split('/')
                    year = year_area_type[0].strip()
                    area = year_area_type[1].strip() if len(year_area_type) > 1 else ""
                    genre = year_area_type[2].strip() if len(year_area_type) > 2 else "

                    txtfile.write(f"{rank}\n")
                    txtfile.write(f"Ӱƣ{title}\n")
                    txtfile.write(f"֣{rating}\n")
                    txtfile.write(f"{people}\n")
                    txtfile.write(f"̨ʣ{quote}\n")
                    txtfile.write(f"ݣ{director}\n")
                    txtfile.write(f"ݣ{actor}\n")
                    txtfile.write(f"ӳݣ{year}\n")
                    txtfile.write(f"/{area}\n")
                    txtfile.write(f"ͣ{genre}\n")
                    txtfile.write("-" * 50 + "\n")  # ָ

                print(f" {start//25 + 1} ҳȡɣǰURL: {url}")
                time.sleep(1)  # ӳ1룬ⱻIP

            except requests.exceptions.RequestException as e:
                print(f"쳣{e}")
                continue

    print("? ȡɣΪ douban_movies.txt")

if __name__ == '__main__':
    scrape_douban_top250_to_txt()