import requests
import re
import time

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}

all_chinese_titles = []

# ѭȡ10ҳÿҳ25250
for page in range(10):
    start = page * 25
    url = f'https://movie.douban.com/top250?start={start}&filter='
    print(f'ȡ {page+1} ҳ...')
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Ƿɹ
        html = response.text
        
        # ȡеӰ
        pattern = r'<span class="title">([^<&]+)</span>'
        titles = re.findall(pattern, html)
        
        # ˵Ӣֻ
        chinese_titles = [t for t in titles if not t.startswith('/')]
        all_chinese_titles.extend(chinese_titles)
        
        time.sleep(1)  # ӳ1룬챻
    except Exception as e:
        print(f' {page+1} ҳȡʧ: {e}')
        break

# 浽ıļ
with open('douban_top250.txt', 'w', encoding='utf-8') as f:
    for i, title in enumerate(all_chinese_titles, 1):
        f.write(f'{i}. {title}\n')

print(f'ѳɹȫ {len(all_chinese_titles)} Ӱ douban_top250.txt')

# ֤
with open('douban_top250.txt', 'r', encoding='utf-8') as f:
    print(f.read())