import requests
from bs4 import BeautifulSoup
import json
import os

def task_1_scrape():
    url = "https://exam.detr.top/exam-b/movies"

    headers = {
        "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
         "AppleWebKit/537.36 (KHTML, like Gecko)" 
         "Chrome/129.0.0.0 Safari/537.36"
    }

    try:
        response = requests.get(url,headers=headers)
        response.encoding = 'utf-8'

        if response.status_code == 200:
            html_content = response.text


            with open("movies.html","w",encoding="utf-8") as f:
                f.write(html_content)
            print("[成功]已保存movies.html")

            soup = BeautifulSoup(html_content,'html.parser')
            table = soup.find('table')
            rows = table.find_all('tr')

            movies_data = []

            print(f"DEBUG:我抓到了{len(movies_data)}个电影数据")

            for row in rows[1:]:
                cols = row.find_all('td')
                if len(cols) > 0:
                    movie = {
                        "id": int(cols[0].get_text(strip = True)),
                        "title": cols[1].get_text(strip = True),
                        "director": cols[2].get_text(strip = True),
                        "year": int(cols[3].get_text(strip = True)),
                        "rating": float(cols[4].get_text(strip = True)),
                        "duration": int(cols[5].get_text(strip = True)),
                        "genre": cols[6].get_text(strip = True),
                        "actors_count": int(cols[7].get_text(strip = True))
                    }
                    movies_data.append(movie)
           
            with open("movies.json","w",encoding="utf-8") as f:
                json.dump(movies_data, f, ensure_ascii=False, indent = 4)
            print(f"[成功]已抓取{len(movies_data)}部电影并保存至movies.json")
        else:
            print(f"[错误]请求失败,状态码：{response.status_code}")

    except Exception as e:
        print(f"[异常]发生错误：{e}")