上传文件至 q2_1_crawler
This commit is contained in:
34
q2_1_crawler/260623.py
Normal file
34
q2_1_crawler/260623.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import json
|
||||
|
||||
with open("movies.json", "r", encoding="utf-8") as f:
|
||||
total_data = json.load(f)
|
||||
movie_list = total_data["movies"]
|
||||
|
||||
# ① 最高分、最低分电影
|
||||
max_movie = max(movie_list, key=lambda x: x["rating"])
|
||||
min_movie = min(movie_list, key=lambda x: x["rating"])
|
||||
print("评分最高电影:", max_movie["title"], ",评分:", max_movie["rating"])
|
||||
print("评分最低电影:", min_movie["title"], ",评分:", min_movie["rating"])
|
||||
|
||||
# ② 统计各电影类型数量
|
||||
genre_dict = {}
|
||||
for m in movie_list:
|
||||
g_list = m["genre"].split(",")
|
||||
for g in g_list:
|
||||
g = g.strip()
|
||||
genre_dict[g] = genre_dict.get(g, 0) + 1
|
||||
print("\n各类型电影数量:", genre_dict)
|
||||
|
||||
# ③ 统计各导演电影数量
|
||||
director_dict = {}
|
||||
for m in movie_list:
|
||||
name = m["director"]
|
||||
director_dict[name] = director_dict.get(name, 0) + 1
|
||||
print("\n各导演电影数量:", director_dict)
|
||||
|
||||
# ④ 统计2020年(含)后上映影片
|
||||
cnt = 0
|
||||
for m in movie_list:
|
||||
if m["year"] >= 2020:
|
||||
cnt += 1
|
||||
print("\n2020年(含)以后上映电影总数:", cnt)
|
||||
50
q2_1_crawler/26062339.py
Normal file
50
q2_1_crawler/26062339.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import requests
|
||||
import json
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# 请求检测头(题目硬性要求)
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
url = "https://exam.detr.top/exam-b/movies"
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
response.encoding = "utf-8"
|
||||
|
||||
# 保存网页源码
|
||||
with open("movies.html", "w", encoding="utf-8") as f:
|
||||
f.write(response.text)
|
||||
|
||||
# 解析表格数据
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
table = soup.find("table")
|
||||
tr_rows = table.find_all("tr")[1:]
|
||||
|
||||
movie_list = []
|
||||
for row in tr_rows:
|
||||
cell = row.find_all("td")
|
||||
info = {
|
||||
"id": int(cell[0].text.strip()),
|
||||
"title": cell[1].text.strip(),
|
||||
"director": cell[2].text.strip(),
|
||||
"year": int(cell[3].text.strip()),
|
||||
"rating": float(cell[4].text.strip()),
|
||||
"duration": int(cell[5].text.strip()),
|
||||
"genre": cell[6].text.strip(),
|
||||
"actors_count": int(cell[7].text.strip())
|
||||
}
|
||||
movie_list.append(info)
|
||||
|
||||
# 提取页面数据编号
|
||||
data_code = soup.find("code").get_text(strip=True)
|
||||
result = {
|
||||
"data_id": data_code,
|
||||
"movies": movie_list
|
||||
}
|
||||
|
||||
# 写入JSON文件
|
||||
with open("movies.json", "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=4)
|
||||
|
||||
print("✅ 爬取完成,两个文件已正常生成")
|
||||
152
q2_1_crawler/movies.html
Normal file
152
q2_1_crawler/movies.html
Normal file
@@ -0,0 +1,152 @@
|
||||
<!-- exam_fingerprint: B-20260623-3049 -->
|
||||
<!-- server_time: 2026-06-23 10:44:49 -->
|
||||
<!-- exam_paper: B -->
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>电影列表</title>
|
||||
<style>
|
||||
body { font-family: "Microsoft YaHei", sans-serif; margin: 20px; background: #f5f5f5; }
|
||||
h1 { color: #c0392b; }
|
||||
.meta { color: #999; font-size: 12px; margin-bottom: 15px; }
|
||||
.meta code { background: #e9ecef; padding: 2px 6px; border-radius: 3px; }
|
||||
table { width: 100%; border-collapse: collapse; background: white; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
|
||||
th, td { padding: 10px; text-align: left; border-bottom: 1px solid #ddd; }
|
||||
th { background: #c0392b; color: white; }
|
||||
tr:hover { background: #fef5f4; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>电影列表</h1>
|
||||
<p class="meta">数据编号:<code>B-20260623-3049</code></p>
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>编号</th>
|
||||
<th>电影名</th>
|
||||
<th>导演</th>
|
||||
<th>上映年份</th>
|
||||
<th>评分</th>
|
||||
<th>时长(分钟)</th>
|
||||
<th>类型</th>
|
||||
<th>主演数</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
|
||||
<tr>
|
||||
<td>1</td>
|
||||
<td>千与千寻</td>
|
||||
<td>Frank Darabont</td>
|
||||
<td>2013</td>
|
||||
<td>8.0</td>
|
||||
<td>126</td>
|
||||
<td>剧情</td>
|
||||
<td>3</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>2</td>
|
||||
<td>肖申克的救赎</td>
|
||||
<td>陈凯歌</td>
|
||||
<td>2018</td>
|
||||
<td>6.8</td>
|
||||
<td>127</td>
|
||||
<td>悬疑</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>3</td>
|
||||
<td>星际穿越</td>
|
||||
<td>Robert Zemeckis</td>
|
||||
<td>2024</td>
|
||||
<td>9.0</td>
|
||||
<td>131</td>
|
||||
<td>冒险</td>
|
||||
<td>2</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>4</td>
|
||||
<td>阿甘正传</td>
|
||||
<td>James Cameron</td>
|
||||
<td>1999</td>
|
||||
<td>8.2</td>
|
||||
<td>160</td>
|
||||
<td>喜剧</td>
|
||||
<td>5</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>5</td>
|
||||
<td>三傻大闹宝莱坞</td>
|
||||
<td>宫崎骏</td>
|
||||
<td>1996</td>
|
||||
<td>9.4</td>
|
||||
<td>95</td>
|
||||
<td>动画</td>
|
||||
<td>4</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>6</td>
|
||||
<td>泰坦尼克号</td>
|
||||
<td>Christopher Nolan</td>
|
||||
<td>2008</td>
|
||||
<td>8.6</td>
|
||||
<td>90</td>
|
||||
<td>科幻</td>
|
||||
<td>3</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>7</td>
|
||||
<td>忠犬八公的故事</td>
|
||||
<td>Lasse Hallström</td>
|
||||
<td>1996</td>
|
||||
<td>6.8</td>
|
||||
<td>168</td>
|
||||
<td>喜剧</td>
|
||||
<td>3</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>8</td>
|
||||
<td>放牛班的春天</td>
|
||||
<td>Rajkumar Hirani</td>
|
||||
<td>2020</td>
|
||||
<td>9.3</td>
|
||||
<td>112</td>
|
||||
<td>喜剧</td>
|
||||
<td>5</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>9</td>
|
||||
<td>盗梦空间</td>
|
||||
<td>Christophe Barratier</td>
|
||||
<td>2005</td>
|
||||
<td>9.1</td>
|
||||
<td>154</td>
|
||||
<td>剧情</td>
|
||||
<td>4</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>10</td>
|
||||
<td>霸王别姬</td>
|
||||
<td>Christopher Nolan</td>
|
||||
<td>2015</td>
|
||||
<td>8.7</td>
|
||||
<td>103</td>
|
||||
<td>剧情</td>
|
||||
<td>5</td>
|
||||
</tr>
|
||||
|
||||
</tbody>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
105
q2_1_crawler/movies.json
Normal file
105
q2_1_crawler/movies.json
Normal file
@@ -0,0 +1,105 @@
|
||||
{
|
||||
"data_id": "B-20260623-3049",
|
||||
"movies": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "千与千寻",
|
||||
"director": "Frank Darabont",
|
||||
"year": 2013,
|
||||
"rating": 8.0,
|
||||
"duration": 126,
|
||||
"genre": "剧情",
|
||||
"actors_count": 3
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "肖申克的救赎",
|
||||
"director": "陈凯歌",
|
||||
"year": 2018,
|
||||
"rating": 6.8,
|
||||
"duration": 127,
|
||||
"genre": "悬疑",
|
||||
"actors_count": 2
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "星际穿越",
|
||||
"director": "Robert Zemeckis",
|
||||
"year": 2024,
|
||||
"rating": 9.0,
|
||||
"duration": 131,
|
||||
"genre": "冒险",
|
||||
"actors_count": 2
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "阿甘正传",
|
||||
"director": "James Cameron",
|
||||
"year": 1999,
|
||||
"rating": 8.2,
|
||||
"duration": 160,
|
||||
"genre": "喜剧",
|
||||
"actors_count": 5
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"title": "三傻大闹宝莱坞",
|
||||
"director": "宫崎骏",
|
||||
"year": 1996,
|
||||
"rating": 9.4,
|
||||
"duration": 95,
|
||||
"genre": "动画",
|
||||
"actors_count": 4
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"title": "泰坦尼克号",
|
||||
"director": "Christopher Nolan",
|
||||
"year": 2008,
|
||||
"rating": 8.6,
|
||||
"duration": 90,
|
||||
"genre": "科幻",
|
||||
"actors_count": 3
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"title": "忠犬八公的故事",
|
||||
"director": "Lasse Hallström",
|
||||
"year": 1996,
|
||||
"rating": 6.8,
|
||||
"duration": 168,
|
||||
"genre": "喜剧",
|
||||
"actors_count": 3
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"title": "放牛班的春天",
|
||||
"director": "Rajkumar Hirani",
|
||||
"year": 2020,
|
||||
"rating": 9.3,
|
||||
"duration": 112,
|
||||
"genre": "喜剧",
|
||||
"actors_count": 5
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"title": "盗梦空间",
|
||||
"director": "Christophe Barratier",
|
||||
"year": 2005,
|
||||
"rating": 9.1,
|
||||
"duration": 154,
|
||||
"genre": "剧情",
|
||||
"actors_count": 4
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"title": "霸王别姬",
|
||||
"director": "Christopher Nolan",
|
||||
"year": 2015,
|
||||
"rating": 8.7,
|
||||
"duration": 103,
|
||||
"genre": "剧情",
|
||||
"actors_count": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user