Files
final-practice/数据处理与标注.py
2026-06-09 11:31:02 +08:00

25 lines
696 B
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
# 1. 读取原始数据
with open("movies.json", "r", encoding="utf-8") as f:
movies = json.load(f)
# 2. 过滤掉quote为空的条目
filtered_data = [
{
"id": movie["rank"],
"title": movie["title"],
"quote": movie["quote"]
}
for movie in movies
if movie["quote"].strip() != ""
]
print(f"过滤前:{len(movies)}")
print(f"过滤后:{len(filtered_data)}")
# 3. 保存为Label Studio可导入的格式
with open("filtered_quotes.json", "w", encoding="utf-8") as f:
json.dump(filtered_data, f, ensure_ascii=False, indent=4)
print("✅ 已生成 filtered_quotes.json可直接导入Label Studio")