上传文件至 /

This commit is contained in:
2026-06-09 11:31:02 +08:00
parent e8dc1a7b0d
commit 42278e7c90
2 changed files with 90 additions and 0 deletions

25
数据处理与标注.py Normal file
View File

@@ -0,0 +1,25 @@
import json
# 1. 读取原始数据
with open("movies.json", "r", encoding="utf-8") as f:
movies = json.load(f)
# 2. 过滤掉quote为空的条目
filtered_data = [
{
"id": movie["rank"],
"title": movie["title"],
"quote": movie["quote"]
}
for movie in movies
if movie["quote"].strip() != ""
]
print(f"过滤前:{len(movies)}")
print(f"过滤后:{len(filtered_data)}")
# 3. 保存为Label Studio可导入的格式
with open("filtered_quotes.json", "w", encoding="utf-8") as f:
json.dump(filtered_data, f, ensure_ascii=False, indent=4)
print("✅ 已生成 filtered_quotes.json可直接导入Label Studio")