25 lines
696 B
Python
25 lines
696 B
Python
import json
|
||
|
||
# 1. 读取原始数据
|
||
with open("movies.json", "r", encoding="utf-8") as f:
|
||
movies = json.load(f)
|
||
|
||
# 2. 过滤掉quote为空的条目
|
||
filtered_data = [
|
||
{
|
||
"id": movie["rank"],
|
||
"title": movie["title"],
|
||
"quote": movie["quote"]
|
||
}
|
||
for movie in movies
|
||
if movie["quote"].strip() != ""
|
||
]
|
||
|
||
print(f"过滤前:{len(movies)} 条")
|
||
print(f"过滤后:{len(filtered_data)} 条")
|
||
|
||
# 3. 保存为Label Studio可导入的格式
|
||
with open("filtered_quotes.json", "w", encoding="utf-8") as f:
|
||
json.dump(filtered_data, f, ensure_ascii=False, indent=4)
|
||
|
||
print("✅ 已生成 filtered_quotes.json,可直接导入Label Studio") |