上传文件至 /
This commit is contained in:
50
20260609.1.py
Normal file
50
20260609.1.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.metrics import classification_report, accuracy_score
|
||||
|
||||
|
||||
genre_map = {
|
||||
0: "剧情",
|
||||
1: "喜剧",
|
||||
2: "科幻",
|
||||
3: "悬疑",
|
||||
4: "动作",
|
||||
5: "爱情",
|
||||
6: "动画",
|
||||
7: "犯罪",
|
||||
8: "奇幻",
|
||||
9: "纪录"
|
||||
}
|
||||
|
||||
|
||||
df = pd.read_csv("movie_data.csv")
|
||||
X = df["text"]
|
||||
y = df["label"]
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.2, random_state=42, stratify=y
|
||||
)
|
||||
|
||||
|
||||
tfidf = TfidfVectorizer(max_features=5000, ngram_range=(1, 2))
|
||||
X_train_tfidf = tfidf.fit_transform(X_train)
|
||||
X_test_tfidf = tfidf.transform(X_test)
|
||||
|
||||
model = MultinomialNB()
|
||||
model.fit(X_train_tfidf, y_train)
|
||||
|
||||
|
||||
y_pred = model.predict(X_test_tfidf)
|
||||
print(f"准确率: {accuracy_score(y_test, y_pred):.4f}")
|
||||
print(classification_report(y_test, y_pred, target_names=genre_map.values()))
|
||||
|
||||
|
||||
def predict_genre(text):
|
||||
text_tfidf = tfidf.transform([text])
|
||||
pred_label = model.predict(text_tfidf)[0]
|
||||
return genre_map[pred_label]
|
||||
|
||||
new_movie = "一群年轻人在宇宙飞船上探索外星文明,遭遇未知危险"
|
||||
print(f"电影简介:{new_movie}")
|
||||
print(f"预测类别:{predict_genre(new_movie)}")
|
||||
Reference in New Issue
Block a user