diff --git a/1.py b/1.py deleted file mode 100644 index f6170f3..0000000 --- a/1.py +++ /dev/null @@ -1,65 +0,0 @@ -import warnings -warnings.filterwarnings("ignore", category=DeprecationWarning) -warnings.filterwarnings("ignore", category=UserWarning) - -import pandas as pd -import numpy as np -from sklearn.model_selection import train_test_split -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.svm import LinearSVC -from sklearn.metrics import accuracy_score, classification_report - -genre_dict = { - 0: "剧情", - 1: "喜剧", - 2: "科幻", - 3: "悬疑", - 4: "动作", - 5: "爱情", - 6: "动画", - 7: "犯罪", - 8: "奇幻", - 9: "纪录" -} -num_classes = len(genre_dict) - -def load_data(file_path="movie_data.csv"): - df = pd.read_csv(file_path) - texts = df["text"].astype(str).tolist() - labels = df["label"].astype(int).tolist() - return texts, labels - -def text_feature_extraction(texts): - vectorizer = TfidfVectorizer( - max_features=10000, - stop_words="english", - ngram_range=(1, 2) - ) - features = vectorizer.fit_transform(texts) - return features, vectorizer - -def train_and_evaluate(features, labels): - X_train, X_test, y_train, y_test = train_test_split( - features, labels, test_size=0.2, random_state=42, stratify=labels - ) - model = LinearSVC(random_state=42, max_iter=10000) - model.fit(X_train, y_train) - y_pred = model.predict(X_test) - acc = accuracy_score(y_test, y_pred) - print(f"测试集准确率: {acc:.4f}") - print("\n分类报告:") - print(classification_report(y_test, y_pred, target_names=genre_dict.values())) - return model - -def predict_genre(model, vectorizer, new_text): - new_feature = vectorizer.transform([new_text]) - pred_label = model.predict(new_feature)[0] - return genre_dict[pred_label] - -if __name__ == "__main__": - texts, labels = load_data() - features, vectorizer = text_feature_extraction(texts) - model = train_and_evaluate(features, labels) - sample_text = "一个孤独的科学家发明了时间机器,却在穿梭时空的过程中陷入了悖论..." - print(f"\n示例文本: {sample_text}") - print(f"预测类型: {predict_genre(model, vectorizer, sample_text)}") \ No newline at end of file