import numpy as np from dataset import load_data, BoWVectorizer, TFIDFVectorizer from train import train import config as cfg import pickle import time # �������� texts, labels = load_data() labels = np.array(labels) # ����ѵ����/���Լ� np.random.seed(42) indices = np.random.permutation(len(texts)) split = int(0.8 * len(texts)) train_idx, test_idx = indices[:split], indices[split:] train_texts = [texts[i] for i in train_idx] test_texts = [texts[i] for i in test_idx] y_train, y_test = labels[train_idx], labels[test_idx] # ������ if cfg.VECTORIZER_TYPE == "bow": vec = BoWVectorizer(cfg.MAX_FEATURES) else: vec = TFIDFVectorizer(cfg.MAX_FEATURES) vec.fit(train_texts) X_train = np.array([vec.transform(t) for t in train_texts]) X_test = np.array([vec.transform(t) for t in test_texts]) # ѵ�� print("="*50) print(f"ѵ������:\n ģ��: {cfg.MODEL_TYPE}\n ����: {cfg.VECTORIZER_TYPE}\n ѧϰ��: {cfg.LEARNING_RATE}") print("="*50) model, t = train( X_train, y_train, X_test, y_test, model_type=cfg.MODEL_TYPE, lr=cfg.LEARNING_RATE, epochs=cfg.NUM_EPOCHS, use_weight=cfg.USE_CLASS_WEIGHT ) # ���� ts = time.strftime("%m%d_%H%M%S") name = f"model_{cfg.MODEL_TYPE}_{cfg.VECTORIZER_TYPE}_{'weighted' if cfg.USE_CLASS_WEIGHT else 'raw'}_{ts}" if cfg.MODEL_TYPE == "lr": np.save(f"{name}_W.npy", model.W) np.save(f"{name}_b.npy", model.b) else: np.save(f"{name}_W1.npy", model.W1) np.save(f"{name}_b1.npy", model.b1) np.save(f"{name}_W2.npy", model.W2) np.save(f"{name}_b2.npy", model.b2) with open(f"{name}_vec.pkl", "wb") as f: pickle.dump(vec, f) print(f"\nģ���ѱ���: {name}_*.npy/*.pkl")