This commit is contained in:
2509165045
2026-05-19 12:13:50 +08:00
parent 4b1cf2f145
commit 8cd21c7ec5
7 changed files with 7772 additions and 5 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -10,7 +10,7 @@
DATA_DIR = 'data/ChnSentiCorp' # 数据集路径 DATA_DIR = 'data/ChnSentiCorp' # 数据集路径
MAX_FEATURES = 3000 # 词表最大容量 MAX_FEATURES = 3000 # 词表最大容量
MAX_SEQ_LEN = 100 # 句子最大长度(词数) MAX_SEQ_LEN = 100 # 句子最大长度(词数)
VECTORIZER_TYPE = 'tfidf' # 'tfidf' 或 'bow'(向量化方式) VECTORIZER_TYPE = 'bow' # 'tfidf' 或 'bow'(向量化方式)
# ==================== 模型相关 ==================== # ==================== 模型相关 ====================
MODEL_TYPE = 'mlp' # 'mlp' 或 'lr'(模型类型) MODEL_TYPE = 'mlp' # 'mlp' 或 'lr'(模型类型)
@@ -19,19 +19,19 @@ NUM_CLASSES = 2 # 类别数(正面/负面二分类)
KEEP_PROB = 1.0 # Dropout保留概率LR忽略设为1即可 KEEP_PROB = 1.0 # Dropout保留概率LR忽略设为1即可
# ==================== 训练相关 ==================== # ==================== 训练相关 ====================
LEARNING_RATE = 0.05 # 学习率 LEARNING_RATE = 0.9 # 学习率
NUM_EPOCHS = 100 # 训练轮数 NUM_EPOCHS = 300 # 训练轮数
BATCH_SIZE = 64 # 批次大小 BATCH_SIZE = 64 # 批次大小
# ==================== 类别权重(解决数据不平衡问题)==================== # ==================== 类别权重(解决数据不平衡问题)====================
USE_CLASS_WEIGHT = True # True=启用类别权重, False=不启用(对比用) USE_CLASS_WEIGHT = True # True=启用类别权重, False=不启用(对比用)
# 权重计算公式: n_samples / (n_classes * n_class_i) # 权重计算公式: n_samples / (n_classes * n_class_i)
# 正面评论多所以权重小,负面评论少所以权重大 # 正面评论多所以权重小,负面评论少所以权重大
CLASS_WEIGHT_POS = 0.73 # 正面类权重(自动计算) CLASS_WEIGHT_POS = 0.67 # 正面类权重(自动计算)
CLASS_WEIGHT_NEG = 1.58 # 负面类权重(自动计算) CLASS_WEIGHT_NEG = 1.58 # 负面类权重(自动计算)
# ==================== 实验相关 ==================== # ==================== 实验相关 ====================
RUN_COMPARISON = False # True=运行对比实验, False=运行单个模型 RUN_COMPARISON = True # True=运行对比实验, False=运行单个模型
COMPARE_MODELS = ['lr', 'mlp'] # 要对比的模型列表 COMPARE_MODELS = ['lr', 'mlp'] # 要对比的模型列表
COMPARE_VECTORS = ['bow', 'tfidf'] # 要对比的向量化方式 COMPARE_VECTORS = ['bow', 'tfidf'] # 要对比的向量化方式

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB