Files
task-3-2-2-text-classification/config.py
2026-04-27 21:45:06 +08:00

41 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
配置文件 - 所有超参数集中管理
设计思路:
将超参数分门别类,学生可以单独修改某一类而不会影响其他
"""
# ==================== 数据相关 ====================
DATA_DIR = 'data/ChnSentiCorp' # 数据集路径
MAX_FEATURES = 3000 # 词表最大容量
MAX_SEQ_LEN = 100 # 句子最大长度(词数)
VECTORIZER_TYPE = 'tfidf' # 'tfidf' 或 'bow'(向量化方式)
# ==================== 模型相关 ====================
MODEL_TYPE = 'mlp' # 'mlp' 或 'lr'(模型类型)
HIDDEN_SIZE = 64 # MLP隐藏层大小LR忽略
NUM_CLASSES = 2 # 类别数(正面/负面二分类)
KEEP_PROB = 1.0 # Dropout保留概率LR忽略设为1即可
# ==================== 训练相关 ====================
LEARNING_RATE = 0.05 # 学习率
NUM_EPOCHS = 100 # 训练轮数
BATCH_SIZE = 64 # 批次大小
# ==================== 类别权重(解决数据不平衡问题)====================
USE_CLASS_WEIGHT = True # True=启用类别权重, False=不启用(对比用)
# 权重计算公式: n_samples / (n_classes * n_class_i)
# 正面评论多所以权重小,负面评论少所以权重大
CLASS_WEIGHT_POS = 0.73 # 正面类权重(自动计算)
CLASS_WEIGHT_NEG = 1.58 # 负面类权重(自动计算)
# ==================== 实验相关 ====================
RUN_COMPARISON = False # True=运行对比实验, False=运行单个模型
COMPARE_MODELS = ['lr', 'mlp'] # 要对比的模型列表
COMPARE_VECTORS = ['bow', 'tfidf'] # 要对比的向量化方式
# ==================== 其他 ====================
RANDOM_SEED = 42 # 随机种子(保证可复现)
VERBOSE = True # 打印详细日志