完成作业

This commit is contained in:
2509165025
2026-04-23 15:54:30 +08:00
parent b04442bbf5
commit dc6df24718
2 changed files with 130 additions and 0 deletions

31
4.23 25/25 .py Normal file
View File

@@ -0,0 +1,31 @@
import jieba
print("=" * 50)
print("完整的文本预处理流程")
print("=" * 50)
docs = [
"今天天气真不错!适合出去玩。",
"Python是一门很棒的编程语言。",
"人工智能和机器学习是未来的发展方向。",
"今天在咖啡馆喝了一杯很好喝的拿铁。"
]
stopwords = set(['', '', '', '', '', '', '', '', '', '', '', '', '一个', '', '', '', '', '', '', '', '', '', '', '没有', '', '', '自己', '', '', '', ','])
def preprocess_text(text):
"""完整的文本预处理流程"""
words = jieba.cut(text)
words = [w for w in words if w not in stopwords and len(w) > 0]
words = [w for w in words if w.strip()]
return words
print("预处理结果:")
for i, doc in enumerate(docs):
words = preprocess_text(doc)
print(f"\nDoc{i+1}: {doc}")
print(f"{' / '.join(words)}")