上传文件至 /
This commit is contained in:
21
2026.4.2306 林伟泰.py
Normal file
21
2026.4.2306 林伟泰.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
|
||||||
|
from sklearn.feature_extraction.text import CountVectorizer
|
||||||
|
|
||||||
|
# 文档集合
|
||||||
|
docs = [
|
||||||
|
"Python 是 编程 语言",
|
||||||
|
"Java 是 编程 语言",
|
||||||
|
]
|
||||||
|
|
||||||
|
# BoW 向量化
|
||||||
|
vectorizer = CountVectorizer()
|
||||||
|
bow_matrix = vectorizer.fit_transform(docs)
|
||||||
|
|
||||||
|
print("词表:", vectorizer.get_feature_names_out())
|
||||||
|
# 输出: ['Python', 'Java', '是', '编程', '语言']
|
||||||
|
|
||||||
|
print("BoW矩阵:")
|
||||||
|
print(bow_matrix.toarray())
|
||||||
|
# 输出:
|
||||||
|
# [[1 0 1 1 1] # Python文档: Python=1, Java=0, 是=1, 编程=1, 语言=1
|
||||||
|
# [0 1 1 1 1]] # Java文档: Python=0, Java=1, 是=1, 编程=1, 语言=1
|
||||||
Reference in New Issue
Block a user