10 lines
335 B
Python
10 lines
335 B
Python
from sklearn.feature_extraction.text import CountVectorizer
|
|
docs=["Python 是 编程 语言"
|
|
"Java 是 编程 语言"
|
|
"Python Python Python"]
|
|
vectorizer = CountVectorizer()
|
|
bow_matrix = vectorizer.fit_transform(docs)
|
|
|
|
print("词表:", vectorizer.get_feature_names_out())
|
|
print("BoW矩阵:")
|
|
print(bow_matrix.toarray()) |