21 lines
445 B
Python
21 lines
445 B
Python
from sklearn.feature_extraction.text import CountVectorizer
|
||
|
||
|
||
docs = [
|
||
"Python 是 编程 语言",
|
||
"Java 是 编程 语言",
|
||
"Python Python Python"
|
||
]
|
||
|
||
|
||
vectorizer = CountVectorizer()
|
||
X = vectorizer.fit_transform(docs)
|
||
|
||
|
||
print("词表(Vocabulary):")
|
||
print(vectorizer.get_feature_names_out())
|
||
|
||
|
||
print("\n每个文档的BoW向量:")
|
||
for i, doc_vec in enumerate(X.toarray()):
|
||
print(f"Doc{i+1}: {doc_vec}") |