Files
task-3-2-1-Text-Processing-…/ljh.py
2026-04-23 15:52:49 +08:00

21 lines
445 B
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from sklearn.feature_extraction.text import CountVectorizer
docs = [
"Python 是 编程 语言",
"Java 是 编程 语言",
"Python Python Python"
]
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(docs)
print("词表Vocabulary")
print(vectorizer.get_feature_names_out())
print("\n每个文档的BoW向量")
for i, doc_vec in enumerate(X.toarray()):
print(f"Doc{i+1}: {doc_vec}")