上传文件至 /

This commit is contained in:
2026-04-23 15:53:06 +08:00
parent c929b11229
commit 2bd3fd4792
2 changed files with 244 additions and 0 deletions

View File

@@ -0,0 +1,17 @@
from sklearn.feature_extraction.text import CountVectorizer
docs = [
"Python 是 编程 语言",
"Java 是 编程 语言",
"Python Python Python"
]
vectorizer = CountVectorizer(tokenizer=lambda x: x.split())
X = vectorizer.fit_transform(docs)
print("词表Vocabulary", vectorizer.get_feature_names_out())
print("Doc1 向量:", X.toarray()[0])
print("Doc2 向量:", X.toarray()[1])
print("Doc3 向量:", X.toarray()[2])