上传文件至 /
This commit is contained in:
17
from sklearn.feature_extraction.text imp.ini
Normal file
17
from sklearn.feature_extraction.text imp.ini
Normal file
@@ -0,0 +1,17 @@
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
|
||||
docs = [
|
||||
"Python 是 编程 语言",
|
||||
"Java 是 编程 语言",
|
||||
"Python Python Python"
|
||||
]
|
||||
|
||||
vectorizer = CountVectorizer(tokenizer=lambda x: x.split())
|
||||
|
||||
X = vectorizer.fit_transform(docs)
|
||||
|
||||
print("词表(Vocabulary):", vectorizer.get_feature_names_out())
|
||||
|
||||
print("Doc1 向量:", X.toarray()[0])
|
||||
print("Doc2 向量:", X.toarray()[1])
|
||||
print("Doc3 向量:", X.toarray()[2])
|
||||
Reference in New Issue
Block a user