from sklearn.feature_extraction.text import CountVectorizer docs = [ "Python 是 编程 语言", "Java 是 编程 语言", "Python Python Python" ] vectorizer = CountVectorizer(tokenizer=lambda x: x.split()) X = vectorizer.fit_transform(docs) print("词表(Vocabulary):", vectorizer.get_feature_names_out()) print("Doc1 向量:", X.toarray()[0]) print("Doc2 向量:", X.toarray()[1]) print("Doc3 向量:", X.toarray()[2])