11 lines
418 B
Python
11 lines
418 B
Python
#词表为["Python", "是", "编程", "语言", "Java"];
|
||
#Doc1 向量[1,1,1,1,0],Doc2 向量[0,1,1,1,1],Doc3 向量[3,0,0,0,0];
|
||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||
|
||
docs = ["Python 编程", "Java 编程", "Python Python"]
|
||
tfidf = TfidfVectorizer()
|
||
matrix = tfidf.fit_transform(docs)
|
||
|
||
print("词表:", tfidf.get_feature_names_out())
|
||
print("TF-IDF矩阵:")
|
||
print(matrix.toarray()) |