17 lines
465 B
Python
17 lines
465 B
Python
import numpy as np
|
|
vocab = ["Python", "学习", "数据", "人工智能", "编程"]
|
|
doc1 = "Python学习编程"
|
|
doc2 = "Python人工智能数据"
|
|
|
|
def text_to_vector(text, vocab):
|
|
words = text.split()
|
|
vector = np.zeros(len(vocab))
|
|
for i, word in enumerate(vocab):
|
|
vector[i] = words.count(word)
|
|
return vector
|
|
|
|
v1 = text_to_vector(doc1, vocab)
|
|
v2 = text_to_vector(doc2, vocab)
|
|
|
|
print("doc1向量:", v1)
|
|
print("doc2向量:", v2) |