18 lines
514 B
Python
18 lines
514 B
Python
import numpy as np
|
|
corpus = [
|
|
"我 喜欢 编程",
|
|
"我 喜欢 学习 Python",
|
|
"编程 是 有趣 的"
|
|
]
|
|
vocab = sorted(list(set(" ".join(corpus).split())))
|
|
print("词汇表:", vocab)
|
|
|
|
def text_to_vector(text, vocab):
|
|
words = text.split()
|
|
vector = np.zeros(len(vocab), dtype=int)
|
|
for i, word in enumerate(vocab):
|
|
vector[i] = words.count(word)
|
|
return vector
|
|
vectors = np.array([text_to_vector(text, vocab) for text in corpus])
|
|
print("\n文本向量化结果:")
|
|
print(vectors) |