Files
task-3-1-3-Matrix-Fundament…/0416+2509165015/4.py

17 lines
513 B
Python

import numpy as np
corpus = [
"我 喜欢 编程",
"我 喜欢 学习 Python",
"编程 是 有趣 的"
]
vocab = sorted(list(set(" ".join(corpus).split())))
print("词汇表:", vocab)
def text_to_vector(text, vocab):
words = text.split()
vector = np.zeros(len(vocab), dtype=int)
for i, word in enumerate(vocab):
vector[i] = words.count(word)
return vector
vectors = np.array([text_to_vector(text, vocab) for text in corpus])
print("\n文本向量化结果:")
print(vectors)