Files
task-3-2-1-Text-Processing-…/260423 43.py
2026-04-23 15:54:48 +08:00

23 lines
622 B
Python

documents=[
"Python是编程语言",
"Java是编程语言",
"Python Python Python"
]
word_list=[]
for doc in documents:
words=doc.split()
for word in words:
if word not in word_list:
word_list.append(word)
vocab=sorted(word_list)
print("词变:",vocab)
bow_vectors=[]
for doc in documents:
words=doc.split()
vector=[words.count(word) for word in vocab]
bow_vectors.append(vector)
for i,vec in enumerate(bow_vectors):
print(f"Doc{i+1}的向量表示:{vec}")
#两个缺点:忽略词序与语义关系
#词汇维度爆炸与稀疏性