上传文件至 /

This commit is contained in:
2026-04-23 15:54:48 +08:00
parent a02bdccba7
commit 1de88d8173
2 changed files with 123 additions and 0 deletions

23
260423 43.py Normal file
View File

@@ -0,0 +1,23 @@
documents=[
"Python是编程语言",
"Java是编程语言",
"Python Python Python"
]
word_list=[]
for doc in documents:
words=doc.split()
for word in words:
if word not in word_list:
word_list.append(word)
vocab=sorted(word_list)
print("词变:",vocab)
bow_vectors=[]
for doc in documents:
words=doc.split()
vector=[words.count(word) for word in vocab]
bow_vectors.append(vector)
for i,vec in enumerate(bow_vectors):
print(f"Doc{i+1}的向量表示:{vec}")
#两个缺点:忽略词序与语义关系
#词汇维度爆炸与稀疏性