上传文件至 /
This commit is contained in:
23
260423 43.py
Normal file
23
260423 43.py
Normal file
@@ -0,0 +1,23 @@
|
||||
documents=[
|
||||
"Python是编程语言",
|
||||
"Java是编程语言",
|
||||
"Python Python Python"
|
||||
]
|
||||
word_list=[]
|
||||
for doc in documents:
|
||||
words=doc.split()
|
||||
for word in words:
|
||||
if word not in word_list:
|
||||
word_list.append(word)
|
||||
vocab=sorted(word_list)
|
||||
print("词变:",vocab)
|
||||
bow_vectors=[]
|
||||
for doc in documents:
|
||||
words=doc.split()
|
||||
vector=[words.count(word) for word in vocab]
|
||||
bow_vectors.append(vector)
|
||||
for i,vec in enumerate(bow_vectors):
|
||||
print(f"Doc{i+1}的向量表示:{vec}")
|
||||
|
||||
#两个缺点:忽略词序与语义关系
|
||||
#词汇维度爆炸与稀疏性
|
||||
Reference in New Issue
Block a user