documents=[ "Python是编程语言", "Java是编程语言", "Python Python Python" ] word_list=[] for doc in documents: words=doc.split() for word in words: if word not in word_list: word_list.append(word) vocab=sorted(word_list) print("词变:",vocab) bow_vectors=[] for doc in documents: words=doc.split() vector=[words.count(word) for word in vocab] bow_vectors.append(vector) for i,vec in enumerate(bow_vectors): print(f"Doc{i+1}的向量表示:{vec}") #两个缺点:忽略词序与语义关系 #词汇维度爆炸与稀疏性