17 lines
391 B
Python
17 lines
391 B
Python
docs = [
|
|
"Python 是 编程 语言",
|
|
"Java 是 编程 语言",
|
|
"Python Python Python"
|
|
]
|
|
|
|
vocab = sorted(set(' '.join(docs).split()))
|
|
print("词表:", vocab)
|
|
|
|
bow_vectors = []
|
|
for doc in docs:
|
|
words = doc.split()
|
|
vector = [words.count(word) for word in vocab]
|
|
bow_vectors.append(vector)
|
|
|
|
for idx, vec in enumerate(bow_vectors):
|
|
print(f"Doc{idx+1}向量:{vec}") |