3-2-1 文本数据处理导论

This commit is contained in:
2509165015
2026-04-23 16:02:35 +08:00
parent bdda7b3230
commit 14a04745b3
3 changed files with 260 additions and 0 deletions

14
0423+2509165015/1.py Normal file
View File

@@ -0,0 +1,14 @@
docs = [
"Python 是 编程 语言",
"Java 是 编程 语言",
"Python Python Python"
]
vocab = sorted(set(' '.join(docs).split()))
print("词表:", vocab)
bow_vectors = []
for doc in docs:
words = doc.split()
vector = [words.count(word) for word in vocab]
bow_vectors.append(vector)
for idx, vec in enumerate(bow_vectors):
print(f"Doc{idx+1}向量:{vec}")