3-2-1 文本数据处理导论

This commit is contained in:
2509165015
2026-04-23 16:02:35 +08:00
parent bdda7b3230
commit 14a04745b3
3 changed files with 260 additions and 0 deletions

10
0423+2509165015/2.py Normal file
View File

@@ -0,0 +1,10 @@
sent1 = "猫 抓 老鼠"
sent2 = "老鼠 抓 猫"
vocab_sent = sorted(set(sent1.split() + sent2.split()))
vec1 = [sent1.split().count(word) for word in vocab_sent]
vec2 = [sent2.split().count(word) for word in vocab_sent]
print("句子1向量", vec1)
print("句子2向量", vec2)
print("向量是否相同:", vec1 == vec2)
large_vocab = [f"word_{i}" for i in range(10000)]
print(f"大词表维度:{len(large_vocab)}")