41 lines
1.6 KiB
Python
41 lines
1.6 KiB
Python
# Word2Vec词嵌入的概念演示
|
||
import numpy as np
|
||
|
||
print("=" * 50)
|
||
print("词嵌入(Word Embedding)概念演示")
|
||
print("=" * 50)
|
||
print()
|
||
|
||
# 假设这些是用Word2Vec等方法训练出来的词向量(简化版,3维)
|
||
# 实际中向量通常是50/100/300维
|
||
word_vectors = {
|
||
"猫": np.array([0.9, 0.1, 0.2]), # 动物属性高,其他低
|
||
"狗": np.array([0.8, 0.3, 0.1]), # 动物属性高
|
||
"小猫": np.array([0.85, 0.2, 0.15]), # 小动物,也像猫
|
||
"苹果": np.array([0.1, 0.2, 0.9]), # 水果属性高
|
||
"香蕉": np.array([0.1, 0.1, 0.85]), # 水果属性高
|
||
"Python": np.array([0.1, 0.0, 0.9]), # 编程语言
|
||
"Java": np.array([0.1, 0.0, 0.85]), # 编程语言
|
||
}
|
||
|
||
print("词向量(简化版3维)示意:")
|
||
print("维度含义: [动物性, 植物性, 其他/技术性]")
|
||
print()
|
||
for word, vec in word_vectors.items():
|
||
print(f" {word}: {vec}")
|
||
print()
|
||
|
||
# 计算相似度
|
||
print("语义相似度:")
|
||
print(f" 猫 vs 狗: {cosine_similarity(word_vectors['猫'], word_vectors['狗']):.3f}")
|
||
print(f" 猫 vs 小猫: {cosine_similarity(word_vectors['猫'], word_vectors['小猫']):.3f}")
|
||
print(f" 猫 vs 苹果: {cosine_similarity(word_vectors['猫'], word_vectors['苹果']):.3f}")
|
||
print(f" 苹果 vs 香蕉: {cosine_similarity(word_vectors['苹果'], word_vectors['香蕉']):.3f}")
|
||
print(f" Python vs Java: {cosine_similarity(word_vectors['Python'], word_vectors['Java']):.3f}")
|
||
print()
|
||
print("词嵌入的优势:")
|
||
print(" - 语义相似的词,向量也相似")
|
||
print(" - 可以做类比推理:国王-男人+女人=女王")
|
||
|
||
|