Files
task-3-2-1-Text-Processing-…/2509165016-8.py
2026-04-23 16:00:52 +08:00

41 lines
1.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Word2Vec词嵌入的概念演示
import numpy as np
print("=" * 50)
print("词嵌入Word Embedding概念演示")
print("=" * 50)
print()
# 假设这些是用Word2Vec等方法训练出来的词向量简化版3维
# 实际中向量通常是50/100/300维
word_vectors = {
"": np.array([0.9, 0.1, 0.2]), # 动物属性高,其他低
"": np.array([0.8, 0.3, 0.1]), # 动物属性高
"小猫": np.array([0.85, 0.2, 0.15]), # 小动物,也像猫
"苹果": np.array([0.1, 0.2, 0.9]), # 水果属性高
"香蕉": np.array([0.1, 0.1, 0.85]), # 水果属性高
"Python": np.array([0.1, 0.0, 0.9]), # 编程语言
"Java": np.array([0.1, 0.0, 0.85]), # 编程语言
}
print("词向量简化版3维示意")
print("维度含义: [动物性, 植物性, 其他/技术性]")
print()
for word, vec in word_vectors.items():
print(f" {word}: {vec}")
print()
# 计算相似度
print("语义相似度:")
print(f" 猫 vs 狗: {cosine_similarity(word_vectors[''], word_vectors['']):.3f}")
print(f" 猫 vs 小猫: {cosine_similarity(word_vectors[''], word_vectors['小猫']):.3f}")
print(f" 猫 vs 苹果: {cosine_similarity(word_vectors[''], word_vectors['苹果']):.3f}")
print(f" 苹果 vs 香蕉: {cosine_similarity(word_vectors['苹果'], word_vectors['香蕉']):.3f}")
print(f" Python vs Java: {cosine_similarity(word_vectors['Python'], word_vectors['Java']):.3f}")
print()
print("词嵌入的优势:")
print(" - 语义相似的词,向量也相似")
print(" - 可以做类比推理:国王-男人+女人=女王")