Files
task-3-2-1-Text-Processing-…/260421-2509165039.py
2026-04-21 23:34:25 +08:00

72 lines
2.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 题目1 在Python中用两种方式表示"Hello":
# 1.用 ord() 函数打印每个字符的ASCII码
text = "Hello"
for char in text:
print(f"'{char}'的ASCII码是:{ord(char)}")
# 2.用 chr() 函数验证字符65对应的是大写字母A
char_65 = chr(65)
print(f"ASCII码65对应的字符是:{char_65}")
print(f"验证结果:{char_65 == 'A'}")
# 题目2
# 图像矩阵:图像的语义(如“猫”“车”)可通过像素的空间关系
# (如边缘、纹理)用数学模型(如卷积神经网络)提取,计算
# 机能通过模式识别自动学习这些特征。
# 文本数据:文本的语义是上下文依赖的(如“苹果”可指水果或
# 公司),且存在歧义(如“bank”可指银行或河岸)、隐喻(如
# “时间就是金钱”)等复杂现象。计算机难以像人类一样理解这
# 些抽象语义,需借助复杂的自然语言处理(NLP)技术(如词向
# 量、Transformer)模拟,但仍存在局限。
# 题目3
# 1.计算 A + B 的结果:
A = [3,4]
B = [1,2]
A_plus_B = [a + b for a,b in zip(A,B)]
print(f"A + B = {A_plus_B}")
# 2.计算 2 * A 的结果:
scalar = 2
two_A = [scalar * a for a in A]
print(f"2 * A = {two_A}")
# 3.计算 A 的长度(模):
import math
A_magnitutude = math.sqrt(sum(a**2 for a in A))
print(f"A的长度(模)是:{A_magnitutude}")
# 题目4
# 1.计算它们的点积 A * B:
A = [1,2,3]
B = [4,5,6]
dot_product = sum(a * b for a,b in zip(A,B))
print(f"A * B ={dot_product}")
# 2.计算它们的余弦相似度:
import math
A = [1,2,3]
B = [4,5,6]
dot_product = sum(a * b for a,b in zip(A,B))
norm_A = math.sqrt(sum(a**2 for a in A))
norm_B = math.sqrt(sum(b**2 for b in B))
cosine_similarity = dot_product / (norm_A * norm_B)
print(f"A和B的余弦相似度是:{cosine_similarity:.4f}")
# 3.如果 A = [1, 0]B = [0, 1],它们的余弦相似度是多少?为什么?
# A \cdot B = 1 \times 0 + 0 \times 1 = 0
# \|A\| = \sqrt{1^2 + 0^2} = 1
# \|B\| = \sqrt{0^2 + 1^2} = 1
# \text{cosine similarity} = \frac{0}{1 \times 1} = 0
# 原因:
# 余弦相似度衡量的是两个向量方向的相似性(夹角余弦值)。
# A = [1, 0]是x轴正方向的单位向量B = [0, 1]是y轴正方向的单位向
# 量它们的夹角为90°(垂直),而\cos(90°)= 0。因此余弦相似度
# 为0说明它们方向完全无关。