Files
task-3-2-1-Text-Processing-…/fps.py
2026-04-21 11:23:56 +08:00

44 lines
1.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

for c in "Hello":
print(c,ord(c))
print(chr(65))
# 数据表示形式:图像以数值矩阵(像素值)存储,是结构化的数值数据,计算机可以直接用数学运算(卷积、矩阵计算)处理;而文本是符号序列,是非结构化的字符数据,需要额外编码(如词向量)才能转为数值,无法直接用简单矩阵运算处理。
# 语义理解:图像的语义基于视觉特征(边缘、颜色、纹理),规律相对直观;文本语义依赖上下文、语法、文化背景等,具有歧义性和抽象性,计算机很难像人类一样理解自然语言的深层含义。
#向量基础
import numpy as np
# 定义向量
A = np.array([3, 4])
B = np.array([1, 2])
# 1. 计算A + B
add_result = A + B
print("A + B =", add_result)
# 2. 计算2 × A
mul_result = 2 * A
print("2 × A =", mul_result)
# 3. 计算A的长度
norm_A = np.linalg.norm(A)
print("A的长度 =", norm_A)
import numpy as np
from numpy.linalg import norm
# 定义向量
A = np.array([1, 2, 3])
B = np.array([4, 5, 6])
# 1. 计算点积A·B
dot_product = np.dot(A, B)
print("A·B =", dot_product)
# 2. 计算余弦相似度
cosine_similarity = np.dot(A, B) / (norm(A) * norm(B))
print("余弦相似度 =", cosine_similarity)
# 3. 向量A = [1, 0]B = [0, 1]的余弦相似度
A_new = np.array([1, 0])
B_new = np.array([0, 1])
cosine_similarity_new = np.dot(A_new, B_new) / (norm(A_new) * norm(B_new))
print("新向量的余弦相似度 =", cosine_similarity_new)
print("原因两个向量相互垂直正交点积为0因此余弦相似度为0")