Files
task-3-2-1-Text-Processing-…/wce.py
2026-04-21 11:26:28 +08:00

69 lines
2.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# ===================== 第一部分:文本数据基础 =====================
print("===== 题目1文本字符表示与ASCII码 =====")
# 方式1直接字符串表示 "Hello"
str1 = "Hello"
# 方式2转义/字符序列表示(等价于"Hello"
str2 = 'H' + 'e' + 'l' + 'l' + 'o'
print(f"两种方式表示Hello\n方式1{str1}\n方式2{str2}\n")
# 用 ord() 打印每个字符的ASCII码
print("每个字符的ASCII码")
for char in "Hello":
print(f"{char} -> {ord(char)}")
# 用 chr() 验证65 对应大写字母 A
print(f"\nchr(65) = {chr(65)}验证65 对应大写字母 A\n")
# ===================== 第二部分:向量基础 =====================
import math
print("===== 题目3二维向量计算 =====")
A = [3, 4]
B = [1, 2]
# 1. A + B
add_res = [A[0]+B[0], A[1]+B[1]]
print(f"A + B = {add_res}")
# 2. 2 × A
mul_res = [2*A[0], 2*A[1]]
print(f"2 × A = {mul_res}")
# 3. A 的长度(模)
len_A = math.sqrt(A[0]**2 + A[1]**2)
print(f"A 的长度(模)= {len_A}\n")
print("===== 题目4点积与余弦相似度 =====")
A1 = [1, 2, 3]
B1 = [4, 5, 6]
# 1. 点积
dot_product = sum(a*b for a, b in zip(A1, B1))
print(f"A·B = {dot_product}")
# 2. 余弦相似度
def cos_sim(vec1, vec2):
dot = sum(a*b for a, b in zip(vec1, vec2))
mod1 = math.sqrt(sum(x**2 for x in vec1))
mod2 = math.sqrt(sum(x**2 for x in vec2))
return dot / (mod1 * mod2)
sim1 = cos_sim(A1, B1)
print(f"余弦相似度 = {sim1:.4f}")
# 3. A=[1,0], B=[0,1] 的余弦相似度
A2 = [1, 0]
B2 = [0, 1]
sim2 = cos_sim(A2, B2)
print(f"A=[1,0], B=[0,1] 余弦相似度 = {sim2}")
print("原因两个向量互相垂直正交夹角90度cos90°=0\n")
# ===================== 题目2思考题答案 =====================
print("===== 题目2思考题答案 =====")
print("1. 数据表示形式:")
print(" - 图像:天然是数值矩阵(像素值),计算机可直接存储、运算")
print(" - 文本:是抽象字符编码,无固定数值结构,需额外编码转换")
print("2. 语义理解:")
print(" - 图像:直观的视觉信号,无歧义,计算机易处理")
print(" - 文本:包含上下文、歧义、语法、情感等复杂语义,计算机难以直接理解")