task-3-2-1-Text-Processing-…/樊豐铭37.py

# -------------------- 题目3 --------------------
import math

A = [3, 4]
B = [1, 2]

print("题目3")
print("A + B =", [A[0]+B[0], A[1]+B[1]])
print("2 × A =", [2*A[0], 2*A[1]])
print("A 的长度 =", math.sqrt(A[0]**2 + A[1]**2))
print()

# -------------------- 题目4 --------------------
A = [1,2,3]
B = [4,5,6]

dot = sum(a*b for a,b in zip(A,B))
normA = math.sqrt(sum(x**2 for x in A))
normB = math.sqrt(sum(x**2 for x in B))
cos = dot / (normA * normB)

print("题目4")
print("点积 =", dot)
print("余弦相似度 =", round(cos, 4))

A2 = [1,0]
B2 = [0,1]
dot2 = A2[0]*B2[0] + A2[1]*B2[1]
normA2 = math.sqrt(A2[0]**2 + A2[1]**2)
normB2 = math.sqrt(B2[0]**2 + B2[1]**2)
cos2 = dot2 / (normA2 * normB2)
print("A=[1,0], B=[0,1] 余弦相似度 =", cos2)
print()

# -------------------- 题目5 BoW --------------------
from sklearn.feature_extraction.text import CountVectorizer

docs = [
    "Python 是 编程 语言",
    "Java 是 编程 语言",
    "Python Python Python"
]

bow = CountVectorizer()
matrix = bow.fit_transform(docs)

print("题目5")
print("词表 =", list(bow.get_feature_names_out()))
print("文档向量：")
print(matrix.toarray())
print()

# -------------------- 题目7 TF-IDF --------------------
from sklearn.feature_extraction.text import TfidfVectorizer

docs = ["Python 编程", "Java 编程", "Python Python"]
tfidf = TfidfVectorizer()
matrix = tfidf.fit_transform(docs)

print("题目7")
print("词表:", tfidf.get_feature_names_out())
print("TF-IDF矩阵:")
print(matrix.toarray().round(4))