# -------------------- 题目3 -------------------- import math A = [3, 4] B = [1, 2] print("题目3") print("A + B =", [A[0]+B[0], A[1]+B[1]]) print("2 × A =", [2*A[0], 2*A[1]]) print("A 的长度 =", math.sqrt(A[0]**2 + A[1]**2)) print() # -------------------- 题目4 -------------------- A = [1,2,3] B = [4,5,6] dot = sum(a*b for a,b in zip(A,B)) normA = math.sqrt(sum(x**2 for x in A)) normB = math.sqrt(sum(x**2 for x in B)) cos = dot / (normA * normB) print("题目4") print("点积 =", dot) print("余弦相似度 =", round(cos, 4)) A2 = [1,0] B2 = [0,1] dot2 = A2[0]*B2[0] + A2[1]*B2[1] normA2 = math.sqrt(A2[0]**2 + A2[1]**2) normB2 = math.sqrt(B2[0]**2 + B2[1]**2) cos2 = dot2 / (normA2 * normB2) print("A=[1,0], B=[0,1] 余弦相似度 =", cos2) print() # -------------------- 题目5 BoW -------------------- from sklearn.feature_extraction.text import CountVectorizer docs = [ "Python 是 编程 语言", "Java 是 编程 语言", "Python Python Python" ] bow = CountVectorizer() matrix = bow.fit_transform(docs) print("题目5") print("词表 =", list(bow.get_feature_names_out())) print("文档向量:") print(matrix.toarray()) print() # -------------------- 题目7 TF-IDF -------------------- from sklearn.feature_extraction.text import TfidfVectorizer docs = ["Python 编程", "Java 编程", "Python Python"] tfidf = TfidfVectorizer() matrix = tfidf.fit_transform(docs) print("题目7") print("词表:", tfidf.get_feature_names_out()) print("TF-IDF矩阵:") print(matrix.toarray().round(4))