diff --git a/樊豐铭37.py b/樊豐铭37.py new file mode 100644 index 0000000..e620e0e --- /dev/null +++ b/樊豐铭37.py @@ -0,0 +1,63 @@ +# -------------------- 题目3 -------------------- +import math + +A = [3, 4] +B = [1, 2] + +print("题目3") +print("A + B =", [A[0]+B[0], A[1]+B[1]]) +print("2 × A =", [2*A[0], 2*A[1]]) +print("A 的长度 =", math.sqrt(A[0]**2 + A[1]**2)) +print() + +# -------------------- 题目4 -------------------- +A = [1,2,3] +B = [4,5,6] + +dot = sum(a*b for a,b in zip(A,B)) +normA = math.sqrt(sum(x**2 for x in A)) +normB = math.sqrt(sum(x**2 for x in B)) +cos = dot / (normA * normB) + +print("题目4") +print("点积 =", dot) +print("余弦相似度 =", round(cos, 4)) + +A2 = [1,0] +B2 = [0,1] +dot2 = A2[0]*B2[0] + A2[1]*B2[1] +normA2 = math.sqrt(A2[0]**2 + A2[1]**2) +normB2 = math.sqrt(B2[0]**2 + B2[1]**2) +cos2 = dot2 / (normA2 * normB2) +print("A=[1,0], B=[0,1] 余弦相似度 =", cos2) +print() + +# -------------------- 题目5 BoW -------------------- +from sklearn.feature_extraction.text import CountVectorizer + +docs = [ + "Python 是 编程 语言", + "Java 是 编程 语言", + "Python Python Python" +] + +bow = CountVectorizer() +matrix = bow.fit_transform(docs) + +print("题目5") +print("词表 =", list(bow.get_feature_names_out())) +print("文档向量:") +print(matrix.toarray()) +print() + +# -------------------- 题目7 TF-IDF -------------------- +from sklearn.feature_extraction.text import TfidfVectorizer + +docs = ["Python 编程", "Java 编程", "Python Python"] +tfidf = TfidfVectorizer() +matrix = tfidf.fit_transform(docs) + +print("题目7") +print("词表:", tfidf.get_feature_names_out()) +print("TF-IDF矩阵:") +print(matrix.toarray().round(4)) \ No newline at end of file