diff --git a/cxy.py b/cxy.py index 12fd740..e620e0e 100644 --- a/cxy.py +++ b/cxy.py @@ -1,31 +1,63 @@ -#第一题 -str1 = "Hello" -str2 = 'Hello' - -print("双引号表示:", str1) -print("单引号表示:", str2) -print("两种表示方式是否相同:", str1 == str2) - -print("\n--- Hello 每个字符的 ASCII 码 ---") -for char in "Hello": - print(f"字符 {char} 的ASCII码:{ord(char)}") - -print("\n--- chr() 函数验证 ---") -result = chr(65) -print(f"数字65通过chr()转换后是:{result}") -print(f"65对应字符是否为大写A:{result == 'A'}") - -#第二题 +# -------------------- 题目3 -------------------- import math A = [3, 4] B = [1, 2] -add_result = [A[0] + B[0], A[1] + B[1]] -print("A + B =", add_result) +print("题目3") +print("A + B =", [A[0]+B[0], A[1]+B[1]]) +print("2 × A =", [2*A[0], 2*A[1]]) +print("A 的长度 =", math.sqrt(A[0]**2 + A[1]**2)) +print() -mul_result = [2 * A[0], 2 * A[1]] -print("2 × A =", mul_result) +# -------------------- 题目4 -------------------- +A = [1,2,3] +B = [4,5,6] -norm_A = math.sqrt(A[0]**2 + A[1]**2) -print("A 的模 =", norm_A) \ No newline at end of file +dot = sum(a*b for a,b in zip(A,B)) +normA = math.sqrt(sum(x**2 for x in A)) +normB = math.sqrt(sum(x**2 for x in B)) +cos = dot / (normA * normB) + +print("题目4") +print("点积 =", dot) +print("余弦相似度 =", round(cos, 4)) + +A2 = [1,0] +B2 = [0,1] +dot2 = A2[0]*B2[0] + A2[1]*B2[1] +normA2 = math.sqrt(A2[0]**2 + A2[1]**2) +normB2 = math.sqrt(B2[0]**2 + B2[1]**2) +cos2 = dot2 / (normA2 * normB2) +print("A=[1,0], B=[0,1] 余弦相似度 =", cos2) +print() + +# -------------------- 题目5 BoW -------------------- +from sklearn.feature_extraction.text import CountVectorizer + +docs = [ + "Python 是 编程 语言", + "Java 是 编程 语言", + "Python Python Python" +] + +bow = CountVectorizer() +matrix = bow.fit_transform(docs) + +print("题目5") +print("词表 =", list(bow.get_feature_names_out())) +print("文档向量:") +print(matrix.toarray()) +print() + +# -------------------- 题目7 TF-IDF -------------------- +from sklearn.feature_extraction.text import TfidfVectorizer + +docs = ["Python 编程", "Java 编程", "Python Python"] +tfidf = TfidfVectorizer() +matrix = tfidf.fit_transform(docs) + +print("题目7") +print("词表:", tfidf.get_feature_names_out()) +print("TF-IDF矩阵:") +print(matrix.toarray().round(4)) \ No newline at end of file