text = "Hello" print([ord(c) for c in text]) text_cn = "你好" print([ord(c) for c in text_cn]) print(chr(65)) import numpy as np v1 = np.array([3]) v2 = np.array([2, 3]) v3 = np.array([1, 2, 3]) print(v1, v2, v3) a = np.array([1, 2, 3]) b = np.array([4, 5, 6]) print("加法:", a + b) print("数乘:", a * 2) v = np.array([3, 4]) print("长度:", np.linalg.norm(v)) print("点积:", np.dot(a, b)) print("点积(@):", a @ b) def cosine_similarity(a, b) dot = np.dot(a, b), norm_a = np.linalg.norm(a) norm_b = np.linalg.norm(b) return dot / (norm_a * norm_b) a1 = np.array([1,2,3]) b1 = np.array([2,4,6]) a2 = np.array([1,0]) b2 = np.array([0,1]) print("相似度1:", cosine_similarity(a1, b1)) print("相似度2:", cosine_similarity(a2, b2)) cat = np.array([0.9, 0.9, 0.8]) dog = np.array([0.8, 0.9, 0.8]) apple = np.array([0.1, 0.3, 0.0]) print("猫vs狗:", cosine_similarity(cat, dog)) print("猫vs苹果:", cosine_similarity(cat, apple)) from sklearn.feature_extraction.text import CountVectorizer docs = [ "Python 是 编程 语言", "Java 是 编程 语言", ] vectorizer = CountVectorizer() bow = vectorizer.fit_transform(docs) print("词表:", vectorizer.get_feature_names_out()) print("BoW矩阵:\n", bow.toarray()) from sklearn.feature_extraction.text import TfidfVectorizer docs_tfidf = [ "Python 编程 语言", "Python Python Python", "Java 编程 语言", ] tfidf = TfidfVectorizer() tfidf_matrix = tfidf.fit_transform(docs_tfidf) print("词表:", tfidf.get_feature_names_out()) print("TF-IDF矩阵:\n", tfidf_matrix.toarray()) word_vectors = { "猫": [0.9, 0.1, 0.2], "狗": [0.8, 0.3, 0.1], "苹果": [0.1, 0.2, 0.9], "Python": [0.1, 0.0, 0.9], "Java": [0.1, 0.0, 0.85], } print("猫vs狗:", cosine_similarity(word_vectors["猫"], word_vectors["狗"]))