Files
task-3-2-1-Text-Processing-…/44龙再飞.py
2026-04-23 16:02:52 +08:00

79 lines
1.9 KiB
Python

text = "Hello"
print([ord(c) for c in text])
text_cn = "你好"
print([ord(c) for c in text_cn])
print(chr(65))
import numpy as np
v1 = np.array([3])
v2 = np.array([2, 3])
v3 = np.array([1, 2, 3])
print(v1, v2, v3)
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
print("加法:", a + b)
print("数乘:", a * 2)
v = np.array([3, 4])
print("长度:", np.linalg.norm(v))
print("点积:", np.dot(a, b))
print("点积(@):", a @ b)
def cosine_similarity(a, b)
dot = np.dot(a, b),
norm_a = np.linalg.norm(a)
norm_b = np.linalg.norm(b)
return dot / (norm_a * norm_b)
a1 = np.array([1,2,3])
b1 = np.array([2,4,6])
a2 = np.array([1,0])
b2 = np.array([0,1])
print("相似度1:", cosine_similarity(a1, b1))
print("相似度2:", cosine_similarity(a2, b2))
cat = np.array([0.9, 0.9, 0.8])
dog = np.array([0.8, 0.9, 0.8])
apple = np.array([0.1, 0.3, 0.0])
print("猫vs狗:", cosine_similarity(cat, dog))
print("猫vs苹果:", cosine_similarity(cat, apple))
from sklearn.feature_extraction.text import CountVectorizer
docs = [
"Python 是 编程 语言",
"Java 是 编程 语言",
]
vectorizer = CountVectorizer()
bow = vectorizer.fit_transform(docs)
print("词表:", vectorizer.get_feature_names_out())
print("BoW矩阵:\n", bow.toarray())
from sklearn.feature_extraction.text import TfidfVectorizer
docs_tfidf = [
"Python 编程 语言",
"Python Python Python",
"Java 编程 语言",
]
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(docs_tfidf)
print("词表:", tfidf.get_feature_names_out())
print("TF-IDF矩阵:\n", tfidf_matrix.toarray())
word_vectors = {
"": [0.9, 0.1, 0.2],
"": [0.8, 0.3, 0.1],
"苹果": [0.1, 0.2, 0.9],
"Python": [0.1, 0.0, 0.9],
"Java": [0.1, 0.0, 0.85],
}
print("猫vs狗:", cosine_similarity(word_vectors[""], word_vectors[""]))