上传文件至 /
This commit is contained in:
88
44龙再飞.py
88
44龙再飞.py
@@ -1,31 +1,79 @@
|
|||||||
|
|
||||||
text = "Hello"
|
text = "Hello"
|
||||||
print([ord(c) for c in text])
|
print([ord(c) for c in text])
|
||||||
|
|
||||||
|
|
||||||
|
text_cn = "你好"
|
||||||
|
print([ord(c) for c in text_cn])
|
||||||
print(chr(65))
|
print(chr(65))
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
A = np.array([3, 4])
|
|
||||||
B = np.array([1, 2])
|
v1 = np.array([3])
|
||||||
print(A + B)
|
v2 = np.array([2, 3])
|
||||||
print(2 * A)
|
v3 = np.array([1, 2, 3])
|
||||||
print(np.linalg.norm(A))
|
print(v1, v2, v3)
|
||||||
A = np.array([1, 2, 3])
|
|
||||||
B = np.array([4, 5, 6])
|
a = np.array([1, 2, 3])
|
||||||
print(np.dot(A, B))
|
b = np.array([4, 5, 6])
|
||||||
cos = np.dot(A,B)/(np.linalg.norm(A)*np.linalg.norm(B))
|
print("加法:", a + b)
|
||||||
print(cos)
|
|
||||||
A2 = np.array([1,0])
|
|
||||||
B2 = np.array([0,1])
|
print("数乘:", a * 2)
|
||||||
print(np.dot(A2,B2)/(np.linalg.norm(A2)*np.linalg.norm(B2)))
|
|
||||||
|
v = np.array([3, 4])
|
||||||
|
print("长度:", np.linalg.norm(v))
|
||||||
|
|
||||||
|
|
||||||
|
print("点积:", np.dot(a, b))
|
||||||
|
print("点积(@):", a @ b)
|
||||||
|
|
||||||
|
def cosine_similarity(a, b)
|
||||||
|
dot = np.dot(a, b),
|
||||||
|
norm_a = np.linalg.norm(a)
|
||||||
|
norm_b = np.linalg.norm(b)
|
||||||
|
return dot / (norm_a * norm_b)
|
||||||
|
|
||||||
|
|
||||||
|
a1 = np.array([1,2,3])
|
||||||
|
b1 = np.array([2,4,6])
|
||||||
|
a2 = np.array([1,0])
|
||||||
|
b2 = np.array([0,1])
|
||||||
|
print("相似度1:", cosine_similarity(a1, b1))
|
||||||
|
print("相似度2:", cosine_similarity(a2, b2))
|
||||||
|
|
||||||
|
cat = np.array([0.9, 0.9, 0.8])
|
||||||
|
dog = np.array([0.8, 0.9, 0.8])
|
||||||
|
apple = np.array([0.1, 0.3, 0.0])
|
||||||
|
print("猫vs狗:", cosine_similarity(cat, dog))
|
||||||
|
print("猫vs苹果:", cosine_similarity(cat, apple))
|
||||||
|
|
||||||
from sklearn.feature_extraction.text import CountVectorizer
|
from sklearn.feature_extraction.text import CountVectorizer
|
||||||
docs = ["Python 是 编程 语言","Java 是 编程 语言","Python Python Python"]
|
docs = [
|
||||||
vec = CountVectorizer()
|
"Python 是 编程 语言",
|
||||||
print(vec.fit_transform(docs).toarray())
|
"Java 是 编程 语言",
|
||||||
print(vec.get_feature_names_out())
|
]
|
||||||
|
vectorizer = CountVectorizer()
|
||||||
|
bow = vectorizer.fit_transform(docs)
|
||||||
|
print("词表:", vectorizer.get_feature_names_out())
|
||||||
|
print("BoW矩阵:\n", bow.toarray())
|
||||||
|
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
docs = ["Python 编程","Java 编程","Python Python"]
|
docs_tfidf = [
|
||||||
|
"Python 编程 语言",
|
||||||
|
"Python Python Python",
|
||||||
|
"Java 编程 语言",
|
||||||
|
]
|
||||||
tfidf = TfidfVectorizer()
|
tfidf = TfidfVectorizer()
|
||||||
matrix = tfidf.fit_transform(docs)
|
tfidf_matrix = tfidf.fit_transform(docs_tfidf)
|
||||||
print("词表:", tfidf.get_feature_names_out())
|
print("词表:", tfidf.get_feature_names_out())
|
||||||
print("矩阵:\n", matrix.toarray())
|
print("TF-IDF矩阵:\n", tfidf_matrix.toarray())
|
||||||
|
|
||||||
|
word_vectors = {
|
||||||
|
"猫": [0.9, 0.1, 0.2],
|
||||||
|
"狗": [0.8, 0.3, 0.1],
|
||||||
|
"苹果": [0.1, 0.2, 0.9],
|
||||||
|
"Python": [0.1, 0.0, 0.9],
|
||||||
|
"Java": [0.1, 0.0, 0.85],
|
||||||
|
}
|
||||||
|
print("猫vs狗:", cosine_similarity(word_vectors["猫"], word_vectors["狗"]))
|
||||||
Reference in New Issue
Block a user