From 60691ec6431494d787883af3c5a6e9a434e013b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9E=97=E4=BC=9F=E6=B3=B0?= <2509165006@student.example.com> Date: Thu, 23 Apr 2026 16:05:03 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20/?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 2026.4.2306 林伟泰.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 2026.4.2306 林伟泰.py diff --git a/2026.4.2306 林伟泰.py b/2026.4.2306 林伟泰.py new file mode 100644 index 0000000..0e5ddd8 --- /dev/null +++ b/2026.4.2306 林伟泰.py @@ -0,0 +1,21 @@ + +from sklearn.feature_extraction.text import CountVectorizer + +# 文档集合 +docs = [ + "Python 是 编程 语言", + "Java 是 编程 语言", +] + +# BoW 向量化 +vectorizer = CountVectorizer() +bow_matrix = vectorizer.fit_transform(docs) + +print("词表:", vectorizer.get_feature_names_out()) +# 输出: ['Python', 'Java', '是', '编程', '语言'] + +print("BoW矩阵:") +print(bow_matrix.toarray()) +# 输出: +# [[1 0 1 1 1] # Python文档: Python=1, Java=0, 是=1, 编程=1, 语言=1 +# [0 1 1 1 1]] # Java文档: Python=0, Java=1, 是=1, 编程=1, 语言=1 \ No newline at end of file