From 60691ec6431494d787883af3c5a6e9a434e013b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9E=97=E4=BC=9F=E6=B3=B0?=
 <2509165006@student.example.com>
Date: Thu, 23 Apr 2026 16:05:03 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?=
 =?UTF-8?q?=20/?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 2026.4.2306 林伟泰.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 2026.4.2306 林伟泰.py

diff --git a/2026.4.2306 林伟泰.py b/2026.4.2306 林伟泰.py
new file mode 100644
index 0000000..0e5ddd8
--- /dev/null
+++ b/2026.4.2306 林伟泰.py	
@@ -0,0 +1,21 @@
+
+from sklearn.feature_extraction.text import CountVectorizer
+
+# 文档集合
+docs = [
+    "Python 是 编程 语言",
+    "Java  是 编程 语言",
+]
+
+# BoW 向量化
+vectorizer = CountVectorizer()
+bow_matrix = vectorizer.fit_transform(docs)
+
+print("词表:", vectorizer.get_feature_names_out())
+# 输出: ['Python', 'Java', '是', '编程', '语言']
+
+print("BoW矩阵:")
+print(bow_matrix.toarray())
+# 输出:
+# [[1 0 1 1 1]   # Python文档: Python=1, Java=0, 是=1, 编程=1, 语言=1
+#  [0 1 1 1 1]]  # Java文档:  Python=0, Java=1, 是=1, 编程=1, 语言=1
\ No newline at end of file