From 435408131453330e6dd92d71d9ec7e0dc8ebe65e Mon Sep 17 00:00:00 2001 From: 2509165045 <2509165045@student.edu.cn> Date: Thu, 25 Jun 2026 15:12:13 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9C=9F=E6=9C=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- q2_1_crawler/movies.html | 130 +++++++++++------------ q2_1_crawler/{movie.json => movies.json} | 106 +++++++++--------- q2_1_crawler/q2_1.py | 2 +- q3/q3_1/q3_1_image_labels.zip | Bin 0 -> 898 bytes q3/q3_2/q3_2_takeout_reviews.json | 1 + q4/q4_1/q4_1.py | 6 ++ 6 files changed, 126 insertions(+), 119 deletions(-) rename q2_1_crawler/{movie.json => movies.json} (66%) create mode 100644 q3/q3_1/q3_1_image_labels.zip create mode 100644 q3/q3_2/q3_2_takeout_reviews.json create mode 100644 q4/q4_1/q4_1.py diff --git a/q2_1_crawler/movies.html b/q2_1_crawler/movies.html index 513eae1..bdd77c2 100644 --- a/q2_1_crawler/movies.html +++ b/q2_1_crawler/movies.html @@ -1,5 +1,5 @@ - - + + @@ -19,7 +19,7 @@

电影列表

-

数据编号:B-20260623-1192

+

数据编号:B-20260623-8994

@@ -38,112 +38,112 @@ - + - - - - - + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - - - + + + + - + - - - - - + + + + + - + - - - - + + + + - + - - - - + + + + - + - - - + + + - + - + - - - + + + diff --git a/q2_1_crawler/movie.json b/q2_1_crawler/movies.json similarity index 66% rename from q2_1_crawler/movie.json rename to q2_1_crawler/movies.json index fdce86a..aca6aed 100644 --- a/q2_1_crawler/movie.json +++ b/q2_1_crawler/movies.json @@ -1,102 +1,102 @@ [ { "id": 1, - "title": "三傻大闹宝莱坞", + "title": "泰坦尼克号", "director": "Frank Darabont", - "year": 2018, - "rating": 7.0, - "duration": 118, - "genre": "动画", - "actors_count": 5 + "year": 1994, + "rating": 9.1, + "duration": 150, + "genre": "剧情", + "actors_count": 2 }, { "id": 2, - "title": "霸王别姬", + "title": "肖申克的救赎", "director": "陈凯歌", - "year": 2012, - "rating": 7.1, - "duration": 119, - "genre": "爱情", - "actors_count": 4 + "year": 2004, + "rating": 7.8, + "duration": 180, + "genre": "剧情", + "actors_count": 2 }, { "id": 3, - "title": "星际穿越", + "title": "霸王别姬", "director": "Robert Zemeckis", - "year": 2015, - "rating": 8.8, - "duration": 171, + "year": 2014, + "rating": 7.6, + "duration": 160, "genre": "冒险", - "actors_count": 3 + "actors_count": 2 }, { "id": 4, - "title": "肖申克的救赎", + "title": "忠犬八公的故事", "director": "James Cameron", - "year": 2017, - "rating": 8.2, - "duration": 149, - "genre": "剧情", - "actors_count": 3 + "year": 1992, + "rating": 9.5, + "duration": 167, + "genre": "悬疑", + "actors_count": 5 }, { "id": 5, - "title": "阿甘正传", + "title": "盗梦空间", "director": "宫崎骏", - "year": 2001, - "rating": 7.1, - "duration": 163, - "genre": "悬疑", + "year": 2014, + "rating": 8.2, + "duration": 176, + "genre": "喜剧", "actors_count": 3 }, { "id": 6, - "title": "泰坦尼克号", + "title": "阿甘正传", "director": "Christopher Nolan", - "year": 1996, - "rating": 8.6, - "duration": 171, - "genre": "冒险", - "actors_count": 5 + "year": 2016, + "rating": 6.9, + "duration": 97, + "genre": "动画", + "actors_count": 2 }, { "id": 7, - "title": "放牛班的春天", + "title": "三傻大闹宝莱坞", "director": "Lasse Hallström", - "year": 2010, - "rating": 7.8, - "duration": 126, - "genre": "科幻", + "year": 2003, + "rating": 7.0, + "duration": 122, + "genre": "悬疑", "actors_count": 2 }, { "id": 8, - "title": "千与千寻", + "title": "放牛班的春天", "director": "Rajkumar Hirani", - "year": 2002, - "rating": 8.6, - "duration": 160, - "genre": "悬疑", + "year": 2021, + "rating": 8.7, + "duration": 140, + "genre": "喜剧", "actors_count": 5 }, { "id": 9, - "title": "忠犬八公的故事", + "title": "星际穿越", "director": "Christophe Barratier", - "year": 1997, - "rating": 7.9, - "duration": 138, + "year": 2002, + "rating": 9.0, + "duration": 125, "genre": "冒险", "actors_count": 5 }, { "id": 10, - "title": "盗梦空间", + "title": "千与千寻", "director": "Christopher Nolan", - "year": 2008, + "year": 2021, "rating": 7.3, - "duration": 158, - "genre": "爱情", - "actors_count": 5 + "duration": 173, + "genre": "剧情", + "actors_count": 3 } ] \ No newline at end of file diff --git a/q2_1_crawler/q2_1.py b/q2_1_crawler/q2_1.py index 51dbfdb..c968ddd 100644 --- a/q2_1_crawler/q2_1.py +++ b/q2_1_crawler/q2_1.py @@ -38,5 +38,5 @@ for item in items: "actors_count": actors_count }) print(data) -with open("movie.json","w",encoding="utf-8") as f: +with open("movies.json","w",encoding="utf-8") as f: json.dump(data,f,ensure_ascii=False,indent=4) \ No newline at end of file diff --git a/q3/q3_1/q3_1_image_labels.zip b/q3/q3_1/q3_1_image_labels.zip new file mode 100644 index 0000000000000000000000000000000000000000..c418abf06e84ae211dcbdedd5e62cc921e800677 GIT binary patch literal 898 zcmWIWW@Zs#00G_f>oH&klwb$anYoGSsm1yMcopR&CZ*;e6mft}`WTt8R}g3dKM->R zadJ*#adB#~UP(oXx1Y|Vr(S+K51)FS^nd=8i2-IjMCbJzi?{Xwb%--EFmM6sy!;ZN zzO3T>yw%U11_cBFb$a@q*FWW>8@4H^gN;*DBg9!_K~5T1z!eAfbLYL!__B5}l!|IP zvSgQTcx8BG*>aZsa>_oonm=C}*eYm=t#Q&c*4LHS;J&&pWF}V$Z@})xiIP`XmTtPT z=+T)!HWLKfs@S{?9zOZAXVRiGOD;{SnZTwx|HzUnGOZ6LU7B*HW`n7NcNQb6o6q{~ zzt9eJ>qj7V24ZNC7@DORCs`z>#K$Kmnj|GzrdjB!x|*pv8mL;BgBVW6s*V<_uI8#v zPO28hs;(xgPL@DfLr^HW_8shHapYkBuk6bweULNffW`kK+p6u=KRT$!SlEBOez|Xs zj9X6_``U^b;x{D2zdg;u=3=U}@z(c)2VXr_Y`xX@E#_gK&i}RMbJE|Q zIQC`U&yDl8xEs|o?(vuDkvmu@xAH`9bl_>Pytf8{6TkbkU7noEw1YP=Z*@VId+)vH z+yUNI-w{N^(+jbhRDqh{=?A6>dkO-X4+7g7y?`WAdO|o6djbOM o0&8pml8EGlt`9vD5&A5ENs$Us8sN>!1~QZt2z`J?Dlma~02kEzq5uE@ literal 0 HcmV?d00001 diff --git a/q3/q3_2/q3_2_takeout_reviews.json b/q3/q3_2/q3_2_takeout_reviews.json new file mode 100644 index 0000000..444d582 --- /dev/null +++ b/q3/q3_2/q3_2_takeout_reviews.json @@ -0,0 +1 @@ +[{"id":82,"annotations":[{"id":103,"completed_by":1,"result":[{"value":{"start":34,"end":39,"text":",五星好评","labels":["正面"]},"id":"9eufCPT_ek","from_name":"label","to_name":"text","type":"labels","origin":"manual"}],"was_cancelled":false,"ground_truth":false,"created_at":"2026-06-23T04:55:07.146654Z","updated_at":"2026-06-23T04:55:36.500510Z","draft_created_at":null,"lead_time":27.32,"prediction":{},"result_count":1,"unique_id":"d28c4b88-6573-4f92-9887-1ae1e34cb814","import_id":null,"last_action":null,"bulk_created":false,"task":82,"project":15,"updated_by":1,"parent_prediction":null,"parent_annotation":null,"last_created_by":null}],"file_upload":"07d0fc79-reviews.json","drafts":[],"predictions":[],"data":{"id":1,"text":"外卖小哥送得超快,餐盒还是热的,炸鸡酥脆多汁,酸辣粉也很正宗,分量足,五星好评!"},"meta":{},"created_at":"2026-06-23T04:51:22.898059Z","updated_at":"2026-06-23T04:55:36.683587Z","allow_skip":true,"inner_id":1,"total_annotations":1,"cancelled_annotations":0,"total_predictions":0,"comment_count":0,"unresolved_comment_count":0,"last_comment_updated_at":null,"project":15,"updated_by":1,"comment_authors":[]},{"id":83,"annotations":[{"id":104,"completed_by":1,"result":[{"value":{"start":22,"end":36,"text":"联系客服也不回,太让人失望了","labels":["负面"]},"id":"KuCYP_CRIc","from_name":"label","to_name":"text","type":"labels","origin":"manual"}],"was_cancelled":false,"ground_truth":false,"created_at":"2026-06-23T04:55:47.369450Z","updated_at":"2026-06-23T04:55:47.369493Z","draft_created_at":null,"lead_time":6.516,"prediction":{},"result_count":1,"unique_id":"e8ef3571-235e-40e6-ad92-0499ee3cac9e","import_id":null,"last_action":null,"bulk_created":false,"task":83,"project":15,"updated_by":1,"parent_prediction":null,"parent_annotation":null,"last_created_by":null}],"file_upload":"07d0fc79-reviews.json","drafts":[],"predictions":[],"data":{"id":2,"text":"等了一个半小时才送到,汤全洒了,面坨成一坨,联系客服也不回,太让人失望了。"},"meta":{},"created_at":"2026-06-23T04:51:22.898133Z","updated_at":"2026-06-23T04:55:47.528709Z","allow_skip":true,"inner_id":2,"total_annotations":1,"cancelled_annotations":0,"total_predictions":0,"comment_count":0,"unresolved_comment_count":0,"last_comment_updated_at":null,"project":15,"updated_by":1,"comment_authors":[]},{"id":84,"annotations":[{"id":105,"completed_by":1,"result":[{"value":{"start":21,"end":36,"text":"配送员态度也好,下次还会再点。","labels":["正面"]},"id":"A55niYjeiw","from_name":"label","to_name":"text","type":"labels","origin":"manual"}],"was_cancelled":false,"ground_truth":false,"created_at":"2026-06-23T04:55:54.197605Z","updated_at":"2026-06-23T04:55:54.197633Z","draft_created_at":null,"lead_time":4.434,"prediction":{},"result_count":1,"unique_id":"63bb366d-a5a3-4760-8a63-d7953aba4372","import_id":null,"last_action":null,"bulk_created":false,"task":84,"project":15,"updated_by":1,"parent_prediction":null,"parent_annotation":null,"last_created_by":null}],"file_upload":"07d0fc79-reviews.json","drafts":[],"predictions":[],"data":{"id":3,"text":"奶茶是用料很扎实的现煮茶,珍珠Q弹有嚼劲,配送员态度也好,下次还会再点。"},"meta":{},"created_at":"2026-06-23T04:51:22.898177Z","updated_at":"2026-06-23T04:55:54.314260Z","allow_skip":true,"inner_id":3,"total_annotations":1,"cancelled_annotations":0,"total_predictions":0,"comment_count":0,"unresolved_comment_count":0,"last_comment_updated_at":null,"project":15,"updated_by":1,"comment_authors":[]},{"id":85,"annotations":[{"id":106,"completed_by":1,"result":[{"value":{"start":23,"end":33,"text":"性价比高,值得推荐。","labels":["正面"]},"id":"2fwUxH974D","from_name":"label","to_name":"text","type":"labels","origin":"manual"}],"was_cancelled":false,"ground_truth":false,"created_at":"2026-06-23T04:56:01.188230Z","updated_at":"2026-06-23T04:56:01.188269Z","draft_created_at":null,"lead_time":5.427,"prediction":{},"result_count":1,"unique_id":"634b76f8-4f37-4d0a-b3ff-72c59f4b4fc8","import_id":null,"last_action":null,"bulk_created":false,"task":85,"project":15,"updated_by":1,"parent_prediction":null,"parent_annotation":null,"last_created_by":null}],"file_upload":"07d0fc79-reviews.json","drafts":[],"predictions":[],"data":{"id":4,"text":"配送速度一般,但披萨味道不错,芝士拉丝效果好,性价比高,值得推荐。"},"meta":{},"created_at":"2026-06-23T04:51:22.898220Z","updated_at":"2026-06-23T04:56:01.297037Z","allow_skip":true,"inner_id":4,"total_annotations":1,"cancelled_annotations":0,"total_predictions":0,"comment_count":0,"unresolved_comment_count":0,"last_comment_updated_at":null,"project":15,"updated_by":1,"comment_authors":[]},{"id":86,"annotations":[{"id":107,"completed_by":1,"result":[{"value":{"start":5,"end":34,"text":"食材不新鲜,有股怪味,吃完拉肚子,商家推卸责任,再也不点了","labels":["负面"]},"id":"7OhkEGkrz7","from_name":"label","to_name":"text","type":"labels","origin":"manual"}],"was_cancelled":false,"ground_truth":false,"created_at":"2026-06-23T04:56:13.273079Z","updated_at":"2026-06-23T04:56:13.273130Z","draft_created_at":null,"lead_time":10.527,"prediction":{},"result_count":1,"unique_id":"857305d7-7a7c-4fe1-bd1d-c6545d428756","import_id":null,"last_action":null,"bulk_created":false,"task":86,"project":15,"updated_by":1,"parent_prediction":null,"parent_annotation":null,"last_created_by":null}],"file_upload":"07d0fc79-reviews.json","drafts":[],"predictions":[],"data":{"id":5,"text":"点的麻辣烫食材不新鲜,有股怪味,吃完拉肚子,商家推卸责任,再也不点了。"},"meta":{},"created_at":"2026-06-23T04:51:22.898260Z","updated_at":"2026-06-23T04:56:13.408066Z","allow_skip":true,"inner_id":5,"total_annotations":1,"cancelled_annotations":0,"total_predictions":0,"comment_count":0,"unresolved_comment_count":0,"last_comment_updated_at":null,"project":15,"updated_by":1,"comment_authors":[]}] \ No newline at end of file diff --git a/q4/q4_1/q4_1.py b/q4/q4_1/q4_1.py new file mode 100644 index 0000000..b8927e9 --- /dev/null +++ b/q4/q4_1/q4_1.py @@ -0,0 +1,6 @@ +import matplotlib.pyplot as plt +import json +with open ("movies.json","r",encoding='utf-8') as f: + print(f) +genre_count=() +plt.bar \ No newline at end of file
1三傻大闹宝莱坞泰坦尼克号 Frank Darabont20187.0118动画519949.1150剧情2
2霸王别姬陈凯歌20127.1119爱情4
3星际穿越Robert Zemeckis20158.8171冒险3
4 肖申克的救赎James Cameron20178.2149陈凯歌20047.8180 剧情2
3霸王别姬Robert Zemeckis20147.6160冒险2
4忠犬八公的故事James Cameron19929.5167悬疑5
5阿甘正传盗梦空间 宫崎骏20017.1163悬疑20148.2176喜剧 3
6泰坦尼克号阿甘正传 Christopher Nolan19968.6171冒险520166.997动画2
7放牛班的春天三傻大闹宝莱坞 Lasse Hallström20107.8126科幻20037.0122悬疑 2
8千与千寻放牛班的春天 Rajkumar Hirani20028.6160悬疑20218.7140喜剧 5
9忠犬八公的故事星际穿越 Christophe Barratier19977.913820029.0125 冒险 5
10盗梦空间千与千寻 Christopher Nolan20082021 7.3158爱情5173剧情3