From 48a23d05b181c19be4b974bafc55ce548919b7e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=AB=E5=A2=9E?= <2509165014@student.example.com> Date: Thu, 2 Apr 2026 16:05:02 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20/?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mz.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 mz.py diff --git a/mz.py b/mz.py new file mode 100644 index 0000000..ac719b9 --- /dev/null +++ b/mz.py @@ -0,0 +1,42 @@ +import requests +import re +import os +from time import sleep + +base_url = 'https://movie.douban.com/top250?start={}&filter=' +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' +} + +p = re.compile( + r'
.*?' + r'(.*?).*?' + r'(.*?).*?' + r'(.*?人评价).*?' + r'(.*?)', + re.S +) + +m = [] +for page in range(10): + u = base_url.format(page*25) + try: + r = requests.get(u, headers=headers, timeout=10) + r.encoding = 'utf-8' + items = p.findall(r.text) + for i in items: + t = i[0].split('/')[0].strip() + m.append([t, i[1], i[2], i[3]]) + except: + pass + sleep(1) + +dp = os.path.join(os.path.expanduser("~"), "Desktop") +fp = os.path.join(dp, "top250.txt") + +with open(fp, 'w', encoding='utf-8') as f: + for i, v in enumerate(m, 1): + f.write(f'{i}. {v[0]}\n') + f.write(f'{v[1]}\n') + f.write(f'{v[2]}\n') + f.write(f'{v[3]}\n\n') \ No newline at end of file