From 4c5f702d61c3377dd0ed81af5109773ca5b64b65 Mon Sep 17 00:00:00 2001
From: 2509165025 <2509165025@student.edu.cn>
Date: Thu, 26 Mar 2026 15:46:10 +0800
Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90=E4=BD=9C=E4=B8=9A=E4=B8=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 2509165025.py => 26 5025.py |  0
 çˆ¬è™«/çˆ¬è™«.py (2).txt        | 30 -------------
 çˆ¬è™«/çˆ¬è™«.py.txt            | 88 -------------------------------------
 3 files changed, 118 deletions(-)
 rename 2509165025.py => 26 5025.py (100%)
 delete mode 100644 çˆ¬è™«/çˆ¬è™«.py (2).txt
 delete mode 100644 çˆ¬è™«/çˆ¬è™«.py.txt

diff --git a/2509165025.py b/26 5025.py
similarity index 100%
rename from 2509165025.py
rename to 26 5025.py
diff --git a/çˆ¬è™«/çˆ¬è™«.py (2).txt b/çˆ¬è™«/çˆ¬è™«.py (2).txt
deleted file mode 100644
index 00dee05..0000000
--- a/çˆ¬è™«/çˆ¬è™«.py (2).txt	
+++ /dev/null
@@ -1,30 +0,0 @@
-import requests
-from bs4 import BeautifulSoup
-import time
-
-headers = {
-    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
-}
-
-all_movies = []
-
-for page in range(0, 250, 25):
-    url = f"https://movie.douban.com/top250?start={page}&filter="
-    print(f"ÕýÔÚÅÀÈ¡µÚ {page//25 + 1} Ò³£º{url}")
-
-    response = requests.get(url, headers=headers)
-    response.encoding = "utf-8"
-    soup = BeautifulSoup(response.text, "html.parser")
-
-   
-    items = soup.find_all("div", class_="item")
-    for item in items:
-        title = item.find("span", class_="title").get_text(strip=True)
-        all_movies.append(title)
-        print(title)
-
-    
-    time.sleep(1)
-
-
-print(f"\nÒ»¹²ÅÀµ½µçÓ°£º{len(all_movies)} ²¿")
\ No newline at end of file
diff --git a/çˆ¬è™«/çˆ¬è™«.py.txt b/çˆ¬è™«/çˆ¬è™«.py.txt
deleted file mode 100644
index 939569d..0000000
--- a/çˆ¬è™«/çˆ¬è™«.py.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-import requests
-from bs4 import BeautifulSoup
-import time
-import csv
-
-
-headers = {
-    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
-}
-
-def fetch_news_list(url):
-    """èŽ·å–æ–°é—»åˆ—è¡¨é¡µçš„æ ‡é¢˜ã€é“¾æŽ¥ã€æ—¶é—´"""
-    try:
-        response = requests.get(url, headers=headers, timeout=10)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.text, 'html.parser')
-        
-        news_list = []
-        
-        for item in soup.select('ul.feed-card-list li.feed-card-item'):
-            title_tag = item.select_one('a.feed-card-link')
-            if not title_tag:
-                continue
-            title = title_tag.get_text(strip=True)
-            link = title_tag['href']
-            
-            if not link.startswith('http'):
-                link = 'https://news.sina.com.cn' + link
-            
-            time_tag = item.select_one('span.feed-card-time')
-            publish_time = time_tag.get_text(strip=True) if time_tag else 'æœªçŸ¥æ—¶é—´'
-            
-            news_list.append({
-                'title': title,
-                'link': link,
-                'publish_time': publish_time
-            })
-        return news_list
-    except Exception as e:
-        print(f"èŽ·å–æ–°é—»åˆ—è¡¨å¤±è´¥: {e}")
-        return []
-
-def fetch_news_content(news_url):
-    """è¿›å…¥æ–°é—»è¯¦æƒ…é¡µï¼Œæå–æ­£æ–‡å†…å®¹"""
-    try:
-        response = requests.get(news_url, headers=headers, timeout=10)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.text, 'html.parser')
-        
-        
-        content = ''
-        for p in soup.select('div.article p'):
-            content += p.get_text(strip=True) + '\n'
-        return content if content else 'æ— æ­£æ–‡å†…å®¹'
-    except Exception as e:
-        print(f"èŽ·å–æ–°é—»æ­£æ–‡å¤±è´¥: {e}")
-        return ''
-
-def save_to_csv(news_data, filename='sina_news.csv'):
-    """å°†çˆ¬å–åˆ°çš„æ–°é—»ä¿å­˜ä¸ºCSVæ–‡ä»¶"""
-    with open(filename, 'w', newline='', encoding='utf-8-sig') as f:
-        writer = csv.DictWriter(f, fieldnames=['title', 'link', 'publish_time', 'content'])
-        writer.writeheader()
-        writer.writerows(news_data)
-    print(f"âœ… æ–°é—»å·²ä¿å­˜åˆ° {filename}")
-
-if __name__ == "__main__":
-  
-    target_url = "https://news.sina.com.cn/china/"
-    print("å¼€å§‹çˆ¬å–æ–°é—»åˆ—è¡¨...")
-    news_list = fetch_news_list(target_url)
-    
-    if not news_list:
-        print("æœªèŽ·å–åˆ°æ–°é—»åˆ—è¡¨ï¼Œç»“æŸçˆ¬å–")
-        exit()
-    
-    
-    news_data = []
-    for i, news in enumerate(news_list[:10], 1):  # åªçˆ¬å‰10æ¡
-        print(f"æ­£åœ¨çˆ¬å–ç¬¬ {i} æ¡: {news['title']}")
-        content = fetch_news_content(news['link'])
-        news['content'] = content
-        news_data.append(news)
-        time.sleep(1)  # å»¶è¿Ÿ1ç§’ï¼Œé¿å…è¯·æ±‚è¿‡å¿«è¢«å°
-    
-    
-    save_to_csv(news_data)
-    print("ðŸŽ‰ çˆ¬å–å®Œæˆï¼")
\ No newline at end of file