diff --git a/XWL.py b/XWL.py new file mode 100644 index 0000000..9a95726 --- /dev/null +++ b/XWL.py @@ -0,0 +1,39 @@ +import requests +import re +import csv +import time +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' +} +all_movies=[] +for page in range(10): + url = 'https://movie.douban.com/top250' + response = requests.get(url, headers=headers) + html = response.text + title_pattern = r'([^<&]+)' + rating_pattern = r']*>(\d+\.?\d*)' + titles = re.findall(title_pattern, html) + ratings = re.findall(rating_pattern, html) + for i in range(min(25,len(ratings))): + title =titles[i*2]if i*2