# Python怎么爬取動漫桌面高清壁紙
在當今數字時代,高清動漫壁紙深受愛好者喜愛。本文將詳細介紹如何用Python爬取動漫桌面壁紙,涵蓋技術選型、反爬策略和實戰代碼。
## 一、技術選型與工具準備
### 1. 核心工具包
```python
import requests # 網絡請求
from bs4 import BeautifulSoup # HTML解析
import os # 文件操作
import time # 延時控制
pip install requests beautifulsoup4
典型壁紙站特點:
- 分頁URL規律:https://wallhaven.cc/search?q=anime&page=2
- 圖片詳情頁包含原始尺寸下載鏈接
# 獲取縮略圖容器
thumbnails = soup.select('figure.thumb')
# 提取高清圖鏈接
hd_url = soup.select_one('#wallpaper')['src']
def get_wallpapers(keyword='anime', pages=3):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
for page in range(1, pages+1):
url = f'https://wallhaven.cc/search?q={keyword}&page={page}'
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
# 解析圖片列表...
def download_image(url, save_dir='wallpapers'):
if not os.path.exists(save_dir):
os.makedirs(save_dir)
filename = os.path.join(save_dir, url.split('/')[-1])
with requests.get(url, stream=True) as r:
with open(filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
print(f'已保存: {filename}')
from fake_useragent import UserAgent
ua = UserAgent()
headers = {'User-Agent': ua.random}
proxies = {
'http': 'http://127.0.0.1:1080',
'https': 'https://127.0.0.1:1080'
}
from concurrent.futures import ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=5) as executor:
executor.map(download_image, img_urls)
time.sleep(random.uniform(1, 3))
import requests
from bs4 import BeautifulSoup
import os
import time
from concurrent.futures import ThreadPoolExecutor
def main():
keyword = input("輸入搜索關鍵詞(如anime): ")
pages = int(input("需要爬取的頁數: "))
base_url = "https://wallhaven.cc/search"
headers = {'User-Agent': 'Mozilla/5.0'}
img_urls = []
for page in range(1, pages+1):
params = {'q': keyword, 'page': page}
res = requests.get(base_url, params=params, headers=headers)
soup = BeautifulSoup(res.text, 'html.parser')
for img in soup.select('figure.thumb'):
detail_link = 'https:' + img.a['href'] if not img.a['href'].startswith('http') else img.a['href']
img_urls.append(get_hd_url(detail_link))
time.sleep(2)
with ThreadPoolExecutor(4) as executor:
executor.map(download_image, img_urls)
if __name__ == '__main__':
main()
通過本文介紹的方法,你可以輕松建立專屬動漫壁紙庫。建議先從少量測試開始,遵守網站爬取規則,享受技術帶來的便利與樂趣! “`
注:實際運行時請替換示例網站為合法的可爬取目標,并確保遵守目標網站的服務條款。
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。