import requests,os from bs4 import BeautifulSoup from urllib.request import urlopen #指定要抓的網址 http://www.ntcu.edu.tw/newweb/index.htm url = 'xxx' html = requests.get(url) html.encoding="utf-8" sp = BeautifulSoup(html.text, 'html.parser') # 建立 images 目錄儲存圖片 # ex. images_dir="images/" images為指定的目錄 images_dir="XXX" if not os.path.exists(images_dir): os.mkdir(images_dir) # 取得所有 標籤 all_links=sp.find_all(['a','img']) for link in all_links: # 讀取 src 和 href 屬性內容 src=link.get('src') href = link.get('href') attrs=[src,src] for attr in attrs: # 讀取 .jpg 和 .png 檔 if attr != None and ('.jpg' in attr or '.png' in attr): # 設定圖檔完整路徑 full_path = attr filename = full_path.split('/')[-1] # 取得圖檔名 ext = filename.split('.')[-1] #取得副檔名 filename = filename.split('.')[-2] #取得主檔名 if 'jpg' in ext: filename = filename + '.jpg' else: filename = filename + '.png' print(attr) # 儲存圖片 try: image = urlopen(full_path) f = open(os.path.join(images_dir,filename),'wb') f.write(image.read()) f.close() except: print("{} 無法讀取!".format(filename))