画像のスクレイピング(sslの場合)
1 2 3 4 5 6 7 8 9 |
import io import ssl from urllib import request context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2) item_image = "https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/180px-Cat03.jpg" f = io.BytesIO(request.urlopen(item_image,context=context).read()) |
しかしなぜか以下のエラーが出てしまう理由は不明
C:\Users\xxx\AppData\Local\Temp\ipykernel_16388\4181980830.py:5: DeprecationWarning: ssl.PROTOCOL_TLSv1_2 is deprecated
context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
応用例
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
#国名+hs6桁のフォルダ名を設定 check_folder_exsist = glob.glob("E:\\rulings_img/" + HS_jsonData) if not check_folder_exsist: os.mkdir("E:\\rulings_img/" + HS_jsonData) for g in range(image_amount): try: driver.find_element(By.XPATH, "//*[@id=\"dtlLayer\"]/div[4]/table/tbody/tr[9]/td/span[" + str(g+1)+ "]/a/img") time.sleep(1) except Exception as e: print("画像取得エラー発生divの値を5に変更:", e.args) driver.driver.find_element(By.XPATH, "//*[@id=\"dtlLayer\"]/div[5]/table/tbody/tr[9]/td/span[" + str(g+1)+ "]/a/img") item_image = driver.find_element(By.XPATH, "//*[@id=\"dtlLayer\"]/div[4]/table/tbody/tr[9]/td/span[" + str(g+1)+ "]/a/img").get_attribute("src") #画像の取得 try: f = io.BytesIO(request.urlopen(item_image,context=context).read()) except: print(">>>>>>>画像取得リトライ<<<<<<<<") time.sleep(10) f = io.BytesIO(request.urlopen(item_image,context=context).read()) #3回トライしても画像取得できない場合はNONEにする for image_loop in range(3): try: img = Image.open(f) img_name = "E:\\rulings_img/" + HS_jsonData + "/" + link_id + "-" + str(g+1) + ".jpg" #画像保存時にRGBはjpgに変換できないというエラーを防ぐ try: img.save(img_name) except: print("imgで保存できない為rgbに変換") rgb_img = img.convert('RGB') rgb_img.save(img_name) #画像が2件以上あれば画像名と画像URLをカンマ区切りで取得 if g > 0: item_image_urls = item_image_urls + "," + item_image img_name_all = img_name_all + ","+ img_name else: item_image_urls = item_image img_name_all = img_name break except: print("3秒停止") time.sleep(3) else: item_image_urls = "None" img_name_all = "None" image_amount = 0 break else: item_image_urls = "None" img_name_all = "None" image_amount = 0 |
コメントを残す