画像のスクレイピング(sslの場合)
| 1 2 3 4 5 6 7 8 9 | import io import ssl from urllib import request context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2) item_image = "https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/180px-Cat03.jpg" f = io.BytesIO(request.urlopen(item_image,context=context).read()) | 
しかしなぜか以下のエラーが出てしまう理由は不明
C:\Users\xxx\AppData\Local\Temp\ipykernel_16388\4181980830.py:5: DeprecationWarning: ssl.PROTOCOL_TLSv1_2 is deprecated
context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
応用例
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | #国名+hs6桁のフォルダ名を設定 check_folder_exsist = glob.glob("E:\\rulings_img/" + HS_jsonData) if not check_folder_exsist:     os.mkdir("E:\\rulings_img/" + HS_jsonData) for g in range(image_amount):     try:           driver.find_element(By.XPATH, "//*[@id=\"dtlLayer\"]/div[4]/table/tbody/tr[9]/td/span[" + str(g+1)+ "]/a/img")         time.sleep(1)     except Exception as e:         print("画像取得エラー発生divの値を5に変更:", e.args)         driver.driver.find_element(By.XPATH, "//*[@id=\"dtlLayer\"]/div[5]/table/tbody/tr[9]/td/span[" + str(g+1)+ "]/a/img")     item_image = driver.find_element(By.XPATH, "//*[@id=\"dtlLayer\"]/div[4]/table/tbody/tr[9]/td/span[" + str(g+1)+ "]/a/img").get_attribute("src")     #画像の取得     try:         f = io.BytesIO(request.urlopen(item_image,context=context).read())     except:         print(">>>>>>>画像取得リトライ<<<<<<<<")         time.sleep(10)         f = io.BytesIO(request.urlopen(item_image,context=context).read())     #3回トライしても画像取得できない場合はNONEにする     for image_loop in range(3):         try:                 img = Image.open(f)             img_name = "E:\\rulings_img/" + HS_jsonData + "/" + link_id + "-" + str(g+1) + ".jpg"             #画像保存時にRGBはjpgに変換できないというエラーを防ぐ             try:                 img.save(img_name)             except:                 print("imgで保存できない為rgbに変換")                 rgb_img = img.convert('RGB')                 rgb_img.save(img_name)             #画像が2件以上あれば画像名と画像URLをカンマ区切りで取得             if g > 0:                 item_image_urls = item_image_urls + "," + item_image                 img_name_all = img_name_all + ","+ img_name             else:                 item_image_urls = item_image                 img_name_all = img_name             break         except:             print("3秒停止")             time.sleep(3)     else:         item_image_urls = "None"         img_name_all = "None"         image_amount = 0         break else: item_image_urls = "None" img_name_all = "None"    image_amount = 0 | 


