!pip install chromedriver_autoinstaller
import time
from selenium import webdriver
from selenium.webdriver.common.keys import By
from selenium.webdriver.common.keys import Keys
from urllib.request import Request, urlopen
def crawl_and_save_image(keyword, pages):
image_urls = []
// 다음페이지 설정
for i in range(1, pages+1):
url = f'https://pixabay.com/ko/images/search/{keyword}/?pagi={i}'
driver.get(url)
time.sleep(2)
for _ in range(20):
driver.execute_script("window.scrollBy({ top: window.innerHeight, behavior: 'smooth' })")
time.sleep(1)
image_area_xpath = '/html/body/div[1]/div[1]/div/div[2]/div[3]'
image_area = driver.find_element(By.XPATH, image_area_xpath)
image_elements = image_area.find_elements(By.TAG_NAME, 'img')
for image_element in image_elements:
image_url = image_element.get_attribute('src')
print(image_url)
image_urls.append(image_url)
// keyword명으로 파일이 없다면 생성
if not os.path.exists(keyword):
os.mkdir(keyword)
for i in range(len(image_urls)):
image_url = image_urls[i]
url = parse.urlparse(image_url)
filename = image_url.split('/')[-1]
image_byte = Request(image_url, headers= {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'})
f = open(f'{keyword}/{filename}', 'wb')
f.write(urlopen(image_byte).read())
f.close()
// 함수 실행
driver = webdriver.Chrome()
crawl_and_save_image('고양이',2)
'데이터 분석' 카테고리의 다른 글
Pandas, Series, DataFrame 2 (2024-05-24 (1) | 2024.05.27 |
---|---|
Pandas, Series, DataFrame (2024-05-23) (0) | 2024.05.23 |
Numpy (2024-05-22) (0) | 2024.05.22 |
selenium, xpath (2024-05-21) (0) | 2024.05.21 |
Crawling, Scraping(2024-05-20) (0) | 2024.05.20 |