|
| 1 | +import os |
| 2 | +import requests |
| 3 | +from urllib.parse import urlsplit, quote_plus |
| 4 | +import pandas as pd |
| 5 | + |
| 6 | +def read_urls_from_csv(csv_file, column_name): |
| 7 | + try: |
| 8 | + # Read CSV file into a DataFrame |
| 9 | + df = pd.read_csv(csv_file) |
| 10 | + |
| 11 | + # Extract URLs from specified column |
| 12 | + urls = df[column_name].tolist() |
| 13 | + |
| 14 | + return urls |
| 15 | + except Exception as e: |
| 16 | + print(f"Error reading URLs from CSV: {e}") |
| 17 | + return [] |
| 18 | + |
| 19 | + |
| 20 | +def download_image(url, folder): |
| 21 | + try: |
| 22 | + # Send a GET request to the URL |
| 23 | + response = requests.get(url, stream=True) |
| 24 | + response.raise_for_status() # Check if the request was successful |
| 25 | + |
| 26 | + # Generate a unique filename using the URL |
| 27 | + filename = quote_plus(url) # Encode URL to use as filename |
| 28 | + filename = filename[:25] # Limit filename length (optional) |
| 29 | + filename = f"{filename}.jpg" # Add file extension if needed |
| 30 | + |
| 31 | + # Create the output path |
| 32 | + output_path = os.path.join(folder, filename) |
| 33 | + |
| 34 | + # Save the image to the specified folder |
| 35 | + with open(output_path, 'wb') as file: |
| 36 | + for chunk in response.iter_content(8192): |
| 37 | + file.write(chunk) |
| 38 | + |
| 39 | + print(f"Downloaded: {url} to {output_path}") |
| 40 | + except requests.exceptions.RequestException as e: |
| 41 | + print(f"Failed to download {url}: {e}") |
| 42 | + |
| 43 | +def download_images_from_list(url_list, folder): |
| 44 | + # Create the output folder if it doesn't exist |
| 45 | + os.makedirs(folder, exist_ok=True) |
| 46 | + |
| 47 | + for url in url_list: |
| 48 | + download_image(url, folder) |
| 49 | + |
| 50 | + |
| 51 | +if __name__ == "__main__": |
| 52 | + # CSV file containing URLs |
| 53 | + csv_file = "your_csv" |
| 54 | + column_name = "YOUR COLUMN LINK CONTAING URLS" # Replace with the column name containing URLs |
| 55 | + |
| 56 | + # Read URLs from CSV |
| 57 | + image_urls = read_urls_from_csv(csv_file, column_name) |
| 58 | + |
| 59 | + # Folder to save downloaded images |
| 60 | + output_folder = "downloaded_images" |
| 61 | + |
| 62 | + # Download images |
| 63 | + download_images_from_list(image_urls, output_folder) |
0 commit comments