Skip to content

Commit c6b198f

Browse files
author
Narra_Venkata_Raghu_Charan
authored
Update bulkdownloadfromurls.py
1 parent 9047fde commit c6b198f

File tree

1 file changed

+23
-36
lines changed

1 file changed

+23
-36
lines changed

scripts/bulkdownloadfromurls.py

+23-36
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,54 @@
11
import os
22
import requests
3-
from urllib.parse import urlsplit, quote_plus
3+
from urllib.parse import quote_plus
44
import pandas as pd
55

6-
76
def read_urls_from_csv(csv_file, column_name):
87
try:
98
# Read CSV file into a DataFrame
10-
df = pd.read_csv(csv_file)
11-
9+
image_data = pd.read_csv(csv_file)
10+
1211
# Extract URLs from specified column
13-
urls = df[column_name].tolist()
14-
12+
urls = image_data[column_name].tolist()
13+
1514
return urls
16-
except Exception as e:
17-
print(f"Error reading URLs from CSV: {e}")
15+
except FileNotFoundError as e:
16+
print(f"File not found: {e}")
17+
return []
18+
except ValueError as e:
19+
print(f"Error processing CSV: {e}")
1820
return []
19-
2021

2122
def download_image(url, folder):
2223
try:
23-
# Send a GET request to the URL
24-
response = requests.get(url, stream=True)
25-
response.raise_for_status() # Check if the request was successful
26-
24+
# Send a GET request to the URL with a timeout
25+
response = requests.get(url, stream=True, timeout=10)
26+
response.raise_for_status()
27+
2728
# Generate a unique filename using the URL
28-
filename = quote_plus(url) # Encode URL to use as filename
29-
filename = filename[:25] # Limit filename length (optional)
30-
filename = f"{filename}.jpg" # Add file extension if needed
31-
29+
filename = quote_plus(url)[:25] # Limit filename length
30+
filename = f"{filename}.jpg"
31+
3232
# Create the output path
3333
output_path = os.path.join(folder, filename)
34-
34+
3535
# Save the image to the specified folder
36-
with open(output_path, "wb") as file:
36+
with open(output_path, 'wb') as file:
3737
for chunk in response.iter_content(8192):
3838
file.write(chunk)
39-
39+
4040
print(f"Downloaded: {url} to {output_path}")
4141
except requests.exceptions.RequestException as e:
4242
print(f"Failed to download {url}: {e}")
4343

44-
4544
def download_images_from_list(url_list, folder):
46-
# Create the output folder if it doesn't exist
4745
os.makedirs(folder, exist_ok=True)
48-
49-
for url in url_list:
46+
for url in url_list
5047
download_image(url, folder)
5148

52-
5349
if __name__ == "__main__":
54-
# CSV file containing URLs
55-
csv_file = "your_csv"
56-
column_name = (
57-
"YOUR COLUMN LINK CONTAING URLS" # Replace with the column name containing URLs
58-
)
59-
60-
# Read URLs from CSV
50+
csv_file = "face_dataset.csv"
51+
column_name = "Imagelink"
6152
image_urls = read_urls_from_csv(csv_file, column_name)
62-
63-
# Folder to save downloaded images
6453
output_folder = "downloaded_images"
65-
66-
# Download images
6754
download_images_from_list(image_urls, output_folder)

0 commit comments

Comments
 (0)