[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit 9047fdefbf2c · 2024-10-09T10:23:21.000Z
for more information, see https://pre-commit.ci
diff --git a/scripts/bulkdownloadfromurls.py b/scripts/bulkdownloadfromurls.py
@@ -3,14 +3,15 @@
 from urllib.parse import urlsplit, quote_plus
 import pandas as pd
 
+
 def read_urls_from_csv(csv_file, column_name):
     try:
         # Read CSV file into a DataFrame
         df = pd.read_csv(csv_file)
-        
+
         # Extract URLs from specified column
         urls = df[column_name].tolist()
-        
+
         return urls
     except Exception as e:
         print(f"Error reading URLs from CSV: {e}")
@@ -22,42 +23,45 @@ def download_image(url, folder):
         # Send a GET request to the URL
         response = requests.get(url, stream=True)
         response.raise_for_status()  # Check if the request was successful
-        
+
         # Generate a unique filename using the URL
         filename = quote_plus(url)  # Encode URL to use as filename
         filename = filename[:25]  # Limit filename length (optional)
         filename = f"{filename}.jpg"  # Add file extension if needed
-        
+
         # Create the output path
         output_path = os.path.join(folder, filename)
-        
+
         # Save the image to the specified folder
-        with open(output_path, 'wb') as file:
+        with open(output_path, "wb") as file:
             for chunk in response.iter_content(8192):
                 file.write(chunk)
-        
+
         print(f"Downloaded: {url} to {output_path}")
     except requests.exceptions.RequestException as e:
         print(f"Failed to download {url}: {e}")
 
+
 def download_images_from_list(url_list, folder):
     # Create the output folder if it doesn't exist
     os.makedirs(folder, exist_ok=True)
-    
+
     for url in url_list:
         download_image(url, folder)
 
 
 if __name__ == "__main__":
     # CSV file containing URLs
     csv_file = "your_csv"
-    column_name = "YOUR COLUMN LINK CONTAING URLS"  # Replace with the column name containing URLs
-    
+    column_name = (
+        "YOUR COLUMN LINK CONTAING URLS"  # Replace with the column name containing URLs
+    )
+
     # Read URLs from CSV
     image_urls = read_urls_from_csv(csv_file, column_name)
-    
+
     # Folder to save downloaded images
     output_folder = "downloaded_images"
-    
+
     # Download images
     download_images_from_list(image_urls, output_folder)
diff --git a/scripts/simplescrapper.py b/scripts/simplescrapper.py
@@ -1,51 +1,69 @@
-
 import requests
 from bs4 import BeautifulSoup
 import pandas as pd
 
+
 def scrape_categories(url):
     try:
         # Send a GET request to the URL
         response = requests.get(url)
         response.raise_for_status()  # Check for request errors
 
         # Parse the HTML content using BeautifulSoup
-        soup = BeautifulSoup(response.content, 'html.parser')
+        soup = BeautifulSoup(response.content, "html.parser")
 
         # Find all category elements (adjust based on the website structure)
         categories = []
-        for category in soup.find_all('a', class_='category-link'):  # Adjust class or tag based on the website structure
+        for category in soup.find_all(
+            "a", class_="category-link"
+        ):  # Adjust class or tag based on the website structure
             category_name = category.text.strip()
-            category_url = category['href']
-            categories.append({'Category': category_name, 'URL': category_url})
+            category_url = category["href"]
+            categories.append({"Category": category_name, "URL": category_url})
 
         return categories
     except Exception as e:
         print(f"An error occurred while scraping categories: {e}")
         return []
 
+
 def scrape_products(category_url):
     try:
         # Send a GET request to the category URL
         response = requests.get(category_url)
         response.raise_for_status()  # Check for request errors
 
         # Parse the HTML content using BeautifulSoup
-        soup = BeautifulSoup(response.content, 'html.parser')
+        soup = BeautifulSoup(response.content, "html.parser")
 
         # Find all product elements (adjust based on the website structure)
         products = []
-        for product in soup.find_all('div', class_='product-item'):  # Adjust class or tag based on the website structure
-            product_name = product.find('h2', class_='product-name').text.strip()  # Adjust based on the website structure
-            product_price = product.find('span', class_='product-price').text.strip()  # Adjust based on the website structure
-            product_url = product.find('a', class_='product-link')['href']  # Adjust based on the website structure
-            products.append({'Product Name': product_name, 'Price': product_price, 'URL': product_url})
+        for product in soup.find_all(
+            "div", class_="product-item"
+        ):  # Adjust class or tag based on the website structure
+            product_name = product.find(
+                "h2", class_="product-name"
+            ).text.strip()  # Adjust based on the website structure
+            product_price = product.find(
+                "span", class_="product-price"
+            ).text.strip()  # Adjust based on the website structure
+            product_url = product.find("a", class_="product-link")[
+                "href"
+            ]  # Adjust based on the website structure
+            products.append(
+                {
+                    "Product Name": product_name,
+                    "Price": product_price,
+                    "URL": product_url,
+                }
+            )
 
         return products
     except Exception as e:
         print(f"An error occurred while scraping products: {e}")
         return []
 
+
 def save_to_excel(categories, output_file):
     try:
         # Create a DataFrame from the categories list
@@ -57,35 +75,40 @@ def save_to_excel(categories, output_file):
     except Exception as e:
         print(f"An error occurred while saving to Excel: {e}")
 
+
 if __name__ == "__main__":
     # Input: E-commerce website URL
     url = input("Enter the e-commerce website URL: ").strip()
 
     # Output: Excel file name
-    output_file = 'categories_and_products.xlsx'
+    output_file = "categories_and_products.xlsx"
 
     # Scrape categories
     categories = scrape_categories(url)
 
     # Scrape products for each category
     for category in categories:
-        category_url = category['URL']
+        category_url = category["URL"]
         products = scrape_products(category_url)
         for product in products:
-            product['Category'] = category['Category']  # Add category name to each product
-            category['Products'] = products
+            product["Category"] = category[
+                "Category"
+            ]  # Add category name to each product
+            category["Products"] = products
 
     # Flatten the data for saving to Excel
     all_data = []
     for category in categories:
-        for product in category.get('Products', []):
-            all_data.append({
-                'Category': category['Category'],
-                'Product Name': product['Product Name'],
-                'Price': product['Price'],
-                'Product URL': product['URL'],
-                'Category URL': category['URL']
-            })
+        for product in category.get("Products", []):
+            all_data.append(
+                {
+                    "Category": category["Category"],
+                    "Product Name": product["Product Name"],
+                    "Price": product["Price"],
+                    "Product URL": product["URL"],
+                    "Category URL": category["URL"],
+                }
+            )
 
     # Save to Excel
     save_to_excel(all_data, output_file)