Skip to content

Commit db4b025

Browse files
author
Narra_Venkata_Raghu_Charan
authored
Create simplescrapper.py
1 parent e9e7c96 commit db4b025

File tree

1 file changed

+91
-0
lines changed

1 file changed

+91
-0
lines changed

scripts/simplescrapper.py

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
2+
import requests
3+
from bs4 import BeautifulSoup
4+
import pandas as pd
5+
6+
def scrape_categories(url):
7+
try:
8+
# Send a GET request to the URL
9+
response = requests.get(url)
10+
response.raise_for_status() # Check for request errors
11+
12+
# Parse the HTML content using BeautifulSoup
13+
soup = BeautifulSoup(response.content, 'html.parser')
14+
15+
# Find all category elements (adjust based on the website structure)
16+
categories = []
17+
for category in soup.find_all('a', class_='category-link'): # Adjust class or tag based on the website structure
18+
category_name = category.text.strip()
19+
category_url = category['href']
20+
categories.append({'Category': category_name, 'URL': category_url})
21+
22+
return categories
23+
except Exception as e:
24+
print(f"An error occurred while scraping categories: {e}")
25+
return []
26+
27+
def scrape_products(category_url):
28+
try:
29+
# Send a GET request to the category URL
30+
response = requests.get(category_url)
31+
response.raise_for_status() # Check for request errors
32+
33+
# Parse the HTML content using BeautifulSoup
34+
soup = BeautifulSoup(response.content, 'html.parser')
35+
36+
# Find all product elements (adjust based on the website structure)
37+
products = []
38+
for product in soup.find_all('div', class_='product-item'): # Adjust class or tag based on the website structure
39+
product_name = product.find('h2', class_='product-name').text.strip() # Adjust based on the website structure
40+
product_price = product.find('span', class_='product-price').text.strip() # Adjust based on the website structure
41+
product_url = product.find('a', class_='product-link')['href'] # Adjust based on the website structure
42+
products.append({'Product Name': product_name, 'Price': product_price, 'URL': product_url})
43+
44+
return products
45+
except Exception as e:
46+
print(f"An error occurred while scraping products: {e}")
47+
return []
48+
49+
def save_to_excel(categories, output_file):
50+
try:
51+
# Create a DataFrame from the categories list
52+
df = pd.DataFrame(categories)
53+
54+
# Save the DataFrame to an Excel file
55+
df.to_excel(output_file, index=False)
56+
print(f"Categories and products saved to {output_file}")
57+
except Exception as e:
58+
print(f"An error occurred while saving to Excel: {e}")
59+
60+
if __name__ == "__main__":
61+
# Input: E-commerce website URL
62+
url = input("Enter the e-commerce website URL: ").strip()
63+
64+
# Output: Excel file name
65+
output_file = 'categories_and_products.xlsx'
66+
67+
# Scrape categories
68+
categories = scrape_categories(url)
69+
70+
# Scrape products for each category
71+
for category in categories:
72+
category_url = category['URL']
73+
products = scrape_products(category_url)
74+
for product in products:
75+
product['Category'] = category['Category'] # Add category name to each product
76+
category['Products'] = products
77+
78+
# Flatten the data for saving to Excel
79+
all_data = []
80+
for category in categories:
81+
for product in category.get('Products', []):
82+
all_data.append({
83+
'Category': category['Category'],
84+
'Product Name': product['Product Name'],
85+
'Price': product['Price'],
86+
'Product URL': product['URL'],
87+
'Category URL': category['URL']
88+
})
89+
90+
# Save to Excel
91+
save_to_excel(all_data, output_file)

0 commit comments

Comments
 (0)