diff --git a/Dockerfile b/Dockerfile index 009d5d3..22d6f8c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,3 @@ -# Use the official Python image from Docker Hub FROM python:3.9-slim AS base WORKDIR /app diff --git a/app/app.py b/app/app.py index 1b23b43..70d6abd 100644 --- a/app/app.py +++ b/app/app.py @@ -18,20 +18,6 @@ time.sleep(5) print("table not found") - insert_fake_data(connection, "products", 1000) - - # try: - # insert_fake_data(connection, "products", 1000) - # insert_fake_data(connection, "shoppers", 1000) - # insert_fake_data(connection, "vendors", 1000) - # # insert_order_items(cursor, num_order_items=5, num_orders=1000, num_products=100, num_vendors=50) - # # insert_vendor_products(cursor, num_vendors=1000, num_products=1000) - # # connection.commit() - # print("Fake data inserted successfully") - # except Exception as e: - # print(f"Error inserting data: {e}") - # connection.rollback() - except Error as e: print(f"Error: {e}") diff --git a/app/models/__pycache__/db_connection.cpython-39.pyc b/app/models/__pycache__/db_connection.cpython-39.pyc index 28a4d61..0129549 100644 Binary files a/app/models/__pycache__/db_connection.cpython-39.pyc and b/app/models/__pycache__/db_connection.cpython-39.pyc differ diff --git a/app/models/__pycache__/db_create_tables.cpython-39.pyc b/app/models/__pycache__/db_create_tables.cpython-39.pyc index e29c052..110c907 100644 Binary files a/app/models/__pycache__/db_create_tables.cpython-39.pyc and b/app/models/__pycache__/db_create_tables.cpython-39.pyc differ diff --git a/app/models/db_connection.py b/app/models/db_connection.py index 57c94d0..d0919e7 100644 --- a/app/models/db_connection.py +++ b/app/models/db_connection.py @@ -35,3 +35,4 @@ def check_tables_exist(connection, tables): print(f"Table {table} does not exist. Waiting for table to be created...") return False return True + diff --git a/app/models/db_create_tables.py b/app/models/db_create_tables.py index c18d830..8333e93 100644 --- a/app/models/db_create_tables.py +++ b/app/models/db_create_tables.py @@ -1,7 +1,8 @@ import mysql.connector from mysql.connector import Error -from services.data_insertion import insert_fake_data , insert_order_items, insert_vendor_products -from services.order_data_insertion import insert_orders_data, inset_order_items_data +from services.data_insertion import insert_fake_data, insert_vendor_products +from services.order_data_insertion import insert_orders_data, insert_order_items_data, insert_revenue_data, insert_cost_data +from services.shopper_data_insertion import insert_reviews_data, insert_time_spent_data def create_tables_from_sql(connection, sql_file_path): @@ -29,12 +30,12 @@ def create_tables_from_sql(connection, sql_file_path): insert_fake_data(cursor, connection, "shoppers", 1000) insert_fake_data(cursor, connection, "vendors", 1000) insert_vendor_products(cursor, connection) - insert_orders_data(cursor, connection) - inset_order_items_data(cursor, connection, max_items_per_order=5) - # insert_reviews_data(cursor, connection) - # insert_time_spent_data(cursor, connection) - # insert_costs_data(cursor, connection) - # insert_revenue_data(cursor, connection) + insert_orders_data(cursor, connection, num_orders=1000) + insert_order_items_data(cursor, connection, max_items_per_order=5) + insert_revenue_data(cursor, connection) + insert_reviews_data(cursor, connection, num_reviews=1000) + insert_time_spent_data(cursor, connection, num_records=1000) + insert_cost_data(cursor, connection) connection.commit() print("All tables created successfully.") return True diff --git a/app/services/__pycache__/data_insertion.cpython-39.pyc b/app/services/__pycache__/data_insertion.cpython-39.pyc index 94e4200..749824c 100644 Binary files a/app/services/__pycache__/data_insertion.cpython-39.pyc and b/app/services/__pycache__/data_insertion.cpython-39.pyc differ diff --git a/app/services/__pycache__/order_data_insertion.cpython-39.pyc b/app/services/__pycache__/order_data_insertion.cpython-39.pyc index 4c7616a..04921c4 100644 Binary files a/app/services/__pycache__/order_data_insertion.cpython-39.pyc and b/app/services/__pycache__/order_data_insertion.cpython-39.pyc differ diff --git a/app/services/__pycache__/shopper_data_insertion.cpython-39.pyc b/app/services/__pycache__/shopper_data_insertion.cpython-39.pyc new file mode 100644 index 0000000..e3f99fb Binary files /dev/null and b/app/services/__pycache__/shopper_data_insertion.cpython-39.pyc differ diff --git a/app/services/data_insertion.py b/app/services/data_insertion.py index 1f58c77..10300d4 100644 --- a/app/services/data_insertion.py +++ b/app/services/data_insertion.py @@ -1,16 +1,17 @@ from faker import Faker +from faker_commerce import Provider as CommerceProvider import random fake = Faker() - +fake.add_provider(CommerceProvider) def insert_fake_data(cursor, connection, table_name, num_rows=1000): print(f"Inserting data into {table_name}...") try: for _ in range(num_rows): if table_name == "products": - name = fake.word() + name = fake.ecommerce_name() cost = round(fake.random_number(digits=2), 2) - description = fake.text(max_nb_chars=200) + description = f"{name}: {fake.sentence()}" cursor.execute( f"INSERT INTO {table_name} (name, cost, description) VALUES (%s, %s, %s)", (name, cost, description) @@ -38,17 +39,15 @@ def insert_fake_data(cursor, connection, table_name, num_rows=1000): f"INSERT INTO {table_name} (contact_name, company_name, contact_email, contact_phone, address, tax_number, commission_rate) VALUES (%s, %s, %s, %s, %s, %s, %s)", (contact_name, company_name, contact_email, contact_phone, address, tax_number, commission_rate) ) - # Commit the transaction connection.commit() print(f"{num_rows} rows inserted into {table_name}") except Exception as e: - # Rollback in case of any error connection.rollback() print(f"Error inserting data into {table_name}: {e}") def insert_orders_for_shoppers(cursor, num_orders): - # Fetch all shopper IDs + # fetch all shopper IDs cursor.execute("SELECT id FROM shoppers") shoppers = cursor.fetchall() @@ -101,7 +100,6 @@ def insert_vendor_products(cursor, connection): print("No products found.") return - # vendor-product associations for vendor in vendors: vendor_id = vendor[0] commission_rate = vendor[1] @@ -110,7 +108,7 @@ def insert_vendor_products(cursor, connection): product_id = product[0] product_cost = product[1] - # calcualte vendor product based on commission + # calculate vendor product based on commission price = round(product_cost * (1 + commission_rate / 100), 2) try: @@ -125,6 +123,5 @@ def insert_vendor_products(cursor, connection): connection.commit() print("Vendor-product associations inserted successfully.") except Exception as e: - # rollback when issue connection.rollback() print(f"Error inserting vendor-products: {e}") diff --git a/app/services/order_data_insertion.py b/app/services/order_data_insertion.py index 84e72e0..7ef108a 100644 --- a/app/services/order_data_insertion.py +++ b/app/services/order_data_insertion.py @@ -3,8 +3,7 @@ fake = Faker() -def insert_orders_data(cursor, connection, num_orders): - num_orders = 1000 +def insert_orders_data(cursor, connection, num_orders=10000): try: cursor.execute("SELECT id FROM shoppers") shoppers = cursor.fetchall() @@ -15,13 +14,11 @@ def insert_orders_data(cursor, connection, num_orders): statuses = ['complete', 'incomplete'] - # Insert random orders for _ in range(num_orders): shopper_id = random.choice(shoppers)[0] status = random.choice(statuses) try: - # Insert the order with total_amount set to 0 cursor.execute(""" INSERT INTO orders (shopper_id, status, total_amount) VALUES (%s, %s, %s) @@ -30,21 +27,14 @@ def insert_orders_data(cursor, connection, num_orders): print(f"Error inserting order for shopper_id {shopper_id}: {e}") connection.rollback() - # Commit the changes connection.commit() print(f"{num_orders} orders inserted successfully.") except Exception as e: - # Rollback if any critical error occurs connection.rollback() print(f"Error inserting orders: {e}") -def inset_order_items_data(cursor, connection, max_items_per_order=5): +def insert_order_items_data(cursor, connection, max_items_per_order=3): try: - # Fetch all orders - cursor.execute("SELECT id, total_amount FROM orders") - orders = cursor.fetchall() - - # Fetch vendor products with vendor commission rates cursor.execute(""" SELECT vendor_products.id AS vendor_product_id, @@ -55,42 +45,121 @@ def inset_order_items_data(cursor, connection, max_items_per_order=5): """) vendor_products = cursor.fetchall() + # Fetch all orders + cursor.execute("SELECT id, total_amount FROM orders") + orders = cursor.fetchall() + if not orders or not vendor_products: print("No orders or vendor products available.") return - # Iterate through each order to create order items for order in orders: order_id, current_total = order total_amount = current_total or 0 num_items = random.randint(1, max_items_per_order) - # Add items to this order for _ in range(num_items): vendor_product = random.choice(vendor_products) vendor_product_id = vendor_product[0] base_price = vendor_product[1] commission_rate = vendor_product[2] - # Calculate item price with commission item_price = round(base_price * (1 + commission_rate / 100), 2) - quantity = random.randint(1, 5) # Random quantity - - # Insert the order item - cursor.execute(""" - INSERT INTO order_items (order_id, product_id, vendor_id, quantity, price) - SELECT %s, vendor_products.product_id, vendor_products.vendor_id, %s, %s - FROM vendor_products - WHERE vendor_products.id = %s - """, (order_id, quantity, item_price, vendor_product_id)) + quantity = random.randint(1, 5) + + cursor.execute("SELECT COUNT(*) FROM vendors WHERE id = %s", (vendor_product[2],)) + vendor_exists = cursor.fetchone()[0] + if vendor_exists: + cursor.execute(""" + INSERT INTO order_items (order_id, vendor_product_id, vendor_id, quantity, price) + VALUES (%s, %s, %s, %s, %s) + """, (order_id, vendor_product_id, vendor_product[2], quantity, item_price)) + else: + print(f"Vendor ID {vendor_product[2]} does not exist.") - # Update the order's total amount total_amount += item_price * quantity - # Update the total amount in the orders table cursor.execute("UPDATE orders SET total_amount = %s WHERE id = %s", (total_amount, order_id)) - # Commit all changes connection.commit() print(f"Order items created successfully and totals updated.") except Exception as e: print(f"Error creating order items: {e}") + +def insert_revenue_data(cursor, connection): + try: + cursor.execute(""" + SELECT + oi.order_id, + oi.vendor_id, + oi.price, + v.commission_rate + FROM order_items oi + JOIN vendors v ON oi.vendor_id = v.id + """) + order_items = cursor.fetchall() + + if not order_items: + print("No order items found.") + return + + for item in order_items: + order_id, vendor_id, item_price, commission_rate = item + commission_amount = round(item_price * (commission_rate / 100), 2) + + try: + cursor.execute(""" + INSERT INTO revenue (order_id, vendor_id, commission_amount) + VALUES (%s, %s, %s) + """, (order_id, vendor_id, commission_amount)) + except Exception as e: + print(f"Error inserting revenue for order_id {order_id}, vendor_id {vendor_id}: {e}") + connection.rollback() + + connection.commit() + print("Revenue data inserted successfully.") + + except Exception as e: + connection.rollback() + print(f"Error inserting revenue data: {e}") + +def insert_cost_data(cursor, connection): + try: + cursor.execute(""" + SELECT + oi.order_id, + oi.vendor_id, + oi.price, + v.commission_rate + FROM order_items oi + JOIN vendors v ON oi.vendor_id = v.id + """) + order_items = cursor.fetchall() + + if not order_items: + print("No order items found.") + return + + cost_types = ['delivery_failure', 're_attempt', 'return_fraud'] + + for item in order_items: + order_id, vendor_id, item_price, commission_rate = item + for cost_type in cost_types: + # possible costs + if cost_type == 'delivery_failure': + cost_amount = round(float(item_price) * 0.10, 2) # 10% + elif cost_type == 're_attempt': + cost_amount = round(float(item_price) * 0.05, 2) # 5% + elif cost_type == 'return_fraud': + cost_amount = round(float(item_price) * 0.15, 2) # 15% + + cursor.execute(""" + INSERT INTO costs (order_id, type, cost_amount) + VALUES (%s, %s, %s) + """, (order_id, cost_type, cost_amount)) + + connection.commit() + print("Cost data inserted successfully.") + + except Exception as e: + connection.rollback() + print(f"Error inserting cost data: {e}") diff --git a/app/services/shopper_data_insertion.py b/app/services/shopper_data_insertion.py new file mode 100644 index 0000000..c52a0a7 --- /dev/null +++ b/app/services/shopper_data_insertion.py @@ -0,0 +1,67 @@ +from faker import Faker +import random + +fake = Faker() + +def insert_reviews_data(cursor, connection, num_reviews=1000): + try: + cursor.execute("SELECT id FROM vendor_products") + vendor_products = cursor.fetchall() + + cursor.execute("SELECT id FROM shoppers") + shoppers = cursor.fetchall() + + if not vendor_products or not shoppers: + print("No vendor products or shoppers found.") + return + + for _ in range(num_reviews): + vendor_product_id = random.choice(vendor_products)[0] + shopper_id = random.choice(shoppers)[0] + + rating = random.randint(1, 5) + + comment = None if random.random() < 0.3 else fake.text(max_nb_chars=200) + + cursor.execute(""" + INSERT INTO reviews (vendor_product_id, shopper_id, rating, comment, created_at) + VALUES (%s, %s, %s, %s, CURRENT_TIMESTAMP) + """, (vendor_product_id, shopper_id, rating, comment)) + + connection.commit() + print(f"{num_reviews} reviews inserted successfully.") + + except Exception as e: + connection.rollback() + print(f"Error inserting reviews: {e}") + +from datetime import datetime, timedelta + +def insert_time_spent_data(cursor, connection, num_records=1000): + try: + cursor.execute("SELECT id FROM shoppers") + shoppers = cursor.fetchall() + + if not shoppers: + print("No shoppers found.") + return + + for _ in range(num_records): + shopper_id = random.choice(shoppers)[0] + + duration_minutes = random.randint(1, 120) + + session_date = None if random.random() < 0.2 else fake.date_this_year() + + cursor.execute(""" + INSERT INTO time_spent (shopper_id, duration_minutes, session_date) + VALUES (%s, %s, %s) + """, (shopper_id, duration_minutes, session_date)) + + connection.commit() + print(f"{num_records} time spent records inserted successfully.") + + except Exception as e: + connection.rollback() + print(f"Error inserting time spent data: {e}") + diff --git a/app/sql/newdb.sql b/app/sql/newdb.sql new file mode 100644 index 0000000..0a3698f --- /dev/null +++ b/app/sql/newdb.sql @@ -0,0 +1,103 @@ +-- Vendors Table +CREATE TABLE vendors ( + id INT AUTO_INCREMENT PRIMARY KEY, + contact_name VARCHAR(255) NOT NULL, + contact_email VARCHAR(255) NOT NULL UNIQUE, + contact_phone VARCHAR(20), + company_name VARCHAR(255) NOT NULL UNIQUE, + tax_number VARCHAR(20) UNIQUE, + address VARCHAR(255), + commission_rate DECIMAL(5, 2) NOT NULL CHECK (commission_rate >= 0 AND commission_rate <= 100) +); + +-- Shoppers Table +CREATE TABLE shoppers ( + id INT AUTO_INCREMENT PRIMARY KEY, + first_name VARCHAR(255), + last_name VARCHAR(255), + email VARCHAR(255) NOT NULL UNIQUE, + phone_number VARCHAR(20), + address VARCHAR(255), + is_member BOOLEAN DEFAULT FALSE +); + +-- Products Table +CREATE TABLE products ( + id INT AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(255) NOT NULL UNIQUE, + description TEXT, + cost DECIMAL(10, 2) NOT NULL CHECK (cost > 0) +); + +-- Vendor Products (many-to-many between vendors and products) +CREATE TABLE vendor_products ( + id INT AUTO_INCREMENT PRIMARY KEY, + vendor_id INT NOT NULL, + product_id INT NOT NULL, + price DECIMAL(10, 2) NOT NULL CHECK (price > 0), + FOREIGN KEY (vendor_id) REFERENCES vendors(id) ON DELETE CASCADE, + FOREIGN KEY (product_id) REFERENCES products(id) ON DELETE CASCADE +); + +-- Orders Table +CREATE TABLE orders ( + id INT AUTO_INCREMENT PRIMARY KEY, + shopper_id INT NOT NULL, + status ENUM('complete', 'incomplete') NOT NULL, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + total_amount DECIMAL(10, 2) DEFAULT 0 CHECK (total_amount >= 0), + FOREIGN KEY (shopper_id) REFERENCES shoppers(id) ON DELETE CASCADE +); + +-- Order Items Table (Products in Orders) +CREATE TABLE order_items ( + id INT AUTO_INCREMENT PRIMARY KEY, + order_id INT NOT NULL, + vendor_product_id INT NOT NULL, + vendor_id INT NOT NULL, + quantity INT NOT NULL CHECK (quantity > 0), + price DECIMAL(10, 2) NOT NULL CHECK (price > 0), + FOREIGN KEY (order_id) REFERENCES orders(id) ON DELETE CASCADE, + FOREIGN KEY (vendor_product_id) REFERENCES vendor_products(id) ON DELETE CASCADE, + FOREIGN KEY (vendor_id) REFERENCES vendors(id) ON DELETE CASCADE +); + +-- Reviews Table (Shopper Experience) +CREATE TABLE reviews ( + id INT AUTO_INCREMENT PRIMARY KEY, + vendor_product_id INT NOT NULL, + shopper_id INT NOT NULL, + rating INT NOT NULL CHECK (rating BETWEEN 1 AND 5), + comment TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (vendor_product_id) REFERENCES vendor_products(id) ON DELETE CASCADE, + FOREIGN KEY (shopper_id) REFERENCES shoppers(id) ON DELETE CASCADE +); + +-- Time Spent on Site (Shopper Experience) +CREATE TABLE time_spent ( + id INT AUTO_INCREMENT PRIMARY KEY, + shopper_id INT NOT NULL, + duration_minutes INT NOT NULL CHECK (duration_minutes > 0), + session_date DATE NOT NULL, + FOREIGN KEY (shopper_id) REFERENCES shoppers(id) ON DELETE CASCADE +); + +-- Costs Table +CREATE TABLE costs ( + id INT AUTO_INCREMENT PRIMARY KEY, + order_id INT NOT NULL, + type ENUM('delivery_failure', 're_attempt', 'return_fraud') NOT NULL, + cost_amount DECIMAL(10, 2) NOT NULL CHECK (cost_amount >= 0), + FOREIGN KEY (order_id) REFERENCES orders(id) ON DELETE CASCADE +); + +-- Revenue Table +CREATE TABLE revenue ( + id INT AUTO_INCREMENT PRIMARY KEY, + order_id INT NOT NULL, + vendor_id INT NOT NULL, + commission_amount DECIMAL(10, 2) NOT NULL CHECK (commission_amount >= 0), + FOREIGN KEY (order_id) REFERENCES orders(id) ON DELETE CASCADE, + FOREIGN KEY (vendor_id) REFERENCES vendors(id) ON DELETE CASCADE +); diff --git a/requirements.txt b/requirements.txt index d48b10f..ad43942 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ mysql-connector-python python-dotenv Faker +faker-commerce