Skip to content

Commit ae35706

Browse files
committed
adding in loading data completed code.
1 parent b0f8184 commit ae35706

File tree

10 files changed

+274
-0
lines changed

10 files changed

+274
-0
lines changed

Labs/lab_2_load_data.ipynb

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Load data into Cosmos DB using the MongoDB API\n",
8+
"\n",
9+
"This notebook demonstrates how to load data into Cosmos DB from Cosmic Works JSON files into the database using the MongoDB API."
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": null,
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"import os\n",
19+
"import pymongo\n",
20+
"import requests\n",
21+
"from pymongo import UpdateOne, DeleteMany\n",
22+
"from models import Product, ProductList, Customer, CustomerList, SalesOrder, SalesOrderList\n",
23+
"from dotenv import load_dotenv"
24+
]
25+
},
26+
{
27+
"cell_type": "markdown",
28+
"metadata": {},
29+
"source": [
30+
"## Establish a connection to the database"
31+
]
32+
},
33+
{
34+
"cell_type": "code",
35+
"execution_count": null,
36+
"metadata": {},
37+
"outputs": [],
38+
"source": [
39+
"load_dotenv()\n",
40+
"CONNECTION_STRING = os.environ.get(\"DB_CONNECTION_STRING\")\n",
41+
"client = pymongo.MongoClient(CONNECTION_STRING)\n",
42+
"# Create database to hold cosmic works data\n",
43+
"# MongoDB will create the database if it does not exist\n",
44+
"db = client.cosmic_works"
45+
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"execution_count": null,
50+
"metadata": {},
51+
"outputs": [],
52+
"source": [
53+
"# empty the collections\n",
54+
"db.products.bulk_write([DeleteMany({})])\n",
55+
"db.customers.bulk_write([DeleteMany({})])\n",
56+
"db.sales.bulk_write([DeleteMany({})])"
57+
]
58+
},
59+
{
60+
"cell_type": "markdown",
61+
"metadata": {},
62+
"source": [
63+
"## Load products"
64+
]
65+
},
66+
{
67+
"cell_type": "code",
68+
"execution_count": null,
69+
"metadata": {},
70+
"outputs": [],
71+
"source": [
72+
"# Add product data to database using bulkwrite and updateOne with upsert\n",
73+
"# Get cosmic works product data from github\n",
74+
"product_raw_data = \"https://cosmosdbcosmicworks.blob.core.windows.net/cosmic-works-small/product.json\"\n",
75+
"product_data = ProductList(items=[Product(**data) for data in requests.get(product_raw_data).json()])\n",
76+
"db.products.bulk_write([ UpdateOne({\"_id\": prod.id}, {\"$set\": prod.model_dump(by_alias=True)}, upsert=True) for prod in product_data.items])"
77+
]
78+
},
79+
{
80+
"cell_type": "markdown",
81+
"metadata": {},
82+
"source": [
83+
"## Load customers and sales raw data"
84+
]
85+
},
86+
{
87+
"cell_type": "code",
88+
"execution_count": null,
89+
"metadata": {},
90+
"outputs": [],
91+
"source": [
92+
"customer_sales_raw_data = \"https://cosmosdbcosmicworks.blob.core.windows.net/cosmic-works-small/customer.json\"\n",
93+
"response = requests.get(customer_sales_raw_data)\n",
94+
"# override decoding\n",
95+
"response.encoding = 'utf-8-sig'\n",
96+
"response_json = response.json()\n",
97+
"# filter where type is customer\n",
98+
"customers = [cust for cust in response_json if cust[\"type\"] == \"customer\"]\n",
99+
"# filter where type is salesOrder\n",
100+
"sales_orders = [sales for sales in response_json if sales[\"type\"] == \"salesOrder\"]"
101+
]
102+
},
103+
{
104+
"cell_type": "markdown",
105+
"metadata": {},
106+
"source": [
107+
"## Load customers"
108+
]
109+
},
110+
{
111+
"cell_type": "code",
112+
"execution_count": null,
113+
"metadata": {},
114+
"outputs": [],
115+
"source": [
116+
"customer_data = CustomerList(items=[Customer(**data) for data in customers])\n",
117+
"db.customers.bulk_write([ UpdateOne({\"_id\": cust.id}, {\"$set\": cust.model_dump(by_alias=True)}, upsert=True) for cust in customer_data.items])"
118+
]
119+
},
120+
{
121+
"cell_type": "markdown",
122+
"metadata": {},
123+
"source": [
124+
"## Load sales orders"
125+
]
126+
},
127+
{
128+
"cell_type": "code",
129+
"execution_count": null,
130+
"metadata": {},
131+
"outputs": [],
132+
"source": [
133+
"sales_data = SalesOrderList(items=[SalesOrder(**data) for data in sales_orders])\n",
134+
"db.sales.bulk_write([ UpdateOne({\"_id\": sale.id}, {\"$set\": sale.model_dump(by_alias=True)}, upsert=True) for sale in sales_data.items])"
135+
]
136+
},
137+
{
138+
"cell_type": "markdown",
139+
"metadata": {},
140+
"source": [
141+
"## Clean up"
142+
]
143+
},
144+
{
145+
"cell_type": "code",
146+
"execution_count": null,
147+
"metadata": {},
148+
"outputs": [],
149+
"source": [
150+
"client.close()"
151+
]
152+
}
153+
],
154+
"metadata": {
155+
"kernelspec": {
156+
"display_name": ".venv",
157+
"language": "python",
158+
"name": "python3"
159+
},
160+
"language_info": {
161+
"codemirror_mode": {
162+
"name": "ipython",
163+
"version": 3
164+
},
165+
"file_extension": ".py",
166+
"mimetype": "text/x-python",
167+
"name": "python",
168+
"nbconvert_exporter": "python",
169+
"pygments_lexer": "ipython3",
170+
"version": "3.11.5"
171+
}
172+
},
173+
"nbformat": 4,
174+
"nbformat_minor": 2
175+
}

Labs/models/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from .tag import Tag
2+
from .product import Product, ProductList
3+
from .address import Address
4+
from .password import Password
5+
from .customer import Customer, CustomerList
6+
from .sales_order_detail import SalesOrderDetail
7+
from .sales_order import SalesOrder, SalesOrderList

Labs/models/address.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from pydantic import BaseModel, Field
2+
3+
class Address(BaseModel):
4+
address_line_1: str = Field(alias="addressLine1")
5+
address_line_2: str = Field(alias="addressLine2")
6+
city: str
7+
state: str
8+
country: str
9+
zip_code: str = Field(alias="zipCode")

Labs/models/customer.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from datetime import datetime
2+
from pydantic import BaseModel, Field
3+
from typing import List, Optional
4+
from .address import Address
5+
from .password import Password
6+
7+
class Customer(BaseModel):
8+
id: str = Field(alias="_id")
9+
customer_id: str = Field(alias="customerId")
10+
title: Optional[str]
11+
first_name: str = Field(alias="firstName")
12+
last_name: str = Field(alias="lastName")
13+
email_address: str = Field(alias="emailAddress")
14+
phone_number: str = Field(alias="phoneNumber")
15+
creation_date: datetime = Field(alias="creationDate")
16+
addresses: List[Address]
17+
password: Password
18+
sales_order_count: int = Field(alias="salesOrderCount")
19+
20+
class Config:
21+
populate_by_name = True
22+
23+
class CustomerList(BaseModel):
24+
items: List[Customer]

Labs/models/password.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from pydantic import BaseModel
2+
3+
class Password(BaseModel):
4+
hash: str
5+
salt: str

Labs/models/product.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from pydantic import BaseModel, Field
2+
from typing import List
3+
from .tag import Tag
4+
5+
class Product(BaseModel):
6+
id: str = Field(alias="_id")
7+
categoryId: str
8+
categoryName: str
9+
sku: str
10+
name: str
11+
description: str
12+
price: float
13+
tags: List[Tag]
14+
15+
class Config:
16+
populate_by_name = True
17+
18+
class ProductList(BaseModel):
19+
items: List[Product]

Labs/models/sales_order.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from pydantic import BaseModel, Field
2+
from typing import List
3+
from datetime import datetime
4+
from .sales_order_detail import SalesOrderDetail
5+
6+
class SalesOrder(BaseModel):
7+
id: str = Field(alias="_id")
8+
customer_id: str = Field(alias="customerId")
9+
order_date: datetime = Field(alias="orderDate")
10+
ship_date: datetime = Field(alias="shipDate")
11+
details: List[SalesOrderDetail]
12+
13+
class Config:
14+
populate_by_name = True
15+
16+
class SalesOrderList(BaseModel):
17+
items: List[SalesOrder]

Labs/models/sales_order_detail.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from pydantic import BaseModel
2+
3+
class SalesOrderDetail(BaseModel):
4+
sku: str
5+
name: str
6+
price: float
7+
quantity: int

Labs/models/tag.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from pydantic import BaseModel, Field
2+
class Tag(BaseModel):
3+
id: str = Field(alias="_id")
4+
name: str
5+
6+
class Config:
7+
populate_by_name = True

Labs/requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
pymongo==4.6.1
2+
python-dotenv==1.0.0
3+
requests==2.31.0
4+
pydantic==2.5.2

0 commit comments

Comments
 (0)