Skip to content

Commit b41d6c9

Browse files
committed
forgot to save the original (updating)
1 parent 5873156 commit b41d6c9

File tree

1 file changed

+195
-8
lines changed

1 file changed

+195
-8
lines changed

Labs/lab_4_langchain.ipynb

Lines changed: 195 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,17 @@
1818
"import pymongo\n",
1919
"from typing import List\n",
2020
"from dotenv import load_dotenv\n",
21-
"from pymongo import MongoClient\n",
2221
"from langchain.chat_models import AzureChatOpenAI\n",
2322
"from langchain.embeddings import AzureOpenAIEmbeddings\n",
2423
"from langchain.vectorstores import AzureCosmosDBVectorSearch\n",
24+
"from langchain_core.vectorstores import VectorStoreRetriever\n",
2525
"from langchain.schema.document import Document\n",
2626
"from langchain.prompts import PromptTemplate\n",
2727
"from langchain.schema import StrOutputParser\n",
28-
"from langchain.schema.runnable import RunnablePassthrough"
28+
"from langchain.schema.runnable import RunnablePassthrough\n",
29+
"from langchain.agents import Tool\n",
30+
"from langchain.agents.agent_toolkits import create_conversational_retrieval_agent\n",
31+
"from langchain_core.messages import SystemMessage"
2932
]
3033
},
3134
{
@@ -41,7 +44,7 @@
4144
"COMPLETIONS_DEPLOYMENT_NAME = \"completions\"\n",
4245
"AOAI_ENDPOINT = os.environ.get(\"AOAI_ENDPOINT\")\n",
4346
"AOAI_KEY = os.environ.get(\"AOAI_KEY\")\n",
44-
"AOAI_API_VERSION = \"2023-05-15\""
47+
"AOAI_API_VERSION = \"2023-09-01-preview\""
4548
]
4649
},
4750
{
@@ -166,7 +169,7 @@
166169
" doc_dict.update(doc.metadata)\n",
167170
" if \"contentVector\" in doc_dict: \n",
168171
" del doc_dict[\"contentVector\"]\n",
169-
" str_docs.append(json.dumps(doc_dict)) \n",
172+
" str_docs.append(json.dumps(doc_dict, default=str)) \n",
170173
" # Return a single string containing each product JSON representation\n",
171174
" # separated by two newlines\n",
172175
" return \"\\n\\n\".join(str_docs)"
@@ -215,14 +218,197 @@
215218
"## LangChain Agent"
216219
]
217220
},
221+
{
222+
"cell_type": "markdown",
223+
"metadata": {},
224+
"source": [
225+
"### Create retrievers\n",
226+
"\n",
227+
"A separate retriever is required for each vector index. The following cell creates a VectorStoreRetriever for the products, customers, and sales collections and associated vector index."
228+
]
229+
},
230+
{
231+
"cell_type": "code",
232+
"execution_count": null,
233+
"metadata": {},
234+
"outputs": [],
235+
"source": [
236+
"def create_cosmic_works_vector_store_retriever(collection_name: str, top_k: int = 3):\n",
237+
" vector_store = AzureCosmosDBVectorSearch.from_connection_string(\n",
238+
" connection_string = CONNECTION_STRING,\n",
239+
" namespace = f\"cosmic_works.{collection_name}\",\n",
240+
" embedding = embedding_model,\n",
241+
" index_name = \"VectorSearchIndex\", \n",
242+
" embedding_key = \"contentVector\",\n",
243+
" text_key = \"_id\"\n",
244+
" )\n",
245+
" return vector_store.as_retriever(search_kwargs={\"k\": top_k})\n",
246+
"\n",
247+
"\n",
248+
"products_retriever = create_cosmic_works_vector_store_retriever(\"products\")\n",
249+
"customers_retriever = create_cosmic_works_vector_store_retriever(\"customers\")\n",
250+
"sales_retriever = create_cosmic_works_vector_store_retriever(\"sales\")"
251+
]
252+
},
253+
{
254+
"cell_type": "markdown",
255+
"metadata": {},
256+
"source": [
257+
" ### Create Agent Tools\n",
258+
" \n",
259+
" LangChain does have a built-in [`create_retriever_tool`](https://python.langchain.com/docs/use_cases/question_answering/conversational_retrieval_agents#retriever-tool) that wraps a vector store retriever, however, because we are storing the embeddings in the `contentVector` field of the document, we must do some pre-processing of the retrieved documents to remove this field so that we don't needlessly expend the model's token quota. \n",
260+
" \n",
261+
" Instead, we'll create a RAG chain as our tool implementation that does the pre-processing through the `format_docs` function we defined above to return each document in its JSON representation."
262+
]
263+
},
264+
{
265+
"cell_type": "code",
266+
"execution_count": null,
267+
"metadata": {},
268+
"outputs": [],
269+
"source": [
270+
"# Create tools that will use vector search in the Azure Cosmos DB for MongoDB vCore collections\n",
271+
"\n",
272+
"# create a chain on the retriever to format the documents as JSON\n",
273+
"products_retriever_chain = products_retriever | format_docs\n",
274+
"customers_retriever_chain = customers_retriever | format_docs\n",
275+
"sales_retriever_chain = sales_retriever | format_docs\n",
276+
"\n",
277+
"tools = [\n",
278+
" Tool(\n",
279+
" name = \"vector_search_products\", \n",
280+
" func = products_retriever_chain.invoke,\n",
281+
" description = \"Searches Cosmic Works product information for similar products based on the question. Returns the product information in JSON format.\"\n",
282+
" ),\n",
283+
" Tool(\n",
284+
" name = \"vector_search_customers\", \n",
285+
" func = customers_retriever_chain.invoke,\n",
286+
" description = \"Searches Cosmic Works customer information and retrieves similar customers based on the question. Returns the customer information in JSON format.\"\n",
287+
" ),\n",
288+
" Tool(\n",
289+
" name = \"vector_search_sales\", \n",
290+
" func = sales_retriever_chain.invoke,\n",
291+
" description = \"Searches Cosmic Works customer sales information and retrieves sales order details based on the question. Returns the sales order information in JSON format.\"\n",
292+
" )\n",
293+
"]"
294+
]
295+
},
296+
{
297+
"cell_type": "markdown",
298+
"metadata": {},
299+
"source": [
300+
"### Tools part 2\n",
301+
"\n",
302+
"Certain properties do not have semantic meaning (such as the GUID _id fields) and attempting to use vector search on these fields will not yield meaningful results. We need a tool to retrieve specific documents based on popular searches criteria.\n",
303+
"\n",
304+
"The following tool definitions is not an exhaustive list of what may be needed but serves as an example to provide concrete lookups of a document in the Cosmic Works database."
305+
]
306+
},
307+
{
308+
"cell_type": "code",
309+
"execution_count": null,
310+
"metadata": {},
311+
"outputs": [],
312+
"source": [
313+
"db = pymongo.MongoClient(CONNECTION_STRING).cosmic_works\n",
314+
"\n",
315+
"def get_product_by_id(product_id: str) -> str:\n",
316+
" \"\"\"\n",
317+
" Retrieves a product by its ID. \n",
318+
" \"\"\"\n",
319+
" doc = db.products.find_one({\"_id\": product_id}) \n",
320+
" if \"contentVector\" in doc:\n",
321+
" del doc[\"contentVector\"]\n",
322+
" return json.dumps(doc)\n",
323+
"\n",
324+
"def get_product_by_sku(sku: str) -> str:\n",
325+
" \"\"\"\n",
326+
" Retrieves a product by its sku.\n",
327+
" \"\"\"\n",
328+
" doc = db.products.find_one({\"sku\": sku})\n",
329+
" if \"contentVector\" in doc:\n",
330+
" del doc[\"contentVector\"]\n",
331+
" return json.dumps(doc, default=str)\n",
332+
"\n",
333+
"def get_sales_by_id(sales_id: str) -> str:\n",
334+
" \"\"\"\n",
335+
" Retrieves a sales order by its ID.\n",
336+
" \"\"\"\n",
337+
" doc = db.sales.find_one({\"_id\": sales_id})\n",
338+
" if \"contentVector\" in doc:\n",
339+
" del doc[\"contentVector\"]\n",
340+
" return json.dumps(doc, default=str) \n",
341+
"\n",
342+
"from langchain.tools import StructuredTool\n",
343+
"\n",
344+
"tools.extend([\n",
345+
" StructuredTool.from_function(get_product_by_id),\n",
346+
" StructuredTool.from_function(get_product_by_sku),\n",
347+
" StructuredTool.from_function(get_sales_by_id)\n",
348+
"])"
349+
]
350+
},
351+
{
352+
"cell_type": "markdown",
353+
"metadata": {},
354+
"source": [
355+
"### Create the agent\n",
356+
"\n",
357+
"The [`create_conversational_retrieval_agent`](https://python.langchain.com/docs/use_cases/question_answering/conversational_retrieval_agents#agent-constructor) is a built-in agent that includes conversational history as well uses the [OpenAIFunctionsAgent](https://python.langchain.com/docs/modules/agents/agent_types/openai_functions_agent#using-openaifunctionsagent) as its underlying implementation."
358+
]
359+
},
360+
{
361+
"cell_type": "code",
362+
"execution_count": null,
363+
"metadata": {},
364+
"outputs": [],
365+
"source": [
366+
"system_message = SystemMessage(\n",
367+
" content = \"\"\"\n",
368+
" You are a helpful, fun and friendly sales assistant for Cosmic Works, a bicycle and bicycle accessories store.\n",
369+
"\n",
370+
" Your name is Cosmo.\n",
371+
"\n",
372+
" You are designed to answer questions about the products that Cosmic Works sells, the customers that buy them, and the sales orders that are placed by customers.\n",
373+
"\n",
374+
" If you don't know the answer to a question, respond with \"I don't know.\"\n",
375+
" \"\"\"\n",
376+
" \n",
377+
")\n",
378+
"agent_executor = create_conversational_retrieval_agent(llm, tools, system_message = system_message, verbose=True)"
379+
]
380+
},
381+
{
382+
"cell_type": "code",
383+
"execution_count": null,
384+
"metadata": {},
385+
"outputs": [],
386+
"source": [
387+
"result = agent_executor({\"input\": \"What products do you have that are yellow?\"})\n",
388+
"print(\"***********************************************************\")\n",
389+
"print(result['output'])"
390+
]
391+
},
392+
{
393+
"cell_type": "code",
394+
"execution_count": null,
395+
"metadata": {},
396+
"outputs": [],
397+
"source": [
398+
"result = agent_executor({\"input\": \"What products were purchased for sales order '06FE91D2-B350-471A-AD29-906BF4EB97C4' ?\"})\n",
399+
"print(\"***********************************************************\")\n",
400+
"print(result['output'])"
401+
]
402+
},
218403
{
219404
"cell_type": "code",
220405
"execution_count": null,
221406
"metadata": {},
222407
"outputs": [],
223408
"source": [
224-
"client = pymongo.MongoClient(CONNECTION_STRING)\n",
225-
"db = client.cosmic_works"
409+
"result = agent_executor({\"input\": \"What was the sales order total for sales order '93436616-4C8A-407D-9FDA-908707EFA2C5' ?\"})\n",
410+
"print(\"***********************************************************\")\n",
411+
"print(result['output'])"
226412
]
227413
},
228414
{
@@ -231,8 +417,9 @@
231417
"metadata": {},
232418
"outputs": [],
233419
"source": [
234-
"# Create custom code lookup tools that retrieves documents from the customers, products, and salesorders collections by ID.\n",
235-
"def"
420+
"result = agent_executor({\"input\": \"What was the price of the product with sku `FR-R92B-58` ?\"})\n",
421+
"print(\"***********************************************************\")\n",
422+
"print(result['output'])"
236423
]
237424
}
238425
],

0 commit comments

Comments
 (0)