diff --git a/EdgeCraftRAG/Dockerfile.server b/EdgeCraftRAG/Dockerfile.server index 4ac52700a8..119b0ec3f9 100755 --- a/EdgeCraftRAG/Dockerfile.server +++ b/EdgeCraftRAG/Dockerfile.server @@ -1,30 +1,17 @@ FROM python:3.11-slim SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libjemalloc-dev \ - libmagic1 \ - libglib2.0-0 \ - poppler-utils \ - tesseract-ocr - -RUN apt-get update && apt-get install -y gnupg wget git -RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ - gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg -RUN echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \ - tee /etc/apt/sources.list.d/intel-gpu-jammy.list -RUN apt-get update && apt-get install -y \ - intel-opencl-icd intel-level-zero-gpu \ - intel-level-zero-gpu-raytracing \ - intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \ - libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \ - libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \ - mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo +RUN apt-get update && apt-get install -y gnupg2 wget git +RUN apt-get remove -y libze-intel-gpu1 libigc1 libigdfcl1 libze-dev || true; \ + apt-get update; \ + apt-get install -y curl +RUN curl -sL 'https://keyserver.ubuntu.com/pks/lookup?fingerprint=on&op=get&search=0x0C0E6AF955CE463C03FC51574D098D70AFBE5E1F' | tee /etc/apt/trusted.gpg.d/driver.asc +RUN echo -e "Types: deb\nURIs: https://ppa.launchpadcontent.net/kobuk-team/intel-graphics/ubuntu/\nSuites: plucky\nComponents: main\nSigned-By: /etc/apt/trusted.gpg.d/driver.asc" > /etc/apt/sources.list.d/driver.sources +RUN apt-get update && apt-get install -y libze-intel-gpu1 libze1 intel-metrics-discovery intel-opencl-icd clinfo intel-gsc && apt-get install -y libze-intel-gpu1 libze1 intel-metrics-discovery intel-opencl-icd clinfo intel-gsc && apt-get install -y libze-dev intel-ocloc libze-intel-gpu-raytracing RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ - chown -R user /home/user/ + chown -R user /home/user/ RUN mkdir /templates && \ chown -R user /templates @@ -33,17 +20,19 @@ RUN chown -R user /templates/default_prompt.txt COPY ./edgecraftrag /home/user/edgecraftrag -RUN mkdir -p /home/user/ui_cache +RUN mkdir -p /home/user/ui_cache ENV UI_UPLOAD_PATH=/home/user/ui_cache USER user WORKDIR /home/user/edgecraftrag -RUN pip install --no-cache-dir --upgrade pip setuptools==70.0.0 && \ - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt +RUN pip3 install --no-cache-dir --upgrade setuptools==70.0.0 --break-system-packages && \ + pip3 install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt --break-system-packages + +RUN pip3 install --no-cache-dir docarray==0.40.0 --break-system-packages WORKDIR /home/user/ RUN git clone https://github.com/openvinotoolkit/openvino.genai.git genai ENV PYTHONPATH="$PYTHONPATH:/home/user/genai/tools/llm_bench" -ENTRYPOINT ["python", "-m", "edgecraftrag.server"] \ No newline at end of file +ENTRYPOINT ["python3", "-m", "edgecraftrag.server"] diff --git a/EdgeCraftRAG/README.md b/EdgeCraftRAG/README.md index 314feb3480..ba30b7bf4c 100755 --- a/EdgeCraftRAG/README.md +++ b/EdgeCraftRAG/README.md @@ -5,6 +5,12 @@ Retrieval-Augmented Generation system for edge solutions. It is designed to curate the RAG pipeline to meet hardware requirements at edge with guaranteed quality and performance. +## What's New + +1. Support Intel Arc B60 for model inference +2. support KBadmin for knowledge base management +3. support Experience Injection module in UI + ## Table of contents 1. [Architecture](#architecture) diff --git a/EdgeCraftRAG/assets/img/kbadmin_index.png b/EdgeCraftRAG/assets/img/kbadmin_index.png new file mode 100644 index 0000000000..7383a01c79 Binary files /dev/null and b/EdgeCraftRAG/assets/img/kbadmin_index.png differ diff --git a/EdgeCraftRAG/assets/img/kbadmin_kb.png b/EdgeCraftRAG/assets/img/kbadmin_kb.png new file mode 100644 index 0000000000..40f6909a9b Binary files /dev/null and b/EdgeCraftRAG/assets/img/kbadmin_kb.png differ diff --git a/EdgeCraftRAG/assets/img/kbadmin_type.png b/EdgeCraftRAG/assets/img/kbadmin_type.png new file mode 100644 index 0000000000..012ebb0bd8 Binary files /dev/null and b/EdgeCraftRAG/assets/img/kbadmin_type.png differ diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md b/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md index d4d283d8f1..94d920bbc4 100755 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md @@ -10,25 +10,27 @@ This document outlines the deployment process for Edge Craft Retrieval-Augmented This section describes how to quickly deploy and test the EdgeCraftRAG service manually on Intel® Arc® platform. The basic steps are: -1. [Prerequisites](#prerequisites) -2. [Access the Code](#access-the-code) -3. [Prepare models](#prepare-models) -4. [Prepare env variables and configurations](#prepare-env-variables-and-configurations) -5. [Configure the Deployment Environment](#configure-the-deployment-environment) -6. [Deploy the Service Using Docker Compose](#deploy-the-service-using-docker-compose) -7. [Access UI](#access-ui) -8. [Cleanup the Deployment](#cleanup-the-deployment) +1. [Prerequisites](#1-prerequisites) +2. [Access the Code](#2-access-the-code) +3. [Prepare models](#3-prepare-models) +4. [Prepare env variables and configurations](#4-prepare-env-variables-and-configurations) +5. [Deploy the Service on Arc A770 Using Docker Compose](#5-deploy-the-service-on-intel-gpu-using-docker-compose) +6. [Access UI](#6-access-ui) +7. [Cleanup the Deployment](#7-cleanup-the-deployment) -### Prerequisites +### 1. Prerequisites EC-RAG supports vLLM deployment(default method) and local OpenVINO deployment for Intel Arc GPU. Prerequisites are shown as below: Hardware: Intel Arc A770 OS: Ubuntu Server 22.04.1 or newer (at least 6.2 LTS kernel) Driver & libraries: please to [Installing GPUs Drivers](https://dgpu-docs.intel.com/driver/installation-rolling.html#installing-gpu-drivers) for detailed driver & libraries setup +Hardware: Intel Arc B60 +please to [Install Native Environment](https://github.com/intel/llm-scaler/tree/main/vllm#11-install-native-environment) for detailed setup + Below steps are based on **vLLM** as inference engine, if you want to choose **OpenVINO**, please refer to [OpenVINO Local Inference](../../../../docs/Advanced_Setup.md#openvino-local-inference) -### Access the Code +### 2. Access the Code Clone the GenAIExample repository and access the EdgeCraftRAG Intel® Arc® platform Docker Compose files and supporting scripts: @@ -43,7 +45,7 @@ Checkout a released version, such as v1.3: git checkout v1.3 ``` -### Prepare models +### 3. Prepare models ```bash # Prepare models for embedding, reranking: @@ -62,7 +64,7 @@ modelscope download --model $LLM_MODEL --local_dir "${MODEL_PATH}/${LLM_MODEL}" # huggingface-cli download $LLM_MODEL --local-dir "${MODEL_PATH}/${LLM_MODEL}" ``` -### Prepare env variables and configurations +### 4. Prepare env variables and configurations Below steps are for single Intel Arc GPU inference, if you want to setup multi Intel Arc GPUs inference, please refer to [Multi-ARC Setup](../../../../docs/Advanced_Setup.md#multi-arc-setup) @@ -77,32 +79,23 @@ export HOST_IP=$ip_address # Your host ip export VIDEOGROUPID=$(getent group video | cut -d: -f3) export RENDERGROUPID=$(getent group render | cut -d: -f3) -# If you have a proxy configured, uncomment below line -# export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server -# export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server +# If you have a proxy configured, execute below line +export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server +export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server # If you have a HF mirror configured, it will be imported to the container # export HF_ENDPOINT=https://hf-mirror.com # your HF mirror endpoint" # Make sure all 3 folders have 1000:1000 permission, otherwise -# chown 1000:1000 ${MODEL_PATH} ${PWD} # the default value of DOC_PATH and TMPFILE_PATH is PWD ,so here we give permission to ${PWD} +chown 1000:1000 ${MODEL_PATH} ${PWD} # the default value of DOC_PATH and TMPFILE_PATH is PWD ,so here we give permission to ${PWD} # In addition, also make sure the .cache folder has 1000:1000 permission, otherwise -# chown 1000:1000 -R $HOME/.cache +chown 1000:1000 -R $HOME/.cache ``` For more advanced env variables and configurations, please refer to [Prepare env variables for vLLM deployment](../../../../docs/Advanced_Setup.md#prepare-env-variables-for-vllm-deployment) -#### Generate nginx config file - -```bash -export VLLM_SERVICE_PORT_0=8100 # You can set your own port for vllm service -# Generate your nginx config file -# nginx-conf-generator.sh requires 2 parameters: DP_NUM and output filepath -bash nginx/nginx-conf-generator.sh 1 nginx/nginx.conf -# set NGINX_CONFIG_PATH -export NGINX_CONFIG_PATH="${PWD}/nginx/nginx.conf" -``` +### 5. Deploy the Service on Intel GPU Using Docker Compose -### Deploy the Service Using Docker Compose +set Milvus DB and chat history round for inference: ```bash # EC-RAG support Milvus as persistent database, by default milvus is disabled, you can choose to set MILVUS_ENABLED=1 to enable it @@ -112,12 +105,45 @@ export MILVUS_ENABLED=0 # EC-RAG support chat history round setting, by default chat history is disabled, you can set CHAT_HISTORY_ROUND to control it # export CHAT_HISTORY_ROUND= # change to your preference +``` + +#### option a. Deploy the Service on Arc A770 Using Docker Compose + +```bash +export VLLM_SERVICE_PORT_0=8100 # You can set your own port for vllm service +# Generate your nginx config file +# nginx-conf-generator.sh requires 2 parameters: DP_NUM and output filepath +bash nginx/nginx-conf-generator.sh 1 nginx/nginx.conf +# set NGINX_CONFIG_PATH +export NGINX_CONFIG_PATH="${PWD}/nginx/nginx.conf" # Launch EC-RAG service with compose docker compose -f docker_compose/intel/gpu/arc/compose_vllm.yaml up -d ``` -### Access UI +#### option b. Deploy the Service on Arc B60 Using Docker Compose + +```bash +# Besides MILVUS_ENABLED and CHAT_HISTORY_ROUND, below environments are exposed for vLLM config, you can change them to your preference: +# export VLLM_SERVICE_PORT_B60=8086 +# export DTYPE=float16 +# export TP=1 # for multi GPU, you can change TP value +# export DP=1 +# export ZE_AFFINITY_MASK=0 # for multi GPU, you can export ZE_AFFINITY_MASK=0,1,2... +# export ENFORCE_EAGER=1 +# export TRUST_REMOTE_CODE=1 +# export DISABLE_SLIDING_WINDOW=1 +# export GPU_MEMORY_UTIL=0.8 +# export NO_ENABLE_PREFIX_CACHING=1 +# export MAX_NUM_BATCHED_TOKENS=8192 +# export DISABLE_LOG_REQUESTS=1 +# export MAX_MODEL_LEN=49152 +# export BLOCK_SIZE=64 +# export QUANTIZATION=fp8 +docker compose -f docker_compose/intel/gpu/arc/compose_vllm_b60.yaml up -d +``` + +### 6. Access UI Open your browser, access http://${HOST_IP}:8082 @@ -126,12 +152,13 @@ Open your browser, access http://${HOST_IP}:8082 Below is the UI front page, for detailed operations on UI and EC-RAG settings, please refer to [Explore_Edge_Craft_RAG](../../../../docs/Explore_Edge_Craft_RAG.md) ![front_page](../../../../assets/img/front_page.png) -### Cleanup the Deployment +### 7. Cleanup the Deployment To stop the containers associated with the deployment, execute the following command: ``` docker compose -f docker_compose/intel/gpu/arc/compose_vllm.yaml down +# or docker compose -f docker_compose/intel/gpu/arc/compose_vllm_b60.yaml down ``` All the EdgeCraftRAG containers will be stopped and then removed on completion of the "down" command. diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml index 488bccdf7a..8c74ba2717 100755 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml @@ -71,7 +71,6 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_ENDPOINT: ${HF_ENDPOINT} vLLM_ENDPOINT: ${vLLM_ENDPOINT:-http://${HOST_IP}:${NGINX_PORT:-8086}} ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false} MAX_MODEL_LEN: ${MAX_MODEL_LEN:-5000} @@ -80,7 +79,6 @@ services: - ${MODEL_PATH:-${PWD}}:/home/user/models - ${DOC_PATH:-${PWD}}:/home/user/docs - ${TMPFILE_PATH:-${PWD}}:/home/user/ui_cache - - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache - ${PROMPT_PATH:-${PWD}}:/templates/custom restart: always ports: diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml index a1afa20eb9..c0e14dd939 100644 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml @@ -72,17 +72,15 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_ENDPOINT: ${HF_ENDPOINT} vLLM_ENDPOINT: ${vLLM_ENDPOINT:-http://${HOST_IP}:${NGINX_PORT:-8086}} LLM_MODEL: ${LLM_MODEL} ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false} - MAX_MODEL_LEN: ${MAX_MODEL_LEN:-5000} + MAX_MODEL_LEN: ${MAX_MODEL_LEN:-10240} CHAT_HISTORY_ROUND: ${CHAT_HISTORY_ROUND:-0} volumes: - ${MODEL_PATH:-${PWD}}:/home/user/models - ${DOC_PATH:-${PWD}}:/home/user/docs - ${TMPFILE_PATH:-${PWD}}:/home/user/ui_cache - - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache - ${PROMPT_PATH:-${PWD}}:/templates/custom restart: always ports: @@ -157,7 +155,6 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_ENDPOINT: ${HF_ENDPOINT} MODEL_PATH: "/llm/models" SERVED_MODEL_NAME: ${LLM_MODEL} TENSOR_PARALLEL_SIZE: ${TENSOR_PARALLEL_SIZE:-1} diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm_b60.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm_b60.yaml new file mode 100644 index 0000000000..f10b476fc1 --- /dev/null +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm_b60.yaml @@ -0,0 +1,186 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + etcd: + container_name: milvus-etcd + image: quay.io/coreos/etcd:v3.5.5 + environment: + - ETCD_AUTO_COMPACTION_MODE=revision + - ETCD_AUTO_COMPACTION_RETENTION=1000 + - ETCD_QUOTA_BACKEND_BYTES=4294967296 + - ETCD_SNAPSHOT_COUNT=50000 + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-${PWD}}/volumes/etcd:/etcd + command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd + healthcheck: + test: ["CMD", "etcdctl", "endpoint", "health"] + interval: 30s + timeout: 20s + retries: 3 + deploy: + replicas: ${MILVUS_ENABLED:-0} + minio: + container_name: milvus-minio + image: minio/minio:RELEASE.2023-03-20T20-16-18Z + environment: + MINIO_ACCESS_KEY: minioadmin + MINIO_SECRET_KEY: minioadmin + ports: + - "${MINIO_PORT1:-5044}:9001" + - "${MINIO_PORT2:-5043}:9000" + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-${PWD}}/volumes/minio:/minio_data + command: minio server /minio_data --console-address ":9001" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + deploy: + replicas: ${MILVUS_ENABLED:-0} + milvus-standalone: + container_name: milvus-standalone + image: milvusdb/milvus:v2.4.6 + command: ["milvus", "run", "standalone"] + security_opt: + - seccomp:unconfined + environment: + ETCD_ENDPOINTS: etcd:2379 + MINIO_ADDRESS: minio:9000 + volumes: + - ./milvus.yaml:/milvus/configs/milvus.yaml + - ${DOCKER_VOLUME_DIRECTORY:-${PWD}}/volumes/milvus:/var/lib/milvus + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] + interval: 30s + start_period: 90s + timeout: 20s + retries: 3 + ports: + - "19530:19530" + - "${MILVUS_STANDALONE_PORT:-9091}:9091" + depends_on: + - "etcd" + - "minio" + deploy: + replicas: ${MILVUS_ENABLED:-0} + edgecraftrag-server: + image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest} + container_name: edgecraftrag-server + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + vLLM_ENDPOINT: ${vLLM_ENDPOINT:-http://${HOST_IP}:${NGINX_PORT:-8086}} + LLM_MODEL: ${LLM_MODEL} + ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false} + MAX_MODEL_LEN: ${MAX_MODEL_LEN:-49152} + CHAT_HISTORY_ROUND: ${CHAT_HISTORY_ROUND:-0} + volumes: + - ${MODEL_PATH:-${PWD}}:/home/user/models + - ${DOC_PATH:-${PWD}}:/home/user/docs + - ${TMPFILE_PATH:-${PWD}}:/home/user/ui_cache + - ${PROMPT_PATH:-${PWD}}:/templates/custom + restart: always + ports: + - ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010} + devices: + - /dev/dri:/dev/dri + group_add: + - ${VIDEOGROUPID:-44} + - ${RENDERGROUPID:-109} + ecrag: + image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest} + container_name: edgecraftrag + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} + PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} + PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} + restart: always + ports: + - ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011} + depends_on: + - edgecraftrag-server + edgecraftrag-ui: + image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest} + container_name: edgecraftrag-ui + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} + PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} + PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} + UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082} + UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0} + volumes: + - ${TMPFILE_PATH:-${PWD}}:/home/user/ui_cache + restart: always + ports: + - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082} + depends_on: + - edgecraftrag-server + - ecrag + llm-serving-xpu: + container_name: ipex-serving-xpu-container + image: intel/llm-scaler-vllm:1.0 + privileged: true + restart: always + ports: + - ${VLLM_SERVICE_PORT_B60:-8086}:${VLLM_SERVICE_PORT_B60:-8086} + volumes: + - ${MODEL_PATH}:/workspace/vllm/models + devices: + - /dev/dri:/dev/dri + environment: + DTYPE: ${DTYPE:-float16} + VLLM_SERVICE_PORT_B60: ${VLLM_SERVICE_PORT_B60:-8086} + ZE_AFFINITY_MASK: ${ZE_AFFINITY_MASK:-0} + ENFORCE_EAGER: ${ENFORCE_EAGER:-1} + TRUST_REMOTE_CODE: ${TRUST_REMOTE_CODE:-1} + DISABLE_SLIDING_WINDOW: ${DISABLE_SLIDING_WINDOW:-1} + GPU_MEMORY_UTIL: ${GPU_MEMORY_UTIL:-0.8} + NO_ENABLE_PREFIX_CACHING: ${NO_ENABLE_PREFIX_CACHING:-1} + MAX_NUM_BATCHED_TOKENS: ${MAX_NUM_BATCHED_TOKENS:-8192} + DISABLE_LOG_REQUESTS: ${DISABLE_LOG_REQUESTS:-1} + MAX_MODEL_LEN: ${MAX_MODEL_LEN:-49152} + BLOCK_SIZE: ${BLOCK_SIZE:-64} + QUANTIZATION: ${QUANTIZATION:-fp8} + LLM_MODEL: ${LLM_MODEL} + TP: ${TP:-1} + DP: ${DP:-1} + entrypoint: + /bin/bash -c " + cd /workspace/vllm/models && + VLLM_OFFLOAD_WEIGHTS_BEFORE_QUANT=1 \ + TORCH_LLM_ALLREDUCE=1 \ + VLLM_USE_V1=1 \ + CCL_ZE_IPC_EXCHANGE=pidfd \ + VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \ + VLLM_WORKER_MULTIPROC_METHOD=spawn \ + python3 -m vllm.entrypoints.openai.api_server \ + --model $${LLM_MODEL} \ + --dtype $${DTYPE} \ + --enforce-eager \ + --port $${VLLM_SERVICE_PORT_B60} \ + --trust-remote-code \ + --disable-sliding-window \ + --gpu-memory-util $${GPU_MEMORY_UTIL} \ + --no-enable-prefix-caching \ + --max-num-batched-tokens $${MAX_NUM_BATCHED_TOKENS} \ + --disable-log-requests \ + --max-model-len $${MAX_MODEL_LEN} \ + --block-size $${BLOCK_SIZE} \ + --quantization $${QUANTIZATION} \ + --distributed-executor-backend mp \ + -tp=$${TP} \ + -dp=$${DP}" +networks: + default: + driver: bridge diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/multi-arc-yaml-generator.sh b/EdgeCraftRAG/docker_compose/intel/gpu/arc/multi-arc-yaml-generator.sh index 2a819eb552..42d21c0370 100644 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/multi-arc-yaml-generator.sh +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/multi-arc-yaml-generator.sh @@ -87,17 +87,15 @@ services: no_proxy: \${no_proxy} http_proxy: \${http_proxy} https_proxy: \${https_proxy} - HF_ENDPOINT: \${HF_ENDPOINT} vLLM_ENDPOINT: \${vLLM_ENDPOINT:-http://\${HOST_IP}:\${NGINX_PORT:-8086}} LLM_MODEL: \${LLM_MODEL} ENABLE_BENCHMARK: \${ENABLE_BENCHMARK:-false} - MAX_MODEL_LEN: \${MAX_MODEL_LEN:-5000} + MAX_MODEL_LEN: \${MAX_MODEL_LEN:-10240} CHAT_HISTORY_ROUND: \${CHAT_HISTORY_ROUND:-0} volumes: - \${MODEL_PATH:-\${PWD}}:/home/user/models - \${DOC_PATH:-\${PWD}}:/home/user/docs - \${TMPFILE_PATH:-\${PWD}}:/home/user/ui_cache - - \${HF_CACHE:-\${HOME}/.cache}:/home/user/.cache - \${PROMPT_PATH:-\${PWD}}:/templates/custom restart: always ports: @@ -176,7 +174,6 @@ for ((i = 0; i < PORT_NUM; i++)); do no_proxy: \${no_proxy} http_proxy: \${http_proxy} https_proxy: \${https_proxy} - HF_ENDPOINT: \${HF_ENDPOINT} MODEL_PATH: "/llm/models" SERVED_MODEL_NAME: \${LLM_MODEL} TENSOR_PARALLEL_SIZE: \${TENSOR_PARALLEL_SIZE:-1} diff --git a/EdgeCraftRAG/docs/API_Guide.md b/EdgeCraftRAG/docs/API_Guide.md index 804fa45397..c13753596c 100644 --- a/EdgeCraftRAG/docs/API_Guide.md +++ b/EdgeCraftRAG/docs/API_Guide.md @@ -205,3 +205,18 @@ curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt/reset -H "Content-Type: a ```bash curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt-file -H "Content-Type: multipart/form-data" -F "file=@your_prompt_file.txt" ``` + +## ChatQnA + +### Retrieval API + +```bash +curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"#Please enter the question you need to retrieve here#", "top_n":5, "max_tokens":512}' | jq '.' + +``` + +### ChatQnA API + +```bash +curl -X POST http://${HOST_IP}:16011/v1/chatqna -H "Content-Type: application/json" -d '{"messages":"#REPLACE WITH YOUR QUESTION HERE#", "top_n":5, "max_tokens":512}' | jq '.' +``` diff --git a/EdgeCraftRAG/docs/Advanced_Setup.md b/EdgeCraftRAG/docs/Advanced_Setup.md index ef864fc1f4..ed6a080310 100644 --- a/EdgeCraftRAG/docs/Advanced_Setup.md +++ b/EdgeCraftRAG/docs/Advanced_Setup.md @@ -55,9 +55,9 @@ EC-RAG support using local OpenVINO models to do inference, please follow below ```bash git clone https://github.com/opea-project/GenAIExamples.git cd GenAIExamples/EdgeCraftRAG -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag:latest -f Dockerfile . -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag-server:latest -f Dockerfile.server . -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag-ui:latest -f ui/docker/Dockerfile.ui . +docker build --no-cache --pull --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag:latest -f Dockerfile . +docker build --no-cache --pull --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag-server:latest -f Dockerfile.server . +docker build --no-cache --pull --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag-ui:latest -f ui/docker/Dockerfile.ui . ``` ### 2. Prepare models @@ -196,3 +196,18 @@ bash docker_compose/intel/gpu/arc/multi-arc-yaml-generator.sh $DP_NUM docker_com ### 3. Start Edge Craft RAG Services with Docker Compose This section is the same as default vLLM inference section, please refer to [Start Edge Craft RAG Services with Docker Compose](../docker_compose/intel/gpu/arc/README.md#deploy-the-service-using-docker-compose) + +## EC-RAG with Kbadmin + +EC-RAG support kbadmin as a knowledge base manager +Please make sure all the kbadmin services have been launched +EC-RAG Docker Images preparation is the same as local inference section, please refer to [Build Docker Images](#1-optional-build-docker-images-for-mega-service-server-and-ui-by-your-own) +Model preparation is the same as vLLM inference section, please refer to [Prepare models](../docker_compose/intel/gpu/arc/README.md#2-prepare-models) + +### 1. Start Edge Craft RAG Services with Docker Compose + +This section is the same as default vLLM inference section, please refer to [Prepare env variables and configurations](../docker_compose/intel/gpu/arc/README.md#prepare-env-variables-and-configurations) and [Start Edge Craft RAG Services with Docker Compose](../docker_compose/intel/gpu/arc/README.md#deploy-the-service-on-arc-a770-using-docker-compose) + +### 2. Access Kbadmin UI + +please refer to [ChatQnA with Kbadmin in UI](./Explore_Edge_Craft_RAG.md#chatqna-with-kbadmin-in-ui) diff --git a/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md b/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md index a63b3a9f0e..0a5a91ba5e 100644 --- a/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md +++ b/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md @@ -33,3 +33,23 @@ After knowledge base creation, you can upload the documents for retrieval. Then, you can submit messages in the chat box in `Chat` page. ![chat_with_rag](../assets/img/chatqna.png) + +## ChatQnA with Kbadmin in UI + +### Kbadmin Pipeline + +In the `Node Parser` page, select 'kbadmin' as node parser. +![kbadmin_node_parser](../assets/img/kbadmin_type.png) + +In the `Indexer` page, input embedding and vector DB information, please note the embedding service port is 13020; the vector DB port is 29530. +![kbadmin_indexer](../assets/img/kbadmin_index.png) + +### Upload files & ChatQnA + +After the pipeline creation, you can go to `Knowledge Base` page and click `Create Knowledge Base` button to create your knowledge base. +Please select 'kbadmin' in `Type`and select kb name from the kbs you created in kbadmin UI page. + +![upload_data](../assets/img/kbadmin_kb.png) + +Then, you can submit messages in the chat box in `Chat` page. +![chat_with_rag](../assets/img/chatqna.png) diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py index c3facef41b..7e0f3aa831 100755 --- a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py @@ -17,6 +17,8 @@ @chatqna_app.post(path="/v1/retrieval") async def retrieval(request: ChatCompletionRequest): try: + active_kb = ctx.knowledgemgr.get_active_knowledge_base() + request.user = active_kb if active_kb else None contexts = ctx.get_pipeline_mgr().run_retrieve(chat_request=request) serialized_contexts = serialize_contexts(contexts) @@ -32,6 +34,11 @@ async def chatqna(request: ChatCompletionRequest): try: sessionid = request.user set_current_session(sessionid) + experience_kb = ctx.knowledgemgr.get_active_experience() + active_kb = ctx.knowledgemgr.get_active_knowledge_base() + request.user = active_kb if active_kb else None + if experience_kb: + request.tool_choice = "auto" if experience_kb.experience_active else "none" generator = ctx.get_pipeline_mgr().get_active_pipeline().generator if generator: request.model = generator.model_id @@ -49,6 +56,16 @@ async def chatqna(request: ChatCompletionRequest): @chatqna_app.post(path="/v1/ragqna") async def ragqna(request: ChatCompletionRequest): try: + sessionid = request.user + set_current_session(sessionid) + experience_kb = ctx.knowledgemgr.get_active_experience() + active_kb = ctx.knowledgemgr.get_active_knowledge_base() + request.user = active_kb if active_kb else None + if experience_kb: + request.tool_choice = "auto" if experience_kb.experience_active else "none" + generator = ctx.get_pipeline_mgr().get_active_pipeline().generator + if generator: + request.model = generator.model_id res, contexts = ctx.get_pipeline_mgr().run_pipeline(chat_request=request) if isinstance(res, GeneratedDoc): res = res.text diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/data.py b/EdgeCraftRAG/edgecraftrag/api/v1/data.py index a000e46e88..4b3fc1b11d 100755 --- a/EdgeCraftRAG/edgecraftrag/api/v1/data.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/data.py @@ -6,24 +6,26 @@ from edgecraftrag.api_schema import DataIn, FilesIn from edgecraftrag.context import ctx from fastapi import FastAPI, File, HTTPException, UploadFile, status +from werkzeug.utils import secure_filename data_app = FastAPI() # Upload a text or files @data_app.post(path="/v1/data") -async def add_data(request: DataIn): +async def add_data(request: DataIn, docs_name: str = None): + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() docs = [] if request.text is not None: docs.extend(ctx.get_file_mgr().add_text(text=request.text)) if request.local_path is not None: - docs.extend(ctx.get_file_mgr().add_files(docs=request.local_path)) + docs.extend(ctx.get_file_mgr().add_files(docs=request.local_path, docs_name=docs_name)) nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=docs) - if nodelist is None or len(nodelist) == 0: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="File not found") - pl = ctx.get_pipeline_mgr().get_active_pipeline() - ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) + if active_pl.indexer.comp_subtype != "kbadmin_indexer": + if nodelist is None or len(nodelist) == 0: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="File not found") + ctx.get_node_mgr().add_nodes(active_pl.node_parser.idx, nodelist) return "Done" @@ -31,16 +33,24 @@ async def add_data(request: DataIn): @data_app.post(path="/v1/data/reindex") async def redindex_data(): pl = ctx.get_pipeline_mgr().get_active_pipeline() - + kb = ctx.get_knowledge_mgr().get_active_knowledge_base() + if kb: + kb_name = kb.name + docs_name = kb_name + pl.name + str(pl.indexer.d) + else: + kb_name = None + docs_name = None ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) - pl.indexer.reinitialize_indexer() + pl.indexer.reinitialize_indexer(kb_name) pl.update_indexer_to_retriever() - all_docs = ctx.get_file_mgr().get_all_docs() + all_docs = [] + docs_list = ctx.get_file_mgr().get_kb_files_by_name(docs_name) + for docs_file in docs_list: + all_docs.extend(docs_file.documents) nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=all_docs) if nodelist is not None and len(nodelist) > 0: ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) - return "Done" @@ -48,13 +58,15 @@ async def redindex_data(): @data_app.post(path="/v1/data/files") async def add_files(request: FilesIn): docs = [] + pl = ctx.get_pipeline_mgr().get_active_pipeline() + kb = ctx.get_knowledge_mgr().get_active_knowledge_base() + docs_name = kb.name + pl.name + str(pl.indexer.d) if request.local_paths is not None: - docs.extend(ctx.get_file_mgr().add_files(docs=request.local_paths)) + docs.extend(ctx.get_file_mgr().add_files(docs=request.local_path, kb_name=docs_name)) nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=docs) if nodelist is None or len(nodelist) == 0: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="File not found") - pl = ctx.get_pipeline_mgr().get_active_pipeline() ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) return "Done" @@ -62,32 +74,47 @@ async def add_files(request: FilesIn): # GET files @data_app.get(path="/v1/data/files") async def get_files(): - return ctx.get_file_mgr().get_files() + return ctx.get_file_mgr().get_all_docs() # GET a file @data_app.get(path="/v1/data/files/{name}") -async def get_file_docs(name): - return ctx.get_file_mgr().get_file_by_name_or_id(name) +async def get_kb_files_by_name(name): + return ctx.get_file_mgr().get_kb_files_by_name(name) # DELETE a file @data_app.delete(path="/v1/data/files/{name}") -async def delete_file(name): - if ctx.get_file_mgr().del_file(name): - pl = ctx.get_pipeline_mgr().get_active_pipeline() - +async def delete_file(kb_name, file_path): + pl = ctx.get_pipeline_mgr().get_active_pipeline() + docs_name = kb_name + pl.name + str(pl.indexer.d) + if ctx.get_file_mgr().del_file(docs_name, file_path): # Current solution: reindexing all docs after deleting one file # TODO: delete the nodes related to the file ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) - pl.indexer.reinitialize_indexer() + pl.indexer.reinitialize_indexer(kb_name) pl.update_indexer_to_retriever() - - all_docs = ctx.get_file_mgr().get_all_docs() + all_docs = ctx.get_file_mgr().get_file_by_name(docs_name) nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=all_docs) if nodelist is not None and len(nodelist) > 0: ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) + return "File is deleted" + else: + return "File not found" + + +# DELETE a file +@data_app.delete(path="/v1/data/all_files/{name}") +async def delete_all_file(name): + if ctx.get_file_mgr().del_kb_file(name): + pl = ctx.get_pipeline_mgr().get_active_pipeline() + + # Current solution: reindexing all docs after deleting one file + # TODO: delete the nodes related to the file + ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) + pl.indexer.reinitialize_indexer() + pl.update_indexer_to_retriever() return f"File {name} is deleted" else: return f"File {name} not found" @@ -110,8 +137,6 @@ async def upload_file(file_name: str, file: UploadFile = File(...)): ) os.makedirs(UPLOAD_DIRECTORY, exist_ok=True) safe_filename = file.filename - # Sanitize the uploaded file's name - safe_filename = file.filename file_path = os.path.normpath(os.path.join(UPLOAD_DIRECTORY, safe_filename)) # Ensure file_path is within UPLOAD_DIRECTORY if not file_path.startswith(os.path.abspath(UPLOAD_DIRECTORY)): diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py b/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py index 21e0c0621e..5ac3b23622 100755 --- a/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py @@ -5,10 +5,13 @@ import json import os import re +from typing import Dict, List, Union from edgecraftrag.api.v1.data import add_data -from edgecraftrag.api_schema import DataIn, KnowledgeBaseCreateIn +from edgecraftrag.api_schema import DataIn, ExperienceIn, KnowledgeBaseCreateIn from edgecraftrag.base import IndexerType +from edgecraftrag.components.query_preprocess import query_search +from edgecraftrag.components.retriever import get_kbs_info from edgecraftrag.context import ctx from edgecraftrag.utils import compare_mappings from fastapi import FastAPI, HTTPException, status @@ -18,6 +21,7 @@ # Define the root directory for knowledge base files KNOWLEDGE_BASE_ROOT = "/home/user/ui_cache" +CONFIG_DIR = "/home/user/ui_cache/configs" # Get all knowledge bases @@ -41,15 +45,20 @@ async def get_knowledge_base(knowledge_name: str): async def create_knowledge_base(knowledge: KnowledgeBaseCreateIn): try: active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + if not active_pl: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Please activate pipeline") if not re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", knowledge.name): raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Knowledge base names must begin with a letter or underscore", ) - kb = ctx.knowledgemgr.create_knowledge_base(knowledge) - if kb.active: - active_pl.indexer.reinitialize_indexer(kb.name) + + if knowledge.active and knowledge.comp_type == "knowledge" and knowledge.comp_subtype == "origin_kb": + active_pl.indexer.reinitialize_indexer(knowledge.name) active_pl.update_indexer_to_retriever() + elif knowledge.active and knowledge.comp_subtype == "kbadmin_kb": + active_pl.retriever.config_kbadmin_milvus(knowledge.name) + kb = ctx.knowledgemgr.create_knowledge_base(knowledge) await save_knowledge_to_file() return "Create knowledge base successfully" except Exception as e: @@ -63,17 +72,28 @@ async def delete_knowledge_base(knowledge_name: str): rm_kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) active_kb = ctx.knowledgemgr.get_active_knowledge_base() active_pl = ctx.get_pipeline_mgr().get_active_pipeline() - if active_kb.name == knowledge_name or active_kb.idx == knowledge_name: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Cannot delete a running knowledge base." - ) - kb_file_path = rm_kb.get_file_paths() - if kb_file_path: - if active_pl.indexer.comp_subtype == "milvus_vector": - await remove_file_handler([], knowledge_name) + if rm_kb.comp_type == "knowledge" and rm_kb.comp_subtype == "origin_kb": if active_kb: - active_pl.indexer.reinitialize_indexer(active_kb.name) - active_pl.update_indexer_to_retriever() + if active_kb.name == knowledge_name or active_kb.idx == knowledge_name: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Cannot delete a running knowledge base.", + ) + kb_file_path = rm_kb.get_file_paths() + if kb_file_path: + if active_pl.indexer.comp_subtype == "milvus_vector": + await remove_file_handler([], knowledge_name) + if active_kb: + active_pl.indexer.reinitialize_indexer(active_kb.name) + active_pl.update_indexer_to_retriever() + if rm_kb.comp_type == "experience": + if rm_kb.experience_active: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Cannot delete a running experience knowledge base.", + ) + else: + rm_kb.clear_experiences() result = ctx.knowledgemgr.delete_knowledge_base(knowledge_name) await save_knowledge_to_file() return result @@ -87,26 +107,31 @@ async def update_knowledge_base(knowledge: KnowledgeBaseCreateIn): try: kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge.name) active_pl = ctx.get_pipeline_mgr().get_active_pipeline() - if active_pl.indexer.comp_subtype != "milvus_vector": - if knowledge.active and knowledge.active != kb.active: - file_paths = kb.get_file_paths() - await update_knowledge_base_handler(file_paths, knowledge.name) - elif not knowledge.active and kb.description != knowledge.description: - pass - elif not knowledge.active: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Must have an active knowledge base" - ) - else: + if active_pl.indexer.comp_subtype == "kbadmin_indexer" and kb.comp_subtype != "kbadmin_kb": + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="The kbadmin pipeline must correspond to the kbadmin type kb.", + ) + if active_pl.indexer.comp_subtype != "kbadmin_indexer" and kb.comp_subtype == "kbadmin_kb": + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, detail="Not kbadmin pipeline cannot active kbadmin type kb." + ) + if kb.comp_type == "knowledge" and kb.comp_subtype == "origin_kb": + if active_pl.indexer.comp_subtype != "milvus_vector": + if knowledge.active and knowledge.active != kb.active: + file_paths = kb.get_file_paths() + await update_knowledge_base_handler(file_paths, knowledge.name) + elif not knowledge.active and kb.description != knowledge.description: + pass + else: + if knowledge.active and knowledge.active != kb.active: + active_pl.indexer.reinitialize_indexer(knowledge.name) + active_pl.update_indexer_to_retriever() + elif not knowledge.active and kb.description != knowledge.description: + pass + elif kb.comp_subtype == "kbadmin_kb": if knowledge.active and knowledge.active != kb.active: - active_pl.indexer.reinitialize_indexer(knowledge.name) - active_pl.update_indexer_to_retriever() - elif not knowledge.active and kb.description != knowledge.description: - pass - elif not knowledge.active: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Must have an active knowledge base" - ) + active_pl.retriever.config_kbadmin_milvus(kb.name) result = ctx.knowledgemgr.update_knowledge_base(knowledge) await save_knowledge_to_file() return result @@ -120,6 +145,16 @@ async def add_file_to_knowledge_base(knowledge_name, file_path: DataIn): try: active_pl = ctx.get_pipeline_mgr().get_active_pipeline() kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) + if kb.comp_type == "experience": + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, detail="The experience type cannot perform file operations." + ) + if kb.comp_subtype == "kbadmin_kb" or active_pl.indexer.comp_subtype == "kbadmin_indexer": + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Please proceed to the kbadmin interface to perform the operation.", + ) + # Validate and normalize the user-provided path user_path = file_path.local_path normalized_path = os.path.normpath(os.path.join(KNOWLEDGE_BASE_ROOT, user_path)) if not normalized_path.startswith(KNOWLEDGE_BASE_ROOT): @@ -170,6 +205,15 @@ async def remove_file_from_knowledge_base(knowledge_name, file_path: DataIn): try: active_pl = ctx.get_pipeline_mgr().get_active_pipeline() kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) + if kb.comp_type == "experience": + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, detail="The experience type cannot perform file operations." + ) + if kb.comp_subtype == "kbadmin_kb" or active_pl.indexer.comp_subtype == "kbadmin_indexer": + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Please proceed to the kbadmin interface to perform the operation.", + ) active_kb = ctx.knowledgemgr.get_active_knowledge_base() if file_path.local_path in kb.get_file_paths(): kb.remove_file_path(file_path.local_path) @@ -178,17 +222,9 @@ async def remove_file_from_knowledge_base(knowledge_name, file_path: DataIn): kb_file_path = kb.get_file_paths() if active_pl.indexer.comp_subtype == "milvus_vector": - if active_kb: - if active_kb.name == knowledge_name or active_kb.idx == knowledge_name: - await remove_file_handler(kb_file_path, knowledge_name) - else: - await remove_file_handler(kb_file_path, knowledge_name) - active_pl.indexer.reinitialize_indexer(active_kb.name) - active_pl.update_indexer_to_retriever() - else: - await remove_file_handler(kb_file_path, knowledge_name) - active_pl.indexer.reinitialize_indexer(active_kb.name) - active_pl.update_indexer_to_retriever() + docs_name = kb.name + active_pl.name + str(active_pl.indexer.d) + docs_list = ctx.get_file_mgr().del_file(docs_name, file_path.local_path) + active_pl.indexer.delete(docs_list) elif active_kb: if active_kb.name == knowledge_name or active_kb.idx == knowledge_name: await update_knowledge_base_handler(kb_file_path, knowledge_name) @@ -198,14 +234,115 @@ async def remove_file_from_knowledge_base(knowledge_name, file_path: DataIn): raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) +@kb_app.post("/v1/experience") +def get_experience_by_question(req: ExperienceIn): + kb = ctx.knowledgemgr.get_experience_kb() + result = kb.get_experience_by_question(req.question) + if not result: + raise HTTPException(404, detail="Experience not found") + return result + + +@kb_app.get("/v1/experiences") +def get_all_experience(): + kb = ctx.knowledgemgr.get_experience_kb() + if kb: + return kb.get_all_experience() + else: + return kb + + +@kb_app.patch("/v1/experiences") +def update_experience(experience: ExperienceIn): + kb = ctx.knowledgemgr.get_experience_kb() + result = kb.update_experience(experience.question, experience.content) + if not result: + raise HTTPException(404, detail="Question not found") + return result + + +@kb_app.delete("/v1/experiences") +def delete_experience(req: ExperienceIn): + kb = ctx.knowledgemgr.get_experience_kb() + success = kb.delete_experience(req.question) + if not success: + raise HTTPException(404, detail=f"Question {req.question} not found") + return {"message": "Question deleted"} + + +@kb_app.post("/v1/multiple_experiences/check") +def check_duplicate_multiple_experiences(experiences: List[Dict[str, Union[str, List[str]]]]): + kb = ctx.knowledgemgr.get_experience_kb() + if not kb: + raise HTTPException(404, detail="No active experience type knowledge base") + all_existing = kb.get_all_experience() + existing_questions = {item["question"] for item in all_existing} + new_questions = [exp["question"] for exp in experiences if "question" in exp] + duplicate_questions = [q for q in new_questions if q in existing_questions] + if duplicate_questions: + return {"code": 2001, "detail": "Duplicate experiences are appended OR overwritten!"} + else: + kb.add_multiple_experiences(experiences=experiences, flag=True) + return {"status": "success", "detail": "No duplicate experiences, added successfully"} + + +@kb_app.post("/v1/multiple_experiences/confirm") +def confirm_multiple_experiences(experiences: List[Dict[str, Union[str, List[str]]]], flag: bool): + kb = ctx.knowledgemgr.get_experience_kb() + try: + if not kb: + raise HTTPException(404, detail="No active experience type knowledge base") + kb.add_multiple_experiences(experiences=experiences, flag=flag) + return {"status": "success", "detail": "Experiences added successfully"} + except Exception as e: + raise HTTPException(status_code=500, detail=f"Add Failure:{str(e)}") + + +@kb_app.post("/v1/experiences/files") +def add_experiences_from_file(req: DataIn): + kb = ctx.knowledgemgr.get_experience_kb() + try: + kb.add_experiences_from_file(req.local_path) + return {"status": "success"} + except Exception as e: + raise HTTPException(status_code=400, detail=str(e)) + + +@kb_app.post(path="/v1/view_sub_questions") +async def view_sub_questions(que: ExperienceIn): + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + CONFIG_DIR + search_config_path = os.path.join(CONFIG_DIR, "search_config.yaml") + search_dir = os.path.join(CONFIG_DIR, "experience_dir/experience.json") + top1_issue, sub_questions_result = await query_search( + user_input=que.question, search_config_path=search_config_path, search_dir=search_dir, pl=active_pl + ) + return sub_questions_result + + +@kb_app.get("/v1/kbadmin/kbs_list") +def get_kbs_list(): + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + try: + if not active_pl or active_pl.indexer.comp_subtype != "kbadmin_indexer": + return [] + CONNECTION_ARGS = {"uri": active_pl.indexer.vector_url} + kbs_list = get_kbs_info(CONNECTION_ARGS) + kb_names = [name for name in kbs_list.keys()] + return kb_names + except Exception as e: + raise HTTPException(status_code=400, detail=str(e)) + + # Update knowledge base data async def update_knowledge_base_handler(file_path=None, knowledge_name: str = "default_kb", add_file: bool = False): if ctx.get_pipeline_mgr().get_active_pipeline() is None: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Please activate pipeline") pl = ctx.get_pipeline_mgr().get_active_pipeline() + docs_name = knowledge_name + pl.name + str(pl.indexer.d) if add_file and file_path: - return await add_data(file_path) + return await add_data(file_path, docs_name) else: try: ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) @@ -214,7 +351,7 @@ async def update_knowledge_base_handler(file_path=None, knowledge_name: str = "d if file_path: for file in file_path: request = DataIn(local_path=file) - await add_data(request) + await add_data(request, docs_name) except MilvusException as e: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) return "Done" @@ -233,16 +370,16 @@ async def remove_file_handler(file_path=None, knowledge_name: str = "default_kb" except MilvusException as e: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) pl.update_indexer_to_retriever() + docs_name = knowledge_name + pl.name + str(pl.indexer.d) if file_path: for file in file_path: request = DataIn(local_path=file) - await add_data(request) + await add_data(request, docs_name) return "Done" # Restore knowledge base configuration async def load_knowledge_from_file(): - CONFIG_DIR = "/home/user/ui_cache/configs" KNOWLEDGEBASE_FILE = os.path.join(CONFIG_DIR, "knowledgebase.json") active_pl = ctx.get_pipeline_mgr().get_active_pipeline() if os.path.exists(KNOWLEDGEBASE_FILE): @@ -253,28 +390,31 @@ async def load_knowledge_from_file(): for Knowledgebase_data in all_data: pipeline_req = KnowledgeBaseCreateIn(**Knowledgebase_data) kb = ctx.knowledgemgr.create_knowledge_base(pipeline_req) - if Knowledgebase_data["file_map"]: - if active_pl.indexer.comp_subtype != "milvus_vector" and Knowledgebase_data["active"]: - for file_path in Knowledgebase_data["file_map"].values(): - await update_knowledge_base_handler( - DataIn(local_path=file_path), Knowledgebase_data["name"], add_file=True - ) - kb.add_file_path(file_path) - elif Knowledgebase_data["active"]: - active_pl.indexer.reinitialize_indexer(Knowledgebase_data["name"]) - active_pl.update_indexer_to_retriever() - for file_path in Knowledgebase_data["file_map"].values(): - kb.add_file_path(file_path) - else: - for file_path in Knowledgebase_data["file_map"].values(): - kb.add_file_path(file_path) + if kb.comp_type == "knowledge" and kb.comp_subtype == "origin_kb": + if Knowledgebase_data["file_map"]: + if active_pl.indexer.comp_subtype != "milvus_vector" and Knowledgebase_data["active"]: + for file_path in Knowledgebase_data["file_map"].values(): + await update_knowledge_base_handler( + DataIn(local_path=file_path), Knowledgebase_data["name"], add_file=True + ) + kb.add_file_path(file_path) + elif Knowledgebase_data["active"]: + active_pl.indexer.reinitialize_indexer(Knowledgebase_data["name"]) + active_pl.update_indexer_to_retriever() + for file_path in Knowledgebase_data["file_map"].values(): + kb.add_file_path(file_path) + else: + for file_path in Knowledgebase_data["file_map"].values(): + kb.add_file_path(file_path) + elif kb.comp_subtype == "kbadmin_kb": + if Knowledgebase_data["active"]: + active_pl.retriever.config_kbadmin_milvus(kb.name) except Exception as e: print(f"Error load Knowledge base: {e}") # Configuration of knowledge base for persistence async def save_knowledge_to_file(): - CONFIG_DIR = "/home/user/ui_cache/configs" KNOWLEDGEBASE_FILE = os.path.join(CONFIG_DIR, "knowledgebase.json") if not os.path.exists(CONFIG_DIR): os.makedirs(CONFIG_DIR, exist_ok=True) @@ -282,7 +422,15 @@ async def save_knowledge_to_file(): kb_base = ctx.knowledgemgr.get_all_knowledge_bases() knowledgebases_data = [] for kb in kb_base: - kb_json = {"name": kb.name, "description": kb.description, "active": kb.active, "file_map": kb.file_map} + kb_json = { + "name": kb.name, + "description": kb.description, + "active": kb.active, + "file_map": kb.file_map, + "comp_type": kb.comp_type, + "comp_subtype": kb.comp_subtype, + "experience_active": kb.experience_active, + } knowledgebases_data.append(kb_json) json_str = json.dumps(knowledgebases_data, indent=2, ensure_ascii=False) with open(KNOWLEDGEBASE_FILE, "w", encoding="utf-8") as f: @@ -291,7 +439,7 @@ async def save_knowledge_to_file(): print(f"Error saving Knowledge base: {e}") -all_pipeline_milvus_maps = {} +all_pipeline_milvus_maps = {"change_pl": []} current_pipeline_kb_map = {} @@ -299,29 +447,87 @@ async def refresh_milvus_map(milvus_name): current_pipeline_kb_map.clear() knowledge_bases_list = await get_all_knowledge_bases() for kb in knowledge_bases_list: + if kb.comp_type == "experience": + continue current_pipeline_kb_map[kb.name] = kb.file_map all_pipeline_milvus_maps[milvus_name] = copy.deepcopy(current_pipeline_kb_map) + milvus_maps_path = os.path.join(CONFIG_DIR, "milvus_maps.json") + with open(milvus_maps_path, "w", encoding="utf-8") as f: + json.dump(all_pipeline_milvus_maps, f, ensure_ascii=False, indent=2) -async def Synchronizing_vector_data(old_active_pl, new_active_pl): +def read_milvus_maps(): + milvus_maps_path = os.path.join(CONFIG_DIR, "milvus_maps.json") + global all_pipeline_milvus_maps try: + with open(milvus_maps_path, "r", encoding="utf-8") as f: + all_pipeline_milvus_maps = json.load(f) + except Exception as e: + all_pipeline_milvus_maps = {"change_pl": []} + return all_pipeline_milvus_maps + + +def save_change_pl(pl_name): + if pl_name not in all_pipeline_milvus_maps["change_pl"]: + return all_pipeline_milvus_maps["change_pl"].append(pl_name) + + +async def Synchronizing_vector_data(old_active_pl, new_active_pl, pl_change): + try: + if pl_change: + save_change_pl(new_active_pl.name) active_kb = ctx.knowledgemgr.get_active_knowledge_base() - active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + # Determine whether it is kbadmin type + if old_active_pl: + if ( + old_active_pl.retriever.comp_subtype == "kbadmin_retriever" + and new_active_pl.retriever.comp_subtype == "kbadmin_retriever" + ): + if active_kb: + if active_kb.comp_subtype == "kbadmin_kb": + new_active_pl.retriever.config_kbadmin_milvus(active_kb.name) + return True + elif old_active_pl.retriever.comp_subtype == "kbadmin_retriever": + return True + milvus_name = ( old_active_pl.name + str(old_active_pl.indexer.model_extra["d"]) if old_active_pl else "default_kb" ) - if not active_kb: - return True - if not active_pl: + if not new_active_pl.status.active: if old_active_pl: if old_active_pl.indexer.comp_subtype == "milvus_vector": await refresh_milvus_map(milvus_name) return True - + if not active_kb: + return True + if new_active_pl.retriever.comp_subtype == "kbadmin_retriever": + if active_kb: + if active_kb.comp_subtype == "kbadmin_kb": + new_active_pl.retriever.config_kbadmin_milvus(active_kb.name) + return True + # Perform milvus data synchronization if new_active_pl.indexer.comp_subtype == "milvus_vector": + # Pipeline component state changed + if new_active_pl.name in all_pipeline_milvus_maps["change_pl"]: + kb_list = await get_all_knowledge_bases() + for kb in kb_list: + if kb.comp_type == "knowledge" and kb.comp_subtype == "origin_kb": + new_active_pl.indexer.clear_milvus_collection(kb.name) + new_active_pl.indexer.reinitialize_indexer(kb.name) + new_active_pl.update_indexer_to_retriever() + add_list = kb.get_file_paths() + docs_name = kb.name + new_active_pl.name + str(new_active_pl.indexer.d) + ctx.get_file_mgr().del_kb_file(docs_name) + for file in add_list: + await add_data(DataIn(local_path=file), docs_name) + all_pipeline_milvus_maps["change_pl"].remove(new_active_pl.name) + return True + # Pipeline component state not changed new_milvus_map = {} kb_list = await get_all_knowledge_bases() for kb in kb_list: + if kb.comp_type == "experience": + continue new_milvus_map[kb.name] = kb.file_map added_files, deleted_files = compare_mappings( new_milvus_map, @@ -330,21 +536,22 @@ async def Synchronizing_vector_data(old_active_pl, new_active_pl): # Synchronization of deleted files for kb_name, file_paths in deleted_files.items(): if file_paths: - new_active_pl.indexer.clear_milvus_collection(kb_name) if kb_name not in new_milvus_map.keys(): + new_active_pl.indexer.clear_milvus_collection(kb_name) continue kb = await get_knowledge_base(kb_name) new_active_pl.indexer.reinitialize_indexer(kb_name) - file_paths = kb.get_file_paths() - if file_paths: - for file in file_paths: - await add_data(DataIn(local_path=file)) + for file_path in file_paths.values(): + docs_name = kb.name + new_active_pl.name + str(new_active_pl.indexer.d) + docs_list = ctx.get_file_mgr().del_file(docs_name, file_path) + new_active_pl.indexer.delete(docs_list) # Synchronization of added files for kb_name, file_paths in added_files.items(): if file_paths: for file_path in file_paths.values(): new_active_pl.indexer.reinitialize_indexer(kb_name) - await add_data(DataIn(local_path=file_path)) + docs_name = kb_name + new_active_pl.name + str(new_active_pl.indexer.d) + await add_data(DataIn(local_path=file_path), docs_name) new_active_pl.indexer.reinitialize_indexer(active_kb.name) new_active_pl.update_indexer_to_retriever() @@ -354,7 +561,8 @@ async def Synchronizing_vector_data(old_active_pl, new_active_pl): new_active_pl.update_indexer_to_retriever() add_list = active_kb.get_file_paths() for file in add_list: - await add_data(DataIn(local_path=file)) + docs_name = active_kb.name + new_active_pl.name + str(new_active_pl.indexer.d) + await add_data(DataIn(local_path=file), docs_name) if old_active_pl: if old_active_pl.indexer.comp_subtype == "milvus_vector": await refresh_milvus_map(milvus_name) diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/model.py b/EdgeCraftRAG/edgecraftrag/api/v1/model.py index bbc0d9806b..bce669280b 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/model.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/model.py @@ -123,7 +123,10 @@ def get_available_weights(model_path): def get_available_models(model_type): avail_models = [] - if model_type == "LLM": + if model_type == "vLLM": + LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen3-8B") + avail_models.append(LLM_MODEL) + elif model_type == "LLM": items = os.listdir(CONTAINER_MODEL_PATH) for item in items: if item == "BAAI": @@ -134,6 +137,8 @@ def get_available_models(model_type): avail_models.append(item + "/" + sub_path) else: avail_models.append(item) + elif model_type == "kbadmin_embedding_model": + return ["BAAI/bge-large-zh-v1.5"] else: for item in os.listdir(CONTAINER_MODEL_PATH + "BAAI"): if (model_type == "reranker" and "rerank" in item) or (model_type == "embedding" and "rerank" not in item): diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py index e1cd5b8345..dfa2ec25e6 100755 --- a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py @@ -4,6 +4,7 @@ import asyncio import json import os +import re import weakref from concurrent.futures import ThreadPoolExecutor @@ -12,15 +13,21 @@ from edgecraftrag.base import IndexerType, InferenceType, ModelType, NodeParserType, PostProcessorType, RetrieverType from edgecraftrag.components.benchmark import Benchmark from edgecraftrag.components.generator import QnAGenerator -from edgecraftrag.components.indexer import VectorIndexer +from edgecraftrag.components.indexer import KBADMINIndexer, VectorIndexer from edgecraftrag.components.node_parser import ( HierarchyNodeParser, + KBADMINParser, SimpleNodeParser, SWindowNodeParser, UnstructedNodeParser, ) from edgecraftrag.components.postprocessor import MetadataReplaceProcessor, RerankProcessor -from edgecraftrag.components.retriever import AutoMergeRetriever, SimpleBM25Retriever, VectorSimRetriever +from edgecraftrag.components.retriever import ( + AutoMergeRetriever, + KBadminRetriever, + SimpleBM25Retriever, + VectorSimRetriever, +) from edgecraftrag.context import ctx from fastapi import FastAPI, File, HTTPException, UploadFile, status from pymilvus import connections @@ -51,16 +58,30 @@ async def get_pipeline_json(name): # GET Pipeline benchmark -@pipeline_app.get(path="/v1/settings/pipelines/{name}/benchmark") -async def get_pipeline_benchmark(name): - pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name) +@pipeline_app.get(path="/v1/settings/pipeline/benchmark") +async def get_pipeline_benchmark(): + pl = ctx.get_pipeline_mgr().get_active_pipeline() if pl and pl.benchmark: return pl.benchmark +# GET Pipeline benchmark +@pipeline_app.get(path="/v1/settings/pipelines/{name}/benchmarks") +async def get_pipeline_benchmarks(name): + pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name) + if pl and pl.benchmark: + return pl.benchmark.benchmark_data_list + + # POST Pipeline @pipeline_app.post(path="/v1/settings/pipelines") async def add_pipeline(request: PipelineCreateIn): + pattern = re.compile(r"^[a-zA-Z0-9_]+$") + if not pattern.fullmatch(request.name): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Pipeline name must consist of letters, numbers, and underscores.", + ) return load_pipeline(request) @@ -126,9 +147,11 @@ def update_pipeline_handler(pl, req): active_kb = ctx.knowledgemgr.get_active_knowledge_base() active_pipeline = ctx.get_pipeline_mgr().get_active_pipeline() kb_name = active_kb.name if active_kb else "default_kb" + pl_change = False if req.node_parser is not None: np = req.node_parser + pl_change = ctx.get_node_parser_mgr().search_parser_change(pl, req) found_parser = ctx.get_node_parser_mgr().search_parser(np) if found_parser is not None: pl.node_parser = found_parser @@ -153,12 +176,10 @@ def update_pipeline_handler(pl, req): pl.node_parser = SWindowNodeParser.from_defaults(window_size=np.window_size) case NodeParserType.UNSTRUCTURED: pl.node_parser = UnstructedNodeParser(chunk_size=np.chunk_size, chunk_overlap=np.chunk_overlap) + case NodeParserType.KBADMINPARSER: + pl.node_parser = KBADMINParser() ctx.get_node_parser_mgr().add(pl.node_parser) - all_docs = ctx.get_file_mgr().get_all_docs() - nodelist = pl.node_parser.run(docs=all_docs) - if nodelist is not None and len(nodelist) > 0: - ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) pl._node_changed = True if req.indexer is not None: @@ -168,17 +189,24 @@ def update_pipeline_handler(pl, req): pl.indexer = found_indexer else: embed_model = None - if ind.embedding_model: - embed_model = ctx.get_model_mgr().search_model(ind.embedding_model) - if embed_model is None: - ind.embedding_model.model_type = ModelType.EMBEDDING - embed_model = ctx.get_model_mgr().load_model(ind.embedding_model) - ctx.get_model_mgr().add(embed_model) match ind.indexer_type: case IndexerType.DEFAULT_VECTOR | IndexerType.FAISS_VECTOR | IndexerType.MILVUS_VECTOR: + if ind.embedding_model: + embed_model = ctx.get_model_mgr().search_model(ind.embedding_model) + if embed_model is None: + ind.embedding_model.model_type = ModelType.EMBEDDING + embed_model = ctx.get_model_mgr().load_model(ind.embedding_model) + ctx.get_model_mgr().add(embed_model) # TODO: **RISK** if considering 2 pipelines with different # nodes, but same indexer, what will happen? - pl.indexer = VectorIndexer(embed_model, ind.indexer_type, ind.vector_uri, kb_name) + pl.indexer = VectorIndexer(embed_model, ind.indexer_type, ind.vector_url, kb_name) + case IndexerType.KBADMIN_INDEXER: + kbadmin_embedding_url = ind.embedding_url + KBADMIN_VECTOR_URL = ind.vector_url + embed_model = ind.embedding_model.model_id + pl.indexer = KBADMINIndexer( + embed_model, ind.indexer_type, kbadmin_embedding_url, KBADMIN_VECTOR_URL + ) case _: pass ctx.get_indexer_mgr().add(pl.indexer) @@ -208,6 +236,8 @@ def update_pipeline_handler(pl, req): pl.retriever = SimpleBM25Retriever(pl.indexer, similarity_top_k=retr.retrieve_topk) else: return Exception("No indexer") + case RetrieverType.KBADMIN_RETRIEVER: + pl.retriever = KBadminRetriever(pl.indexer, similarity_top_k=retr.retrieve_topk) case _: pass # Index is updated to retriever @@ -272,7 +302,7 @@ def run_async_task(): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: - loop.run_until_complete(Synchronizing_vector_data(active_pipeline, pl)) + loop.run_until_complete(Synchronizing_vector_data(active_pipeline, pl, pl_change)) except Exception as e: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Synchronization error: {e}") finally: @@ -292,8 +322,8 @@ def load_pipeline_from_file(): with open(PIPELINE_FILE, "r", encoding="utf-8") as f: all_pipelines = f.read() try: - all_da = json.loads(all_pipelines) - for pipeline_data in all_da: + all_data = json.loads(all_pipelines) + for pipeline_data in all_data: one_pipelinejson = json.loads(pipeline_data) pipeline_req = PipelineCreateIn(**one_pipelinejson) load_pipeline(pipeline_req) @@ -323,18 +353,18 @@ def save_pipeline_to_file(): # Detecting if milvus is connected @pipeline_app.post(path="/v1/check/milvus") async def check_milvus(request: MilvusConnectRequest): - vector_uri = request.vector_uri + vector_url = request.vector_url try: - if vector_uri.startswith("http://"): - host_port = vector_uri.replace("http://", "") - elif vector_uri.startswith("https://"): - host_port = vector_uri.replace("https://", "") + if vector_url.startswith("http://"): + host_port = vector_url.replace("http://", "") + elif vector_url.startswith("https://"): + host_port = vector_url.replace("https://", "") else: - host_port = vector_uri + host_port = vector_url host, port = host_port.split(":", 1) - connections.connect(alias="default", host=host, port=port) + connections.connect(alias="knowledge_default", host=host, port=port) - if connections.has_connection("default"): + if connections.has_connection("knowledge_default"): return {"status": "200", "message": "Milvus connection successful."} else: return {"status": "404", "message": "Milvus connection failed."} diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py b/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py index 86639a40a7..0de6a283a2 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py @@ -41,6 +41,8 @@ async def get_prompt(): try: generator = ctx.get_pipeline_mgr().get_active_pipeline().generator if generator: + if generator.prompt_content is not None: + return generator.prompt_content return generator.prompt except Exception as e: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) diff --git a/EdgeCraftRAG/edgecraftrag/api_schema.py b/EdgeCraftRAG/edgecraftrag/api_schema.py index d7ae1c8478..2bdf8dbd4e 100644 --- a/EdgeCraftRAG/edgecraftrag/api_schema.py +++ b/EdgeCraftRAG/edgecraftrag/api_schema.py @@ -25,7 +25,8 @@ class NodeParserIn(BaseModel): class IndexerIn(BaseModel): indexer_type: str embedding_model: Optional[ModelIn] = None - vector_uri: Optional[str] = None + embedding_url: Optional[str] = None + vector_url: Optional[str] = None class RetrieverIn(BaseModel): @@ -80,7 +81,15 @@ class KnowledgeBaseCreateIn(BaseModel): name: str description: Optional[str] = None active: Optional[bool] = None + comp_type: Optional[str] = "knowledge" + comp_subtype: Optional[str] = "origin_kb" + experience_active: Optional[bool] = None + + +class ExperienceIn(BaseModel): + question: str + content: list[str] = None class MilvusConnectRequest(BaseModel): - vector_uri: str + vector_url: str diff --git a/EdgeCraftRAG/edgecraftrag/base.py b/EdgeCraftRAG/edgecraftrag/base.py index db1dc414b8..3306afc2ed 100644 --- a/EdgeCraftRAG/edgecraftrag/base.py +++ b/EdgeCraftRAG/edgecraftrag/base.py @@ -19,7 +19,9 @@ class CompType(str, Enum): RETRIEVER = "retriever" POSTPROCESSOR = "postprocessor" GENERATOR = "generator" + QUERYSEARCH = "querysearch" FILE = "file" + CHUNK_NUM = "chunk_num" class ModelType(str, Enum): @@ -44,6 +46,7 @@ class NodeParserType(str, Enum): HIERARCHY = "hierarchical" SENTENCEWINDOW = "sentencewindow" UNSTRUCTURED = "unstructured" + KBADMINPARSER = "kbadmin_parser" class IndexerType(str, Enum): @@ -51,6 +54,7 @@ class IndexerType(str, Enum): FAISS_VECTOR = "faiss_vector" DEFAULT_VECTOR = "vector" MILVUS_VECTOR = "milvus_vector" + KBADMIN_INDEXER = "kbadmin_indexer" class RetrieverType(str, Enum): @@ -58,6 +62,7 @@ class RetrieverType(str, Enum): VECTORSIMILARITY = "vectorsimilarity" AUTOMERGE = "auto_merge" BM25 = "bm25" + KBADMIN_RETRIEVER = "kbadmin_retriever" class PostProcessorType(str, Enum): @@ -113,9 +118,19 @@ class BaseMgr: def __init__(self): self.components = {} - def add(self, comp: BaseComponent): + def add(self, comp: BaseComponent, name: str = None): + if name: + self.components[name] = comp + return True self.components[comp.idx] = comp + def append(self, comp: BaseComponent, name: str = None): + key = name if name else comp.idx + if key not in self.components: + self.components[key] = [] + self.components[key].append(comp) + return True + def get(self, idx: str) -> BaseComponent: if idx in self.components: return self.components[idx] diff --git a/EdgeCraftRAG/edgecraftrag/components/benchmark.py b/EdgeCraftRAG/edgecraftrag/components/benchmark.py index fc3801b5d3..3bf2a7e602 100644 --- a/EdgeCraftRAG/edgecraftrag/components/benchmark.py +++ b/EdgeCraftRAG/edgecraftrag/components/benchmark.py @@ -49,7 +49,14 @@ def cal_input_token_size(self, input_text_list): return input_token_size def init_benchmark_data(self): - pipeline_comp = [CompType.RETRIEVER, CompType.POSTPROCESSOR, CompType.GENERATOR] + pipeline_comp = [ + CompType.NODEPARSER, + CompType.CHUNK_NUM, + CompType.RETRIEVER, + CompType.POSTPROCESSOR, + CompType.QUERYSEARCH, + CompType.GENERATOR, + ] if self.is_enabled(): with self._idx_lock: self.last_idx += 1 @@ -58,6 +65,8 @@ def init_benchmark_data(self): data["idx"] = idx for comp in pipeline_comp: data[comp] = "" + data[CompType.NODEPARSER] = 0 + data[CompType.CHUNK_NUM] = 0 return idx, data def update_benchmark_data(self, idx, comp_type, start, end): diff --git a/EdgeCraftRAG/edgecraftrag/components/generator.py b/EdgeCraftRAG/edgecraftrag/components/generator.py index cb170fcd10..0f746b89cb 100755 --- a/EdgeCraftRAG/edgecraftrag/components/generator.py +++ b/EdgeCraftRAG/edgecraftrag/components/generator.py @@ -84,10 +84,39 @@ def extract_unstructured_eles(retrieved_nodes=[], text_gen_context=""): return unstructured_str +def build_stream_response(status=None, content=None, error=None): + response = {"status": status, "contentType": "text"} + if content is not None: + response["content"] = content + if error is not None: + response["error"] = error + return response + + async def local_stream_generator(lock, llm, prompt_str, unstructured_str): async with lock: response = llm.stream_complete(prompt_str) collected_data = [] + try: + for r in response: + collected_data.append(r.delta) + yield r.delta + await asyncio.sleep(0) + if unstructured_str: + collected_data.append(unstructured_str) + yield unstructured_str + res = "".join(collected_data) + save_history(res) + except Exception as e: + start_idx = str(e).find("message") + len("message") + result_error = str(e)[start_idx:] + yield f"code:0000{result_error}" + + +async def stream_generator(llm, prompt_str, unstructured_str): + response = llm.stream_complete(prompt_str) + collected_data = [] + try: for r in response: collected_data.append(r.delta) yield r.delta @@ -97,20 +126,10 @@ async def local_stream_generator(lock, llm, prompt_str, unstructured_str): yield unstructured_str res = "".join(collected_data) save_history(res) - - -async def stream_generator(llm, prompt_str, unstructured_str): - response = llm.stream_complete(prompt_str) - collected_data = [] - for r in response: - collected_data.append(r.delta) - yield r.delta - await asyncio.sleep(0) - if unstructured_str: - collected_data.append(unstructured_str) - yield unstructured_str - res = "".join(collected_data) - save_history(res) + except Exception as e: + start_idx = str(e).find("message") + len("message") + result_error = str(e)[start_idx:] + yield f"code:0000{result_error}" class QnAGenerator(BaseComponent): @@ -130,13 +149,20 @@ def __init__(self, llm_model, prompt_template_file, inference_type, vllm_endpoin self.llm = llm_model if isinstance(llm_model, str): self.model_id = llm_model + self.model_path = llm_model else: - self.model_id = llm_model().model_id + llm_instance = llm_model() + if llm_instance.model_path is None or llm_instance.model_path == "": + self.model_id = llm_instance.model_id + self.model_path = os.path.join("/home/user/models/", os.getenv("LLM_MODEL", "Qwen/Qwen3-8B")) + else: + self.model_id = llm_instance.model_id + self.model_path = llm_instance.model_path if self.inference_type == InferenceType.LOCAL: self.lock = asyncio.Lock() self.prompt_content = prompt_content self.prompt_template_file = prompt_template_file - self.prompt = self.init_prompt(self.model_id, self.prompt_content, self.prompt_template_file) + self.prompt = self.init_prompt(self.model_path, self.prompt_content, self.prompt_template_file) self.llm = llm_model if isinstance(llm_model, str): @@ -151,20 +177,13 @@ def __init__(self, llm_model, prompt_template_file, inference_type, vllm_endpoin vllm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8086") self.vllm_endpoint = vllm_endpoint - def init_prompt(self, model_id, prompt_content=None, prompt_template_file=None, enable_think=False): - # using the prompt template enhancement strategy(only tested on Qwen2-7B-Instruction) if template_enhance_on is true - template_enhance_on = True if "Qwen2" in self.model_id else False + def init_prompt(self, model_path, prompt_content=None, prompt_template_file=None, enable_think=False): if prompt_content: - self.set_prompt(prompt_content) - return get_prompt_template(model_id, prompt_content, prompt_template_file, enable_think) + return get_prompt_template(model_path, prompt_content, prompt_template_file, enable_think) elif prompt_template_file is None: print("There is no template file, using the default template.") - prompt_template = get_prompt_template(model_id, prompt_content, prompt_template_file, enable_think) - return ( - DocumentedContextRagPromptTemplate.from_template(prompt_template) - if template_enhance_on - else prompt_template - ) + prompt_template = get_prompt_template(model_path, prompt_content, prompt_template_file, enable_think) + return prompt_template else: safe_root = "/templates" prompt_template_file = os.path.normpath(os.path.join(safe_root, prompt_template_file)) @@ -172,25 +191,19 @@ def init_prompt(self, model_id, prompt_content=None, prompt_template_file=None, raise ValueError("Invalid template path") if not os.path.exists(prompt_template_file): raise ValueError("Template file not exists") - if template_enhance_on: - return DocumentedContextRagPromptTemplate.from_file(prompt_template_file) - else: - return get_prompt_template(model_id, prompt_content, prompt_template_file, enable_think) + return get_prompt_template(model_path, prompt_content, prompt_template_file, enable_think) def set_prompt(self, prompt): if "{context}" not in prompt: prompt += "\n<|im_start|>{context}<|im_end|>" if "{chat_history}" not in prompt: prompt += "\n<|im_start|>{chat_history}" - self.prompt = prompt + self.prompt_content = prompt + self.prompt = self.init_prompt(self.model_path, self.prompt_content, self.prompt_template_file) def reset_prompt(self): - prompt_template = get_prompt_template(self.model_id) - self.prompt = ( - DocumentedContextRagPromptTemplate.from_template(prompt_template) - if self.template_enhance_on - else prompt_template - ) + self.prompt_content = None + self.prompt = self.init_prompt(self.model_path, self.prompt_content, self.prompt_template_file) def clean_string(self, string): ret = string @@ -206,20 +219,21 @@ def query_transform(self, chat_request, retrieved_nodes, sub_questions=None): :return: Generated text_gen_context and prompt_str.""" text_gen_context = "" for n in retrieved_nodes: - origin_text = n.node.get_text() + origin_text = n.node.text text_gen_context += self.clean_string(origin_text.strip()) query = chat_request.messages chat_history = concat_history(chat_request.messages) # Modify model think status if chat_request.chat_template_kwargs: - if self.enable_think != chat_request.chat_template_kwargs["enable_thinking"]: - self.prompt = self.init_prompt( - self.model_id, - self.prompt_content, - self.prompt_template_file, - chat_request.chat_template_kwargs["enable_thinking"], - ) - self.enable_think = chat_request.chat_template_kwargs["enable_thinking"] + if "enable_thinking" in chat_request.chat_template_kwargs: + if self.enable_think != chat_request.chat_template_kwargs["enable_thinking"]: + self.prompt = self.init_prompt( + self.model_path, + self.prompt_content, + self.prompt_template_file, + chat_request.chat_template_kwargs["enable_thinking"], + ) + self.enable_think = chat_request.chat_template_kwargs["enable_thinking"] if sub_questions: final_query = f"{query}\n\n### Sub-questions ###\nThe following list is how you should consider the answer, you MUST follow these steps when responding:\n\n{sub_questions}" else: diff --git a/EdgeCraftRAG/edgecraftrag/components/indexer.py b/EdgeCraftRAG/edgecraftrag/components/indexer.py index 842122964f..bd79bb3042 100644 --- a/EdgeCraftRAG/edgecraftrag/components/indexer.py +++ b/EdgeCraftRAG/edgecraftrag/components/indexer.py @@ -13,8 +13,7 @@ class VectorIndexer(BaseComponent, VectorStoreIndex): - - def __init__(self, embed_model, vector_type, milvus_uri="http://localhost:19530", kb_name="default_kb"): + def __init__(self, embed_model, vector_type, vector_url="http://localhost:19530", kb_name="default_kb"): BaseComponent.__init__( self, comp_type=CompType.INDEXER, @@ -26,10 +25,10 @@ def __init__(self, embed_model, vector_type, milvus_uri="http://localhost:19530" from llama_index.core import Settings Settings.embed_model = None - self.milvus_uri = milvus_uri - self._initialize_indexer(embed_model, vector_type, milvus_uri, kb_name) + self.vector_url = vector_url + self._initialize_indexer(embed_model, vector_type, vector_url, kb_name) - def _initialize_indexer(self, embed_model, vector_type, milvus_uri, kb_name): + def _initialize_indexer(self, embed_model, vector_type, vector_url, kb_name): # get active name pl = ctx.get_pipeline_mgr().get_active_pipeline() plname = pl.name if pl else "" @@ -46,7 +45,7 @@ def _initialize_indexer(self, embed_model, vector_type, milvus_uri, kb_name): VectorStoreIndex.__init__(self, embed_model=embed_model, nodes=[], storage_context=faiss_store) case IndexerType.MILVUS_VECTOR: milvus_vector_store = MilvusVectorStore( - uri=milvus_uri, + uri=vector_url, dim=self.d, collection_name=kb_name + plname + str(self.d), overwrite=False, @@ -55,14 +54,14 @@ def _initialize_indexer(self, embed_model, vector_type, milvus_uri, kb_name): VectorStoreIndex.__init__(self, embed_model=embed_model, nodes=[], storage_context=milvus_store) def reinitialize_indexer(self, kb_name="default_kb"): - self._initialize_indexer(self.model, self.comp_subtype, self.milvus_uri, kb_name) + self._initialize_indexer(self.model, self.comp_subtype, self.vector_url, kb_name) def clear_milvus_collection(self, kb_name="default_kb"): # get active name pl = ctx.get_pipeline_mgr().get_active_pipeline() plname = pl.name if pl else "" milvus_vector_store = MilvusVectorStore( - uri=self.milvus_uri, + uri=self.vector_url, collection_name=kb_name + plname + str(self.d), overwrite=False, ) @@ -75,3 +74,42 @@ def run(self, **kwargs) -> Any: def ser_model(self): set = {"idx": self.idx, "indexer_type": self.comp_subtype, "model": self.model} return set + + +class KBADMINIndexer(BaseComponent): + # Handled in the kbadmin project + def __init__(self, embed_model, vector_type, kbadmin_embedding_url, vector_url="http://localhost:29530"): + BaseComponent.__init__( + self, + comp_type=CompType.INDEXER, + comp_subtype=IndexerType.KBADMIN_INDEXER, + ) + self.embed_model = embed_model + self.kbadmin_embedding_url = kbadmin_embedding_url + self.vector_url = vector_url + + def insert_nodes(self, nodes): + return None + + def _index_struct(self, nodes): + return None + + def run(self, **kwargs) -> Any: + return None + + def reinitialize_indexer(self, kb_name="default_kb"): + return None + + def clear_milvus_collection(self, **kwargs): + return None + + @model_serializer + def ser_model(self): + set = { + "idx": self.idx, + "indexer_type": self.comp_subtype, + "model": {"model_id": self.embed_model}, + "kbadmin_embedding_url": self.kbadmin_embedding_url, + "vector_url": self.vector_url, + } + return set diff --git a/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py b/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py index 259c4a463f..45ea309fad 100644 --- a/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py +++ b/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py @@ -1,8 +1,9 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import json import os -from typing import Any, List, Optional +from typing import Any, Dict, List, Optional, Union from edgecraftrag.base import BaseComponent from pydantic import model_serializer @@ -12,8 +13,12 @@ class Knowledge(BaseComponent): file_paths: Optional[List[str]] = [] file_map: Optional[List[str]] = {} description: Optional[str] = "None" - comp_type: str = "knowledge" + comp_type: Optional[str] = "knowledge" + comp_subtype: Optional[str] = "origin_kb" + experience_active: Optional[bool] = False if comp_type == "knowledge" else True active: bool + if comp_type == "experience": + comp_subtype = None def _update_file_names(self) -> None: self.file_map = {os.path.basename(path): path for path in self.file_paths if path is not None} @@ -35,6 +40,105 @@ def remove_file_path(self, file_path: str) -> bool: def get_file_paths(self) -> List[str]: return self.file_paths + def ensure_file_exists(self): + dir_path = os.path.dirname(self.file_paths[0]) + os.makedirs(dir_path, exist_ok=True) + if not os.path.exists(self.file_paths[0]): + with open(self.file_paths[0], "w", encoding="utf-8") as f: + json.dump([], f, ensure_ascii=False, indent=4) + + def get_all_experience(self) -> List[Dict]: + experinence_file = "/home/user/ui_cache/configs/experience_dir/experience.json" + if experinence_file not in self.file_paths: + self.file_paths.append(experinence_file) + if not os.path.isfile(self.file_paths[0]): + self.ensure_file_exists() + with open(self.file_paths[0], "r", encoding="utf-8") as f: + return json.load(f) + + def get_experience_by_question(self, question: str) -> Optional[Dict]: + for item in self.get_all_experience(): + if item.get("question") == question: + return item + return None + + def add_multiple_experiences( + self, experiences: List[Dict[str, Union[str, List[str]]]], flag: bool = True + ) -> List[Dict]: + all_experiences = self.get_all_experience() + result = [] + for exp in experiences: + question = exp.get("question") + if not question: + raise ValueError("Must exist when uploading question") + content = exp.get("content", []) + found = False + for item in all_experiences: + if item["question"] == question: + if flag: + item["content"].extend([c for c in content if c not in item["content"]]) + else: + item["content"] = content + result.append(item) + found = True + break + if not found: + new_item = {"question": question, "content": content} + all_experiences.append(new_item) + result.append(new_item) + with open(self.file_paths[0], "w", encoding="utf-8") as f: + json.dump(all_experiences, f, ensure_ascii=False, indent=4) + return result + + def delete_experience(self, question: str) -> bool: + items = self.get_all_experience() + remaining_items = [item for item in items if item.get("question") != question] + if len(remaining_items) == len(items): + return False + with open(self.file_paths[0], "w", encoding="utf-8") as f: + json.dump(remaining_items, f, ensure_ascii=False, indent=4) + return True + + def clear_experiences(self) -> bool: + all_experiences = self.get_all_experience() + with open(self.file_paths[0], "w", encoding="utf-8") as f: + json.dump([], f, ensure_ascii=False, indent=4) + return True + + def update_experience(self, question: str, content: List[str]) -> Optional[Dict]: + items = self.get_all_experience() + for i, item in enumerate(items): + if item.get("question") == question: + updated_item = {"question": question, "content": content} + items[i] = updated_item + with open(self.file_paths[0], "w", encoding="utf-8") as f: + json.dump(items, f, ensure_ascii=False, indent=4) + return updated_item + return None + + def add_experiences_from_file(self, file_path: str, flag: bool = False) -> List[Dict]: + if not file_path.endswith(".json"): + raise ValueError("File upload type error") + try: + with open(file_path, "r", encoding="utf-8") as f: + experiences = json.load(f) + if not isinstance(experiences, list): + raise ValueError("The contents of the file must be a list") + return self.add_multiple_experiences(experiences=experiences, flag=flag) + except json.JSONDecodeError as e: + raise ValueError("File parsing failure") + except Exception as e: + raise RuntimeError("File Error") + + def calculate_totals(self): + if self.comp_type == "knowledge": + total = len(self.file_paths) + elif self.comp_type == "experience": + total = len(self.get_all_experience()) + else: + total = None + return total + def run(self, **kwargs) -> Any: pass @@ -44,8 +148,11 @@ def ser_model(self): "idx": self.idx, "name": self.name, "comp_type": self.comp_type, + "comp_subtype": self.comp_subtype, "file_map": self.file_map, "description": self.description, "active": self.active, + "experience_active": self.experience_active, + "total": self.calculate_totals(), } return set diff --git a/EdgeCraftRAG/edgecraftrag/components/node_parser.py b/EdgeCraftRAG/edgecraftrag/components/node_parser.py index 0f386bc61f..0bd49b91b4 100644 --- a/EdgeCraftRAG/edgecraftrag/components/node_parser.py +++ b/EdgeCraftRAG/edgecraftrag/components/node_parser.py @@ -168,3 +168,25 @@ def ser_model(self): "chunk_overlap": self.chunk_overlap, } return set + + +class KBADMINParser(BaseComponent): + # Handled in the kbadmin project + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.comp_type = CompType.NODEPARSER + self.comp_subtype = NodeParserType.KBADMINPARSER + + def run(self, **kwargs) -> Any: + return None + + def insert_nodes(self): + return None + + @model_serializer + def ser_model(self): + set = { + "idx": self.idx, + "parser_type": self.comp_subtype, + } + return set diff --git a/EdgeCraftRAG/edgecraftrag/components/pipeline.py b/EdgeCraftRAG/edgecraftrag/components/pipeline.py index 41780ef88f..29205a3819 100644 --- a/EdgeCraftRAG/edgecraftrag/components/pipeline.py +++ b/EdgeCraftRAG/edgecraftrag/components/pipeline.py @@ -44,7 +44,7 @@ def __init__( if self.name == "" or self.name is None: self.name = self.idx self.enable_benchmark = os.getenv("ENABLE_BENCHMARK", "False").lower() == "true" - self.run_pipeline_cb = run_generator_ben if self.enable_benchmark else run_generator + self.run_pipeline_cb = run_generator self.run_retriever_cb = run_retrieve self.run_data_prepare_cb = run_simple_doc @@ -97,12 +97,10 @@ def check_active(self, nodelist, kb_name): # TODO: update doc changes # TODO: more operations needed, add, del, modify def update_nodes(self, nodes): - print(f"Updating {len(nodes)} nodes ...") if self.indexer is not None: self.indexer.insert_nodes(nodes) def update_indexer_to_retriever(self): - print("Updating indexer to retriever ...") if self.indexer is not None and self.retriever is not None: old_retriever = self.retriever retriever_type = old_retriever.comp_subtype @@ -122,7 +120,6 @@ def update_indexer_to_retriever(self): # Implement abstract run function # callback dispatcher def run(self, **kwargs) -> Any: - print(kwargs) if "cbtype" in kwargs: if kwargs["cbtype"] == CallbackType.DATAPREP: if "docs" in kwargs: @@ -183,9 +180,18 @@ def model_existed(self, model_id: str) -> bool: # Test callback to retrieve nodes from query def run_retrieve(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: + benchmark_data = {} query = chat_request.messages + top_k = None if chat_request.k == ChatCompletionRequest.model_fields["k"].default else chat_request.k contexts = {} - retri_res = pl.retriever.run(query=query) + start = 0 + if pl.enable_benchmark: + _, benchmark_data = pl.benchmark.init_benchmark_data() + start = time.perf_counter() + retri_res = pl.retriever.run(query=query, top_k=top_k) + if pl.enable_benchmark: + benchmark_data[CompType.RETRIEVER] = time.perf_counter() - start + pl.benchmark.insert_benchmark_data(benchmark_data) contexts[CompType.RETRIEVER] = retri_res query_bundle = QueryBundle(query) if pl.postprocessor: @@ -201,10 +207,18 @@ def run_retrieve(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: def run_simple_doc(pl: Pipeline, docs: List[Document]) -> Any: + start = 0 + benchmark_data = {} + if pl.enable_benchmark: + _, benchmark_data = pl.benchmark.init_benchmark_data() + start = time.perf_counter() n = pl.node_parser.run(docs=docs) if pl.indexer is not None: pl.indexer.insert_nodes(n) - print(pl.indexer._index_struct) + if pl.enable_benchmark: + benchmark_data[CompType.NODEPARSER] += time.perf_counter() - start + benchmark_data[CompType.CHUNK_NUM] += len(n) + pl.benchmark.insert_benchmark_data(benchmark_data) return n @@ -225,114 +239,93 @@ async def timing_wrapper(): return ret -def run_generator_ben(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: - benchmark_index, benchmark_data = pl.benchmark.init_benchmark_data() +def run_generator(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: + if pl.enable_benchmark: + benchmark_index, benchmark_data = pl.benchmark.init_benchmark_data() contexts = {} - start = time.perf_counter() + retri_res = [] + active_kb = chat_request.user if chat_request.user else None + enable_rag_retrieval = ( + chat_request.chat_template_kwargs.get("enable_rag_retrieval", True) + if chat_request.chat_template_kwargs + else True + ) + if not active_kb: + enable_rag_retrieval = False + elif pl.retriever.comp_subtype == "kbadmin_retriever" and active_kb.comp_subtype == "origin_kb": + enable_rag_retrieval = False + elif pl.retriever.comp_subtype != "kbadmin_retriever" and active_kb.comp_subtype == "kbadmin_kb": + enable_rag_retrieval = False query = chat_request.messages - if pl.generator.inference_type == InferenceType.VLLM: - UI_DIRECTORY = os.getenv("TMPFILE_PATH", "/home/user/ui_cache") - search_config_path = os.path.join(UI_DIRECTORY, "configs/search_config.yaml") - search_dir = os.path.join(UI_DIRECTORY, "configs/search_dir") - - def run_async_query_search(): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - return loop.run_until_complete(query_search(query, search_config_path, search_dir, pl)) - finally: - loop.close() - - with ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(run_async_query_search) - top1_issue, sub_questionss_result = future.result() - if sub_questionss_result: - query = query + sub_questionss_result - - retri_res = pl.retriever.run(query=query) - query_bundle = QueryBundle(query) - benchmark_data[CompType.RETRIEVER] = time.perf_counter() - start - contexts[CompType.RETRIEVER] = retri_res - - start = time.perf_counter() - if pl.postprocessor: - for processor in pl.postprocessor: - if ( - isinstance(processor, RerankProcessor) - and chat_request.top_n != ChatCompletionRequest.model_fields["top_n"].default - ): - processor.top_n = chat_request.top_n - retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle) - contexts[CompType.POSTPROCESSOR] = retri_res - benchmark_data[CompType.POSTPROCESSOR] = time.perf_counter() - start + sub_questionss_result = None + experience_status = True if chat_request.tool_choice == "auto" else False + if enable_rag_retrieval: + start = 0 + if pl.enable_benchmark: + start = time.perf_counter() + if pl.generator.inference_type == InferenceType.VLLM and experience_status: + UI_DIRECTORY = "/home/user/ui_cache" + search_config_path = os.path.join(UI_DIRECTORY, "configs/search_config.yaml") + search_dir = os.path.join(UI_DIRECTORY, "configs/experience_dir/experience.json") + + def run_async_query_search(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + return loop.run_until_complete(query_search(query, search_config_path, search_dir, pl)) + finally: + loop.close() + + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(run_async_query_search) + top1_issue, sub_questionss_result = future.result() + if sub_questionss_result: + query = query + sub_questionss_result + if pl.enable_benchmark: + benchmark_data[CompType.QUERYSEARCH] = time.perf_counter() - start + start = time.perf_counter() + top_k = None if chat_request.k == ChatCompletionRequest.model_fields["k"].default else chat_request.k + retri_res = pl.retriever.run(query=query, top_k=top_k) + if pl.enable_benchmark: + benchmark_data[CompType.RETRIEVER] = time.perf_counter() - start + contexts[CompType.RETRIEVER] = retri_res + query_bundle = QueryBundle(query) + if pl.enable_benchmark: + start = time.perf_counter() + if pl.postprocessor: + for processor in pl.postprocessor: + if ( + isinstance(processor, RerankProcessor) + and chat_request.top_n != ChatCompletionRequest.model_fields["top_n"].default + ): + processor.top_n = chat_request.top_n + retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle) + contexts[CompType.POSTPROCESSOR] = retri_res + if pl.enable_benchmark: + benchmark_data[CompType.POSTPROCESSOR] = time.perf_counter() - start if pl.generator is None: raise ValueError("No Generator Specified") - text_gen_context, prompt_str = pl.generator.query_transform(chat_request, retri_res) - input_token_size = pl.benchmark.cal_input_token_size(prompt_str) - - np_type = pl.node_parser.comp_subtype - start = time.perf_counter() - if pl.generator.inference_type == InferenceType.LOCAL: - ret = pl.generator.run(chat_request, retri_res, np_type) - elif pl.generator.inference_type == InferenceType.VLLM: - ret = pl.generator.run_vllm(chat_request, retri_res, np_type, sub_questions=sub_questionss_result) - else: - raise ValueError("LLM inference_type not supported") - end = time.perf_counter() + if pl.enable_benchmark: + _, prompt_str = pl.generator.query_transform(chat_request, retri_res) + input_token_size = pl.benchmark.cal_input_token_size(prompt_str) - if isinstance(ret, StreamingResponse): - ret = benchmark_response(ret, pl.benchmark, benchmark_index, benchmark_data, input_token_size, start) - else: - benchmark_data[CompType.GENERATOR] = end - start - pl.benchmark.insert_llm_data(benchmark_index, input_token_size) - pl.benchmark.insert_benchmark_data(benchmark_data) - return ret, contexts - - -def run_generator(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: - query = chat_request.messages - contexts = {} - if pl.generator.inference_type == InferenceType.VLLM: - UI_DIRECTORY = os.getenv("TMPFILE_PATH", "/home/user/ui_cache") - search_config_path = os.path.join(UI_DIRECTORY, "configs/search_config.yaml") - search_dir = os.path.join(UI_DIRECTORY, "configs/search_dir") - - def run_async_query_search(): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - return loop.run_until_complete(query_search(query, search_config_path, search_dir, pl)) - finally: - loop.close() - - with ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(run_async_query_search) - top1_issue, sub_questionss_result = future.result() - if sub_questionss_result: - query = query + sub_questionss_result - retri_res = pl.retriever.run(query=query) - contexts[CompType.RETRIEVER] = retri_res - query_bundle = QueryBundle(query) - - if pl.postprocessor: - for processor in pl.postprocessor: - if ( - isinstance(processor, RerankProcessor) - and chat_request.top_n != ChatCompletionRequest.model_fields["top_n"].default - ): - processor.top_n = chat_request.top_n - retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle) - contexts[CompType.POSTPROCESSOR] = retri_res - - if pl.generator is None: - raise ValueError("No Generator Specified") np_type = pl.node_parser.comp_subtype + if pl.enable_benchmark: + start = time.perf_counter() if pl.generator.inference_type == InferenceType.LOCAL: ret = pl.generator.run(chat_request, retri_res, np_type) elif pl.generator.inference_type == InferenceType.VLLM: ret = pl.generator.run_vllm(chat_request, retri_res, np_type, sub_questions=sub_questionss_result) else: raise ValueError("LLM inference_type not supported") + if pl.enable_benchmark: + end = time.perf_counter() + if isinstance(ret, StreamingResponse): + ret = benchmark_response(ret, pl.benchmark, benchmark_index, benchmark_data, input_token_size, start) + else: + benchmark_data[CompType.GENERATOR] = end - start + pl.benchmark.insert_llm_data(benchmark_index, input_token_size) + pl.benchmark.insert_benchmark_data(benchmark_data) return ret, contexts diff --git a/EdgeCraftRAG/edgecraftrag/components/postprocessor.py b/EdgeCraftRAG/edgecraftrag/components/postprocessor.py index bb59cc3d21..cbd387f59e 100644 --- a/EdgeCraftRAG/edgecraftrag/components/postprocessor.py +++ b/EdgeCraftRAG/edgecraftrag/components/postprocessor.py @@ -60,5 +60,5 @@ def run(self, **kwargs) -> Any: @model_serializer def ser_model(self): - set = {"idx": self.idx, "processor_type": self.comp_subtype, "model": None, "top_n": None} + set = {"idx": self.idx, "processor_type": self.comp_subtype, "top_n": None} return set diff --git a/EdgeCraftRAG/edgecraftrag/components/query_preprocess.py b/EdgeCraftRAG/edgecraftrag/components/query_preprocess.py index 124014a038..320f2c32aa 100644 --- a/EdgeCraftRAG/edgecraftrag/components/query_preprocess.py +++ b/EdgeCraftRAG/edgecraftrag/components/query_preprocess.py @@ -85,6 +85,7 @@ def __init__( output_template="", json_key="relevance", json_levels=["Low", "High"], + scores_weight=None, temperature=1.0, API_BASE=None, **kwargs, @@ -107,6 +108,19 @@ def __init__( self.json_levels = json_levels self.API_BASE = API_BASE + # dynamically set scores_weight, use default if not provided + if scores_weight is None: + # generate default weights based on json_levels count + if len(json_levels) == 2: + self.scores_weight = [0.0, 1.0] # Low, High + elif len(json_levels) == 3: + self.scores_weight = [0.0, 0.5, 1.0] # Low, Medium, High + else: + # for other counts, generate evenly spaced weights + self.scores_weight = [i / (len(json_levels) - 1) for i in range(len(json_levels))] + else: + self.scores_weight = scores_weight + async def invoke_vllm(self, input_texts): headers = {"Content-Type": "application/json"} payload = { @@ -152,18 +166,22 @@ async def _calculate_logits_score(self, user_input, issue): def _calculate_token_score_vllm(self, outputs, output_index=1, transform="exp"): generated_scores = outputs[output_index] - three_scores = [ - generated_scores.get("Low", -9999.0), - generated_scores.get("Medium", -9999.0), - generated_scores.get("High", -9999.0), - ] - level_scores = [score / self.temperature for score in three_scores] + + # dynamically get scores for all levels + level_scores = [] + for level in self.json_levels: + level_scores.append(generated_scores.get(level, -9999.0)) + + # apply temperature scaling + level_scores = [score / self.temperature for score in level_scores] level_scores_np = numpy.array(level_scores) level_scores_np = numpy.where(level_scores_np < -1000, -1000, level_scores_np) level_scores_np_exp = numpy.exp(level_scores_np - numpy.max(level_scores_np)) scores_probs = level_scores_np_exp / level_scores_np_exp.sum() - scores_weight = numpy.array([0.0, 0.5, 1.0]) # Low=0, Medium=0.5, High=1 + + # using dynamic scores_weight + scores_weight = numpy.array(self.scores_weight) final_score = numpy.dot(scores_probs, scores_weight) return final_score @@ -172,40 +190,43 @@ async def compute_score(self, input_pair): return await self._calculate_logits_score(*input_pair) -def read_json_files(directory: str) -> dict: +def read_json_files(file_path: str) -> dict: result = {} - for filename in os.listdir(directory): - if filename.endswith(".json"): - file_path = os.path.join(directory, filename) - if os.path.isfile(file_path): - try: - with open(file_path, "r", encoding="utf-8") as file: - data = json.load(file) - result.update(data) - except Exception: - continue + if os.path.isfile(file_path): + with open(file_path, "r", encoding="utf-8") as f: + result = json.load(f) return result async def query_search(user_input, search_config_path, search_dir, pl): - top1_issue = None - sub_questionss_result = None - if not os.path.exists(search_dir): - return top1_issue, sub_questionss_result + sub_questions_result = None model_id = pl.generator.model_id vllm_endpoint = pl.generator.vllm_endpoint - cfg = OmegaConf.load(search_config_path) - cfg.query_matcher.model_id = model_id - cfg.query_matcher.API_BASE = os.path.join(vllm_endpoint, "v1/completions") - query_matcher = LogitsEstimatorJSON(**cfg.query_matcher) maintenance_data = read_json_files(search_dir) - issues = list(maintenance_data.keys()) + issues = [] + for i in range(len(maintenance_data)): + issues.append(maintenance_data[i]["question"]) if not issues: - return top1_issue, sub_questionss_result - + return top1_issue, sub_questions_result + + cfg = {} + if not os.path.exists(search_config_path): + cfg["query_matcher"] = { + "instructions": "You're a knowledgeable assistant. Your task is to judge if two queries ask for the same information about the same primary subject. Output only 'Yes' or 'No'. Yes = same subject entity AND same information need, with only wording or stylistic differences. No = different subject entity, different spec or numeric constraint, different attribute/metric, or scope changed by adding/removing a restricting condition. Entity changes MUST lead to No.", + "input_template": "Query 1: {}\nQuery 2: {}\n", + "output_template": "\nAre these queries equivalent? Answer 'Yes' or 'No':", + "json_key": "similarity", + "json_levels": ["No", "Yes"], + "temperature": 0.1, + } + else: + cfg = OmegaConf.load(search_config_path) + cfg["query_matcher"]["model_id"] = model_id + cfg["query_matcher"]["API_BASE"] = os.path.join(vllm_endpoint, "v1/completions") + query_matcher = LogitsEstimatorJSON(**cfg["query_matcher"]) semaphore = asyncio.Semaphore(200) async def limited_compute_score(query_matcher, user_input, issue): @@ -219,9 +240,9 @@ async def limited_compute_score(query_matcher, user_input, issue): # Maximum less than 0.6, we don't use query search. if match_scores[0][1] < 0.6: - return top1_issue, sub_questionss_result + return top1_issue, sub_questions_result top1_issue = match_scores[0][0] - for key, value in maintenance_data.items(): - if key == top1_issue: - sub_questionss_result = value - return top1_issue, sub_questionss_result + for i in range(len(maintenance_data)): + if maintenance_data[i]["question"] == top1_issue: + sub_questions_result = "\n".join(maintenance_data[i]["content"]) + return top1_issue, sub_questions_result diff --git a/EdgeCraftRAG/edgecraftrag/components/retriever.py b/EdgeCraftRAG/edgecraftrag/components/retriever.py index fa8553346a..cdd3fe0bc2 100644 --- a/EdgeCraftRAG/edgecraftrag/components/retriever.py +++ b/EdgeCraftRAG/edgecraftrag/components/retriever.py @@ -1,14 +1,19 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from typing import Any, List, cast +import warnings +from typing import Any, List, Optional, cast +import requests from edgecraftrag.base import BaseComponent, CompType, RetrieverType +from langchain_milvus import Milvus +from langchain_openai import OpenAIEmbeddings from llama_index.core.indices.vector_store.retrievers import VectorIndexRetriever from llama_index.core.retrievers import AutoMergingRetriever -from llama_index.core.schema import BaseNode +from llama_index.core.schema import BaseNode, Document, NodeWithScore from llama_index.retrievers.bm25 import BM25Retriever from pydantic import model_serializer +from pymilvus import Collection, MilvusException, connections, utility class VectorSimRetriever(BaseComponent, VectorIndexRetriever): @@ -39,6 +44,8 @@ def __init__(self, indexer, **kwargs): def run(self, **kwargs) -> Any: for k, v in kwargs.items(): if k == "query": + top_k = kwargs["top_k"] if kwargs["top_k"] else self.topk + self.similarity_top_k = top_k return self.retrieve(v) return None @@ -75,8 +82,9 @@ def __init__(self, indexer, **kwargs): def run(self, **kwargs) -> Any: for k, v in kwargs.items(): if k == "query": + top_k = kwargs["top_k"] if kwargs["top_k"] else self.topk # vector_retriever needs to be updated - self._vector_retriever = self._index.as_retriever(similarity_top_k=self.topk) + self._vector_retriever = self._index.as_retriever(similarity_top_k=top_k) return self.retrieve(v) return None @@ -108,8 +116,9 @@ def __init__(self, indexer, **kwargs): def run(self, **kwargs) -> Any: for k, v in kwargs.items(): if k == "query": + top_k = kwargs["top_k"] if kwargs["top_k"] else self.topk nodes = cast(List[BaseNode], list(self._docstore.docs.values())) - similarity_top_k = min(len(nodes), self.topk) + similarity_top_k = min(len(nodes), top_k) bm25_retr = BM25Retriever.from_defaults(nodes=nodes, similarity_top_k=similarity_top_k) return bm25_retr.retrieve(v) @@ -123,3 +132,139 @@ def ser_model(self): "retrieve_topk": self.topk, } return set + + +class KBadminRetriever(BaseComponent): + def __init__(self, indexer, **kwargs): + BaseComponent.__init__( + self, + comp_type=CompType.RETRIEVER, + comp_subtype=RetrieverType.KBADMIN_RETRIEVER, + ) + self.vector_db = None + self.collection_name = None + self.topk = kwargs.get("similarity_top_k", 30) + self.KBADMIN_MILVUS_URL = indexer.vector_url + self.CONNECTION_ARGS = {"uri": indexer.vector_url} + self.vector_field = "q_1024_vec" + self.text_field = "content_with_weight" + self.embedding_model_name = indexer.embed_model + self.embedding_url = indexer.kbadmin_embedding_url + "/v3" + self.embedding = OpenAIEmbeddings( + model=self.embedding_model_name, + api_key="unused", + base_url=self.embedding_url, + tiktoken_enabled=False, + embedding_ctx_length=510, + ) + + def config_kbadmin_milvus(self, knowledge_name): + collection_name = knowledge_name + if not kbs_rev_maps: + get_kbs_info(self.CONNECTION_ARGS) + collection_name = kbs_rev_maps[collection_name] + self.vector_db = Milvus( + self.embedding, + connection_args=self.CONNECTION_ARGS, + collection_name=collection_name, + vector_field=self.vector_field, + text_field=self.text_field, + enable_dynamic_field=True, + index_params={"index_type": "FLAT", "metric_type": "IP", "params": {}}, + ) + + def similarity_search_with_embedding(self, query: str, k) -> list[tuple[Document, float]]: + url = self.embedding_url + "/embeddings" + embedding_info = {"model": self.embedding_model_name, "input": query} + # Get embedding result from embedding service + response = requests.post(url, headers={"Content-Type": "application/json"}, json=embedding_info) + embedding_json = response.json() + embedding = embedding_json["data"][0]["embedding"] + docs_and_scores = self.vector_db.similarity_search_with_score_by_vector(embedding=embedding, k=k) + relevance_score_fn = self.vector_db._select_relevance_score_fn() + return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores] + + def run(self, **kwargs) -> Any: + query = kwargs["query"] + top_k = kwargs["top_k"] if kwargs["top_k"] else self.topk + # langchain retrieval + docs_and_similarities = self.similarity_search_with_embedding(query=query, k=top_k) + node_with_scores: List[NodeWithScore] = [] + for doc, similarity in docs_and_similarities: + score: Optional[float] = None + if similarity is not None: + score = similarity + # convert langchain store format into llamaindex + node = Document.from_langchain_format(doc) + node_with_scores.append(NodeWithScore(node=node, score=score)) + return node_with_scores + + @model_serializer + def ser_model(self): + set = {"idx": self.idx, "retriever_type": self.comp_subtype, "CONNECTION_ARGS": self.CONNECTION_ARGS} + return set + + +# global kbs maps. +global kbs_rev_maps +kbs_rev_maps = {} + + +def get_kbs_info(CONNECTION_ARGS): + alias = "default" + try: + connections.connect("default", **CONNECTION_ARGS) + collections = utility.list_collections() + all_kb_infos = {} + new_infos = {} + for kb in collections: + collection = Collection(kb) + collection.load() + try: + if any(field.name == "kb_id" for field in collection.schema.fields): + docs = collection.query( + expr="pk != 0", + output_fields=["kb_name", "kb_id", "docnm_kwd"], + timeout=10, + ) + else: + docs = collection.query( + expr="pk != 0", + output_fields=["filename"], + timeout=10, + ) + collection.release() + except MilvusException as e: + continue + this_kbinfo = {} + for doc in docs: + try: + if "kb_name" in doc: + if not this_kbinfo: + this_kbinfo["name"] = doc["kb_name"] + this_kbinfo["uuid"] = doc["kb_id"] + this_kbinfo["files"] = set([doc["docnm_kwd"]]) + else: + this_kbinfo["files"].add(doc["docnm_kwd"]) + else: + if not this_kbinfo: + this_kbinfo["name"] = kb + this_kbinfo["uuid"] = "" + this_kbinfo["files"] = set([doc["filename"]]) + else: + this_kbinfo["files"].add(doc["filename"]) + except KeyError: + this_kbinfo = None + break + if this_kbinfo: + unique_files = list(this_kbinfo["files"]) + this_kbinfo["files"] = unique_files + new_infos[kb] = this_kbinfo + all_kb_infos.update(new_infos) + kbs_rev_maps.clear() + for kb_id in all_kb_infos: + kbs_rev_maps[all_kb_infos[kb_id]["name"]] = kb_id + return kbs_rev_maps + finally: + if connections.has_connection(alias): + connections.disconnect(alias) diff --git a/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py index b8dd82ab7b..c956ee316d 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py @@ -26,6 +26,34 @@ def search_parser(self, npin: NodeParserIn) -> BaseComponent: return v return None + def search_parser_change(self, pl, req): + pl_change = False + try: + if pl.node_parser.comp_subtype != req.node_parser.parser_type: + return True + if pl.node_parser.comp_subtype == req.node_parser.parser_type: + if pl.node_parser.comp_subtype == NodeParserType.SIMPLE: + if ( + pl.node_parser.chunk_size != req.node_parser.chunk_size + or pl.node_parser.chunk_overlap != req.node_parser.chunk_overlap + ): + pl_change = True + elif pl.node_parser.comp_subtype == NodeParserType.SENTENCEWINDOW: + if pl.node_parser.window_size != req.node_parser.window_size: + pl_change = True + elif pl.node_parser.comp_subtype == NodeParserType.HIERARCHY: + if pl.node_parser.chunk_sizes != req.node_parser.chunk_sizes: + pl_change = True + elif pl.node_parser.comp_subtype == NodeParserType.UNSTRUCTURED: + if ( + pl.node_parser.chunk_size != req.node_parser.chunk_size + or pl.node_parser.chunk_overlap != req.node_parser.chunk_overlap + ): + pl_change = True + except: + return False + return pl_change + class IndexerMgr(BaseMgr): @@ -43,6 +71,7 @@ def search_indexer(self, indin: IndexerIn) -> BaseComponent: (v.model.model_id_or_path == indin.embedding_model.model_id) or (v.model.model_id_or_path == indin.embedding_model.model_path) ) + and v.model.device == indin.embedding_model.device ): return v return None diff --git a/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py index 0278f1f6ac..6f29e931e4 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py @@ -20,7 +20,7 @@ def add_text(self, text: str): self.add(file) return file.documents - def add_files(self, docs: Any): + def add_files(self, docs: Any, docs_name: str = "default"): if not isinstance(docs, list): docs = [docs] @@ -41,38 +41,31 @@ def add_files(self, docs: Any): for file_path in files: file = File(file_path=file_path) - self.add(file) + self.append(file, docs_name) input_docs.extend(file.documents) - return input_docs - def get_file_by_name_or_id(self, name: str): - for _, file in self.components.items(): - if file.name == name or file.idx == name: - return file + def get_file_by_name(self, docs_name: str = "default", file_path: str = None): + for name, files in self.components.items(): + if docs_name == name: + for file in files: + if file_path == file.documents[0].metadata["file_path"]: + return file.documents return None - def get_files(self): - return [file for _, file in self.components.items()] + def get_kb_files_by_name(self, docs_name: str = "default"): + file_docs = [] + for name, files in self.components.items(): + if name == docs_name: + return files + return file_docs def get_all_docs(self) -> List[Document]: - all_docs = [] - for _, file in self.components.items(): - all_docs.extend(file.documents) + all_docs = {} + for doc_name, files in self.components.items(): + all_docs[doc_name] = files return all_docs - def get_docs_by_file(self, name) -> List[Document]: - file = self.get_file_by_name_or_id(name) - return file.documents if file else [] - - def del_file(self, name): - file = self.get_file_by_name_or_id(name) - if file: - self.remove(file.idx) - return True - else: - return False - def update_file(self, name): file = self.get_file_by_name_or_id(name) if file: @@ -81,3 +74,20 @@ def update_file(self, name): return True else: return False + + def del_kb_file(self, docs_name: str = "default"): + files = self.get_kb_files_by_name(docs_name) + if files: + self.remove(docs_name) + + def del_file(self, docs_name: str = "default", file_path: str = None): + files = self.get_file_by_name(docs_name, file_path) + docs_list = [] + for docs_file in files: + docs_list.append(docs_file.id_) + files = self.get_kb_files_by_name(docs_name) + for docs_file in files: + if file_path == docs_file.documents[0].metadata["file_path"]: + self.components[docs_name].remove(docs_file) + return docs_list + return None diff --git a/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py index dc69943eb2..d6dbba3ead 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py @@ -13,6 +13,7 @@ class KnowledgeManager(BaseMgr): def __init__(self): super().__init__() self.active_knowledge_idx: Optional[str] = None + self.active_experience_idx: Optional[str] = None def get_knowledge_base_by_name_or_id(self, name: str): for _, kb in self.components.items(): @@ -26,8 +27,17 @@ def get_active_knowledge_base(self) -> Optional[Knowledge]: else: return None + def get_active_experience(self): + if self.active_experience_idx: + return self.get_knowledge_base_by_name_or_id(self.active_experience_idx) + else: + return None + def active_knowledge(self, knowledge: KnowledgeBaseCreateIn): kb = self.get_knowledge_base_by_name_or_id(knowledge.name) + if kb.comp_type != "knowledge": + raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="Experience type cannot be active") + kb = self.get_knowledge_base_by_name_or_id(knowledge.name) self.active_knowledge_idx = kb.idx if knowledge.active else None for idx, comp in self.components.items(): @@ -35,16 +45,44 @@ def active_knowledge(self, knowledge: KnowledgeBaseCreateIn): comp.active = idx == self.active_knowledge_idx return kb + def active_experience(self, knowledge: KnowledgeBaseCreateIn): + kb = self.get_knowledge_base_by_name_or_id(knowledge.name) + if kb.comp_type != "experience": + raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="Knowledge type cannot be active") + self.active_experience_idx = kb.idx if knowledge.experience_active else None + if kb.experience_active != knowledge.experience_active: + for idx, comp in self.components.items(): + if isinstance(comp, Knowledge): + comp.experience_active = idx == self.active_experience_idx + return kb + def create_knowledge_base(self, knowledge: KnowledgeBaseCreateIn) -> Knowledge: for _, kb in self.components.items(): if kb.name == knowledge.name: raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="The knowledge base already exists.") + if knowledge.comp_type == "experience": + for idx, kb in self.components.items(): + if kb.comp_type == "experience": + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, detail="Only one experience class can be created." + ) + if knowledge.comp_type == "experience": + knowledge.active = False if knowledge.active is None: knowledge.active = False - kb = Knowledge(name=knowledge.name, description=knowledge.description, active=knowledge.active) + kb = Knowledge( + name=knowledge.name, + description=knowledge.description, + active=knowledge.active, + comp_type=knowledge.comp_type, + comp_subtype=knowledge.comp_subtype, + experience_active=knowledge.experience_active, + ) self.add(kb) if knowledge.active: self.active_knowledge(knowledge) + if knowledge.experience_active: + self.active_experience(knowledge) return kb def delete_knowledge_base(self, name: str): @@ -54,12 +92,16 @@ def delete_knowledge_base(self, name: str): def update_knowledge_base(self, knowledge) -> Knowledge: kb = self.get_knowledge_base_by_name_or_id(knowledge.name) - - if knowledge.description is not None: - kb.description = knowledge.description - - if knowledge.active is not None and kb.active != knowledge.active: - kb = self.active_knowledge(knowledge) + if kb.comp_type == "knowledge": + if knowledge.description is not None: + kb.description = knowledge.description + if knowledge.active is not None and kb.active != knowledge.active: + kb = self.active_knowledge(knowledge) + if kb.comp_type == "experience": + if knowledge.description is not None: + kb.description = knowledge.description + if knowledge.experience_active is not None and kb.experience_active != knowledge.experience_active: + kb = self.active_experience(knowledge) return "Knowledge base update successfully" def get_all_knowledge_bases(self) -> List[Dict[str, Any]]: @@ -67,3 +109,8 @@ def get_all_knowledge_bases(self) -> List[Dict[str, Any]]: for idx, kb in self.components.items(): kb_list.append(kb) return kb_list + + def get_experience_kb(self): + for idx, kb in self.components.items(): + if kb.comp_type == "experience": + return kb diff --git a/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py index 81524a3754..b22f0c66df 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py @@ -66,8 +66,8 @@ def activate_pipeline(self, name: str, active: bool, nm: NodeMgr, kb_name: None) return nodelist = None - if pl.node_changed: - nodelist = nm.get_nodes(pl.node_parser.idx) + # if pl.node_changed: + # nodelist = nm.get_nodes(pl.node_parser.idx) pl.check_active(nodelist, kb_name) prevactive = self._active_pipeline if prevactive: diff --git a/EdgeCraftRAG/edgecraftrag/requirements.txt b/EdgeCraftRAG/edgecraftrag/requirements.txt old mode 100644 new mode 100755 index 8dc53e6c83..289ba3ef4d --- a/EdgeCraftRAG/edgecraftrag/requirements.txt +++ b/EdgeCraftRAG/edgecraftrag/requirements.txt @@ -3,11 +3,13 @@ EbookLib>=0.18 faiss-cpu>=1.8.0.post1 html2text>=2025.4.15 langchain-core==0.3.60 -llama-index==0.12.41 -llama-index-core==0.12.41 +langchain-milvus +langchain-openai +llama-index==0.12.36 +llama-index-core==0.12.37 llama-index-embeddings-openvino==0.5.2 -llama-index-llms-openai==0.4.0 -llama-index-llms-openai-like==0.4.0 +llama-index-llms-openai==0.3.44 +llama-index-llms-openai-like==0.3.4 llama-index-llms-openvino==0.4.0 llama-index-postprocessor-openvino-rerank==0.4.1 llama-index-readers-file==0.4.7 @@ -20,6 +22,6 @@ pillow>=10.4.0 py-cpuinfo>=9.0.0 pymilvus==2.5.10 python-docx==1.1.2 -unstructured==0.16.11 +unstructured unstructured[pdf] werkzeug==3.1.3 diff --git a/EdgeCraftRAG/edgecraftrag/utils.py b/EdgeCraftRAG/edgecraftrag/utils.py index 18a43e5879..1eef20f8c2 100755 --- a/EdgeCraftRAG/edgecraftrag/utils.py +++ b/EdgeCraftRAG/edgecraftrag/utils.py @@ -44,15 +44,14 @@ def iter_elements(cls, paragraph: Paragraph, opts: DocxPartitionerOptions) -> It yield Image(text="IMAGE", metadata=element_metadata) -def get_prompt_template(model_id, prompt_content=None, template_path=None, enable_think=False): +def get_prompt_template(model_path, prompt_content=None, template_path=None, enable_think=False): if prompt_content is not None: template = prompt_content elif template_path is not None: template = Path(template_path).read_text(encoding=None) else: template = DEFAULT_TEMPLATE - tokenizer = AutoTokenizer.from_pretrained(model_id) - model_id = model_id.split("/")[-1] + tokenizer = AutoTokenizer.from_pretrained(model_path) messages = [{"role": "system", "content": template}, {"role": "user", "content": "\n{input}\n"}] prompt_template = tokenizer.apply_chat_template( messages, @@ -90,10 +89,6 @@ def compare_mappings(new_dict, old_dict): deleted = {name: old_files[name] for name in set(old_files) - set(new_files)} if deleted: deleted_files[key] = deleted - - for key in list(added_files.keys()): - if key in deleted_files: - del added_files[key] return added_files, deleted_files @@ -126,7 +121,7 @@ def concat_history(message: str) -> str: max_token = 6000 active_pl = ctx.get_pipeline_mgr().get_active_pipeline() if active_pl.generator.inference_type == InferenceType.VLLM: - vllm_max_len = int(os.getenv("MAX_MODEL_LEN", "5000")) + vllm_max_len = int(os.getenv("MAX_MODEL_LEN", "10240")) if vllm_max_len > 5000: max_token = vllm_max_len - 1024 diff --git a/EdgeCraftRAG/nginx/nginx-conf-generator.sh b/EdgeCraftRAG/nginx/nginx-conf-generator.sh index bd8e5b194c..f12799f583 100644 --- a/EdgeCraftRAG/nginx/nginx-conf-generator.sh +++ b/EdgeCraftRAG/nginx/nginx-conf-generator.sh @@ -25,7 +25,7 @@ EOL # Generate the server lines for ((i=0; i> $2 + echo " server ${HOST_IP}:${!PORT_VAR:-8$((i+1))00};" >> $2 done # Close the upstream block and the http block diff --git a/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh b/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh index 192e18b67c..700dd92990 100755 --- a/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh +++ b/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh @@ -35,7 +35,7 @@ TENSOR_PARALLEL_SIZE=1 SELECTED_XPU_0=0 vLLM_ENDPOINT="http://${HOST_IP}:${NGINX_PORT}" LLM_MODEL="Qwen/Qwen3-8B" -LLM_MODEL_PATH="${HOME}/qwen/" +LLM_MODEL_PATH="${MODEL_PATH}/${LLM_MODEL}" NGINX_CONFIG_PATH="$WORKPATH/nginx/nginx.conf" VLLM_IMAGE_TAG="0.8.3-b20" DP_NUM=1 diff --git a/EdgeCraftRAG/tests/test_compose_vllm_on_arc_b60.sh b/EdgeCraftRAG/tests/test_compose_vllm_on_arc_b60.sh new file mode 100755 index 0000000000..df97274a89 --- /dev/null +++ b/EdgeCraftRAG/tests/test_compose_vllm_on_arc_b60.sh @@ -0,0 +1,173 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e +source ./common.sh + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" + +ip_address=$(hostname -I | awk '{print $1}') +HOST_IP=$ip_address + +COMPOSE_FILE="compose_vllm_b60.yaml" +EC_RAG_SERVICE_PORT=16010 + +MODEL_PATH="${HOME}/models" +# MODEL_PATH="$WORKPATH/models" +DOC_PATH="$WORKPATH/tests" +UI_UPLOAD_PATH="$WORKPATH/tests" + +HF_ENDPOINT=https://hf-mirror.com +VLLM_SERVICE_PORT_B60=8086 +TP=1 +vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT_B60}" +LLM_MODEL="Qwen/Qwen3-8B" +VLLM_IMAGE_TAG="1.0" +DP=1 + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . + popd && sleep 1s + + echo "Pull intel/llm-scaler-vllm image" + docker pull intel/llm-scaler-vllm:${VLLM_IMAGE_TAG} + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel/gpu/arc + source set_env.sh + # Start Docker Containers + docker compose -f $COMPOSE_FILE up -d > ${LOG_PATH}/start_services_with_compose.log + echo "ipex-serving-xpu is booting, please wait." + sleep 30s + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs ipex-serving-xpu-container > ${LOG_PATH}/ipex-serving-xpu-container.log 2>&1 + if grep -q "Starting vLLM API server on http://0.0.0.0:" ${LOG_PATH}/ipex-serving-xpu-container.log; then + break + fi + sleep 6s + n=$((n+1)) + done +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + echo "[ $SERVICE_NAME ] Validating $SERVICE_NAME service..." + local RESPONSE=$(curl -s -w "%{http_code}" -o ${LOG_PATH}/${SERVICE_NAME}.log -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + while [ ! -f ${LOG_PATH}/${SERVICE_NAME}.log ]; do + sleep 1 + done + local HTTP_STATUS="${RESPONSE: -3}" + local CONTENT=$(cat ${LOG_PATH}/${SERVICE_NAME}.log) + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_rag() { + cd $WORKPATH/tests + + # setup pipeline + validate_services \ + "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/settings/pipelines" \ + "active" \ + "pipeline" \ + "edgecraftrag-server" \ + '@configs/test_pipeline_ipex_vllm.json' + + # add data + validate_services \ + "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/data" \ + "Done" \ + "data" \ + "edgecraftrag-server" \ + '@configs/test_data.json' + + # query + validate_services \ + "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/chatqna" \ + "1234567890" \ + "query" \ + "ipex-serving-xpu-container" \ + '{"messages":"What is the test id?","max_tokens":5}' +} + +function validate_megaservice() { + # Curl the Mega Service + validate_services \ + "${HOST_IP}:16011/v1/chatqna" \ + "1234567890" \ + "query" \ + "ipex-serving-xpu-container" \ + '{"messages":"What is the test id?","max_tokens":5}' +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/gpu/arc + export MODEL_PATH="${HOME}/models" + docker compose -f $COMPOSE_FILE down +} + + +function main() { + mkdir -p $LOG_PATH + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + echo "::group::build_docker_images" + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + echo "::endgroup::" + + echo "::group::start_services" + start_services + echo "::endgroup::" + + echo "::group::validate_rag" + validate_rag + echo "::endgroup::" + + echo "::group::validate_megaservice" + validate_megaservice + echo "::endgroup::" + + echo "::group::stop_docker" + stop_docker + echo y | docker system prune + echo "::endgroup::" + +} + +main diff --git a/EdgeCraftRAG/tools/quick_start.sh b/EdgeCraftRAG/tools/quick_start.sh index fdefa797f0..27525d1f97 100755 --- a/EdgeCraftRAG/tools/quick_start.sh +++ b/EdgeCraftRAG/tools/quick_start.sh @@ -5,6 +5,8 @@ set -e WORKPATH=$(dirname "$(pwd)") +ip_address=$(hostname -I | awk '{print $1}') +HOST_IP=$ip_address get_user_input() { local var_name=$1 @@ -32,7 +34,7 @@ function start_vllm_services() { MILVUS_ENABLED=$(get_enable_function "MILVUS DB(Enter 1 for enable)" "0") CHAT_HISTORY_ROUND=$(get_user_input "chat history round" "0") LLM_MODEL=$(get_user_input "your LLM model" "Qwen/Qwen3-8B") - MODEL_PATH=$(get_user_input "your model path" "${HOME}/models") + MODEL_PATH=$(get_user_input "your model path" "${PWD}/models") read -p "Have you prepare models in ${MODEL_PATH}:(yes/no) [yes]" user_input user_input=${user_input:-"yes"} @@ -63,14 +65,20 @@ function start_vllm_services() { # vllm ENV export NGINX_PORT=8086 export vLLM_ENDPOINT="http://${HOST_IP}:${NGINX_PORT}" - TENSOR_PARALLEL_SIZE=$(get_user_input "your tp size" 1) - read -p "selected GPU [$(seq -s, 0 $((TENSOR_PARALLEL_SIZE - 1)))] " SELECTED_XPU_0; SELECTED_XPU_0=${SELECTED_XPU_0:-$(seq -s, 0 $((TENSOR_PARALLEL_SIZE - 1)))} - DP_NUM=$(get_user_input "DP number(how many containers to run vLLM)" 1) - for (( x=0; x ipex-llm-serving-xpu-container.log 2>&1 + if grep -q "Starting vLLM API server on http://0.0.0.0:" ipex-llm-serving-xpu-container.log; then + break + fi + sleep 6s + n=$((n+1)) + done + rm -rf ipex-llm-serving-xpu-container.log + echo "service launched, please visit UI at ${HOST_IP}:8082" +} + + +function quick_start_ov_services() { + COMPOSE_FILE="compose.yaml" + echo "stop former service..." + docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE down + + ip_address=$(hostname -I | awk '{print $1}') + export HOST_IP=${HOST_IP:-"${ip_address}"} + export DOC_PATH=${DOC_PATH:-"$WORKPATH/tests"} + export TMPFILE_PATH=${TMPFILE_PATH:-"$WORKPATH/tests"} + export MILVUS_ENABLED=${MILVUS_ENABLED:-1} + export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND:-"0"} + export LLM_MODEL=${LLM_MODEL:-"Qwen/Qwen3-8B"} + export MODEL_PATH=${MODEL_PATH:-"${HOME}/models"} + export VIDEOGROUPID=$(getent group video | cut -d: -f3) + export RENDERGROUPID=$(getent group render | cut -d: -f3) + + check_baai_folder + export HF_CACHE=${HF_CACHE:-"${HOME}/.cache"} + if [ ! -d "${HF_CACHE}" ]; then + mkdir -p "${HF_CACHE}" + echo "Created directory: ${HF_CACHE}" + fi + + sudo chown 1000:1000 "${MODEL_PATH}" "${DOC_PATH}" "${TMPFILE_PATH}" + sudo chown -R 1000:1000 "${HF_CACHE}" + export HF_ENDPOINT=${HF_ENDPOINT:-"https://hf-mirror.com"} + export no_proxy="localhost, 127.0.0.1, 192.168.1.1, ${HOST_IP}" + export CCL_DG2_USM=${CCL_DG2_USM:-0} + + echo "Starting service..." + docker compose -f "$WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE" up -d +} + + +function start_vLLM_B60_services() { + COMPOSE_FILE="compose_vllm_b60.yaml" + echo "stop former service..." + docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE down + + ip_address=$(hostname -I | awk '{print $1}') + HOST_IP=$(get_user_input "host ip" "${ip_address}") + DOC_PATH=$(get_user_input "DOC_PATH" "$WORKPATH/tests") + TMPFILE_PATH=$(get_user_input "TMPFILE_PATH" "$WORKPATH/tests") + MILVUS_ENABLED=$(get_enable_function "MILVUS DB(Enter 1 for enable)" "0") + CHAT_HISTORY_ROUND=$(get_user_input "chat history round" "0") + LLM_MODEL=$(get_user_input "your LLM model" "Qwen/Qwen3-72B") + MODEL_PATH=$(get_user_input "your model path" "${PWD}/models") + read -p "Have you prepare models in ${MODEL_PATH}:(yes/no) [yes]" user_input + user_input=${user_input:-"yes"} + + if [ "$user_input" == "yes" ]; then + # 模型文件路径请参考以下形式存放, llm为huggingface + # Indexer: ${MODEL_PATH}/BAAI/bge-small-en-v1.5 + # Reranker: ${MODEL_PATH}/BAAI/bge-reranker-large + # llm :${MODEL_PATH}/${LLM_MODEL} (从huggingface或modelscope下载的原始模型,而不是经过OpenVINO转换的模型!) + echo "you skipped model downloading, please make sure you have prepared all models under ${MODEL_PATH}" + else + echo "you have not prepare models, starting to download models into ${MODEL_PATH}..." + mkdir -p $MODEL_PATH + pip install --upgrade --upgrade-strategy eager "optimum[openvino]" + optimum-cli export openvino -m BAAI/bge-small-en-v1.5 ${MODEL_PATH}/BAAI/bge-small-en-v1.5 --task sentence-similarity + optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-reranker-large --task text-classification + pip install -U huggingface_hub + huggingface-cli download $LLM_MODEL --local-dir "${MODEL_PATH}/${LLM_MODEL}" + fi + echo "give permission to related path..." + sudo chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH} + # vllm ENV + export VLLM_SERVICE_PORT_B60=8086 + export vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT_B60}" + read -p "DP number(how many containers to run B60_vLLM) [4] , press Enter to confirm, or type a new value:" DP; DP=${DP:-4} + read -p "Tensor parallel size(your tp size [1]), press Enter to confirm, or type a new value:" TP; TP=${TP:-1} + DTYPE=$(get_user_input "DTYPE (vLLM data type, e.g. float16/bfloat16)" "float16") + ZE_AFFINITY_MASK=$(get_user_input "ZE_AFFINITY_MASK (GPU affinity mask, multi-GPU use 0,1,2...)" "0,1,2,3") + ENFORCE_EAGER=$(get_user_input "ENFORCE_EAGER (enable eager execution, 1=enable/0=disable)" "1") + TRUST_REMOTE_CODE=$(get_user_input "TRUST_REMOTE_CODE (trust remote code for custom models, 1=enable/0=disable)" "1") + DISABLE_SLIDING_WINDOW=$(get_user_input "DISABLE_SLIDING_WINDOW (disable sliding window attention, 1=disable/0=enable)" "1") + GPU_MEMORY_UTIL=$(get_user_input "GPU_MEMORY_UTIL (GPU memory utilization, range 0.1-1.0)" "0.8") + NO_ENABLE_PREFIX_CACHING=$(get_user_input "NO_ENABLE_PREFIX_CACHING (disable prefix caching, 1=disable/0=enable)" "1") + MAX_NUM_BATCHED_TOKENS=$(get_user_input "MAX_NUM_BATCHED_TOKENS (max number of batched tokens)" "8192") + DISABLE_LOG_REQUESTS=$(get_user_input "DISABLE_LOG_REQUESTS (disable request logs, 1=disable/0=enable)" "1") + MAX_MODEL_LEN=$(get_user_input "MAX_MODEL_LEN (max model context length, e.g. 49152/10240)" "49152") + BLOCK_SIZE=$(get_user_input "BLOCK_SIZE (vLLM block size)" "64") + QUANTIZATION=$(get_user_input "QUANTIZATION (model quantization method, e.g. fp8/int4)" "fp8") + # export ENV + export MODEL_PATH=${MODEL_PATH} + export DOC_PATH=${DOC_PATH} + export TMPFILE_PATH=${TMPFILE_PATH} + export LLM_MODEL=${LLM_MODEL} + export no_proxy="localhost, 127.0.0.1, 192.168.1.1, ${HOST_IP}" + export MILVUS_ENABLED=${MILVUS_ENABLED} + export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND} + export SELECTED_XPU_0=${SELECTED_XPU_0} + export VIDEOGROUPID=$(getent group video | cut -d: -f3) + export RENDERGROUPID=$(getent group render | cut -d: -f3) + # export vllm ENV + export DP=${DP} + export TP=${TP} + export DTYPE=${DTYPE} + export ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK} + export ENFORCE_EAGER=${ENFORCE_EAGER} + export TRUST_REMOTE_CODE=${TRUST_REMOTE_CODE} + export DISABLE_SLIDING_WINDOW=${DISABLE_SLIDING_WINDOW} + export GPU_MEMORY_UTIL=${GPU_MEMORY_UTIL} + export NO_ENABLE_PREFIX_CACHING=${NO_ENABLE_PREFIX_CACHING} + export MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS} + export DISABLE_LOG_REQUESTS=${DISABLE_LOG_REQUESTS} + export MAX_MODEL_LEN=${MAX_MODEL_LEN} + export BLOCK_SIZE=${BLOCK_SIZE} + export QUANTIZATION=${QUANTIZATION} + + # Start Docker Containers + docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE up -d + echo "ipex-llm-serving-xpu is booting, please wait..." + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs ipex-llm-serving-xpu-container-0 > ipex-llm-serving-xpu-container.log 2>&1 + if grep -q "Starting vLLM API server on http://0.0.0.0:" ipex-llm-serving-xpu-container.log; then + break + fi + sleep 6s + n=$((n+1)) + done + rm -rf ipex-llm-serving-xpu-container.log + echo "service launched, please visit UI at ${HOST_IP}:8082" +} + + +function quick_start_vllm_B60_services() { + WORKPATH=$(dirname "$PWD") + COMPOSE_FILE="compose_vllm_b60.yaml" + EC_RAG_SERVICE_PORT=16010 + docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE down + + export HOST_IP=${HOST_IP:-"${ip_address}"} + export MODEL_PATH=${MODEL_PATH:-"${PWD}/models"} + export DOC_PATH=${DOC_PATH:-"$WORKPATH/tests"} + export TMPFILE_PATH=${TMPFILE_PATH:-"$WORKPATH/tests"} + export MILVUS_ENABLED=${MILVUS_ENABLED:-1} + export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND:-2} + export LLM_MODEL=${LLM_MODEL:-Qwen/Qwen3-72B} + export VIDEOGROUPID=$(getent group video | cut -d: -f3) + export RENDERGROUPID=$(getent group render | cut -d: -f3) + # export vllm ENV + export DP=${DP:-4} + export TP=${TP:-1} + export DTYPE=${DTYPE:-float16} + export ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK:-0,1,2,3} + export ENFORCE_EAGER=${ENFORCE_EAGER:-1} + export TRUST_REMOTE_CODE=${TRUST_REMOTE_CODE:-1} + export DISABLE_SLIDING_WINDOW=${DISABLE_SLIDING_WINDOW:-1} + export GPU_MEMORY_UTIL=${GPU_MEMORY_UTIL:-0.8} + export NO_ENABLE_PREFIX_CACHING=${NO_ENABLE_PREFIX_CACHING:-1} + export MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-8192} + export DISABLE_LOG_REQUESTS=${disable_LOG_REQUESTS:-1} + export MAX_MODEL_LEN=${MAX_MODEL_LEN:-49152} + export BLOCK_SIZE=${BLOCK_SIZE:-64} + export QUANTIZATION=${QUANTIZATION:-fp8} + + + check_baai_folder + export no_proxy="localhost, 127.0.0.1, 192.168.1.1, ${HOST_IP}" + sudo chown -R 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH} + docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE up -d + echo "ipex-llm-serving-xpu is booting, please wait..." + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs ipex-llm-serving-xpu-container-0 > ipex-llm-serving-xpu-container.log 2>&1 + if grep -q "Starting vLLM API server on http://0.0.0.0:" ipex-llm-serving-xpu-container.log; then + break + fi + sleep 6s + n=$((n+1)) + done + rm -rf ipex-llm-serving-xpu-container.log + echo "service launched, please visit UI at ${HOST_IP}:8082" +} + + +function main { + if [[ $- == *i* ]]; then + read -p "Do you want to start vLLM or local OpenVINO services? (vLLM_A770/vLLM_B60/ov) [vLLM_A770]: " user_input + user_input=${user_input:-"vLLM_A770"} + if [[ "$user_input" == "vLLM_A770" ]]; then + start_vllm_services + elif [[ "$user_input" == "vLLM_B60" ]]; then + start_vLLM_B60_services + else + start_services + fi else - start_services + export SERVICE_TYPE=${SERVICE_TYPE:-"vLLM_A770"} + if [[ "$SERVICE_TYPE" == "vLLM_A770" || "$SERVICE_TYPE" == "vLLM" ]]; then + quick_start_vllm_services + elif [[ "$SERVICE_TYPE" == "vLLM_B60" || "$SERVICE_TYPE" == "vLLM_b60" ]]; then + quick_start_vllm_B60_services + else + quick_start_ov_services + fi fi } diff --git a/EdgeCraftRAG/ui/vue/.env.development b/EdgeCraftRAG/ui/vue/.env.development index d7ef344a8a..ea6834f8a0 100644 --- a/EdgeCraftRAG/ui/vue/.env.development +++ b/EdgeCraftRAG/ui/vue/.env.development @@ -2,5 +2,5 @@ ENV = development # Local Api -VITE_API_URL = http://10.67.106.238:16010/ -VITE_CHATBOT_URL = http://10.67.106.238:16011/ +VITE_API_URL = / +VITE_CHATBOT_URL = / diff --git a/EdgeCraftRAG/ui/vue/components.d.ts b/EdgeCraftRAG/ui/vue/components.d.ts index 7959bda79a..599bc31cb8 100644 --- a/EdgeCraftRAG/ui/vue/components.d.ts +++ b/EdgeCraftRAG/ui/vue/components.d.ts @@ -45,6 +45,7 @@ declare module 'vue' { ASelectOption: typeof import('ant-design-vue/es')['SelectOption'] ASlider: typeof import('ant-design-vue/es')['Slider'] ASpace: typeof import('ant-design-vue/es')['Space'] + ASpin: typeof import('ant-design-vue/es')['Spin'] ASteps: typeof import('ant-design-vue/es')['Steps'] ATable: typeof import('ant-design-vue/es')['Table'] ATag: typeof import('ant-design-vue/es')['Tag'] @@ -52,6 +53,7 @@ declare module 'vue' { ATooltip: typeof import('ant-design-vue/es')['Tooltip'] AUploadDragger: typeof import('ant-design-vue/es')['UploadDragger'] FormTooltip: typeof import('./src/components/FormTooltip.vue')['default'] + PartialLoading: typeof import('./src/components/PartialLoading.vue')['default'] RouterLink: typeof import('vue-router')['RouterLink'] RouterView: typeof import('vue-router')['RouterView'] SvgIcon: typeof import('./src/components/SvgIcon.vue')['default'] diff --git a/EdgeCraftRAG/ui/vue/index.html b/EdgeCraftRAG/ui/vue/index.html index df137679ef..c871332d3c 100644 --- a/EdgeCraftRAG/ui/vue/index.html +++ b/EdgeCraftRAG/ui/vue/index.html @@ -9,6 +9,7 @@ + Edge Craft RAG based Q&A Chatbot diff --git a/EdgeCraftRAG/ui/vue/nginx.conf b/EdgeCraftRAG/ui/vue/nginx.conf index 6d9a233bf8..8b6701e78a 100644 --- a/EdgeCraftRAG/ui/vue/nginx.conf +++ b/EdgeCraftRAG/ui/vue/nginx.conf @@ -28,7 +28,7 @@ http { proxy_pass http://edgecraftrag-server:16010; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_http_version 1.1; - proxy_read_timeout 180s; + proxy_read_timeout 600s; proxy_set_header Connection ""; } diff --git a/EdgeCraftRAG/ui/vue/package.json b/EdgeCraftRAG/ui/vue/package.json index 516e870406..d56e123754 100644 --- a/EdgeCraftRAG/ui/vue/package.json +++ b/EdgeCraftRAG/ui/vue/package.json @@ -9,7 +9,6 @@ "preview": "vite preview" }, "dependencies": { - "@vueuse/i18n": "^4.0.0-beta.12", "ant-design-vue": "^4.0.0-rc.6", "axios": "^1.7.9", "clipboard": "^2.0.11", diff --git a/EdgeCraftRAG/ui/vue/src/api/chatbot/index.ts b/EdgeCraftRAG/ui/vue/src/api/chatbot/index.ts index f7946ad72d..e8981e0f9a 100644 --- a/EdgeCraftRAG/ui/vue/src/api/chatbot/index.ts +++ b/EdgeCraftRAG/ui/vue/src/api/chatbot/index.ts @@ -21,9 +21,16 @@ export const requestChatbotConfig = (data: Object) => { }); }; -export const getBenchmark = (name: String) => { +export const getBenchmark = () => { return request({ - url: `/v1/settings/pipelines/${name}/benchmark`, + url: `/v1/settings/pipeline/benchmark`, method: "get", }); }; + +export const requestStopChat = () => { + return request({ + url: `/v1/chatqna/stop`, + method: "post", + }); +}; diff --git a/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts b/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts index bb7bc9a494..fe300d6b33 100644 --- a/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts +++ b/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts @@ -10,7 +10,7 @@ export const getKnowledgeBaseList = () => { }); }; -export const getKnowledgeBaseDetialByName = (kbName: String) => { +export const getKnowledgeBaseDetailByName = (kbName: String) => { return request({ url: `/v1/knowledge/${kbName}`, method: "get", @@ -54,15 +54,14 @@ export const requestKnowledgeBaseRelation = (kbName: String, data: Object) => { url: `/v1/knowledge/${kbName}/files`, method: "post", data, - showLoading: true, showSuccessMsg: true, successMsg: "request.knowledge.uploadSucc", }); }; -export const requestFileDelete = (kbName: String, data: Object) => { +export const requestFileDelete = (name: String, data: Object) => { return request({ - url: `/v1/knowledge/${kbName}/files`, + url: `/v1/knowledge/${name}/files`, method: "delete", data, showLoading: true, @@ -71,4 +70,86 @@ export const requestFileDelete = (kbName: String, data: Object) => { }); }; +export const getExperienceList = () => { + return request({ + url: "/v1/experiences", + method: "get", + }); +}; + +export const requestExperienceCreate = (data: EmptyArrayType) => { + return request({ + url: "/v1/multiple_experiences/check", + method: "post", + data, + showLoading: true, + }); +}; +export const requestExperienceConfirm = (flag: Boolean, data: EmptyArrayType) => { + return request({ + url: `/v1/multiple_experiences/confirm?flag=${flag}`, + method: "post", + data, + showLoading: true, + showSuccessMsg: true, + successMsg: "request.experience.createSucc", + }); +}; +export const getExperienceDetailByName = (data: Object) => { + return request({ + url: `/v1/experience`, + method: "post", + data, + }); +}; + +export const requestExperienceUpdate = (data: Object) => { + return request({ + url: `/v1/experiences`, + method: "patch", + data, + showLoading: true, + showSuccessMsg: true, + successMsg: "request.experience.updateSucc", + }); +}; + +export const requestExperienceDelete = (data: Object) => { + return request({ + url: `/v1/experiences`, + method: "delete", + data, + showLoading: true, + showSuccessMsg: true, + successMsg: "request.experience.deleteSucc", + }); +}; + +export const requestExperienceRelation = (data: Object) => { + return request({ + url: "/v1/experiences/files", + method: "post", + data, + showLoading: true, + showSuccessMsg: true, + successMsg: "experience.importSuccTip", + }); +}; + +export const getkbadminList = () => { + return request({ + url: "/v1/kbadmin/kbs_list", + method: "get", + }); +}; + +export const requestUploadFileUrl = (kbName: String, data: Object) => { + return request({ + url: `v1/data/file/${kbName}`, + method: "post", + data, + type: "files", + }); +}; + export const uploadFileUrl = `${import.meta.env.VITE_API_URL}v1/data/file/`; diff --git a/EdgeCraftRAG/ui/vue/src/api/pipeline/index.ts b/EdgeCraftRAG/ui/vue/src/api/pipeline/index.ts index fd06d1d3d8..335908b6c9 100644 --- a/EdgeCraftRAG/ui/vue/src/api/pipeline/index.ts +++ b/EdgeCraftRAG/ui/vue/src/api/pipeline/index.ts @@ -14,7 +14,6 @@ export const getPipelineList = () => { return request({ url: "/v1/settings/pipelines", method: "get", - showLoading: true, }); }; diff --git a/EdgeCraftRAG/ui/vue/src/api/request.ts b/EdgeCraftRAG/ui/vue/src/api/request.ts index 91805dbab5..44f6cf2051 100644 --- a/EdgeCraftRAG/ui/vue/src/api/request.ts +++ b/EdgeCraftRAG/ui/vue/src/api/request.ts @@ -7,8 +7,6 @@ import axios, { AxiosInstance } from "axios"; import qs from "qs"; import i18n from "@/i18n"; -const antNotification = serviceManager.getService("antNotification"); - const service: AxiosInstance = axios.create({ baseURL: import.meta.env.VITE_API_URL, timeout: 600000, @@ -39,6 +37,8 @@ service.interceptors.response.use( if (NextLoading) NextLoading.done(); const res = response.data; if (config.showSuccessMsg) { + const antNotification = serviceManager.getService("antNotification"); + if (antNotification) antNotification("success", i18n.global.t("common.success"), i18n.global.t(config.successMsg)); } @@ -55,6 +55,7 @@ service.interceptors.response.use( } else { errorMessage = error.message; } + const antNotification = serviceManager.getService("antNotification"); if (antNotification) antNotification("error", i18n.global.t("common.error"), errorMessage); return Promise.reject(error); diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css index dedd20b357..5163bc195e 100644 --- a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css +++ b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css @@ -1,9 +1,9 @@ @font-face { font-family: "iconfont"; /* Project id 4784207 */ src: - url("iconfont.woff2?t=1754038546130") format("woff2"), - url("iconfont.woff?t=1754038546130") format("woff"), - url("iconfont.ttf?t=1754038546130") format("truetype"); + url("iconfont.woff2?t=1757469597873") format("woff2"), + url("iconfont.woff?t=1757469597873") format("woff"), + url("iconfont.ttf?t=1757469597873") format("truetype"); } .iconfont { @@ -14,6 +14,14 @@ -moz-osx-font-smoothing: grayscale; } +.icon-kb:before { + content: "\e639"; +} + +.icon-experience:before { + content: "\e68e"; +} + .icon-deep-think:before { content: "\e772"; } diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js index f6731b5c1a..5e96151e2e 100644 --- a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js +++ b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js @@ -2,12 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 (window._iconfont_svg_string_4784207 = - ''), - ((h) => { + ''), + ((c) => { var l = (a = (a = document.getElementsByTagName("script"))[a.length - 1]).getAttribute("data-injectcss"), a = a.getAttribute("data-disable-injectsvg"); if (!a) { - var c, + var h, t, i, o, @@ -15,8 +15,8 @@ m = function (l, a) { a.parentNode.insertBefore(l, a); }; - if (l && !h.__iconfont__svg__cssinject__) { - h.__iconfont__svg__cssinject__ = !0; + if (l && !c.__iconfont__svg__cssinject__) { + c.__iconfont__svg__cssinject__ = !0; try { document.write( "", @@ -25,10 +25,10 @@ console && console.log(l); } } - (c = function () { + (h = function () { var l, a = document.createElement("div"); - (a.innerHTML = h._iconfont_svg_string_4784207), + (a.innerHTML = c._iconfont_svg_string_4784207), (a = a.getElementsByTagName("svg")[0]) && (a.setAttribute("aria-hidden", "true"), (a.style.position = "absolute"), @@ -40,14 +40,14 @@ }), document.addEventListener ? ~["complete", "loaded", "interactive"].indexOf(document.readyState) - ? setTimeout(c, 0) + ? setTimeout(h, 0) : ((t = function () { - document.removeEventListener("DOMContentLoaded", t, !1), c(); + document.removeEventListener("DOMContentLoaded", t, !1), h(); }), document.addEventListener("DOMContentLoaded", t, !1)) : document.attachEvent && - ((i = c), - (o = h.document), + ((i = h), + (o = c.document), (v = !1), s(), (o.onreadystatechange = function () { diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json index a8fea13f43..db90f79659 100644 --- a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json +++ b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json @@ -5,6 +5,20 @@ "css_prefix_text": "icon-", "description": "", "glyphs": [ + { + "icon_id": "687788", + "name": "知识库", + "font_class": "kb", + "unicode": "e639", + "unicode_decimal": 58937 + }, + { + "icon_id": "5299955", + "name": "experience", + "font_class": "experience", + "unicode": "e68e", + "unicode_decimal": 59022 + }, { "icon_id": "44419262", "name": "deep-think", diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.ttf b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.ttf index d49803ef9c..6f8585b8d5 100644 Binary files a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.ttf and b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.ttf differ diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff index e9b153a7f5..9e3764205e 100644 Binary files a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff and b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff differ diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff2 b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff2 index 1f66b46dff..63a303beba 100644 Binary files a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff2 and b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff2 differ diff --git a/EdgeCraftRAG/ui/vue/src/components.d.ts b/EdgeCraftRAG/ui/vue/src/components.d.ts index f79d1e1d69..35e756d199 100644 --- a/EdgeCraftRAG/ui/vue/src/components.d.ts +++ b/EdgeCraftRAG/ui/vue/src/components.d.ts @@ -10,11 +10,51 @@ export {}; /* prettier-ignore */ declare module 'vue' { export interface GlobalComponents { + AAffix: typeof import('ant-design-vue/es')['Affix'] AButton: typeof import('ant-design-vue/es')['Button'] + ACheckbox: typeof import('ant-design-vue/es')['Checkbox'] + ACheckboxGroup: typeof import('ant-design-vue/es')['CheckboxGroup'] + ACol: typeof import('ant-design-vue/es')['Col'] + ACollapse: typeof import('ant-design-vue/es')['Collapse'] + ACollapsePanel: typeof import('ant-design-vue/es')['CollapsePanel'] + AConfigProvider: typeof import('ant-design-vue/es')['ConfigProvider'] + ADescriptions: typeof import('ant-design-vue/es')['Descriptions'] + ADescriptionsItem: typeof import('ant-design-vue/es')['DescriptionsItem'] + ADivider: typeof import('ant-design-vue/es')['Divider'] + ADrawer: typeof import('ant-design-vue/es')['Drawer'] + ADropdown: typeof import('ant-design-vue/es')['Dropdown'] + AEmpty: typeof import('ant-design-vue/es')['Empty'] + AForm: typeof import('ant-design-vue/es')['Form'] + AFormItem: typeof import('ant-design-vue/es')['FormItem'] + AImage: typeof import('ant-design-vue/es')['Image'] AInput: typeof import('ant-design-vue/es')['Input'] + AInputNumber: typeof import('ant-design-vue/es')['InputNumber'] ALayout: typeof import('ant-design-vue/es')['Layout'] ALayoutContent: typeof import('ant-design-vue/es')['LayoutContent'] + ALayoutHeader: typeof import('ant-design-vue/es')['LayoutHeader'] + ALayoutSider: typeof import('ant-design-vue/es')['LayoutSider'] + AMenu: typeof import('ant-design-vue/es')['Menu'] + AMenuItem: typeof import('ant-design-vue/es')['MenuItem'] + AModal: typeof import('ant-design-vue/es')['Modal'] + APagination: typeof import('ant-design-vue/es')['Pagination'] + APopover: typeof import('ant-design-vue/es')['Popover'] + ARadio: typeof import('ant-design-vue/es')['Radio'] + ARadioGroup: typeof import('ant-design-vue/es')['RadioGroup'] + ARow: typeof import('ant-design-vue/es')['Row'] + ASelect: typeof import('ant-design-vue/es')['Select'] + ASelectOption: typeof import('ant-design-vue/es')['SelectOption'] + ASlider: typeof import('ant-design-vue/es')['Slider'] + ASpace: typeof import('ant-design-vue/es')['Space'] + ASteps: typeof import('ant-design-vue/es')['Steps'] + ATable: typeof import('ant-design-vue/es')['Table'] + ATag: typeof import('ant-design-vue/es')['Tag'] + ATextarea: typeof import('ant-design-vue/es')['Textarea'] + ATooltip: typeof import('ant-design-vue/es')['Tooltip'] + AUploadDragger: typeof import('ant-design-vue/es')['UploadDragger'] + FormTooltip: typeof import('./components/FormTooltip.vue')['default'] RouterLink: typeof import('vue-router')['RouterLink'] RouterView: typeof import('vue-router')['RouterView'] + SvgIcon: typeof import('./components/SvgIcon.vue')['default'] + TableColumns: typeof import('./components/TableColumns.vue')['default'] } } diff --git a/EdgeCraftRAG/ui/vue/src/components/PartialLoading.vue b/EdgeCraftRAG/ui/vue/src/components/PartialLoading.vue new file mode 100644 index 0000000000..b744f43e11 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/components/PartialLoading.vue @@ -0,0 +1,70 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/i18n/en.ts b/EdgeCraftRAG/ui/vue/src/i18n/en.ts index 39d3cf0fa8..4c6ba3e7f3 100644 --- a/EdgeCraftRAG/ui/vue/src/i18n/en.ts +++ b/EdgeCraftRAG/ui/vue/src/i18n/en.ts @@ -27,6 +27,14 @@ export default { all: "All", reset: "Reset", uploadTip: "Click or drag file to this area to upload", + loading: "Loading", + waitTip: "Please wait patiently and do not refresh the page during this period.", + copy: "Copy", + send: "Send", + regenerate: "Regenerate", + copySucc: "Copy successful !", + copyError: "Copy failed !", + emptyText: "The content is empty !", }, system: { title: "System Status", @@ -85,10 +93,12 @@ export default { indexer: "Indexer", indexerType: "Indexer Type", embedding: "Embedding Model", + embeddingUrl: "Embedding URL", embeddingDevice: "Embedding run device", retriever: "Retriever", retrieverType: "Retriever Type", topk: "Search top k", + topn: "Top n", postProcessor: "PostProcessor", postProcessorType: "PostProcessor Type", rerank: "Rerank Model", @@ -99,15 +109,17 @@ export default { language: "Large Language Model", llmDevice: "LLM run device", weights: "Weights", - local: "Local", - vllm: "Vllm", - vector_uri: "Vector Uri", + local: "Local (OpenVINO)", + vllm: "Remote (vLLM)", + vector_url: "Vector Database URL", modelName: "Model Name", - vllm_url: "Vllm Url", + vllm_url: "vLLM URL", + kbadmin: "kbadmin", }, valid: { nameValid1: "Please input name", nameValid2: "Name should be between 2 and 30 characters", + nameValid3: "The name only supports letters, numbers, and underscores.", nodeParserType: "Please select Node Parser Type", chunkSizeValid1: "Please select Chunk Size", chunkSizeValid2: "The value of Chunk Size cannot be less than Chunk Overlap", @@ -115,9 +127,11 @@ export default { chunkOverlapValid2: "The value of Chunk Overlap cannot be greater than Chunk Size", windowSize: "Please select Chunk Window Size", indexerType: "Please select Indexer Type", - embedding: "Please select Embedding Model", + embedding: "Please select embedding Model", + embeddingUrl: "IP : Port, (e.g. 192.168.1.1:13020)", embeddingDevice: "Please select Embedding run device", retrieverType: "Please select Retriever Type", + retrieverTypeFormat: "Retriever type can only select kbadmin", topk: "Please select Top k", postProcessorType: "Please select PostProcessor Type", rerank: "Please select Rerank Model", @@ -126,14 +140,28 @@ export default { language: "Please select Large Language Model", llmDevice: "Please select LLM run device", weights: "Please select Weights", - vector_uri: "IP : Port, (e.g. 192.168.1.1:19530)", + kb_vector_url: "IP : Port, (e.g. 192.168.1.1:29530)", + vector_url: "IP : Port, (e.g. 192.168.1.1:19530)", vllm_url: "IP : Port, (e.g. 192.168.1.1:8086)", - urlValid1: "Please enter url", + urlValid1: "Please enter vector url", urlValid2: "Please enter the correct url", urlValid3: "URL cannot be accessed", urlValid4: "Test passed !", urlValid5: "The URL has not passed verification yet", modelName: "Please enter model name", + vllmUrlValid1: "Please enter vLLM url", + vllmUrlValid2: "Please enter the correct url", + vllmUrlValid3: "URL cannot be accessed", + vllmUrlValid4: "Test passed !", + vllmUrlValid5: "The URL has not passed verification yet", + nodeParserTypeTip: "Both Indexer Type and Retriever Type will be set to kbadmin at the same time", + indexerTypeTip: "Both Node Parser Type and Retriever Type will be set to kbadmin at the same time", + retrieverTypeTip: "Both Node Parser Type and Indexer Type will be set to kbadmin at the same time", + retrieverChangeTip: "Please go to the Indexer stage to complete the data", + indexerTypeValid1: "Indexer type can only select kbadmin", + modelRequired: "Please enter embedding model url", + modelFormat: "Please enter the correct url", + retrieverValid: "Please return to the Indexer stage to supplement information.", }, desc: { name: "The name identifier of the pipeline", @@ -143,14 +171,15 @@ export default { windowSize: "The number of sentences on each side of a sentence to capture", indexerType: "The type of index structure responsible for building based on the parsed nodes", embedding: "Embed the text data to represent it and build a vector index", - embeddingDevice: "The device used by the Embedding Model", + embeddingUrl: "Connecting embedding model url", + embeddingDevice: "The device used by the embedding model", retrieverType: - "The retrieval type used when retrieving relevant nodes from the index according to the user's query", + "The retrieval type used when retrieving relevant nodes from the index according to the user's experience", topk: "The number of top k results to return", postProcessorType: "Select postprocessors for post-processing of the context", rerank: "Rerank Model", rerankDevice: "Rerank run device", - generatorType: "Local inference generator or vllm generator", + generatorType: "Local inference generator or vLLM generator", language: "The large model used for generating dialogues", llmDevice: "The device used by the LLM", weights: "Model weight", @@ -167,10 +196,11 @@ export default { "Sentence window node parser. Splits a document into Nodes, with each node being a sentence. Each node contains a window from the surrounding sentences in the metadata.", unstructured: "UnstructedNodeParser is a component that processes unstructured data.", milvusVector: "Embedding vectors stored in milvus", - vector_uri: "Connecting milvus uri", + vector_url: "Connecting milvus vector url", test: "Test", - modelName: "Vllm model name", - vllm_url: " Test if Vllm url is available ", + modelName: "vLLM model name", + vllm_url: " Test if vLLM url is available ", + kbadmin: "Third party knowledge base engine", }, }, generation: { @@ -225,11 +255,28 @@ export default { activated: "Activated", nameValid1: "Please input knowledge base name", nameValid2: "Name should be between 2 and 30 characters", - nameValid3: "The name cannot start with a number", + nameValid3: "Alphanumeric and underscore only, starting with a letter or underscore.", desValid: "Please input knowledge base description", activeValid: "Please select whether to activate", - uploadValid: "Single file size not exceeding 50M.", + uploadValid: "Single file size not exceeding 200M.", deleteFileTip: "Are you sure delete this file?", + selectTitle: "Create Type Select", + selectDes: "Please select the type you want to create", + experience: "Experience", + experienceDes: + "Experience refers to the knowledge and skills acquired through practical involvement, trial, and reflection, serving as a key foundation for solving real-world problems.", + kbDes: + "A Knowledge Base is a centralized repository for storing organized information such as documents, FAQs, and guides, enabling teams or users to quickly access and share knowledge.", + type: "Type", + original: "Original", + kbadmin: "kbadmin", + typeValid: "Please select knowledge base type", + nameRequired: "Please select kbadmin name", + waitTip: "Please be patient and wait for the file upload to complete.", + done: "Finished", + successfully: "Successfully ", + failed: "Failed", + totalTip: "files", }, request: { pipeline: { @@ -248,9 +295,51 @@ export default { updateSucc: "Knowledge Base update successfully !", deleteSucc: "Knowledge Base deleted successfully !", }, + experience: { + createSucc: "Experience created successfully!", + updateSucc: "Experience update successful!", + deleteSucc: "Experience deleted successfully!", + }, }, error: { notFoundTip: "Uh oh! It seems like you're lost", back: "Go Home", }, + experience: { + create: "Create Experience", + edit: "Edit Experience", + import: "Import Experience", + fileFormatTip: "Supports JSON format, with file size not exceeding 100M.", + importSuccTip: "Files upload successful!", + importErrTip: "Files upload failed!", + uploadValid: "Single file size not exceeding 100M.", + experience: "Experience", + detail: "Detail", + operation: "Operation", + deleteTip: "Are you sure delete this experience?", + addExperience: "Add Experience", + delExperience: "Delete Experience", + addContent: "Add Content", + delContent: "Delete Content", + total: "Total experience: ", + unique: "Unique", + selectTip: "Please choose an appropriate method for data update", + cover: "Cover", + increase: "Append", + deactivateTip: "Are you sure deactivate this experience?", + activeTip: "Are you sure activate this experience?", + label: { + experience: "Experience", + contents: "Experience Content", + content: "Content", + }, + placeholder: { + experience: "Please enter Experience", + content: "Please enter content", + }, + valid: { + experience: "Experience cannot be empty", + content: "Content cannot be empty", + }, + }, }; diff --git a/EdgeCraftRAG/ui/vue/src/i18n/zh.ts b/EdgeCraftRAG/ui/vue/src/i18n/zh.ts index 2a1a318851..24b810bdc4 100644 --- a/EdgeCraftRAG/ui/vue/src/i18n/zh.ts +++ b/EdgeCraftRAG/ui/vue/src/i18n/zh.ts @@ -27,6 +27,14 @@ export default { all: "全选", reset: "重置", uploadTip: "点击或将文件拖到此区域进行上传", + loading: "加载中", + waitTip: "请耐心等待,在此期间不要刷新页面。", + copy: "复制", + send: "发送", + regenerate: "重新生成", + copySucc: "复制成功!", + copyError: "复制失败!", + emptyText: "内容为空!", }, system: { title: "系统状态", @@ -84,10 +92,12 @@ export default { indexer: "索引器", indexerType: "索引器类型", embedding: "嵌入模型", + embeddingUrl: "嵌入模型地址", embeddingDevice: "模型运行设备", retriever: "检索器", retrieverType: "检索器类型", topk: "检索 top k", + topn: "Top n", postProcessor: "节点后处理器", postProcessorType: "节点后处理器类型", rerank: "重排模型", @@ -98,15 +108,17 @@ export default { language: "语言大模型", llmDevice: "运行设备", weights: "权重", - local: "本地", - vllm: "Vllm", - vector_uri: "Vector Uri", + local: "本地(OpenVINO)", + vllm: "远程(vLLM)", + vector_url: "向量数据库地址", modelName: "模型名称", - vllm_url: "Vllm 地址", + vllm_url: "vLLM 地址", + kbadmin: "kbadmin", }, valid: { nameValid1: "请输入名称", nameValid2: "请输入 2 到 30 个字符的名称", + nameValid3: "名称仅支持字母、数字和下划线", nodeParserType: "请选择节点解析器类型", chunkSizeValid1: "请选择分块大小", chunkSizeValid2: "分块大小的值不能小于分块重叠值", @@ -115,8 +127,10 @@ export default { windowSize: "请选择句子上下文窗口大小", indexerType: "请选择索引器类型", embedding: "请选择嵌入模型", + embeddingUrl: "IP : 端口,(例如 192.168.1.1:13020)", embeddingDevice: "请选择嵌入模型运行设备", retrieverType: "请选择检索器类型", + retrieverTypeFormat: "检索器类型只能选择kbadmin", topk: "请选择Top k", postProcessorType: "请选择后处理器类型", rerank: "请选择重排模型", @@ -125,14 +139,28 @@ export default { language: "请选择大语言模型", llmDevice: "请选择大语言模型运行设备", weights: "请选择模型权重", - vector_uri: "IP : 端口,(例如 192.168.1.1:19530)", + kb_vector_url: "IP : 端口,(例如 192.168.1.1:29530)", + vector_url: "IP : 端口,(例如 192.168.1.1:19530)", vllm_url: "IP : 端口,(例如 192.168.1.1:8086)", - urlValid1: "URL 不能为空", - urlValid2: "请输入正确的URL", - urlValid3: "URL无法访问", + urlValid1: "向量数据库地址不能为空", + urlValid2: "请输入正确的向量数据库地址", + urlValid3: "向量数据库地址无法访问", urlValid4: "测试通过!", - urlValid5: "URL还未通过校验", + urlValid5: "向量数据库地址还未通过校验", modelName: "请输入模型名称", + vllmUrlValid1: "vLLM地址不能为空", + vllmUrlValid2: "请输入正确的vLLM地址", + vllmUrlValid3: "vLLM地址无法访问", + vllmUrlValid4: "测试通过!", + vllmUrlValid5: "vLLM地址还未通过校验", + nodeParserTypeTip: "索引器类型和检索器类型将同时设置为kbadmin", + indexerTypeTip: "节点解析器类型和检索器类型将同时设置为kbadmin", + retrieverTypeTip: "索引器类型和节点解析器类型将同时设置为kbadmin", + retrieverChangeTip: "请前往索引器阶段补全数据", + indexerTypeValid1: "索引器类型只能选择kbadmin", + modelRequired: "请输入嵌入模型地址", + modelFormat: "请输入正确的模型地址", + retrieverValid: "请回到Indexer阶段补充信息", }, desc: { name: "Pipeline的名称标识,用于区分不同工作流", @@ -142,13 +170,14 @@ export default { windowSize: "每个节点捕获的上下文句子窗口大小,用于增强语义完整性", indexerType: "基于解析节点构建的索引结构类型", embedding: "将文本转换为向量表示的过程", + embeddingUrl: "嵌入模型地址", embeddingDevice: "执行嵌入模型推理的硬件设备(CPU/GPU)", retrieverType: "根据用户查询从索引中检索节点的算法类型", topk: "检索时返回的最相关结果数量", postProcessorType: "对检索结果进行后处理的组件类型", rerank: "对检索结果进行二次排序的模型,提升答案相关性", rerankDevice: "执行重排模型推理的硬件设备(CPU/GPU)", - generatorType: "回答生成方式的类型(本地部署模型或 vllm 高效推理框架)", + generatorType: "回答生成方式的类型(本地部署模型或 vLLM 高效推理框架)", language: "用于生成自然语言回答的大模型(如 LLaMA、ChatGLM)", llmDevice: "大语言模型推理的硬件设备(需匹配模型规模要求)", weights: "大模型的权重", @@ -157,17 +186,18 @@ export default { vectorsimilarity: "根据向量相似性进行检索", autoMerge: "该检索器会尝试将上下文合并到父级上下文中", bm25: "使用BM25算法检索节点的BM25检索器", - faissVector: "嵌入存储在Faiss索引中。", + faissVector: "矢量索引存储在Faiss中。", vector: "矢量存储索引", simple: "解析文本,优先选择完整的句子。", - hierarchical: "使用借点解析将文档分割成递归层次节点", + hierarchical: "使用NodeParser将文档拆分为递归层次结构的节点。", sentencewindow: "将文档分割成节点,每个节点代表一个句子。每个节点包含一个来自元数据中周围句子的窗口", unstructured: "一个处理非结构化数据的组件", - milvusVector: "嵌入存储在Milvus索引中", - vector_uri: "测试Milvus地址是否可用", + milvusVector: "矢量索引存储在Milvus中", + vector_url: "测试Milvus地址是否可用", test: "测 试", - modelName: "Vllm 模型名称", - vllm_url: "测试Vllm地址是否可用", + modelName: "vLLM 模型名称", + vllm_url: "测试vLLM地址是否可用", + kbadmin: "第三方知识库系统", }, }, generation: { @@ -221,11 +251,28 @@ export default { activated: "激活状态", nameValid1: "请输入知识库名称", nameValid2: "请输入 2 到 30 个字符的名称", - nameValid3: "名称不能以数字开头", + nameValid3: "仅支持字母、数字和下划线,必须以字母或下划线开头。", desValid: "请输入知识库描述", activeValid: "请选择是否启用该功能。", - uploadValid: "单个文件大小不得超过 50MB", + uploadValid: "单个文件大小不得超过 200MB", deleteFileTip: "您确定要删除此文档吗?此操作不可恢复。", + selectTitle: "创建类型选择", + selectDes: "请选择要创建的数据类型", + experience: "经验注入", + experienceDes: + "Experience是指个人或团队在实践过程中积累的知识和技能,通常通过实际操作、试错和反思获得,是解决实际问题的重要依据", + kbDes: + "知识库是系统化存储信息的集合,用于集中管理文档、常见问题、操作指南等知识内容,便于团队或用户快速查找和共享信息。", + type: "类型", + original: "原始的", + kbadmin: "kbadmin", + typeValid: "请选择知识库类型", + nameRequired: "请选择kbadmin名称", + waitTip: "请耐心等待所有文件上传完成!", + done: "已完成", + successfully: "成功", + failed: "失败", + totalTip: "个文件", }, request: { pipeline: { @@ -244,9 +291,51 @@ export default { updateSucc: "知识库更新成功!", deleteSucc: " 知识库删除成功!", }, + experience: { + createSucc: "经验创建成功!", + updateSucc: "经验更新成功!", + deleteSucc: " 经验删除成功!", + }, }, error: { notFoundTip: "Oops 好像走错地方啦~", back: "首页", }, + experience: { + create: "新建经验", + edit: "编辑经验", + import: "导入经验", + fileFormatTip: "仅支持JSON格式,文件大小不超过100M", + importSuccTip: "文件上传成功!", + importErrTip: "文件上传失败!", + uploadValid: "单个文件大小不得超过 200MB", + experience: "经验", + detail: "详情", + operation: "操作", + deleteTip: "确定要删除这个经验?", + addExperience: "新增经验", + delExperience: "删除经验", + addContent: "新增内容", + delContent: "删除内容", + total: "经验总数: ", + unique: "唯一", + selectTip: "请选择合适的方式进行数据更新", + cover: "覆盖", + increase: "追加", + deactivateTip: "您确定要停用该经验库吗?", + activeTip: "您确定要启用该经验库吗?", + label: { + experience: "经验", + contents: "经验内容", + content: "内容", + }, + placeholder: { + experience: "请输入经验", + content: "请输入内容", + }, + valid: { + experience: "经验不能为空", + content: "内容不能为空", + }, + }, }; diff --git a/EdgeCraftRAG/ui/vue/src/layout/Header.vue b/EdgeCraftRAG/ui/vue/src/layout/Header.vue index 33d0038d62..0de80ece3d 100644 --- a/EdgeCraftRAG/ui/vue/src/layout/Header.vue +++ b/EdgeCraftRAG/ui/vue/src/layout/Header.vue @@ -2,7 +2,7 @@
- +
- + +
+ + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ExperienceDetail.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ExperienceDetail.vue new file mode 100644 index 0000000000..24a1f937ec --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ExperienceDetail.vue @@ -0,0 +1,225 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ImportDialog.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ImportDialog.vue new file mode 100644 index 0000000000..b558bac195 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ImportDialog.vue @@ -0,0 +1,100 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/UpdateDialog.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/UpdateDialog.vue new file mode 100644 index 0000000000..e828d47356 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/UpdateDialog.vue @@ -0,0 +1,387 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/index.ts b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/index.ts new file mode 100644 index 0000000000..c9d1df62ed --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/index.ts @@ -0,0 +1,7 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +import UpdateDialog from "./UpdateDialog.vue"; +import ImportDialog from "./ImportDialog.vue"; + +export { UpdateDialog, ImportDialog }; diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetail.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetail.vue new file mode 100644 index 0000000000..64b13e320d --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetail.vue @@ -0,0 +1,336 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetial.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetial.vue deleted file mode 100644 index 290a85cd8e..0000000000 --- a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetial.vue +++ /dev/null @@ -1,302 +0,0 @@ - - - - - diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/SelectTypeDialog.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/SelectTypeDialog.vue new file mode 100644 index 0000000000..8b9ab7978e --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/SelectTypeDialog.vue @@ -0,0 +1,134 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateDialog.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateDialog.vue index f987ff5cb4..e95e7436f6 100644 --- a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateDialog.vue +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateDialog.vue @@ -17,14 +17,45 @@ autocomplete="off" :label-col="{ style: { width: '100px' } }" > - + + + {{ $t("knowledge.original") }} + {{ $t("knowledge.kbadmin") }} + + + + + + {{ + item + }} + + - + {{ $t("pipeline.activated") }} {{ $t("pipeline.inactive") }} @@ -59,10 +90,11 @@ import { requestKnowledgeBaseCreate, requestKnowledgeBaseUpdate, + getkbadminList, } from "@/api/knowledgeBase"; import { isValidName } from "@/utils/validate"; import { FormInstance } from "ant-design-vue"; -import { computed, ref } from "vue"; +import { computed, ref, onMounted } from "vue"; import { useI18n } from "vue-i18n"; const props = defineProps({ @@ -74,11 +106,17 @@ const props = defineProps({ type: String, default: "create", }, + dialogFlag: { + type: String, + default: "knowledge", + }, }); interface FormType { - name: string; + name: string | undefined; description: string; + comp_type: string; active: boolean; + comp_subtype: string; } const validateName = async (rule: any, value: string) => { @@ -89,7 +127,6 @@ const validateName = async (rule: any, value: string) => { if (len < 2 || len > 30) { return Promise.reject(t("knowledge.nameValid2")); } - console.log(isValidName(value)); if (!isValidName(value)) { return Promise.reject(t("knowledge.nameValid3")); } @@ -98,9 +135,11 @@ const validateName = async (rule: any, value: string) => { const { t } = useI18n(); const emit = defineEmits(["close", "switch"]); +const { dialogFlag } = props; + const typeMap = { - create: t("knowledge.create"), - edit: t("knowledge.edit"), + create: t(`${dialogFlag}.create`), + edit: t(`${dialogFlag}.edit`), } as const; const dialogTitle = computed(() => { return typeMap[props.dialogType as keyof typeof typeMap]; @@ -108,20 +147,39 @@ const dialogTitle = computed(() => { const isEdit = computed(() => { return props.dialogType === "edit"; }); -const isActivated = computed(() => { - return props.dialogData?.active; +const isExperience = computed(() => { + return props.dialogFlag === "experience"; +}); + +const isOriginal = computed(() => { + return form.comp_subtype === "origin_kb"; }); const modelVisible = ref(true); const submitLoading = ref(false); const formRef = ref(); -const { name = "", description = "", active = false } = props.dialogData; +const { + comp_subtype = "origin_kb", + name = undefined, + description = "", + active = false, + experience_active = false, +} = props.dialogData; const form = reactive({ - name, + comp_subtype, + name: isExperience.value ? "Experience" : name, description, - active, + comp_type: dialogFlag, + active: isExperience.value ? experience_active : active, }); - -const rules = reactive({ +const kbList = ref([]); +const rules: FormRules = reactive({ + comp_subtype: [ + { + required: true, + message: t("knowledge.typeValid"), + trigger: "change", + }, + ], name: [ { required: true, @@ -129,6 +187,13 @@ const rules = reactive({ trigger: ["blur", "change"], }, ], + kbName: [ + { + required: true, + message: t("knowledge.nameRequired"), + trigger: "change", + }, + ], active: [ { required: true, @@ -137,17 +202,36 @@ const rules = reactive({ }, ], }); +const handleTypeChange = () => { + form.name = undefined; +}; +const queryKbadmin = async () => { + const data: any = await getkbadminList(); + kbList.value = [].concat(data); +}; +// Format parameter +const formatFormParam = () => { + const { name, description, comp_type, active, comp_subtype } = form; + return { + name, + description, + comp_type, + comp_subtype: !isExperience.value ? comp_subtype : undefined, + active: !isExperience.value ? active : undefined, + experience_active: isExperience.value ? active : undefined, + }; +}; // Submit const handleSubmit = () => { formRef.value?.validate().then(() => { submitLoading.value = true; const { name } = form; - const apiUrl = - props.dialogType === "edit" - ? requestKnowledgeBaseUpdate - : requestKnowledgeBaseCreate; - apiUrl(form) + const apiUrl = isEdit.value + ? requestKnowledgeBaseUpdate + : requestKnowledgeBaseCreate; + + apiUrl(formatFormParam()) .then(() => { emit("switch", name); handleCancel(); @@ -165,6 +249,9 @@ const handleSubmit = () => { const handleCancel = () => { emit("close"); }; +onMounted(() => { + queryKbadmin(); +});