diff --git a/.github/workflows/rigging_pr_description.yml b/.github/workflows/rigging_pr_description.yml index 4ef920d..5bb8b8f 100644 --- a/.github/workflows/rigging_pr_description.yml +++ b/.github/workflows/rigging_pr_description.yml @@ -19,12 +19,13 @@ jobs: # Get the diff first - name: Get Diff id: diff + # shellcheck disable=SC2102 run: | git fetch origin "${{ github.base_ref }}" MERGE_BASE=$(git merge-base HEAD "origin/${{ github.base_ref }}") - # Encode the diff as base64 to preserve all characters - DIFF=$(git diff "$MERGE_BASE"..HEAD | base64 -w 0) - echo "diff=$DIFF" >> "$GITHUB_OUTPUT" + # Use separate diff arguments instead of range notation + DIFF=$(git diff "$MERGE_BASE" HEAD | base64 --wrap=0) + echo "diff=${DIFF}" >> "$GITHUB_OUTPUT" - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b #v5.0.3 with: python-version: "3.11" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 18c7b6f..54313f5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,16 +18,6 @@ repos: - id: actionlint name: Check Github Actions - # Python linting - - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: 8b76f04e7e5a9cd259e9d1db7799599355f97cdf # v0.8.2 - hooks: - # Run the linter. - - id: ruff - # Run the formatter. - - id: ruff-format - # Python code security - repo: https://github.com/PyCQA/bandit rev: 8fd258abbac759d62863779f946d6a88e8eabb0f #1.8.0 diff --git a/burpference/api_adapters.py b/burpference/api_adapters.py index 5be8ff5..806fc11 100644 --- a/burpference/api_adapters.py +++ b/burpference/api_adapters.py @@ -19,18 +19,18 @@ def prepare_request(self, user_content, system_content=None): def process_response(self, response_data): pass + # Ollama /generate API adapter class class OllamaGenerateAPIAdapter(BaseAPIAdapter): def prepare_request(self, system_content, user_content): - prompt = "{0}\n\nUser request:\n{1}".format( - system_content, user_content) + prompt = "{0}\n\nUser request:\n{1}".format(system_content, user_content) return { "model": self.config.get("model", "llama3.2"), "prompt": prompt, "format": self.config.get("format", "json"), - "stream": self.config.get("stream", False) + "stream": self.config.get("stream", False), } def process_response(self, response_data): @@ -39,13 +39,18 @@ def process_response(self, response_data): # Ollama /chat API adapter class + class OllamaChatAPIAdapter(BaseAPIAdapter): def prepare_request(self, system_content, user_content): total_input_size = len(system_content) + len(user_content) - max_tokens = self.config.get("max_input_size", 32000) # Default to 32k if not specified + max_tokens = self.config.get( + "max_input_size", 32000 + ) # Default to 32k if not specified if total_input_size > max_tokens: - raise ValueError("Input size ({total_input_size} chars) exceeds maximum allowed ({max_tokens})") + raise ValueError( + "Input size ({total_input_size} chars) exceeds maximum allowed ({max_tokens})" + ) model = self.config.get("model", "llama3.2") quantization = self.config.get("quantization") @@ -54,8 +59,12 @@ def prepare_request(self, system_content, user_content): model = "{0}:{1}".format(model, quantization) try: - system_content = system_content.encode('utf-8', errors='replace').decode('utf-8') - user_content = user_content.encode('utf-8', errors='replace').decode('utf-8') + system_content = system_content.encode("utf-8", errors="replace").decode( + "utf-8" + ) + user_content = user_content.encode("utf-8", errors="replace").decode( + "utf-8" + ) except Exception as e: raise ValueError("Error encoding content: {str(e)}") @@ -63,14 +72,15 @@ def prepare_request(self, system_content, user_content): "model": model, "messages": [ {"role": "system", "content": system_content}, - {"role": "user", "content": user_content} + {"role": "user", "content": user_content}, ], - "stream": self.config.get("stream", False) + "stream": self.config.get("stream", False), } def process_response(self, response_data): return json.loads(response_data) + # OpenAI /v1/chat/completions API adapter class @@ -80,15 +90,15 @@ def prepare_request(self, user_content, system_content=None): "model": self.config.get("model", "gpt-4o-mini"), "messages": [ {"role": "system", "content": system_content}, - {"role": "user", "content": user_content} - ] + {"role": "user", "content": user_content}, + ], } def process_response(self, response_data): response = json.loads(response_data) - if 'choices' in response and len(response['choices']) > 0: - if 'message' in response['choices'][0]: - return response['choices'][0]['message']['content'] + if "choices" in response and len(response["choices"]) > 0: + if "message" in response["choices"][0]: + return response["choices"][0]["message"]["content"] else: raise ValueError("Unexpected response format: {response}") else: @@ -97,56 +107,61 @@ def process_response(self, response_data): def send_request(self, request_payload): headers = { "Authorization": "Bearer {0}".format(self.config.get("api_key", "")), - "Content-Type": "application/json" + "Content-Type": "application/json", } - req = urllib2.Request(self.config.get( - "host"), json.dumps(request_payload), headers=headers) - req.get_method = lambda: 'POST' + req = urllib2.Request( + self.config.get("host"), json.dumps(request_payload), headers=headers + ) + req.get_method = lambda: "POST" response = urllib2.urlopen(req) return response.read() # Anthropic /v1/messages API adapter class + class AnthropicAPIAdapter(BaseAPIAdapter): def prepare_request(self, user_content, system_content=None): return { "model": self.config.get("model", "claude-3-5-sonnet-20241022"), "max_tokens": int(self.config.get("max_tokens", 1020)), "system": system_content, - "messages": [ - {"role": "user", "content": user_content} - ] + "messages": [{"role": "user", "content": user_content}], } def send_request(self, request_payload): headers = { "x-api-key": self.config.get("headers", {}).get("x-api-key", ""), "content-type": "application/json", - "anthropic-version": self.config.get("headers", {}).get("anthropic-version", "2023-06-01") + "anthropic-version": self.config.get("headers", {}).get( + "anthropic-version", "2023-06-01" + ), } - req = urllib2.Request(self.config.get("host"), - data=json.dumps(request_payload).encode('utf-8'), - headers=headers) - req.get_method = lambda: 'POST' + req = urllib2.Request( + self.config.get("host"), + data=json.dumps(request_payload).encode("utf-8"), + headers=headers, + ) + req.get_method = lambda: "POST" try: response = urllib2.urlopen(req) return response.read() except urllib2.HTTPError as e: - error_message = e.read().decode('utf-8') + error_message = e.read().decode("utf-8") raise ValueError("HTTP Error {e.code}: {error_message}") except Exception as e: raise ValueError("Error sending request: {str(e)}") def process_response(self, response_data): response = json.loads(response_data) - if 'message' in response: - return response['message']['content'] - elif 'content' in response: - return response['content'] + if "message" in response: + return response["message"]["content"] + elif "content" in response: + return response["content"] else: raise ValueError("Unexpected response format: {response}") + # Groq openai/v1/chat/completions @@ -157,22 +172,23 @@ def prepare_request(self, user_content, system_content=None): "max_tokens": int(self.config.get("max_tokens", 1020)), "messages": [ {"role": "system", "content": system_content}, - {"role": "user", "content": user_content} - ] + {"role": "user", "content": user_content}, + ], } def process_response(self, response_data): response = json.loads(response_data) - return response['choices'][0]['message']['content'] + return response["choices"][0]["message"]["content"] def send_request(self, request_payload): headers = { "x-api-key": "{0}".format(self.config.get("api_key", "")), - "Content-Type": "application/json" + "Content-Type": "application/json", } - req = urllib2.Request(self.config.get( - "host"), json.dumps(request_payload), headers=headers) - req.get_method = lambda: 'POST' + req = urllib2.Request( + self.config.get("host"), json.dumps(request_payload), headers=headers + ) + req.get_method = lambda: "POST" response = urllib2.urlopen(req) return response.read() @@ -184,25 +200,81 @@ def prepare_request(self, system_content, user_content): "max_tokens": int(self.config.get("max_tokens", 1020)), "messages": [ {"role": "system", "content": system_content}, - {"role": "user", "content": user_content} - ] + {"role": "user", "content": user_content}, + ], } def process_response(self, response_data): response = json.loads(response_data) - return response['choices'][0]['message']['content'] + return response["choices"][0]["message"]["content"] def send_request(self, request_payload): headers = { "x-api-key": "{0}".format(self.config.get("api_key", "")), - "Content-Type": "application/json" + "Content-Type": "application/json", } - req = urllib2.Request(self.config.get( - "host"), json.dumps(request_payload), headers=headers) - req.get_method = lambda: 'POST' + req = urllib2.Request( + self.config.get("host"), json.dumps(request_payload), headers=headers + ) + req.get_method = lambda: "POST" response = urllib2.urlopen(req) return response.read() + +# HuggingFace API adapter class /chat-completion + + +class HuggingFaceAPIAdapter(BaseAPIAdapter): + def prepare_request(self, user_content, system_content=None): + messages = [] + if system_content: + messages.append({"role": "system", "content": system_content}) + messages.append({"role": "user", "content": user_content}) + + return { + "inputs": {"messages": messages}, + "parameters": { + "max_length": self.config.get("parameters", {}).get("max_length", 512), + "temperature": self.config.get("parameters", {}).get( + "temperature", 0.7 + ), + "top_p": self.config.get("parameters", {}).get("top_p", 0.9), + "repetition_penalty": self.config.get("parameters", {}).get( + "repetition_penalty", 1.2 + ), + }, + } + + def send_request(self, request_payload): + headers = self.config.get("headers", {}) + + if "Authorization" not in headers: + headers["Authorization"] = "Bearer {}".format( + self.config.get("api_key", "") + ) + + req = urllib2.Request( + self.config.get("host"), + json.dumps(request_payload).encode("utf-8"), + headers=headers, + ) + + try: + response = urllib2.urlopen(req) + return response.read() + except urllib2.HTTPError as e: + error_message = e.read().decode("utf-8") + raise ValueError("HTTP Error {}: {}".format(e.code, error_message)) + + def process_response(self, response_data): + response = json.loads(response_data) + if isinstance(response, list) and len(response) > 0: + return response[0].get("generated_text", "") + elif isinstance(response, dict): + return response.get("generated_text", str(response)) + return str(response) + + # Generic other API base adapter @@ -218,6 +290,7 @@ def process_response(self, response_data): # Function to define and load the API adapter + def get_api_adapter(config): api_type = config.get("api_type", "").lower() endpoint = config.get("host", "").lower() @@ -237,6 +310,8 @@ def get_api_adapter(config): return GroqOpenAIChatAPIAdapter(config) elif api_type == "groq-openai-stream": return GroqOpenAIChatAPIStreamAdapter(config) + elif api_type == "huggingface": + return HuggingFaceAPIAdapter(config) elif api_type == "other": return OtherAPIAdapter(config) else: diff --git a/configs/README.md b/configs/README.md index 90ff1d1..012dcb6 100644 --- a/configs/README.md +++ b/configs/README.md @@ -15,6 +15,8 @@ If you intend to fork or contribute to burpference, ensure that you have exclude - [Example Anthropic `/messages` inference with `claude-3-5-sonnet-20241022`:](#example-anthropic-messages-inference-with-claude-3-5-sonnet-20241022) - [OpenAI Inference](#openai-inference) - [Example OpenAI `/completions` inference with `gpt-4o-mini`:](#example-openai-completions-inference-with-gpt-4o-mini) + - [HuggingFace Serveless Inference](#huggingface-serveless-inference) + - [Example HuggingFace `/text-generation` inference](#example-huggingface-text-generation-inference) - [Model System Prompts](#model-system-prompts) --- @@ -96,6 +98,31 @@ In order to serve inference as part of burpference, the model must be running on } ``` +### HuggingFace Serveless Inference + +#### Example HuggingFace `/text-generation` inference + +```json +{ + "api_type": "huggingface", + "name": "HuggingFace Code Review", + "model": "bigcode/starcoder", + "host": "https://api-inference.huggingface.co/models/bigcode/starcoder", + "api_key": "YOUR_HUGGINGFACE_API_KEY", + "headers": { + "Authorization": "YOUR_HUGGINGFACE_API_KEY", + "Content-Type": "application/json" + }, + "parameters": { + "max_tokens": 512, + "temperature": 0.7, + "top_p": 0.9, + "repetition_penalty": 1.2, + "do_sample": true + } +} +``` + ## Model System Prompts By default, the system prompt sent as pretext to the model is defined [here](../prompts/proxy_prompt.txt), feel free to edit, tune and tweak as you see fit. diff --git a/configs/huggingface_bigstar_coder.json b/configs/huggingface_bigstar_coder.json new file mode 100644 index 0000000..77dff12 --- /dev/null +++ b/configs/huggingface_bigstar_coder.json @@ -0,0 +1,16 @@ +{ + "api_type": "huggingface", + "name": "HuggingFace Code Review", + "model": "bigcode/starcoder", + "host": "https://api-inference.huggingface.co/models/bigcode/starcoder", + "headers": { + "Authorization": "Bearer YOUR_HUGGINGFACE_API_KEY", + "Content-Type": "application/json" + }, + "parameters": { + "max_length": 512, + "temperature": 0.7, + "top_p": 0.9, + "repetition_penalty": 1.2 + } +}