robusta-dev · aantn · Jun 24, 2024 · Jun 24, 2024 · Jun 24, 2024 · Jun 24, 2024
diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml
@@ -21,6 +21,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
 
     - name: Install dependencies and build
+      # if you change something here, you must also change it in .github/workflows/build-binaries-and-brew.yaml
       run: |
         python -m pip install --upgrade pip setuptools pyinstaller
 
@@ -29,7 +30,7 @@ jobs:
         poetry install --no-root
 
         sudo apt-get install -y binutils
-        pyinstaller holmes.py --add-data 'holmes/plugins/runbooks/*:holmes/plugins/runbooks' --add-data 'holmes/plugins/prompts/*:holmes/plugins/prompts' --add-data 'holmes/plugins/toolsets/*:holmes/plugins/toolsets'
+        pyinstaller holmes.py --add-data 'holmes/plugins/runbooks/*:holmes/plugins/runbooks' --add-data 'holmes/plugins/prompts/*:holmes/plugins/prompts' --add-data 'holmes/plugins/toolsets/*:holmes/plugins/toolsets' --hidden-import=tiktoken_ext.openai_public --hidden-import=tiktoken_ext --hiddenimport litellm.llms.tokenizers --collect-data litellm
         ls dist
 
     - name: Test the binary

diff --git a/.github/workflows/build-binaries-and-brew.yaml b/.github/workflows/build-binaries-and-brew.yaml
@@ -61,10 +61,13 @@ jobs:
         (Get-Content $filePath) -replace '__version__ = .+', '__version__ = "${{ github.ref_name }}"' | Set-Content $filePath
       shell: pwsh
 
+    # if you change something here, you must also change it in .github/workflows/build-and-test.yaml
     - name: Build with PyInstaller
       shell: bash
+      # regarding the tiktoken part of the command, see https://github.com/openai/tiktoken/issues/80
+      # regarding the litellm part of the command, see https://github.com/pyinstaller/pyinstaller/issues/8620#issuecomment-2186540504
       run: |
-        pyinstaller holmes.py --add-data 'holmes/plugins/runbooks/*:holmes/plugins/runbooks' --add-data 'holmes/plugins/prompts/*:holmes/plugins/prompts' --add-data 'holmes/plugins/toolsets/*:holmes/plugins/toolsets'
+        pyinstaller holmes.py --add-data 'holmes/plugins/runbooks/*:holmes/plugins/runbooks' --add-data 'holmes/plugins/prompts/*:holmes/plugins/prompts' --add-data 'holmes/plugins/toolsets/*:holmes/plugins/toolsets' --hidden-import=tiktoken_ext.openai_public --hidden-import=tiktoken_ext --hiddenimport litellm.llms.tokenizers --collect-data litellm 
         ls dist
 
     - name: Zip the application (Unix)

diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@
   </p>
 </div>
 
-The only AI assistant that investigates incidents **like a human does** - by looking at alerts and fetching missing data until it finds the root cause. Powered by OpenAI or any tool-calling LLM of your choice, including open source models.
+The only AI assistant that investigates incidents **like a human does** - by looking at alerts and fetching missing data until it finds the root cause. Powered by OpenAI, Azure AI, AWS Bedrock, or any tool-calling LLM of your choice, including open source models.
 
 ### What Can HolmesGPT Do?
 - **Investigate Incidents (AIOps)** from PagerDuty/OpsGenie/Prometheus/Jira/more
@@ -106,15 +106,17 @@ Like what you see? Checkout [other use cases](#other-use-cases) or get started b
 
 ## Key Features
 - **Connects to Existing Observability Data:** Find correlations you didn’t know about. No need to gather new data or add instrumentation.
-- **Compliance Friendly:** Can be run on-premise with your own LLM (or in the cloud with OpenAI or Azure)
+- **Compliance Friendly:** Can be run on-premise with your own LLM (or in the cloud with OpenAI/Azure/AWS)
 - **Transparent Results:** See a log of the AI’s actions and what data it gathered to understand how it reached conclusions
 - **Extensible Data Sources:** Connect the AI to custom data by providing your own tool definitions
 - **Runbook Automation:** Optionally provide runbooks in plain English and the AI will follow them automatically
 - **Integrates with Existing Workflows:** Connect Slack and Jira to get results inside your existing tools
 
 ## Installation
 
-First you will need <a href="#getting-an-api-key">an OpenAI API key, or the equivalent for another model</a>. Then install with one of the below methods:
+**Prerequisite:** <a href="#getting-an-api-key"> Get an API key for a supported LLM.</a>
+
+**Installation Methods:**
 
 <details>
   <summary>Brew (Mac/Linux)</summary>
@@ -183,7 +185,9 @@ docker run -it --net=host -v $(pwd)/config.yaml:/app/config.yaml -v ~/.aws:/root
 
 ### Getting an API Key
 
-HolmesGPT requires an API Key to function. Follow one of the instructions below.
+HolmesGPT requires an LLM API Key to function. The most common option is OpenAI, but many [LiteLLM-compatible](https://docs.litellm.ai/docs/providers/) models are supported. To use an LLM, set `--model` (e.g. `gpt-4o` or `bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0`) and `--api-key` (if necessary). Depending on the provider, you may need to set environment variables too.
+
+**Instructions for popular LLMs:**
 
 <details>
 <summary>OpenAI</summary>
@@ -192,7 +196,7 @@ To work with OpenAI’s GPT 3.5 or GPT-4 models you need a paid [OpenAI API key]
 
 **Note**: This is different from being a “ChatGPT Plus” subscriber.
 
-Pass your API key to holmes with the `--api-key` cli argument:
+Pass your API key to holmes with the `--api-key` cli argument. Because OpenAI is the default LLM, the `--model` flag is optional for OpenAI (gpt-4o is the default).
 
 ```
 holmes ask --api-key="..." "what pods are crashing in my cluster and why?"
@@ -205,16 +209,50 @@ If you prefer not to pass secrets on the cli, set the OPENAI_API_KEY environment
 <details>
 <summary>Azure OpenAI</summary>
 
-To work with Azure AI, you need the [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource). 
+To work with Azure AI, you need an [Azure OpenAI resource](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource) and to set the following environment variables:
+
+* AZURE_API_VERSION - e.g. 2024-02-15-preview
+* AZURE_API_BASE - e.g. https://my-org.openai.azure.com/
+* AZURE_OPENAI_API_KEY (optional) - equivalent to the `--api-key` cli argument
+
+Set those environment variables and run:
 
 ```bash
-holmes ask "what pods are unhealthy and why?" --llm=azure --api-key=<PLACEHOLDER> --azure-endpoint='<PLACEHOLDER>'
+holmes ask "what pods are unhealthy and why?" --model=azure/<DEPLOYMENT_NAME> --api-key=<API_KEY>
+```
+
+Refer [LiteLLM Azure docs ↗](https://litellm.vercel.app/docs/providers/azure) for more details. 
+</details>
+
+<details>
+<summary>AWS Bedrock</summary>
+
+Before running the below command you must run `pip install boto3>=1.28.57` and set the following environment variables:
+
+* `AWS_REGION_NAME`
+* `AWS_ACCESS_KEY_ID`
+* `AWS_SECRET_ACCESS_KEY`
+
+If the AWS cli is already configured on your machine, you may be able to find those parameters with:
+
+```console
+cat ~/.aws/credentials ~/.aws/config
+```
+
+Once everything is configured, run:
+```console
+holmes ask "what pods are unhealthy and why?" --model=bedrock/<MODEL_NAME>
 ```
 
-The `--azure-endpoint` should be a URL in the format "https://some-azure-org.openai.azure.com/openai/deployments/gpt4-1106/chat/completions?api-version=2023-07-01-preview"
+Be sure to replace `MODEL_NAME` with a model you have access to - e.g. `anthropic.claude-3-5-sonnet-20240620-v1:0`. To list models your account can access:
+
+```
+aws bedrock list-foundation-models --region=us-east-1
+```
 
-If you prefer not to pass secrets on the cli, set the AZURE_OPENAI_API_KEY environment variable or save the API key in a HolmesGPT config file.
+Note that different models are available in different regions. For example, Claude Opus is only available in us-west-2.
 
+Refer to [LiteLLM Bedrock docs ↗](https://litellm.vercel.app/docs/providers/bedrock) for more details. 
 </details>
 
 <details>
@@ -429,7 +467,7 @@ Define custom runbooks to give explicit instructions to the LLM on how to invest
 
 ### Large Language Model (LLM) Configuration
 
-Choose between OpenAI or Azure for integrating large language models. Provide the necessary API keys and endpoints for the selected service.
+Choose between OpenAI, Azure, AWS Bedrock, and more. Provide the necessary API keys and endpoints for the selected service.
 
 
 <details>
@@ -438,7 +476,6 @@ Choose between OpenAI or Azure for integrating large language models. Provide th
 
 ```bash
 # Configuration for OpenAI LLM
-#llm: "openai"
 #api_key: "your-secret-api-key"
 ```
 </details>
@@ -449,12 +486,20 @@ Choose between OpenAI or Azure for integrating large language models. Provide th
 
 ```bash
 # Configuration for Azure LLM
-#llm: "azure"
 #api_key: "your-secret-api-key"
-#azure_endpoint: "https://some-azure-org.openai.azure.com/openai/deployments/gpt4-1106/chat/completions?api-version=2023-07-01-preview"
+#model: "azure/<DEPLOYMENT_NAME>"
+#you will also need to set environment variables - see above
+```
+</details>
+
+<summary>Bedrock</summary>
+
+```bash
+# Configuration for AWS Bedrock LLM
+#model: "bedrock/<MODEL_ID>"
+#you will also need to set environment variables - see above
 ```
 </details>
-
 
 </details>
 

diff --git a/holmes/config.py b/holmes/config.py
@@ -25,21 +25,10 @@
                                      load_toolsets_from_file)
 from holmes.utils.pydantic_utils import RobustaBaseConfig, load_model_from_file
 
-
-class LLMType(StrEnum):
-    OPENAI = "openai"
-    AZURE = "azure"
-
-
 class Config(RobustaBaseConfig):
-    llm: Optional[LLMType] = LLMType.OPENAI
     api_key: Optional[SecretStr] = (
         None  # if None, read from OPENAI_API_KEY or AZURE_OPENAI_ENDPOINT env var
     )
-    azure_endpoint: Optional[str] = (
-        None  # if None, read from AZURE_OPENAI_ENDPOINT env var
-    )
-    azure_api_version: Optional[str] = "2024-02-01"
     model: Optional[str] = "gpt-4o"
     max_steps: Optional[int] = 10
 
@@ -77,11 +66,10 @@ class Config(RobustaBaseConfig):
 
     @classmethod
     def load_from_env(cls):
-        kwargs = {"llm": LLMType(os.getenv("HOLMES_LLM", "OPENAI").lower())}
+        kwargs = {}
         for field_name in [
             "model",
             "api_key",
-            "azure_endpoint",
             "max_steps",
             "alertmanager_url",
             "alertmanager_username",
@@ -106,22 +94,6 @@ def load_from_env(cls):
                 kwargs[field_name] = val
         return cls(**kwargs)
 
-    def create_llm(self) -> OpenAI:
-        if self.llm == LLMType.OPENAI:
-            logging.debug(f"Using OpenAI")
-            return OpenAI(
-                api_key=self.api_key.get_secret_value() if self.api_key else None,
-            )
-        elif self.llm == LLMType.AZURE:
-            logging.debug(f"Using Azure with endpoint {self.azure_endpoint}")
-            return AzureOpenAI(
-                api_key=self.api_key.get_secret_value() if self.api_key else None,
-                azure_endpoint=self.azure_endpoint,
-                api_version=self.azure_api_version,
-            )
-        else:
-            raise ValueError(f"Unknown LLM type: {self.llm}")
-
     def _create_tool_executor(
         self, console: Console, allowed_toolsets: ToolsetPattern
     ) -> YAMLToolExecutor:
@@ -162,8 +134,8 @@ def create_toolcalling_llm(
     ) -> IssueInvestigator:
         tool_executor = self._create_tool_executor(console, allowed_toolsets)
         return ToolCallingLLM(
-            self.create_llm(),
             self.model,
+            self.api_key.get_secret_value() if self.api_key else None,
             tool_executor,
             self.max_steps,
         )
@@ -178,8 +150,8 @@ def create_issue_investigator(
         runbook_manager = RunbookManager(all_runbooks)
         tool_executor = self._create_tool_executor(console, allowed_toolsets)
         return IssueInvestigator(
-            self.create_llm(),
             self.model,
+            self.api_key.get_secret_value() if self.api_key else None,
             tool_executor,
             runbook_manager,
             self.max_steps,