Skip to content

Commit

Permalink
feat: added quickstart tutorial for self-hosted models (#191)
Browse files Browse the repository at this point in the history
* feat: added docker-compose and test for llama3:8b via ollama

* feat: added quick-start tutorial for self-hosted models

* chore: formatting fix

* fix: fixed ci test for ollama model

* chore: bumped the default Ollama model to Llama 3.1

* chore: fixed doc

* feat: added instructions for self-hosted vision models

* fix: updated env for ollama ci test

* chore: divided a single quickstart ci job into separate jobs

* chore: simplified names of the ci jobs

* fix: migrated setup.py script for ollama to httpx

* feat: added ci test for self-hosted embedding model

* feat: used .env file instead of env vars in self-hosted model tutorial

* fix: increased timeout in the ollama setup script

* review

* feat: added progress bar for model downloading

---------

Co-authored-by: sr-remsha <[email protected]>
  • Loading branch information
adubovik and sr-remsha committed Sep 19, 2024
1 parent 92286ed commit 4460529
Show file tree
Hide file tree
Showing 25 changed files with 561 additions and 27 deletions.
37 changes: 31 additions & 6 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:

jobs:
run-notebooks:
name: Run example Python notebooks
name: Cookbook notebooks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand All @@ -18,8 +18,8 @@ jobs:
cwd: "./dial-cookbook/ci"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart:
name: Run quickstart examples
run-quickstart-model:
name: Quickstart model
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand All @@ -28,19 +28,44 @@ jobs:
with:
cwd: "./dial-docker-compose/ci/model"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart-application:
name: Quickstart application
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
name: Run quickstart application example
with:
cwd: "./dial-docker-compose/ci/application"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart-addon:
name: Quickstart addon
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
name: Run quickstart addon example
with:
cwd: "./dial-docker-compose/ci/addon"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart-self-hosted-model:
name: Quickstart self-hosted model
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
with:
cwd: "./dial-docker-compose/ci/ollama"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

build:
needs: [run-notebooks, run-quickstart]
needs:
- run-notebooks
- run-quickstart-model
- run-quickstart-application
- run-quickstart-addon
- run-quickstart-self-hosted-model
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand Down
38 changes: 32 additions & 6 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:

jobs:
run-notebooks:
name: Run example Python notebooks
name: Cookbook notebooks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand All @@ -18,8 +18,9 @@ jobs:
cwd: "./dial-cookbook/ci"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart:
name: Run quickstart examples

run-quickstart-model:
name: Quickstart model
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand All @@ -28,19 +29,44 @@ jobs:
with:
cwd: "./dial-docker-compose/ci/model"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart-application:
name: Quickstart application
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
name: Run quickstart application example
with:
cwd: "./dial-docker-compose/ci/application"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart-addon:
name: Quickstart addon
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
name: Run quickstart addon example
with:
cwd: "./dial-docker-compose/ci/addon"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

run-quickstart-self-hosted-model:
name: Quickstart self-hosted model
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: isbang/compose-action@802a148945af6399a338c7906c267331b39a71af # v2.0.0
with:
cwd: "./dial-docker-compose/ci/ollama"
up-flags: "--abort-on-container-exit --exit-code-from test --timeout 300"

build-and-deploy:
needs: [run-notebooks, run-quickstart]
needs:
- run-notebooks
- run-quickstart-model
- run-quickstart-application
- run-quickstart-addon
- run-quickstart-self-hosted-model
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand Down
7 changes: 5 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ yarn-error.log*
.venv
__pycache__

# DIAL Core logs
*.log
# Docker container volumes
core-data
core-logs
.ollama

/.quarto/

# Autogenerated files by Quarto
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
## Helm Deployment

* [AI DIAL Generic Installation Simple Guide](https://github.com/epam/ai-dial-helm/tree/main/charts/dial/examples/generic/simple)

## Tutorials

* [Launch AI DIAL Chat with an Azure model](./docs/tutorials/quick-start-model.md)
* [Launch AI DIAL Chat with a self-hosted model](./docs/tutorials/quick-start-with-self-hosted-model.md)
* [Launch AI DIAL Chat with a Sample Application](./docs/tutorials/quick-start-with-application.md)
* [Launch AI DIAL Chat with a Sample Addon](./docs/tutorials/quick-start-with-addon.md)

Expand All @@ -31,7 +32,7 @@
## Configuration

* Refer to [Configuration](./docs/Deployment/configuration.md)

## Other AI DIAL Project Open Source Repositories

Here is the current list of repositories where you can find more details. You can also refer to [repository map](https://epam-rail.com/open-source).
Expand Down
2 changes: 1 addition & 1 deletion dial-docker-compose/addon/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ include:

services:
adapter-openai:
image: epam/ai-dial-adapter-openai:0.11.0
image: epam/ai-dial-adapter-openai:0.14.0
environment:
WEB_CONCURRENCY: "3"

Expand Down
4 changes: 4 additions & 0 deletions dial-docker-compose/ci/ollama/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
DIAL_DIR="./ollama"
OLLAMA_CHAT_MODEL=llama3.1:8b-instruct-q4_0
OLLAMA_VISION_MODEL=llava-phi3:3.8b-mini-q4_0
OLLAMA_EMBEDDING_MODEL=nomic-embed-text:137m-v1.5-fp16
16 changes: 16 additions & 0 deletions dial-docker-compose/ci/ollama/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
include:
- path: ../../ollama/docker-compose.yml
env_file: ./.env

services:
test:
build: test
environment:
DIAL_URL: "http://core:8080"
DIAL_API_KEY: "dial_api_key"
DIAL_API_VERSION: "2024-02-01"
depends_on:
ollama-setup:
condition: service_healthy
core:
condition: service_healthy
1 change: 1 addition & 0 deletions dial-docker-compose/ci/ollama/test/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Dockerfile
7 changes: 7 additions & 0 deletions dial-docker-compose/ci/ollama/test/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM python:3.11-alpine

WORKDIR /app
COPY * /app
RUN pip install -r requirements.txt

CMD ["python", "app.py"]
141 changes: 141 additions & 0 deletions dial-docker-compose/ci/ollama/test/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import base64
import os
from pathlib import Path
from typing import Any
import aiohttp
import asyncio
import backoff

import logging
import time
from contextlib import asynccontextmanager


def get_env(name: str) -> str:
value = os.environ.get(name)
if value is None:
raise ValueError(f"'{name}' environment variable must be defined")
return value


DIAL_URL = get_env("DIAL_URL")
DIAL_API_KEY = get_env("DIAL_API_KEY")
DIAL_API_VERSION = get_env("DIAL_API_VERSION")

logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger(__name__)


@asynccontextmanager
async def timer(name: str):
log.debug(f"[{name}] Starting...")
start = time.perf_counter()
yield
elapsed = time.perf_counter() - start
log.debug(f"[{name}] Executed in {elapsed:.2f} seconds")


@backoff.on_exception(
backoff.expo,
(aiohttp.ClientError, aiohttp.ServerTimeoutError),
max_time=60,
)
async def post_with_retry(url: str, payload: dict, headers: dict, params: dict):
async with aiohttp.ClientSession() as session:
async with session.post(
url, json=payload, headers=headers, params=params
) as response:
response.raise_for_status()
return await response.json()


def read_image_base64(png_file: Path) -> str:
return base64.b64encode(png_file.read_bytes()).decode("utf-8")

async def dial_chat_completion(deployment_id: str, messages: list) -> str:
api_url = f"{DIAL_URL}/openai/deployments/{deployment_id}/chat/completions"

payload = {
"model": deployment_id,
"messages": messages,
"stream": False,
}
headers = {"api-key": DIAL_API_KEY}
params = {"api-version": DIAL_API_VERSION}

body = await post_with_retry(api_url, payload, headers, params)
log.debug(f"Response: {body}")

content = body.get("choices", [])[0].get("message", {}).get("content", "")

log.debug(f"Content: {content}")

return content

async def dial_embeddings(deployment_id: str, input: Any) -> str:
api_url = f"{DIAL_URL}/openai/deployments/{deployment_id}/embeddings"

payload = {
"model": deployment_id,
"input": input,
}
headers = {"api-key": DIAL_API_KEY}
params = {"api-version": DIAL_API_VERSION}

body = await post_with_retry(api_url, payload, headers, params)
log.debug(f"Response: {body}")

embedding = body.get("data", [])[0].get("embedding", [])

log.debug(f"Len embedding vector: {len(embedding)}")

return embedding

async def test_chat_model(deployment_id: str):
message = "2 + 3 = ? Reply with a single number:"
messages = [{"role": "user", "content": message}]
content = await dial_chat_completion(deployment_id, messages)

if "5" not in content:
raise ValueError(f"Test failed for {deployment_id!r}")


async def test_vision_model(deployment_id: str):
base64_data = read_image_base64(Path("./image.png"))
base64_image = f"data:image/png;base64,{base64_data}"

messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe the image"},
{"type": "image_url", "image_url": {"url": base64_image}},
],
}
]

content = await dial_chat_completion(deployment_id, messages)

if "vision" not in content.lower():
raise ValueError(f"Test failed for {deployment_id!r}")

async def test_embedding_model(deployment_id: str):
embeddings = await dial_embeddings(deployment_id, "cat")

if len(embeddings) == 0 or not isinstance(embeddings[0], float):
raise ValueError(f"Test failed for {deployment_id!r}")


async def tests():
async with timer("Testing chat-model"):
await test_chat_model("chat-model")

async with timer("Testing vision-model"):
await test_vision_model("vision-model")

async with timer("Testing embedding-model"):
await test_embedding_model("embedding-model")

if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(tests())
Binary file added dial-docker-compose/ci/ollama/test/image.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions dial-docker-compose/ci/ollama/test/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
aiohttp==3.9.4
backoff==2.2.1
6 changes: 3 additions & 3 deletions dial-docker-compose/common.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
services:
themes:
image: epam/ai-dial-chat-themes:0.4.0
image: epam/ai-dial-chat-themes:0.6.0
ports:
- "3001:8080"

chat:
ports:
- "3000:3000"
image: epam/ai-dial-chat:0.10.0
image: epam/ai-dial-chat:0.17.0
depends_on:
- themes
- core
Expand Down Expand Up @@ -36,7 +36,7 @@ services:
user: ${UID:-root}
ports:
- "8080:8080"
image: epam/ai-dial-core:0.9.0
image: epam/ai-dial-core:0.16.0
environment:
'AIDIAL_SETTINGS': '/opt/settings/settings.json'
'JAVA_OPTS': '-Dgflog.config=/opt/settings/gflog.xml'
Expand Down
2 changes: 1 addition & 1 deletion dial-docker-compose/model/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ include:

services:
adapter-openai:
image: epam/ai-dial-adapter-openai:0.11.0
image: epam/ai-dial-adapter-openai:0.14.0
environment:
WEB_CONCURRENCY: "3"
Loading

0 comments on commit 4460529

Please sign in to comment.