Skip to content

Commit

Permalink
Merge pull request #15 from jonfairbanks/develop
Browse files Browse the repository at this point in the history
Logging, Caching & User Experience Improvements
  • Loading branch information
jonfairbanks committed Feb 28, 2024
2 parents 29f287d + 5b710e5 commit e2acb78
Show file tree
Hide file tree
Showing 13 changed files with 319 additions and 209 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
*.pyc
data/*
*.log
.cache/*
.nv/*
4 changes: 1 addition & 3 deletions components/chatbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@ def chatbox():
if prompt := st.chat_input("How can I help?"):
# Prevent submission if Ollama endpoint is not set
if not st.session_state["query_engine"]:
st.warning(
"Please confirm settings and upload files before proceeding."
)
st.warning("Please confirm settings and upload files before proceeding.")
st.stop()

# Add the user input to messages state
Expand Down
24 changes: 18 additions & 6 deletions components/page_state.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import streamlit as st

import utils.logs as logs

from utils.ollama import get_models


Expand All @@ -13,21 +15,21 @@ def set_initial_state():
st.session_state["ollama_endpoint"] = "http://localhost:11434"

if "embedding_model" not in st.session_state:
st.session_state["embedding_model"] = None
st.session_state["embedding_model"] = "Default (bge-large-en-v1.5)"

if "ollama_models" not in st.session_state:
try:
models = get_models()
st.session_state["ollama_models"] = models
except Exception as err:
print(
logs.log.warn(
f"Warning: Initial loading of Ollama models failed. You might be hosting Ollama somewhere other than localhost. -- {err}"
)
st.session_state["ollama_models"] = []
pass

if "selected_model" not in st.session_state:
st.session_state["selected_model"] = None
st.session_state["selected_model"] = st.session_state["ollama_models"][0]

if "messages" not in st.session_state:
st.session_state["messages"] = [
Expand All @@ -51,12 +53,21 @@ def set_initial_state():
# Llama-Index #
###############

if "llm" not in st.session_state:
st.session_state["llm"] = None

if "documents" not in st.session_state:
st.session_state["documents"] = None

if "query_engine" not in st.session_state:
st.session_state["query_engine"] = None

if "service_context" not in st.session_state:
st.session_state["service_context"] = None

if "chat_mode" not in st.session_state:
st.session_state["chat_mode"] = "best"

#####################
# Advanced Settings #
#####################
Expand All @@ -70,9 +81,7 @@ def set_initial_state():
)

if "top_k" not in st.session_state:
st.session_state["top_k"] = (
3 # Default is 2; increasing to 5 will result in more documents being retrieved
)
st.session_state["top_k"] = 3

if "embedding_model" not in st.session_state:
st.session_state["embedding_model"] = None
Expand All @@ -82,3 +91,6 @@ def set_initial_state():

if "chunk_size" not in st.session_state:
st.session_state["chunk_size"] = 1024

if "chunk_overlap" not in st.session_state:
st.session_state["chunk_overlap"] = 20
158 changes: 8 additions & 150 deletions components/tabs/file_upload.py
Original file line number Diff line number Diff line change
@@ -1,162 +1,20 @@
import os

import streamlit as st

import utils.helpers as func
import utils.ollama as ollama
import utils.llama_index as llama_index

from components.tabs.local_files import local_files
from components.tabs.github_repo import github_repo
from components.tabs.website import website


def file_upload():
st.title("Directly import your files")
st.caption("Convert your files to embeddings for utilization during chat")
st.write("")

# Force users to confirm Settings before uploading files
if st.session_state["selected_model"] is not None:
file_upload_container = st.container(border=True)
with file_upload_container:
uploaded_files = st.file_uploader(
"Select Files",
accept_multiple_files=True,
type=(
"csv",
"docx",
"epub",
"ipynb",
"json",
"md",
"pdf",
"ppt",
"pptx",
),
)
else:
st.warning("Please configure Ollama settings before proceeding!", icon="⚠️")
file_upload_container = st.container(border=True)
with file_upload_container:
uploaded_files = st.file_uploader(
"Select Files",
accept_multiple_files=True,
type=(
"csv",
"docx",
"epub",
"ipynb",
"json",
"md",
"pdf",
"ppt",
"pptx",
),
disabled=True,
)

if len(uploaded_files) > 0:
st.session_state["file_list"] = uploaded_files

with st.status("Preparing your data...", expanded=True) as status:
error = None

######################
# Save Files to Disk #
######################

st.caption("Uploading Files Locally")
for uploaded_file in uploaded_files:
with st.spinner(f"Processing {uploaded_file.name}..."):
save_dir = os.getcwd() + "/data"
func.save_uploaded_file(uploaded_file, save_dir)

st.caption("Loading Embedding Model")

######################################
# Create Llama-Index service-context #
# to use local LLMs and embeddings #
######################################

try:
llm = ollama.create_ollama_llm(
st.session_state["selected_model"],
st.session_state["ollama_endpoint"],
)

# resp = llm.complete("Hello!")
# print(resp)

# Determine embedding model to use

embedding_model = st.session_state["embedding_model"]
hf_embedding_model = None
with st.expander("💻   **Local Files**", expanded=False):
local_files()

if embedding_model == None:
print("No embedding model set; using defaults...")
hf_embedding_model = "BAAI/bge-large-en-v1.5"

if embedding_model == "Default (bge-large-en-v1.5)":
print("Using default embedding model...")
hf_embedding_model = "BAAI/bge-large-en-v1.5"

if embedding_model == "Large (Salesforce/SFR-Embedding-Mistral)":
print("Using the Salesforce embedding model; RIP yer VRAM...")
hf_embedding_model = "Salesforce/SFR-Embedding-Mistral"

if embedding_model == "Other":
print("Using a user-provided embedding model...")
hf_embedding_model = st.session_state["other_embedding_model"]

service_context = llama_index.create_service_context(
llm,
st.session_state["system_prompt"],
hf_embedding_model,
st.session_state["chunk_size"],
)
except Exception as err:
print(f"Setting up Service Context failed: {err}")
error = err

#######################################
# Load files from the data/ directory #
#######################################

st.caption("Processing File Data")
try:
documents = llama_index.load_documents(save_dir)
st.session_state["documents"] = documents
except Exception as err:
print(f"Document Load Error: {err}")
error = err

###########################################
# Create an index from ingested documents #
###########################################

st.caption("Creating File Index")
try:
llama_index.create_query_engine(documents, service_context)
except Exception as err:
print(f"Index Creation Error: {err}")
error = err

#####################
# Show Final Status #
#####################

if error is not None:
status.update(
label="File processing failed.",
state="error",
expanded=True,
)
st.error(error)
else:
status.update(
label="Your files are ready. Let's chat!",
state="complete",
expanded=False,
)

with st.expander("GitHub Repo", expanded=False):
with st.expander("🗂️  **GitHub Repo**", expanded=False):
github_repo()

with st.expander("🌐   **Website**", expanded=False):
website()
49 changes: 29 additions & 20 deletions components/tabs/github_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,39 +40,45 @@ def github_repo():
st.session_state["selected_model"],
st.session_state["ollama_endpoint"],
)
st.session_state["llm"] = llm
st.caption("✔️ LLM Initialized")

# resp = llm.complete("Hello!")
# print(resp)
except Exception as err:
logs.log.error(f"Failed to setup LLM: {err}")
error = err

# Determine embedding model to use
####################################
# Determine embedding model to use #
####################################

embedding_model = st.session_state["embedding_model"]
hf_embedding_model = None
embedding_model = st.session_state["embedding_model"]
hf_embedding_model = None

if embedding_model == None:
logs.log.info("No embedding model set; using defaults...")
hf_embedding_model = "BAAI/bge-large-en-v1.5"
if embedding_model == None:
# logs.log.info("No embedding model set; using defaults...")
hf_embedding_model = "BAAI/bge-large-en-v1.5"

if embedding_model == "Default (bge-large-en-v1.5)":
logs.log.info("Using default embedding model...")
hf_embedding_model = "BAAI/bge-large-en-v1.5"
if embedding_model == "Default (bge-large-en-v1.5)":
# logs.log.info("Using default embedding model...")
hf_embedding_model = "BAAI/bge-large-en-v1.5"

if embedding_model == "Large (Salesforce/SFR-Embedding-Mistral)":
logs.log.info(
"Using the Salesforce embedding model; RIP yer VRAM..."
)
hf_embedding_model = "Salesforce/SFR-Embedding-Mistral"
if embedding_model == "Large (Salesforce/SFR-Embedding-Mistral)":
# logs.log.info("Using the Salesforce embedding model; RIP yer VRAM...")
hf_embedding_model = "Salesforce/SFR-Embedding-Mistral"

if embedding_model == "Other":
logs.log.info("Using a user-provided embedding model...")
hf_embedding_model = st.session_state["other_embedding_model"]
if embedding_model == "Other":
# logs.log.info("Using a user-provided embedding model...")
hf_embedding_model = st.session_state["other_embedding_model"]

service_context = llama_index.create_service_context(
llm,
try:
llama_index.create_service_context(
st.session_state["llm"],
st.session_state["system_prompt"],
hf_embedding_model,
st.session_state["chunk_size"],
# st.session_state["chunk_overlap"],
)
st.caption("✔️ Context Created")
except Exception as err:
Expand All @@ -97,7 +103,10 @@ def github_repo():
###########################################

try:
llama_index.create_query_engine(documents, service_context)
llama_index.create_query_engine(
st.session_state["documents"],
st.session_state["service_context"],
)
st.caption("✔️ Created File Index")
except Exception as err:
logs.log.error(f"Index Creation Error: {err}")
Expand Down
Loading

0 comments on commit e2acb78

Please sign in to comment.