Skip to content

Commit

Permalink
Advanced Settings
Browse files Browse the repository at this point in the history
  • Loading branch information
jonfairbanks committed Feb 22, 2024
1 parent b4cc043 commit 561f734
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 46 deletions.
29 changes: 18 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,27 +37,34 @@ Docker:

### To Do
- [x] Refactor
- [x] Migrate chat stream to llama-index
- [x] Implement llama-index Chat Engine with memory
- [x] Swap to llama-index Chat Engine
- [x] Function to handle file embeddings
- [ ] Allow Switching of Embedding Model
- [x] Delete Files after Index created/failed
- [x] Migrate Chat Stream to Llama-Index
- [x] Implement Llama-Index Chat Engine with Memory
- [x] Swap to Llama-Index Chat Engine
- [x] Function to Handle File Embeddings
- [ ] Allow Users to Set LLM Settings
- [x] System Prompt
- [ ] Chat Mode
- [x] top_k
- [x] chunk_size
- [x] chunk_overlap
- [ ] Allow Switching of Embedding Model & Settings
- [x] Delete Files after Index Created/Failed
- [ ] Ability to Remove Files from Index
- [ ] Function to handle GitHub repo ingestion
- [ ] Support for JSON files
- [x] Show Loaders in UI (file uploads, conversions, ...)
- [X] Export Data (uploaded files, chat history, ...)
- [ ] Function to Handle GitHub Repo Ingestion
- [ ] Support for JSON Files
- [x] Show Loaders in UI (File Uploads, Conversions, ...)
- [x] Export Data (Uploaded Files, Chat History, ...)
- [x] View and Manage Imported Files
- [x] About Tab in Sidebar
- [x] Docker Support
- [ ] Implement Log Library
- [ ] Re-write Docstrings

### Known Issues & Bugs
- [ ] Refreshing the page loses all state (expected Streamlit behavior; need to implement local-storage)
- [x] Files can be uploaded before Ollama config is set, leading to embedding errors
- [ ] Assuming Ollama is hosted on localhost, Models are automatically loaded and selected, but the dropdown does not render the selected option
- [ ] Upon first sending the first Chat message, the File Processing expander appears to re-run itself
- [ ] Upon sending a Chat message, the File Processing expander appears to re-run itself

### Resources
- [Ollama](https://ollama.com/)
Expand Down
14 changes: 12 additions & 2 deletions components/page_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,21 @@ def set_initial_state():
if "advanced" not in st.session_state:
st.session_state["advanced"] = False

if "system_prompt" not in st.session_state:
st.session_state[
"system_prompt"
] = "You are a sophisticated virtual assistant designed to assist users in comprehensively understanding and extracting insights from a wide range of documents at their disposal. Your expertise lies in tackling complex inquiries and providing insightful analyses based on the information contained within these documents."

if "top_k" not in st.session_state:
st.session_state["top_k"] = None
st.session_state[
"top_k"
] = 3 # Default is 2; increasing to 5 will result in more documents being retrieved

if "embedding_model" not in st.session_state:
st.session_state["embedding_model"] = None

if "other_embedding_model" not in st.session_state:
st.session_state["other_embedding_model"] = None

if "chunk_size" not in st.session_state:
st.session_state["chunk_size"] = None
st.session_state["chunk_size"] = 1024
33 changes: 31 additions & 2 deletions components/tabs/file_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def file_upload():
st.caption("Convert your files to embeddings for utilization during chat")
st.write("")

# Force users to confirm Settings before uploading files
if st.session_state["selected_model"] is not None:
uploaded_files = st.file_uploader(
"Select Files",
Expand Down Expand Up @@ -55,9 +56,37 @@ def file_upload():
st.session_state["selected_model"],
st.session_state["ollama_endpoint"],
)

# resp = llm.complete("Hello!")
# print(resp)
service_context = llama_index.create_service_context(llm)

# Determine embedding model to use

embedding_model = st.session_state["embedding_model"]
hf_embedding_model = None

if embedding_model == None:
print("No embedding model set; using defaults...")
hf_embedding_model = "BAAI/bge-large-en-v1.5"

if embedding_model == "Default (bge-large-en-v1.5)":
print("Using default embedding model...")
hf_embedding_model = "BAAI/bge-large-en-v1.5"

if embedding_model == "Best (Salesforce/SFR-Embedding-Mistral)":
print("Using the Salesforce embedding model; RIP yer VRAM...")
hf_embedding_model = "Salesforce/SFR-Embedding-Mistral"

if embedding_model == "Other":
print("Using a user-provided embedding model...")
hf_embedding_model = st.session_state["other_embedding_model"]

service_context = llama_index.create_service_context(
llm,
st.session_state["system_prompt"],
hf_embedding_model,
st.session_state["chunk_size"],
)
except Exception as err:
print(f"Setting up Service Context failed: {err}")
error = err
Expand Down Expand Up @@ -102,4 +131,4 @@ def file_upload():
)

with st.expander("GitHub Repo", expanded=False):
st.write(":grey[Coming Soon™]")
st.write(":grey[Coming Soon™]")
41 changes: 28 additions & 13 deletions components/tabs/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,14 @@ def settings():
st.select_slider(
"Top K",
options=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
help="A higher Top K will return more results at the expense of accuracy.",
value=5,
help="The number of most similar documents to retrieve in response to a query.",
value=st.session_state["top_k"],
key="top_k",
)
st.text_input(
st.text_area(
"System Prompt",
value="You are a sophisticated virtual assistant designed to assist users in comprehensively understanding and extracting insights from a wide range of documents at their disposal. Your expertise lies in tackling complex inquiries and providing insightful analyses based on the information contained within these documents.",
value=st.session_state["system_prompt"],
key="system_prompt",
disabled=True,
)
st.selectbox(
"Chat Mode",
Expand All @@ -50,26 +49,42 @@ def settings():

st.subheader(
"Embeddings",
help="Embeddings help convert your files to a format LLMs can understand.",
help="Embeddings are numerical representations of data, useful for tasks like document clustering and similarity detection when processing files, as they encode semantic meaning for efficient manipulation and retrieval.",
)
embedding_settings = st.container(border=True)
with embedding_settings:
st.selectbox(
embedding_model = st.selectbox(
"Model",
["Default (bge-large-en-v1.5)", "Best (Salesforce/SFR-Embedding-Mistral)"],
disabled=True,
[
"Default (bge-large-en-v1.5)",
"Best (Salesforce/SFR-Embedding-Mistral)",
"Other",
],
key="embedding_model",
)
if embedding_model == "Other":
st.text_input(
"HuggingFace Model",
key="other_embedding_model",
placeholder="Salesforce/SFR-Embedding-Mistral",
)
if st.session_state["advanced"] == True:
st.caption(
"View the [Embeddings Leaderboard](https://huggingface.co/spaces/mteb/leaderboard)"
"View the [MTEB Embeddings Leaderboard](https://huggingface.co/spaces/mteb/leaderboard)"
)
st.text_input(
"Chunk Size",
help="This should not exceed the value provided by your embedding model.",
help="Reducing `chunk_size` improves embedding precision by focusing on smaller text portions. This enhances information retrieval accuracy but escalates computational demands due to processing more chunks.",
key="chunk_size",
placeholder="512",
placeholder="1024",
value=st.session_state["chunk_size"],
disabled=True,
)
st.text_input(
"Chunk Overlap",
help="`chunk_overlap` sets the overlap between consecutive document chunks. It prevents loss of information at chunk boundaries. For instance, a value of 20 means a 20-token overlap. Adjusting this parameter affects the precision and generality of the calculated embeddings.",
key="chunk_overlap",
placeholder="20",
value=st.session_state["chunk_overlap"],
)

st.subheader("Export Data")
Expand Down
32 changes: 14 additions & 18 deletions utils/llama_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,32 @@

def create_service_context(
llm, # TODO: Determine type
embed_model: str = "local:BAAI/bge-large-en-v1.5", # TODO: Allow users to set this
chunk_size: int = 512, # TODO: Allow users to set this
system_prompt: str = None, # TODO: What are the implications of no system prompt being passed?
embed_model: str = "BAAI/bge-large-en-v1.5",
chunk_size: int = 1024, # Llama-Index default is 1024
):
"""
Create a service context with the specified language model and embedding model.
Parameters:
- llm (TODO: Determine type): The language model to use for generation.
- embed_model (str, optional): The embedding model to use for similarity search. Default is "local:BAAI/bge-large-en".
- llm (TODO: Determine type): The Llama-Index LLM instance to use for generation.
- system_prompt (str, optional): System prompt to use when creating the LLM.
- embed_model (str, optional): The embedding model to use for similarity search. Default is `BAAI/bge-large-en-v1.5`.
- chunk_size (int, optional): The maximum number of tokens to consider at once. Default is 1024.
Returns:
- A `ServiceContext` object with the specified settings.
"""
formatted_embed_model = f"local:{embed_model}"
service_context = ServiceContext.from_defaults(
llm=llm, embed_model=embed_model, chunk_size=chunk_size
llm=llm,
system_prompt=system_prompt,
embed_model=formatted_embed_model,
chunk_size=chunk_size,
)

set_global_service_context(service_context)
# Note: this may be redundant since service_context is returned
set_global_service_context(service_context)

return service_context

Expand Down Expand Up @@ -108,23 +115,12 @@ def create_query_engine(documents, service_context):
documents=documents, service_context=service_context, show_progress=True
)

# print(f"Index: {index}")
# print()

if st.session_state["top_k"] is None:
top_k = 5
else:
top_k = st.session_state["top_k"]

query_engine = index.as_query_engine(
similarity_top_k=top_k, # A higher value will return additional results at the sake of accuracy
similarity_top_k=st.session_state["top_k"],
service_context=service_context,
streaming=True,
)

# print(f"Query Engine: {query_engine}")
# print()

st.session_state["query_engine"] = query_engine

return query_engine
Expand Down

0 comments on commit 561f734

Please sign in to comment.