Advanced Settings

jonfairbanks · Feb 22, 2024 · 561f734 · 561f734
1 parent b4cc043
commit 561f734
Show file tree

Hide file tree

Showing 5 changed files with 103 additions and 46 deletions.
diff --git a/README.md b/README.md
@@ -37,27 +37,34 @@ Docker:
 
 ### To Do
 - [x] Refactor
-- [x] Migrate chat stream to llama-index
-- [x] Implement llama-index Chat Engine with memory
-- [x] Swap to llama-index Chat Engine
-- [x] Function to handle file embeddings
-- [ ] Allow Switching of Embedding Model
-- [x] Delete Files after Index created/failed
+- [x] Migrate Chat Stream to Llama-Index
+- [x] Implement Llama-Index Chat Engine with Memory
+- [x] Swap to Llama-Index Chat Engine
+- [x] Function to Handle File Embeddings
+- [ ] Allow Users to Set LLM Settings
+    - [x] System Prompt
+    - [ ] Chat Mode
+    - [x] top_k
+    - [x] chunk_size
+    - [x] chunk_overlap
+- [ ] Allow Switching of Embedding Model & Settings
+- [x] Delete Files after Index Created/Failed
 - [ ] Ability to Remove Files from Index
-- [ ] Function to handle GitHub repo ingestion
-- [ ] Support for JSON files
-- [x] Show Loaders in UI (file uploads, conversions, ...)
-- [X] Export Data (uploaded files, chat history, ...)
+- [ ] Function to Handle GitHub Repo Ingestion
+- [ ] Support for JSON Files
+- [x] Show Loaders in UI (File Uploads, Conversions, ...)
+- [x] Export Data (Uploaded Files, Chat History, ...)
 - [x] View and Manage Imported Files
 - [x] About Tab in Sidebar
 - [x] Docker Support
 - [ ] Implement Log Library
+- [ ] Re-write Docstrings
 
 ### Known Issues & Bugs
 - [ ] Refreshing the page loses all state (expected Streamlit behavior; need to implement local-storage)
 - [x] Files can be uploaded before Ollama config is set, leading to embedding errors
 - [ ] Assuming Ollama is hosted on localhost, Models are automatically loaded and selected, but the dropdown does not render the selected option
-- [ ] Upon first sending the first Chat message, the File Processing expander appears to re-run itself
+- [ ] Upon sending a Chat message, the File Processing expander appears to re-run itself
 
 ### Resources
 - [Ollama](https://ollama.com/)

diff --git a/components/page_state.py b/components/page_state.py
@@ -64,11 +64,21 @@ def set_initial_state():
     if "advanced" not in st.session_state:
         st.session_state["advanced"] = False
 
+    if "system_prompt" not in st.session_state:
+        st.session_state[
+            "system_prompt"
+        ] = "You are a sophisticated virtual assistant designed to assist users in comprehensively understanding and extracting insights from a wide range of documents at their disposal. Your expertise lies in tackling complex inquiries and providing insightful analyses based on the information contained within these documents."
+
     if "top_k" not in st.session_state:
-        st.session_state["top_k"] = None
+        st.session_state[
+            "top_k"
+        ] = 3  # Default is 2; increasing to 5 will result in more documents being retrieved
 
     if "embedding_model" not in st.session_state:
         st.session_state["embedding_model"] = None
 
+    if "other_embedding_model" not in st.session_state:
+        st.session_state["other_embedding_model"] = None
+
     if "chunk_size" not in st.session_state:
-        st.session_state["chunk_size"] = None
+        st.session_state["chunk_size"] = 1024
diff --git a/components/tabs/file_upload.py b/components/tabs/file_upload.py
@@ -12,6 +12,7 @@ def file_upload():
     st.caption("Convert your files to embeddings for utilization during chat")
     st.write("")
 
+    # Force users to confirm Settings before uploading files
     if st.session_state["selected_model"] is not None:
         uploaded_files = st.file_uploader(
             "Select Files",
@@ -55,9 +56,37 @@ def file_upload():
                     st.session_state["selected_model"],
                     st.session_state["ollama_endpoint"],
                 )
+
                 # resp = llm.complete("Hello!")
                 # print(resp)
-                service_context = llama_index.create_service_context(llm)
+
+                # Determine embedding model to use
+
+                embedding_model = st.session_state["embedding_model"]
+                hf_embedding_model = None
+
+                if embedding_model == None:
+                    print("No embedding model set; using defaults...")
+                    hf_embedding_model = "BAAI/bge-large-en-v1.5"
+
+                if embedding_model == "Default (bge-large-en-v1.5)":
+                    print("Using default embedding model...")
+                    hf_embedding_model = "BAAI/bge-large-en-v1.5"
+
+                if embedding_model == "Best (Salesforce/SFR-Embedding-Mistral)":
+                    print("Using the Salesforce embedding model; RIP yer VRAM...")
+                    hf_embedding_model = "Salesforce/SFR-Embedding-Mistral"
+
+                if embedding_model == "Other":
+                    print("Using a user-provided embedding model...")
+                    hf_embedding_model = st.session_state["other_embedding_model"]
+
+                service_context = llama_index.create_service_context(
+                    llm,
+                    st.session_state["system_prompt"],
+                    hf_embedding_model,
+                    st.session_state["chunk_size"],
+                )
             except Exception as err:
                 print(f"Setting up Service Context failed: {err}")
                 error = err
@@ -102,4 +131,4 @@ def file_upload():
                 )
 
     with st.expander("GitHub Repo", expanded=False):
-        st.write(":grey[Coming Soon&trade;]")
+        st.write(":grey[Coming Soon&trade;]")
diff --git a/components/tabs/settings.py b/components/tabs/settings.py
@@ -30,15 +30,14 @@ def settings():
             st.select_slider(
                 "Top K",
                 options=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-                help="A higher Top K will return more results at the expense of accuracy.",
-                value=5,
+                help="The number of most similar documents to retrieve in response to a query.",
+                value=st.session_state["top_k"],
                 key="top_k",
             )
-            st.text_input(
+            st.text_area(
                 "System Prompt",
-                value="You are a sophisticated virtual assistant designed to assist users in comprehensively understanding and extracting insights from a wide range of documents at their disposal. Your expertise lies in tackling complex inquiries and providing insightful analyses based on the information contained within these documents.",
+                value=st.session_state["system_prompt"],
                 key="system_prompt",
-                disabled=True,
             )
             st.selectbox(
                 "Chat Mode",
@@ -50,26 +49,42 @@ def settings():
 
     st.subheader(
         "Embeddings",
-        help="Embeddings help convert your files to a format LLMs can understand.",
+        help="Embeddings are numerical representations of data, useful for tasks like document clustering and similarity detection when processing files, as they encode semantic meaning for efficient manipulation and retrieval.",
     )
     embedding_settings = st.container(border=True)
     with embedding_settings:
-        st.selectbox(
+        embedding_model = st.selectbox(
             "Model",
-            ["Default (bge-large-en-v1.5)", "Best (Salesforce/SFR-Embedding-Mistral)"],
-            disabled=True,
+            [
+                "Default (bge-large-en-v1.5)",
+                "Best (Salesforce/SFR-Embedding-Mistral)",
+                "Other",
+            ],
+            key="embedding_model",
         )
+        if embedding_model == "Other":
+            st.text_input(
+                "HuggingFace Model",
+                key="other_embedding_model",
+                placeholder="Salesforce/SFR-Embedding-Mistral",
+            )
         if st.session_state["advanced"] == True:
             st.caption(
-                "View the [Embeddings Leaderboard](https://huggingface.co/spaces/mteb/leaderboard)"
+                "View the [MTEB Embeddings Leaderboard](https://huggingface.co/spaces/mteb/leaderboard)"
             )
             st.text_input(
                 "Chunk Size",
-                help="This should not exceed the value provided by your embedding model.",
+                help="Reducing `chunk_size` improves embedding precision by focusing on smaller text portions. This enhances information retrieval accuracy but escalates computational demands due to processing more chunks.",
                 key="chunk_size",
-                placeholder="512",
+                placeholder="1024",
                 value=st.session_state["chunk_size"],
-                disabled=True,
+            )
+            st.text_input(
+                "Chunk Overlap",
+                help="`chunk_overlap` sets the overlap between consecutive document chunks. It prevents loss of information at chunk boundaries. For instance, a value of 20 means a 20-token overlap. Adjusting this parameter affects the precision and generality of the calculated embeddings.",
+                key="chunk_overlap",
+                placeholder="20",
+                value=st.session_state["chunk_overlap"],
             )
 
     st.subheader("Export Data")

diff --git a/utils/llama_index.py b/utils/llama_index.py
@@ -22,25 +22,32 @@
 
 def create_service_context(
     llm,  # TODO: Determine type
-    embed_model: str = "local:BAAI/bge-large-en-v1.5",  # TODO: Allow users to set this
-    chunk_size: int = 512,  # TODO: Allow users to set this
+    system_prompt: str = None,  # TODO: What are the implications of no system prompt being passed?
+    embed_model: str = "BAAI/bge-large-en-v1.5",
+    chunk_size: int = 1024,  # Llama-Index default is 1024
 ):
     """
     Create a service context with the specified language model and embedding model.
 
     Parameters:
-    - llm (TODO: Determine type): The language model to use for generation.
-    - embed_model (str, optional): The embedding model to use for similarity search. Default is "local:BAAI/bge-large-en".
+    - llm (TODO: Determine type): The Llama-Index LLM instance to use for generation.
+    - system_prompt (str, optional): System prompt to use when creating the LLM.
+    - embed_model (str, optional): The embedding model to use for similarity search. Default is `BAAI/bge-large-en-v1.5`.
     - chunk_size (int, optional): The maximum number of tokens to consider at once. Default is 1024.
 
     Returns:
     - A `ServiceContext` object with the specified settings.
     """
+    formatted_embed_model = f"local:{embed_model}"
     service_context = ServiceContext.from_defaults(
-        llm=llm, embed_model=embed_model, chunk_size=chunk_size
+        llm=llm,
+        system_prompt=system_prompt,
+        embed_model=formatted_embed_model,
+        chunk_size=chunk_size,
     )
 
-    set_global_service_context(service_context)
+    # Note: this may be redundant since service_context is returned
+    set_global_service_context(service_context)  
 
     return service_context
 
@@ -108,23 +115,12 @@ def create_query_engine(documents, service_context):
             documents=documents, service_context=service_context, show_progress=True
         )
 
-        # print(f"Index: {index}")
-        # print()
-
-        if st.session_state["top_k"] is None:
-            top_k = 5
-        else:
-            top_k = st.session_state["top_k"]
-
         query_engine = index.as_query_engine(
-            similarity_top_k=top_k,  # A higher value will return additional results at the sake of accuracy
+            similarity_top_k=st.session_state["top_k"],
             service_context=service_context,
             streaming=True,
         )
 
-        # print(f"Query Engine: {query_engine}")
-        # print()
-
         st.session_state["query_engine"] = query_engine
 
         return query_engine