-
Notifications
You must be signed in to change notification settings - Fork 0
/
chat.py
131 lines (115 loc) · 4.23 KB
/
chat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from rich import print
from rich.console import Console
from rich.markdown import Markdown
from util import clear, printLogo, handleApiKeyInput
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from time import sleep
from chroma import Chroma
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import configparser
import click
@click.command()
@click.option("--path", default="", help="Path to the pdf")
@click.option("--clear_db", default=False, help="Clear the chroma db")
@click.option("--chunk_size", default=1000, help="Text splitter chunk size")
@click.option("--max_tokens", default=512, help="LLM max token config")
@click.option(
"--context_size", default=5, help="Number of documents to include in prompt context"
)
def main(path, clear_db, chunk_size, max_tokens, context_size):
console = Console()
config = configparser.ConfigParser()
config.read("config.ini")
clear()
printLogo()
if path == "":
# Choose a PDF to interact with if flag not set
console.print("\n\nEnter the path to the pdf: ", style="bold white")
pdfPath = input()
if pdfPath == "":
pdfPath = "bitcoin.pdf"
clear()
else:
pdfPath = path
# Load PDF
with console.status(
"[bold blue]Loading PDF...", spinner_style="bold blue"
) as status:
loader = PyPDFLoader(pdfPath)
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size, chunk_overlap=0
)
docs = text_splitter.split_documents(pages)
sleep(1)
console.print(
"[bold blue]Loaded [bold white]",
len(pages),
"[bold blue] pages.",
highlight=False,
)
console.print(
"[bold blue]Loaded [bold white]",
len(docs),
"[bold blue] docs.",
highlight=False,
)
if (
config.get("keys", "open_ai_api_key", fallback=-1) == -1
or config.get("keys", "open_ai_api_key", fallback=-1) == ""
):
OPENAI_API_KEY = handleApiKeyInput(console)
else:
OPENAI_API_KEY = config.get("keys", "open_ai_api_key")
cc = Chroma(OPENAI_API_KEY, pdfPath, clear_db)
with console.status(
"[bold blue]Working on tasks...", spinner_style="bold blue"
) as status:
for doc in docs:
cc.add(doc.page_content, console)
llm = OpenAI(openai_api_key=OPENAI_API_KEY, max_tokens=max_tokens)
prompt = PromptTemplate(
input_variables=["docs", "question"],
template="""Given the following sections from a text, answer the question using only that information.
If you are unsure and the answer is not explicitly written in the provided text,
say 'Sorry, I don't know how to help with that'. Answer in Markdown. Context sections: {docs} Question: '''{question}'''""",
)
chain = LLMChain(llm=llm, prompt=prompt)
clear()
printLogo()
console.print(
"Embeddings have been generated, you can now interact with the pdf\n\n",
style="bold blue",
)
while True:
console.print(
"\n\n\n[bold white]Ask a question or type [bold blue]'S'[bold white] for stats & more info. ([bold red]Q[bold white] to quit)",
highlight=False,
)
question = input("Question: ")
if question == "S":
console.print("\n\n---\nCount: ", cc.collection.count(), "\n---\n\n\n")
elif question == "Q":
console.print("\n\n[bold red]Stopping...")
quit()
else:
console.print("[bold blue]Answer: ")
with console.status(
"[bold white] Thinking...", spinner="pong", spinner_style="bold white"
) as status:
console.print(
Markdown(
chain.run(
{
"docs": cc.query(question, context_size),
"question": question,
}
)
),
style="bold white",
)
if __name__ == "__main__":
main()