Add memory/chat history to a prompt with retrieved nodes processed as context #14789
Replies: 2 comments
-
To add memory/chat history into the prompt and use chat history for the retrieval process in your customized query engine and prompt, you can follow these steps:
Here’s how you can modify your code: Step 1: Modify the Prompt to Include Chat HistoryUpdate your prompt to include a placeholder for the chat history: customized_prompt = [
(
"system",
"Return string to the given question using the provided Content and source_name in no particular order as references and use 'source_name' to improve the answer relevance for a given question and also provide citations using ' chunk_id ' to the 'Content' from which you answered the question. "
),
(
"user",
"{chat_history}\n{context_str}"
),
(
"user",
"You are a professional geologist and petroleum engineer that can answer questions based on the content that I have provided. Please respond to this question : ' {query_str} '. Answer should be properly formatted with all necessary special characters like new line, tab, hyperlinks, image links, and bullets. Answer only if you get the information from the content . If you get partial information then return the partial answer only. Always send relevant and correct chunk_ids with the answer fragments. "
)
] Step 2: Update the Query Engine to Handle Chat HistoryModify your class BaseQueryEngine:
def __init__(self, index, llm, similarity_top_k=20, use_rerank=True, rerank_top_n=10, text_qa_template_path="prompts/prompt_1.txt", filters=None):
self.index = index
self.similarity_top_k = similarity_top_k
self.text_qa_template = load_prompt(text_qa_template_path)
self.llm = llm
self.filters = filters
self.use_rerank = use_rerank
self.chat_history = [] # Initialize chat history
if self.use_rerank:
self.reranker = CohereRerank(api_key=cohere_api_key, top_n=rerank_top_n)
def query(self, question):
retriever = self.index.as_retriever(
similarity_top_k=self.similarity_top_k,
filters=self.filters)
# Include chat history in the query
full_query = "\n".join([msg['content'] for msg in self.chat_history]) + "\n" + question
retrieved_nodes = retriever.retrieve(full_query)
if self.use_rerank:
logging.info("Using rerank")
query_bundle = QueryBundle(full_query)
reranked_nodes = self.reranker.postprocess_nodes(retrieved_nodes, query_bundle)
else:
logging.info("Not using rerank")
reranked_nodes = retrieved_nodes
context_str = ""
source_data = {}
for i, node in enumerate(reranked_nodes):
context_str += f"Content: {node.text}. chunk_id: 'chk-{i+1}', source_name: {node.metadata['file_name']}{node.metadata['page_label']}."
source_data[f"chk-{i+1}"] = {
"text": node.text,
"file_name": node.metadata['file_name'],
"page": node.metadata['page_label']
}
context_str = context_str.replace('\n', '')
# Format the prompt with chat history
chat_history_str = "\n".join([f"{msg['role']}: {msg['content']}" for msg in self.chat_history])
fmt_qa_prompt = self.text_qa_template.format(
chat_history=chat_history_str, context_str=context_str, query_str=question
)
response = self.llm.complete(fmt_qa_prompt)
# Update chat history
self.chat_history.append({"role": "user", "content": question})
self.chat_history.append({"role": "assistant", "content": response})
return response, source_data Step 3: Use the Updated Query EngineWhen using the class QueryEngineWithTfidfFilter(BaseQueryEngine):
def __init__(self,
index,
llm,
semantic_weight=0.7,
filter_top_n=5,
similarity_top_k=20,
use_rerank=True,
rerank_top_n=10,
text_qa_template_path="prompts/prompt_1.txt",
file_path_df='metadata_embedding_db.pkl',
embed_model=embed_model
):
super().__init__(
index=index,
similarity_top_k=similarity_top_k,
text_qa_template_path=text_qa_template_path,
llm=llm,
rerank_top_n=rerank_top_n,
filters=None,
use_rerank=use_rerank
)
self.metadata_search_engine = MetadataSearchEngine(
df_path=file_path_df,
embed_model=embed_model
)
self.top_n = filter_top_n
self.semantic_weight = semantic_weight
def query(self, question):
metadata_search = self.metadata_search_engine.filter(
question,
top_n=self.top_n,
tfidf_weight=1-self.semantic_weight,
)
logging.info(f"Metadata filter results: {metadata_search}")
self.filters = MetadataFilters(
filters=[MetadataFilter(key="ReportFileName", value=item) for item in metadata_search],
condition=FilterCondition.OR,
)
return super().query(question) This setup ensures that the chat history is included in the prompt and used during the retrieval process, enhancing the relevance and context of the responses. |
Beta Was this translation helpful? Give feedback.
-
I was recently doing something similar running Llama locally with LM Studio but using the Vercel AI SDK and found Zep really helpful in this area. Works with any SDK, Framework, or AI Model. Cheers. https://www.unremarkable.ai/llama-chat-history-with-zeps-ai-memory-knowledge-graph/ |
Beta Was this translation helpful? Give feedback.
-
How does Llamaindex at lower level to include memory/chat history in the RAG process? Does chat history also used for the retrieval of new question?
I have the following customized query engine and prompt, can you instruct me how to add memorty/chat history into the prompt? Also, how to use chat history for the retrival?
my customized prompt:
[
(
"system",
"Return string to the given question using the provided Content and source_name in no particular order as references and use 'source_name' to improve the answer relevance for a given question and also provide citations using ' chunk_id ' to the 'Content' from which you answered the question. "
),
(
"user",
"{context_str}"
),
(
"user",
"You are a professional geologist and petroleum engineer that can answer questions based on the content that I have provided. Please respond to this question : ' {query_str} '. Answer should be properly formatted with all necessary special characters like new line, tab, hyperlinks, image links, and bullets. Answer only if you get the information from the content . If you get partial information then return the partial answer only. Always send relevant and correct chunk_ids with the answer fragments. "
)
]
my customized query engine:
class BaseQueryEngine:
def init(self, index, llm, similarity_top_k=20, use_rerank=True, rerank_top_n=10, text_qa_template_path="prompts/prompt_1.txt", filters=None):
self.index = index
self.similarity_top_k = similarity_top_k
self.text_qa_template = load_prompt(text_qa_template_path)
self.llm = llm
self.filters = filters
self.use_rerank = use_rerank
if self.use_rerank:
#self.reranker = SentenceTransformerRerank(model="cross-encoder/ms-marco-MiniLM-L-2-v2", top_n=rerank_top_n)
self.reranker = CohereRerank(api_key=cohere_api_key, top_n=rerank_top_n)
class QueryEngineWithTfidfFilter(BaseQueryEngine):
def init(self,
index,
llm,
semantic_weight=0.7,
filter_top_n=5,
similarity_top_k=20,
use_rerank=True,
rerank_top_n=10,
text_qa_template_path="prompts/prompt_1.txt",
file_path_df='metadata_embedding_db.pkl',
embed_model=embed_model
):
Beta Was this translation helpful? Give feedback.
All reactions