- RAG & SPIN Specialist improvements
This commit is contained in:
@@ -16,10 +16,7 @@ task_description: >
|
|||||||
|
|
||||||
{custom_description}
|
{custom_description}
|
||||||
expected_output: >
|
expected_output: >
|
||||||
- Personal Identification information such as name, email, phone number, job title, and any additional information that
|
|
||||||
may prove to be interesting in the current or future conversations.
|
|
||||||
- Company information such as company name, industry, size, company website, ...
|
|
||||||
{custom_expected_output}
|
|
||||||
metadata:
|
metadata:
|
||||||
author: "Josako"
|
author: "Josako"
|
||||||
date_added: "2025-01-08"
|
date_added: "2025-01-08"
|
||||||
|
|||||||
@@ -11,12 +11,7 @@ task_description: >
|
|||||||
|
|
||||||
{custom_description}
|
{custom_description}
|
||||||
expected_output: >
|
expected_output: >
|
||||||
- Personal Identification information such as name, email, phone number, job title, and any additional information that
|
|
||||||
may prove to be interesting in the current or future conversations.
|
|
||||||
- Company information such as company name, industry, size, company website, ...
|
|
||||||
{custom_expected_output}
|
|
||||||
- Top {nr_of_questions} questions to ask in order to complete identification.
|
|
||||||
{custom_expected_output}
|
|
||||||
metadata:
|
metadata:
|
||||||
author: "Josako"
|
author: "Josako"
|
||||||
date_added: "2025-01-08"
|
date_added: "2025-01-08"
|
||||||
|
|||||||
@@ -19,7 +19,6 @@ task_description: >
|
|||||||
%%%{additional_questions}%%%
|
%%%{additional_questions}%%%
|
||||||
|
|
||||||
expected_output: >
|
expected_output: >
|
||||||
One consolidated communication towards the end user.
|
|
||||||
{custom_expected_output}
|
{custom_expected_output}
|
||||||
metadata:
|
metadata:
|
||||||
author: "Josako"
|
author: "Josako"
|
||||||
|
|||||||
@@ -14,9 +14,7 @@ task_description: >
|
|||||||
Query:
|
Query:
|
||||||
{query}
|
{query}
|
||||||
expected_output: >
|
expected_output: >
|
||||||
- Answer
|
|
||||||
- A list of sources used in generating the answer, citations
|
|
||||||
- An indication (True or False) if there's insufficient information to give an answer.
|
|
||||||
metadata:
|
metadata:
|
||||||
author: "Josako"
|
author: "Josako"
|
||||||
date_added: "2025-01-08"
|
date_added: "2025-01-08"
|
||||||
|
|||||||
@@ -10,13 +10,7 @@ task_description: >
|
|||||||
Latest reply:
|
Latest reply:
|
||||||
{query}
|
{query}
|
||||||
expected_output: >
|
expected_output: >
|
||||||
The SPIN analysis, comprised of:
|
|
||||||
- Situation information: a description of the customer's current context / situation.
|
|
||||||
- Problem information: a description of the customer's problems uncovering it's challenges and pain points.
|
|
||||||
- Implication information: implications of situation / identified problems, i.e. of the consequences of those problems.
|
|
||||||
- Need-payoff information: Customer's needs, helping customers realize value of solutions.
|
|
||||||
- Additional info: Information that does not fit in the above SPIN-categories, but that can be commercially
|
|
||||||
interesting, including if provided: {custom_expected_output}
|
|
||||||
metadata:
|
metadata:
|
||||||
author: "Josako"
|
author: "Josako"
|
||||||
date_added: "2025-01-08"
|
date_added: "2025-01-08"
|
||||||
|
|||||||
@@ -12,17 +12,7 @@ task_description: >
|
|||||||
Latest reply:
|
Latest reply:
|
||||||
{query}
|
{query}
|
||||||
expected_output: >
|
expected_output: >
|
||||||
The SPIN analysis, comprised of:
|
|
||||||
- Situation information: a description of the customer's current context / situation.
|
|
||||||
- Problem information: a description of the customer's problems uncovering it's challenges and pain points.
|
|
||||||
- Implication information: implications of situation / identified problems, i.e. of the consequences of those problems.
|
|
||||||
- Need-payoff information: Customer's needs, helping customers realize value of solutions.
|
|
||||||
- Additional info: Information that does not fit in the above SPIN-categories, but that can be commercially interesting.
|
|
||||||
The SPIN questions:
|
|
||||||
- At max {nr_of_questions} questions to complete the SPIN-context of the customer, as a markdown list without '```'.
|
|
||||||
Potential Customer Indication:
|
|
||||||
- An indication if - given the current SPIN - this could be a good customer (True) or not (False).
|
|
||||||
{custom_expected_output}
|
|
||||||
metadata:
|
metadata:
|
||||||
author: "Josako"
|
author: "Josako"
|
||||||
date_added: "2025-01-08"
|
date_added: "2025-01-08"
|
||||||
|
|||||||
@@ -4,6 +4,6 @@ from pydantic import BaseModel, Field
|
|||||||
|
|
||||||
|
|
||||||
class RAGOutput(BaseModel):
|
class RAGOutput(BaseModel):
|
||||||
answer: Optional[str] = Field(None, description="Answer to the questions asked")
|
answer: str = Field(None, description="Answer to the questions asked")
|
||||||
citations: Optional[List[str]] = Field(None, description="A list of sources used in generating the answer")
|
insufficient_info: bool = Field(None, description="An indication if there's insufficient information to answer")
|
||||||
insufficient_info: Optional[bool] = Field(None, description="An indication if there's insufficient information to answer")
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from typing import Dict, Any
|
from typing import Dict, Any, Optional
|
||||||
|
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
from pydantic import BaseModel, Field, model_validator
|
from pydantic import BaseModel, Field, model_validator
|
||||||
@@ -11,6 +11,7 @@ class RetrieverMetadata(BaseModel):
|
|||||||
document_id: int = Field(..., description="ID of the source document")
|
document_id: int = Field(..., description="ID of the source document")
|
||||||
version_id: int = Field(..., description="Version ID of the source document")
|
version_id: int = Field(..., description="Version ID of the source document")
|
||||||
document_name: str = Field(..., description="Name of the source document")
|
document_name: str = Field(..., description="Name of the source document")
|
||||||
|
url: Optional[str] = Field(..., description="URL of the source document")
|
||||||
user_metadata: Dict[str, Any] = Field(
|
user_metadata: Dict[str, Any] = Field(
|
||||||
default_factory=dict, # This will use an empty dict if None is provided
|
default_factory=dict, # This will use an empty dict if None is provided
|
||||||
description="User-defined metadata"
|
description="User-defined metadata"
|
||||||
|
|||||||
@@ -97,6 +97,7 @@ class StandardRAGRetriever(BaseRetriever):
|
|||||||
query_obj = (
|
query_obj = (
|
||||||
db.session.query(
|
db.session.query(
|
||||||
db_class,
|
db_class,
|
||||||
|
DocumentVersion.url,
|
||||||
(1 - db_class.embedding.cosine_distance(query_embedding)).label('similarity')
|
(1 - db_class.embedding.cosine_distance(query_embedding)).label('similarity')
|
||||||
)
|
)
|
||||||
.join(DocumentVersion, db_class.doc_vers_id == DocumentVersion.id)
|
.join(DocumentVersion, db_class.doc_vers_id == DocumentVersion.id)
|
||||||
@@ -116,7 +117,7 @@ class StandardRAGRetriever(BaseRetriever):
|
|||||||
|
|
||||||
# Transform results into standard format
|
# Transform results into standard format
|
||||||
processed_results = []
|
processed_results = []
|
||||||
for doc, similarity in results:
|
for doc, url, similarity in results:
|
||||||
# Parse user_metadata to ensure it's a dictionary
|
# Parse user_metadata to ensure it's a dictionary
|
||||||
user_metadata = self._parse_metadata(doc.document_version.user_metadata)
|
user_metadata = self._parse_metadata(doc.document_version.user_metadata)
|
||||||
processed_results.append(
|
processed_results.append(
|
||||||
@@ -128,6 +129,7 @@ class StandardRAGRetriever(BaseRetriever):
|
|||||||
document_id=doc.document_version.doc_id,
|
document_id=doc.document_version.doc_id,
|
||||||
version_id=doc.document_version.id,
|
version_id=doc.document_version.id,
|
||||||
document_name=doc.document_version.document.name,
|
document_name=doc.document_version.document.name,
|
||||||
|
url=url or "",
|
||||||
user_metadata=user_metadata,
|
user_metadata=user_metadata,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -65,7 +65,6 @@ class SpecialistExecutor(CrewAIBaseSpecialistExecutor):
|
|||||||
"language": arguments.language,
|
"language": arguments.language,
|
||||||
"query": arguments.query,
|
"query": arguments.query,
|
||||||
"context": formatted_context,
|
"context": formatted_context,
|
||||||
"citations": citations,
|
|
||||||
"history": self.formatted_history,
|
"history": self.formatted_history,
|
||||||
"name": self.specialist.configuration.get('name', ''),
|
"name": self.specialist.configuration.get('name', ''),
|
||||||
"company": self.specialist.configuration.get('company', ''),
|
"company": self.specialist.configuration.get('company', ''),
|
||||||
|
|||||||
@@ -124,7 +124,6 @@ class SpecialistExecutor(CrewAIBaseSpecialistExecutor):
|
|||||||
"language": arguments.language,
|
"language": arguments.language,
|
||||||
"query": arguments.query,
|
"query": arguments.query,
|
||||||
"context": formatted_context,
|
"context": formatted_context,
|
||||||
"citations": citations,
|
|
||||||
"history": self.formatted_history,
|
"history": self.formatted_history,
|
||||||
"historic_spin": json.dumps(self.latest_spin, indent=2),
|
"historic_spin": json.dumps(self.latest_spin, indent=2),
|
||||||
"historic_lead_info": json.dumps(self.latest_lead_info, indent=2),
|
"historic_lead_info": json.dumps(self.latest_lead_info, indent=2),
|
||||||
@@ -217,7 +216,9 @@ class SPINFlow(EveAICrewAIFlow[SPINFlowState]):
|
|||||||
async def execute_rag(self):
|
async def execute_rag(self):
|
||||||
inputs = self.state.input.model_dump()
|
inputs = self.state.input.model_dump()
|
||||||
try:
|
try:
|
||||||
|
current_app.logger.debug("In execute_rag")
|
||||||
crew_output = await self.rag_crew.kickoff_async(inputs=inputs)
|
crew_output = await self.rag_crew.kickoff_async(inputs=inputs)
|
||||||
|
current_app.logger.debug(f"Crew execution ended with output:\n{crew_output}")
|
||||||
self.specialist_executor.log_tuning("RAG Crew Output", crew_output.model_dump())
|
self.specialist_executor.log_tuning("RAG Crew Output", crew_output.model_dump())
|
||||||
output_pydantic = crew_output.pydantic
|
output_pydantic = crew_output.pydantic
|
||||||
if not output_pydantic:
|
if not output_pydantic:
|
||||||
@@ -276,13 +277,16 @@ class SPINFlow(EveAICrewAIFlow[SPINFlowState]):
|
|||||||
if self.state.spin:
|
if self.state.spin:
|
||||||
additional_questions = additional_questions + self.state.spin.questions
|
additional_questions = additional_questions + self.state.spin.questions
|
||||||
inputs["additional_questions"] = additional_questions
|
inputs["additional_questions"] = additional_questions
|
||||||
|
current_app.logger.debug(f"Prepared Answers: \n{inputs['prepared_answers']}")
|
||||||
|
current_app.logger.debug(f"Additional Questions: \n{additional_questions}")
|
||||||
try:
|
try:
|
||||||
crew_output = await self.rag_consolidation_crew.kickoff_async(inputs=inputs)
|
crew_output = await self.rag_consolidation_crew.kickoff_async(inputs=inputs)
|
||||||
|
current_app.logger.debug(f"Consolidation output after crew execution:\n{crew_output}")
|
||||||
self.specialist_executor.log_tuning("RAG Consolidation Crew Output", crew_output.model_dump())
|
self.specialist_executor.log_tuning("RAG Consolidation Crew Output", crew_output.model_dump())
|
||||||
output_pydantic = crew_output.pydantic
|
output_pydantic = crew_output.pydantic
|
||||||
if not output_pydantic:
|
if not output_pydantic:
|
||||||
raw_json = json.loads(crew_output.raw)
|
raw_json = json.loads(crew_output.raw)
|
||||||
output_pydantic = LeadInfoOutput.model_validate(raw_json)
|
output_pydantic = RAGOutput.model_validate(raw_json)
|
||||||
self.state.final_output = output_pydantic
|
self.state.final_output = output_pydantic
|
||||||
return crew_output
|
return crew_output
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -314,7 +314,10 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor):
|
|||||||
])
|
])
|
||||||
|
|
||||||
# Return document_ids for citations
|
# Return document_ids for citations
|
||||||
citations = [ctx.metadata.document_id for ctx in unique_contexts]
|
citations = [{"document_id": ctx.metadata.document_id,
|
||||||
|
"document_version_id": ctx.metadata.version_id,
|
||||||
|
"url": ctx.metadata.url}
|
||||||
|
for ctx in unique_contexts]
|
||||||
|
|
||||||
self.log_tuning("Context Retrieval Results",
|
self.log_tuning("Context Retrieval Results",
|
||||||
{"Formatted Context": formatted_context,
|
{"Formatted Context": formatted_context,
|
||||||
@@ -345,6 +348,7 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor):
|
|||||||
|
|
||||||
modified_result = {
|
modified_result = {
|
||||||
"detailed_query": detailed_query,
|
"detailed_query": detailed_query,
|
||||||
|
"citations": citations,
|
||||||
}
|
}
|
||||||
final_result = result.model_copy(update=modified_result)
|
final_result = result.model_copy(update=modified_result)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user