- Addition of general chunking parameters chunking_heading_level and chunking patterns
- Addition of Processor types docx and markdown
This commit is contained in:
@@ -45,7 +45,7 @@ class TranscriptionBaseProcessor(BaseProcessor):
|
||||
return text_splitter.split_text(transcription)
|
||||
|
||||
def _process_chunks(self, chunks):
|
||||
self._log_tuning("_process_chunks", {"Nr of Chunks": len(chunks)})
|
||||
self.log_tuning("_process_chunks", {"Nr of Chunks": len(chunks)})
|
||||
llm = self.model_variables.get_llm()
|
||||
template = self.model_variables.get_template('transcript')
|
||||
language_template = create_language_template(template, self.document_version.language)
|
||||
@@ -64,7 +64,7 @@ class TranscriptionBaseProcessor(BaseProcessor):
|
||||
}
|
||||
markdown = chain.invoke(input_transcript)
|
||||
markdown = self._clean_markdown(markdown)
|
||||
self._log_tuning("_process_chunks", {
|
||||
self.log_tuning("_process_chunks", {
|
||||
"Chunk Number": f"{i + 1} of {len(chunks)}",
|
||||
"Chunk": chunk,
|
||||
"Previous Chunk": previous_part,
|
||||
|
||||
Reference in New Issue
Block a user