From 2ca006d82c6596abbfd6a8cefd60af7c1ccddac9 Mon Sep 17 00:00:00 2001 From: Josako Date: Thu, 22 Aug 2024 16:41:13 +0200 Subject: [PATCH] Added excluded element classes to HTML parsing to allow for more complex document parsing Added chunking to conversion of HTML to markdown in case of large files --- common/models/user.py | 4 +- common/utils/model_utils.py | 1 + eveai_app/views/user_forms.py | 1 + eveai_app/views/user_views.py | 6 + eveai_workers/tasks.py | 190 ++++++++++++++---- integrations/Wordpress/eveai-chat-widget.zip | Bin 14062 -> 14098 bytes .../eveai-chat-widget/eveai-chat_plugin.php | 2 +- .../eveai-chat-widget/js/eveai-chat-widget.js | 18 +- nginx/static/assets/css/eveai.css | 1 + requirements.txt | 4 +- 10 files changed, 181 insertions(+), 46 deletions(-) diff --git a/common/models/user.py b/common/models/user.py index c700613..ffc9db4 100644 --- a/common/models/user.py +++ b/common/models/user.py @@ -35,10 +35,11 @@ class Tenant(db.Model): html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li']) html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True) html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True) + html_excluded_classes = db.Column(ARRAY(sa.String(200)), nullable=True) + min_chunk_size = db.Column(db.Integer, nullable=True, default=2000) max_chunk_size = db.Column(db.Integer, nullable=True, default=3000) - # Embedding search variables es_k = db.Column(db.Integer, nullable=True, default=5) es_similarity_threshold = db.Column(db.Float, nullable=True, default=0.7) @@ -80,6 +81,7 @@ class Tenant(db.Model): 'html_end_tags': self.html_end_tags, 'html_included_elements': self.html_included_elements, 'html_excluded_elements': self.html_excluded_elements, + 'html_excluded_classes': self.html_excluded_classes, 'min_chunk_size': self.min_chunk_size, 'max_chunk_size': self.max_chunk_size, 'es_k': self.es_k, diff --git a/common/utils/model_utils.py b/common/utils/model_utils.py index 320dd99..686cfa8 100644 --- a/common/utils/model_utils.py +++ b/common/utils/model_utils.py @@ -86,6 +86,7 @@ def select_model_variables(tenant): model_variables['html_end_tags'] = tenant.html_end_tags model_variables['html_included_elements'] = tenant.html_included_elements model_variables['html_excluded_elements'] = tenant.html_excluded_elements + model_variables['html_excluded_classes'] = tenant.html_excluded_classes # Set Chunk Size variables model_variables['min_chunk_size'] = tenant.min_chunk_size diff --git a/eveai_app/views/user_forms.py b/eveai_app/views/user_forms.py index a8fc984..4eb49b0 100644 --- a/eveai_app/views/user_forms.py +++ b/eveai_app/views/user_forms.py @@ -32,6 +32,7 @@ class TenantForm(FlaskForm): default='p, li') html_included_elements = StringField('HTML Included Elements', validators=[Optional()]) html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()]) + html_excluded_classes = StringField('HTML Excluded Classes', validators=[Optional()]) min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()], default=2000) max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()], default=3000) # Embedding Search variables diff --git a/eveai_app/views/user_views.py b/eveai_app/views/user_views.py index e7afa51..943a935 100644 --- a/eveai_app/views/user_views.py +++ b/eveai_app/views/user_views.py @@ -68,6 +68,8 @@ def tenant(): if form.html_included_elements.data else [] new_tenant.html_excluded_elements = [tag.strip() for tag in form.html_excluded_elements.data.split(',')] \ if form.html_excluded_elements.data else [] + new_tenant.html_excluded_classes = [cls.strip() for cls in form.html_excluded_classes.data.split(',')] \ + if form.html_excluded_classes.data else [] current_app.logger.debug(f'html_tags: {new_tenant.html_tags},' f'html_end_tags: {new_tenant.html_end_tags},' @@ -123,6 +125,8 @@ def edit_tenant(tenant_id): form.html_included_elements.data = ', '.join(tenant.html_included_elements) if tenant.html_excluded_elements: form.html_excluded_elements.data = ', '.join(tenant.html_excluded_elements) + if tenant.html_excluded_classes: + form.html_excluded_classes.data = ', '.join(tenant.html_excluded_classes) if form.validate_on_submit(): # Populate the tenant with form data @@ -134,6 +138,8 @@ def edit_tenant(tenant_id): elem.strip()] tenant.html_excluded_elements = [elem.strip() for elem in form.html_excluded_elements.data.split(',') if elem.strip()] + tenant.html_excluded_classes = [elem.strip() for elem in form.html_excluded_classes.data.split(',') if + elem.strip()] db.session.commit() flash('Tenant updated successfully.', 'success') diff --git a/eveai_workers/tasks.py b/eveai_workers/tasks.py index d90b0ca..67cd18e 100644 --- a/eveai_workers/tasks.py +++ b/eveai_workers/tasks.py @@ -3,7 +3,6 @@ import os from datetime import datetime as dt, timezone as tz import subprocess - import gevent from bs4 import BeautifulSoup import html @@ -12,6 +11,7 @@ from flask import current_app # OpenAI imports from langchain.chains.summarize import load_summarize_chain from langchain.text_splitter import CharacterTextSplitter, MarkdownHeaderTextSplitter +from langchain_core.documents import Document from langchain_core.exceptions import LangChainException from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate @@ -105,7 +105,7 @@ def create_embeddings(tenant_id, document_version_id): def process_pdf(tenant, model_variables, document_version): file_data = minio_client.download_document_file(tenant.id, document_version.doc_id, document_version.language, - document_version.id, document_version.file_name) + document_version.id, document_version.file_name) pdf_text = '' pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_data)) @@ -114,8 +114,9 @@ def process_pdf(tenant, model_variables, document_version): markdown = generate_markdown_from_pdf(tenant, model_variables, document_version, pdf_text) markdown_file_name = f'{document_version.id}.md' - minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, document_version.id, - markdown_file_name, markdown.encode()) + minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, + document_version.id, + markdown_file_name, markdown.encode()) potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name) chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'], @@ -160,7 +161,7 @@ def delete_embeddings_for_document_version(document_version): def process_html(tenant, model_variables, document_version): file_data = minio_client.download_document_file(tenant.id, document_version.doc_id, document_version.language, - document_version.id, document_version.file_name) + document_version.id, document_version.file_name) html_content = file_data.decode('utf-8') # The tags to be considered can be dependent on the tenant @@ -173,13 +174,15 @@ def process_html(tenant, model_variables, document_version): excluded_elements=html_excluded_elements) extracted_file_name = f'{document_version.id}-extracted.html' - minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, document_version.id, - extracted_file_name, extracted_html.encode()) + minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, + document_version.id, + extracted_file_name, extracted_html.encode()) markdown = generate_markdown_from_html(tenant, model_variables, document_version, extracted_html) markdown_file_name = f'{document_version.id}.md' - minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, document_version.id, - markdown_file_name, markdown.encode()) + minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, + document_version.id, + markdown_file_name, markdown.encode()) potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name) chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'], @@ -235,19 +238,94 @@ def enrich_chunks(tenant, document_version, title, chunks): return enriched_chunks +# def generate_markdown_from_html(tenant, model_variables, document_version, html_content): +# current_app.logger.debug(f'Generating markdown from HTML for tenant {tenant.id} ' +# f'on document version {document_version.id}') +# llm = model_variables['llm'] +# template = model_variables['html_parse_template'] +# parse_prompt = ChatPromptTemplate.from_template(template) +# setup = RunnablePassthrough() +# output_parser = StrOutputParser() +# +# chain = setup | parse_prompt | llm | output_parser +# input_html = {"html": html_content} +# +# markdown = chain.invoke(input_html) +# +# current_app.logger.debug(f'Finished generating markdown from HTML for tenant {tenant.id} ' +# f'on document version {document_version.id}') +# +# return markdown + + def generate_markdown_from_html(tenant, model_variables, document_version, html_content): - current_app.logger.debug(f'Generating Markdown from HTML for tenant {tenant.id} ' + current_app.logger.debug(f'Generating markdown from HTML for tenant {tenant.id} ' f'on document version {document_version.id}') + llm = model_variables['llm'] template = model_variables['html_parse_template'] parse_prompt = ChatPromptTemplate.from_template(template) setup = RunnablePassthrough() output_parser = StrOutputParser() - chain = setup | parse_prompt | llm | output_parser - input_html = {"html": html_content} - markdown = chain.invoke(input_html) + soup = BeautifulSoup(html_content, 'lxml') + + def split_content(soup, max_size=20000): + chunks = [] + current_chunk = [] + current_size = 0 + + for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'span', 'table']): + element_html = str(element) + element_size = len(element_html) + + if current_size + element_size > max_size and current_chunk: + chunks.append(''.join(map(str, current_chunk))) + current_chunk = [] + current_size = 0 + + current_chunk.append(element) + current_size += element_size + + if element.name in ['h1', 'h2', 'h3'] and current_size > max_size: + chunks.append(''.join(map(str, current_chunk))) + current_chunk = [] + current_size = 0 + + if current_chunk: + chunks.append(''.join(map(str, current_chunk))) + + return chunks + + chunks = split_content(soup) + + markdown_chunks = [] + + for chunk in chunks: + current_app.logger.debug(f'Processing chunk to generate markdown from HTML for tenant {tenant.id} ' + f'on document version {document_version.id}') + if tenant.embed_tuning: + current_app.embed_tuning_logger.debug(f'Processing chunk: \n ' + f'------------------\n' + f'{chunk}\n' + f'------------------\n') + input_html = {"html": chunk} + markdown_chunk = chain.invoke(input_html) + markdown_chunks.append(markdown_chunk) + if tenant.embed_tuning: + current_app.embed_tuning_logger.debug(f'Processed markdown chunk: \n ' + f'-------------------------\n' + f'{markdown_chunk}\n' + f'-------------------------\n') + current_app.logger.debug(f'Finished processing chunk to generate markdown from HTML for tenant {tenant.id} ' + f'on document version {document_version.id}') + + # Combine all markdown chunks + markdown = "\n\n".join(markdown_chunks) + + current_app.logger.debug(f'Finished generating markdown from HTML for tenant {tenant.id} ' + f'on document version {document_version.id}') return markdown @@ -324,36 +402,73 @@ def embed_chunks(tenant, model_variables, document_version, chunks): def parse_html(tenant, html_content, tags, included_elements=None, excluded_elements=None): + current_app.logger.debug(f'Parsing HTML for tenant {tenant.id}') soup = BeautifulSoup(html_content, 'html.parser') extracted_html = '' + excluded_classes = parse_excluded_classes(tenant.html_excluded_classes) if included_elements: elements_to_parse = soup.find_all(included_elements) else: - elements_to_parse = [soup] # parse the entire document if no included_elements specified + elements_to_parse = [soup] + log_parsing_info(tenant, tags, included_elements, excluded_elements, excluded_classes, elements_to_parse) + + for element in elements_to_parse: + for sub_element in element.find_all(tags): + if should_exclude_element(sub_element, excluded_elements, excluded_classes): + continue + extracted_html += extract_element_content(sub_element) + + title = soup.find('title').get_text(strip=True) if soup.find('title') else '' + + current_app.logger.debug(f'Finished parsing HTML for tenant {tenant.id}') + + return extracted_html, title + + +def parse_excluded_classes(excluded_classes): + parsed = {} + for rule in excluded_classes: + element, cls = rule.split('.', 1) + parsed.setdefault(element, set()).add(cls) + return parsed + + +def should_exclude_element(element, excluded_elements, excluded_classes): + if excluded_elements and element.find_parent(excluded_elements): + return True + return is_element_excluded_by_class(element, excluded_classes) + + +def is_element_excluded_by_class(element, excluded_classes): + for parent in element.parents: + if element_matches_exclusion(parent, excluded_classes): + return True + return element_matches_exclusion(element, excluded_classes) + + +def element_matches_exclusion(element, excluded_classes): + if '*' in excluded_classes and any(cls in excluded_classes['*'] for cls in element.get('class', [])): + return True + return element.name in excluded_classes and \ + any(cls in excluded_classes[element.name] for cls in element.get('class', [])) + + +def extract_element_content(element): + content = ' '.join(child.strip() for child in element.stripped_strings) + return f'<{element.name}>{content}\n' + + +def log_parsing_info(tenant, tags, included_elements, excluded_elements, excluded_classes, elements_to_parse): if tenant.embed_tuning: current_app.embed_tuning_logger.debug(f'Tags to parse: {tags}') current_app.embed_tuning_logger.debug(f'Included Elements: {included_elements}') - current_app.embed_tuning_logger.debug(f'Included Elements: {len(included_elements)}') current_app.embed_tuning_logger.debug(f'Excluded Elements: {excluded_elements}') + current_app.embed_tuning_logger.debug(f'Excluded Classes: {excluded_classes}') current_app.embed_tuning_logger.debug(f'Found {len(elements_to_parse)} elements to parse') current_app.embed_tuning_logger.debug(f'First element to parse: {elements_to_parse[0]}') - # Iterate through the found included elements - for element in elements_to_parse: - # Find all specified tags within each included element - for sub_element in element.find_all(tags): - if tenant.embed_tuning: - current_app.embed_tuning_logger.debug(f'Found element: {sub_element.name}') - if excluded_elements and sub_element.find_parent(excluded_elements): - continue # Skip this sub_element if it's within any of the excluded_elements - extracted_html += f'<{sub_element.name}>{sub_element.get_text(strip=True)}\n' - - title = soup.find('title').get_text(strip=True) - - return extracted_html, title - def process_youtube(tenant, model_variables, document_version): base_path = os.path.join(current_app.config['UPLOAD_FOLDER'], @@ -414,8 +529,9 @@ def download_youtube(url, tenant_id, document_version, file_name): with open(temp_file.name, 'rb') as f: file_data = f.read() - minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language, document_version.id, - file_name, file_data) + minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language, + document_version.id, + file_name, file_data) current_app.logger.info(f'Downloaded YouTube video: {url} for tenant: {tenant_id}') return file_name, yt.title, yt.description, yt.author @@ -429,7 +545,7 @@ def compress_audio(tenant_id, document_version, input_file, output_file): current_app.logger.info(f'Compressing audio for tenant: {tenant_id}') input_data = minio_client.download_document_file(tenant_id, document_version.doc_id, document_version.language, - document_version.id, input_file) + document_version.id, input_file) with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_input: temp_input.write(input_data) @@ -448,8 +564,9 @@ def compress_audio(tenant_id, document_version, input_file, output_file): with open(temp_output.name, 'rb') as f: compressed_data = f.read() - minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language, document_version.id, - output_file, compressed_data) + minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language, + document_version.id, + output_file, compressed_data) current_app.logger.info(f'Compressed audio for tenant: {tenant_id}') except Exception as e: @@ -465,7 +582,7 @@ def transcribe_audio(tenant_id, document_version, input_file, output_file, model # Download the audio file from MinIO audio_data = minio_client.download_document_file(tenant_id, document_version.doc_id, document_version.language, - document_version.id, input_file) + document_version.id, input_file) # Load the audio data into pydub audio = AudioSegment.from_mp3(io.BytesIO(audio_data)) @@ -649,6 +766,3 @@ def combine_chunks_for_markdown(potential_chunks, min_chars, max_chars): return actual_chunks pass - - - diff --git a/integrations/Wordpress/eveai-chat-widget.zip b/integrations/Wordpress/eveai-chat-widget.zip index 921f14faad2aece1c0a5a0d180fdcece5464e5a8..dce62c31e10c7ae5381f0502a1e4e132a49370b3 100644 GIT binary patch delta 7330 zcmZ{J1yoes*FFr*fW*+<2-4l%jpWcFAstczUI9V6h8!9ZL|PhYk?s&g>28%$5d1T| z?~lK~Z++ig>#VcxKIeJ%oW1Y8d)BiLU6b8#b<|N%iI5Q2oH|Jo?tL)ZPOm-t&A6G9 zWGGM&R&%~PE8k8p%#z|cN}3u8j2f927}@N#U$_~F*~cG@gp7KMjD&Q{c#zTN*3Cnt z)Zxshu)CVn5OdCDSKiWeUjkWv7Mmf2T_+JgF!%Xc>4h;d-}Ph$D7A%Xevy&6Z_uYQ z2j?1v?#kdZ3deJ*2|{d){(uAPWNbpOlZZ8^c9HB24XofJt61i=T0nIk@S9TFG|-K; zpSeHXVPTNI_rCF^fXOogbB(F;#ufW-znkm30*@tJ_fB^SxX-`1#HK3SzCVz->gmuU zQq+CMN|PBF-zV5Kd3mN^3dO8$t;OW|u2B&q zbm*u(JJEu#lcHCHtfP*B+2AE&K#GVI84}VzDZoU+L_$Qh5+nisPgMVjHam2VjDmbkg07oB0MpaASrd zmJ!#hn?Vbs=c0ifqGH``{7;r|k`D_TslQs{)DeWHSYQ0F7n3i0HUomEriq94O?u8SW+1xp6X z*87aYV$`_9sa{!zcc0t5s3b>Y-sViRnZl&~je%Q^mohHC+wj>HDBBXHaYMa9Yo7# zaLB;q13WLWUa$J>N4{WI0z~l;89JZbj|qb^$A}C)%WZ2Ld}o`IzpeF}<^tfz9L_6u z#gdw_aZsD}>N{lxM63rp*GUFrS5{wagqFF*Z-qLia=b7mB1+fzEXV)mSk$7%hCg^# zq_s0g<8d*t5X+&jO(Tl`^jcHqGlFM z-!QN0pP0@SX8crG>PXm{1nbgeORJTi(U;{vFP8>*s>Dj*tg^mYh5dmysPGw{&aT873L%6 zSQ01MEBFnK9us2akGiLimNgsA&v*v59Tp7FO2zl2@)<=E)_3kDO_QCsK5poy>TEB~ zamE$*DfW?ci>Sy6UUka(k~vEtQt2c>|B6jR&)igM?zkn_m2n--&`Hcp-R#~MzB~@? zF%VWp5f88!nr~#3XeR*oT#@g=Daxzh9+4JUrBQPY}7S9G;=w`aZI*4=Jz{Fz*Co#m$YWcpYvNg zo)P;pcQrMtFHcPDBex_VHmWdYkNl$I`aPaM%W-^)?vtHA2T^$g zzC#yQIisEij!g=6w#eK30fh55GN0op&)g0S)~om?g{lF*c3i=y=2aiwdSGVMDxxXVy^7TlX(Pg3n3OB;gVjmI#OBF5i%Sv$=ijHE ziewveo@o&68}_UiO5ncvwauHyP$bO!oI{f?zYyCZ!ULbbPfmnkDA$&gE`mU9L^Ypv zuHsoxt`0Tp3C28KPzLip+_|ZN7Q0>&MtiRg_;NW0agdj~vi|hk-9!UO6`@)dDwWWV z%w4+(dy?pmUF!`NqjKa{4+e%M)47VD!|yFNkBf>S$=cFMNx_E*3K+>mvbPiA*V!23 z+Nyw+iSQLQ?a(F@Wzw0r*)+a8m*E2(Q@N-S9~d3*MerOw+y>5(OtAX`aM^COSX>4! z4s)AU8M7EVnmh{1W+@Hd{Q7qMRQcV;RoUQv1a|aOiVL^LxhArNk2^lsVTtYR+P-$G}ujtU`l4 z?<0AA)EL7JqPsuPd{W>S9LZ+mS2Jh6Pui(p-7P-sj`X7sy;^#DEHpY_~8B-)y- zg_isC*YXbq0@}5GeftBtP}RN9#F?M!qcNOKX*d4+%dC~ z1jbasT9XuS9!?}*oNAe+3D<3}bl&sxca(`pcTokn!Pxy<;r)FjrTIS(?V#LxymK_O zDe0mr7p}}?ktmCpWY?tm{ZU$_`FgU6`rT3E)AOPv0u>)$6ocM`^-g~E_#9fzk*?R> zUkMZqV8445Fe-wE_NX=7thgg5MG+YSda%Vg-?+}be)N7b&)v_DILCdU(8Xy{du*4= zG3fVnd)1KF&iAMYoy{lN>Bq8!@G@KGN7#df(02!L{2%JI5VFjoDN1J*nvr`M$^)3# zR!(1uIPQBHwK@hT=1#JD-1xH6*g4tqh^?{56_KPNn!^iRbaB@0rX! z_c0PNH?`G-oD=I&C4ZL+j)AeBNF7)PgT*I_;_lnYcQWQ~dzb#>Dt^ojXg(2It66jLuHN0Jq2_8y8Wp1MHa&Rki5%@> z(>(R%)6Z3~s7nSQAXUg-hJOBy(si=L2UTj5@# zY`seQ6O}=UOAI~%g}oH*8mqtsWyLxd6(^PLpxH zgfRD2VdcOWTD|G6={UBG{Vh*n`l%2H&q{2^7_w+Ecvhv*3 z^K5}2`L$!5Rn#)euS;!c9>X$0G2i0orVW(+;wxQ9yPQzI){^i^Jl+AiV5u?3!(Udl zSp?f2=MX}K?X)bp)=5RoSp6O8qG)Gfb1c^#gFQddMN`1%tixgDkKYO!o9^{WjI@r3 z8?umK4}22mtS>icgU`K~bhGeTj2kNA^wPz2XpFTG57>uxQO=D#b>P){o8mpr<-(N` zTAs0Dj{BL62(DFN9q0y>Q!#<*+1ndy2~Xu4WM9;6WY}!Nma|Ck0rC5|h^Hb`1UOIzVm*fld zZ~b)LYnC@l_yNF*Z!HH44`aG&!dRbdXl2l(>Ua-Dei?3L*b<~l!+rLAr@tjO`I}M0 zLv!iFGb7H<6G)jr8~ zT0`EH8oeWH_--F3=0gWP4Z~T=t{QM9v3>DT4LSxI`64iVztB;;1~!?pPl$yt1}>GMl3AeNq?j4V z(fB44x($g6os5M+^-#_QlXGeU^vamE4zZL;bM4hnqVUfZpzhlk>+}Y5@w=w2zJm?Y zZQti{SAjg21@bkmC(i|<<>#BZr0P`W{6GqETW->)YJy*(s_R`22UaYHB%z)D1fcv6 zmKm>9fZE5`g87God)o=R-MyPu56<7LPv_|Qj_7C5A9)$0ZsOL$e}q3O={_NOpQdDq zg9mxYc0M*9gR5aT6Q$lJRy*{5QdOg>9v+5_+6_#1rQ%*Eg3>~16XXvW7lpL?WzENa zI2m%ocZlq>1OuKeY30iSLwhDlOP3jd!sPf3n60wYgx#1VuGa*jogI-=$~2O7AT?X% zOl-qa?bZQtK@yU|;U%R;?kZz^+939L<;l_vGdsa>5C2=i35-%p zpcL})lr7pU9(2g1+Lquzl{MHkW}cb2m;v#$)XmK5T=FXBDX}Mt<^n(7nb)m#>q9yL z6~**S(hdn4o#ws{)!)kZmGU5Z&WY?<3L5Ja)i!_KUeNkDACIpg;vgj=FWU-(kE z8gD3galjSN-lKQXtkBgQvn<3pAE^arI==I(_2`2` zI{+FM$BNISU)zNVTkj
X5fsq%3wg4^g}?37yaaeu+Hy9 z3S5I_=mSufV?mD_p_yrvRn^X~FxGwLeM(BK%kw4)3^$pSr9;sJehjZ%x`-m>u``jz zDqlVq@2@nYab6ul3T5~1<3&}V@*3Lj_HtIcf#m`guJ9I;VGn`1nBiupBls>yg}b9r zbo)c*C(tb~+ zO?pl>p|Y{BBdwf_2?KV~Z`&WejXIleyzHl;LHfw5kLp3MReG!R^~&y#{jIe;ZkXgA zUa;Ez@8rg;6=eWL9pBrjUn`dA$;jUHhU;;7_5;@=OUUU+8@T!|o#6V`Dc4=kx2j z77X{W3U<7vH9BVH-|{m;8QpkdbA6S|(!Y#WiIVh;KSBCcx}b6q2_x+ox*wB9L~(wM zub@KSuw&H%`0hfem&wOF?iynJ&q~dl8MzEMEiJlvV>a}kz4{o`a+W*z;PW>V8e1v7 z<|Jmi%XYehhUn=*oL$peJcFxpHy_(ve%pSeuDH!LzxVF;w7&Z+nDfz(CTILonEjO5 zH!bu`272oBU;0<#VXC6*^+9pyMxL5UwORDAaJaMd0lAsNHnm1+SA$m^qQ1mUA%(B_ z>P9_WGYd@gvkbUp>(_#7i7&s^ppaaB^Slz;+pHm+`m?OLI7t-)=fu!5C(7ooQ|56a z*qaTT7~2_6o$EPDJN9)=-vBdz5^2_ARi`c(r1XycB9zubNYPMJr`o8=(WuxS9iCxm zgZW3C7;rAT8rs z%XG?H8r6u;#5}9v%G&1wOND1K^F=}H~HeizUTDd#>~ zt@7I`^I*sg=JYRac)*bHWUBQcdEKy3b5mc;M@fLmGbO(4;e}CVzyRSJBx>ed?Zn3! zY7VF071!(BA)f7mIp0RmDB@(Y9Id=VC}L%bk2dyX%C#YO^O1a5qgXB7DN4BZlCDkn z(_5&!`*bS0;TTb^E>g+VKg9#UxD8Lf0hpb`XVM4(#YQe*n*$#SX;<$53n*xqw@N#N zOYK%bM<@~hh!y^@WiD()2L*P(C3L5)AVhRz9;#crCb!I;9f7&AW8E|XpttP-SR1$K zT`SUC_#xJSdt($Y#xMvx8)xKGDd{1e({XYB~f4W$Vnw@MagHFF^Mm+?`8-h*$s(Qv5lw)`9STfyl8KP4y$=a$8cAMZ}Ax;g6q8lnGo ze~ov`J;qOP2mG5GTYwTYfPceH29p*bzuWSEaSQ&}(e}u1xmyKj?|}bu(<0JFFuLLM z0vQR302K*|{=a}dFnU3@Tg<=e8sZ?gn7>oa@n4umSfZf79p+y@bco#k{rI!}7v?4P qZPaLmsP0~ZoBZB7LjeDFCh%WCc36mz#2p6VtY3%@9mI`zeEuK#a#q#= delta 7218 zcmZu$1yq#X)&^$CAqIvHDe3MKr8}fl1O$ebl16H11SAImk#1=u6{H*KZb3SvLHcL> zl>6Pg{#ol?XV#8qpLzCKv)}ccSi7ILIFFS9Ktd$Mr(X*{28S7xW7uSl4s9~OU1KE3 z0SK!B@0}G3+63PR`T|BC$6wdPQgFl0;p8Enn# zO_=tYxPAJyT;tvrP!b%-7Zfn;Q!@t!E&}&|1o(02qw+0bsb#wH+ZchKHox`2Qu|Ig zUWTxn3__tc4@uu|^?|G>n@;l8i34HMTnZOwH>-QshTyPmAF%fNa0Jd0_nS z9>dFl0(a)WO}_0t+!(+h)Xm((IUkCf0`e*))-oJrwDNa~`P#)k7AD;4%Ip}~G5 z+jEUJD_pel+4O1KAZ{jw4OPi8qZj+LK>wonZ^EfV9?;eA`#k4vzTb5d#4?37>0q$* z^T1HcK)PKmHFr%A0pl<=e4^WGx}J%x1MZ3H&dxC3*2z62WhB!ZcCT@xqN?S!lPlKv3+hVqLx;>R0FAO90huvxp((z_lzPQhA-k9yX4p@i zX^ERC2KX7gilce$`L@c&^PjcX*f1s)vcy@b_dNQ^W%{qi`u6o-6~_jF!~K_HS@bN? zTpo6^J+aB%ogLjZcYRqJ8ad#)ED`s48f!#N(P)tL7jILb=_IiovRtN!JNgOIbLHozerK62Ao3Ou-S8(mx>CR5Okd*x^ zt*n09H!30ml%d=O!U1Cv70UeRoFmG)zW3gBP`Yb=;|O_vQndBuq$PJ`mE%)8Qcx}} zGpjlsGf`Gd%;X0#6kK*TobQXUz%u!^F8%}Z%nC&;6N^?My}>r4txe$yltXaLQ??!# zlq$5M{$l~H={9`%gvLfs5N9HfU+BtxLlGR^^G^OC!DHY|3t_Fhfj#~{an}P4J(?kA znq%vGv7cpeLz4t>Fr~MuQJq}AQ&Y&b2#*ZzZSx6Sn9rKm1Y9p&t7|5}CNlKyDY`Ix zR{y%YQr#^|RXNgu(;>8nN{az8atCf{qkfnOC&*Cp?_CM}T4y*Q@lk zZB9sN^-gSVt5pLbGb}02;_2$}(}iQvBCHT!5k2i!K;9mVpb_PaK`M?mj|Ymq66B9u zY8$uIM-j{;VBJ_07o=b<@LGZ@6SE@FzzJvKL^`9|;-J;f73O(kw&c7#LT!%otDxPc zgdhYk%lIw(?Cu*OL}gA8>e$5uEZeeo2aN&N11gD zG+ZXp?A%9d$VX1Y)QRJ&u+Glm6jSzAUuZsll0me))93Zgxo#Sr;Vi=8sC4LVhM8fTis-VRm#rQp?evv8PIsPU%<>SDI$b~?ll7A}P zt`!i95@Z0v(MBnC=lGL=`+Kv$-(QfB-bupKsQ6)sH<&-;%KU%cNmeKfoMr?M#2Zo8 zy5kOXV&}GI#eWt4m{j}0Xv1U&5GbJ;J93oa_WBKj zqg(mpu7SkJ10w(Ax#1s&a9LKE`Bbu!6zpAP7x1;q*^dtu$(%4N+U{09sX!Hm4+OwP zh-9H&5|y%Uxgy6$QX4i>DOT340vc)vO)Rlv+b2_n*h%pH2@?xJx*5@l@Eu4&zd^Uoz|2jf@CVF2sW$nV$A|#narPN~ou>RwqOJ4Ogj7J888#u;%v8y6hO^VNvbyz7h8)v(feWb(h z;m&Rm;|X-_#d2)e*Qfh>z3B4tN>U0NcyaLZeNXdAZKnSk%|=a*DGjbW-Qsa0vMh(Q z5<`lTe>Q<+cE)&7dAbz?6aPC2D*pJ@_+GR?v(L{7=C@gsU_~^_@oOrNA4@QjcbT1U zVRB(T13we7A8eq7G2Bd6-FR2j6>fM*6W5o>JQB}yO5fktI;>yvBHIfMZgLXsKl6%i zTiw*feDD9sGT!EbG4OJD8#u2ryb-~d?Q0HVPd7zHSh(D93pjUb^zTRs8 zRx#~Ky^KKZW8_%TZrW%mbD++ml)n6rhk2M%vayu`lP-{Rb8t2GXYqqeTI{z`6Eg$s zpk3?_%aPqd!`fUVP;Bji;SH#-x);4>^DqsHJsB)*JaM5@E4m_^aR!e7XAnaZMlF4v zKg1F9@(4>Bd>=h@H5<7j7RF4go0i?eM_LK)8_n47wKC9IM-6scaK^Evf?5Wr>W}eP zepM*zoQ~_m)u?^K%xrM^miPD@{jfe3JIPeH|H_mEqOR0?zYgr z%q?XcB{4m~Hu2dI-1lf{&O6`ByK_shMKwx4WIxYV7QO#q;5rl=XGnA`uf)`IPQHCv ze-tw8$**E+`b1KE@3J`-*5-};Ru^!z5d?eoRlJe&Dl>zn2vzmj=FA}D5DTY_H^8u} zl0`a1&|?ur&GUVz(a`fNzn_Y1u6CP>_%+4%>Ft6_C_)auu~ks6H(9fJRY~9OwE{{K zyd&=AJ{RcR`T;Gcka1J)mG?JS$VScx8cInAB*hvNB2N{u>tAZ3v5GBq%ss{E`55-_ zLgz7?5in5fSYfj%08b3RUe6*FbVQEG|{M)UC7@2wDqmSzF+xf z5CdvZ<-E4hql%v_Nb`V#wv~xq{MRyUdC8kt^)N3iY5tR1xZUMVpa1*I|81ODjy z-?kJJAUB3OP-p;oPsjyiT(Y14_+D~xl~bzm{1_D*2o$k=o)Y|gJpTokjh0niiKzXW z*)acS=D-R)xzewqCq-ZZ_lkXXMuqoS$kV86QY`tAWT`rjBP6pn1jy}HJATo?bQN^N zzNS!Njdp&lMaIv=p5DqKq3zM;JW==rIEWJ?_x$!t4VvX!G%OWY9}8}I#OUS>-3+5u zR7#ar7FBcID)gsyYK+7U{bM_%y?&}mkB;v|X;(J-3mjZUpdPI31WqBlnWKw|W z=u*#hwx+6Ow&``O7Jh?x=I}!(EV%mBqfsr{;~=y$(j=l+=qXxBd1X6|yFY*8a?HH6 z#E9w+>=DCydxjB@flJU#a?-VccIAuR=yT&Y6qgc*&#QO``QG>=9yTRAQNbw65(k*^dDA< z)-~`eV!bbBa`;7wXr)^mpVW5PCiEwWmi&H0hI0O4oV(+2&1=Y`S!S0XU-jA^O*QZ_ z)=Qv~8A=3Ia>fZLbZlU!?#iFXq~h?|*(|iGc1RSN>O?%IB|7 zDIK�s$C|I{d^k^9h6+dt7N&-#&VSuKWU=o#*uG><5s(p=rNqgK-jxfeerRa{RT| z(p`D>2z}=Y>qkQ^zEfQlYc4#d3j4q(WNA?rT(GAN!fPuozz715_2mz8PPa234RY01 zx~6kyz`ta@iwPoOEq!KwFZV3@sS0O26-xNJ`u_J`xU>`LlcW%E1Ygl6&BF!BAbH-O z{dj$;&sA!6PR)88sbIoNw%u)%yRPIF8!$anY&C9G+{n1B#W#L#K+h!zq(oq^?mV)S zEixl32eNKuAA42xOshUK01)dSn{Y`iV}@IJl}!~iB@ozTO8JgBRKZMrx}PYm&u1XM zp~>e<_EIF(QW3#V-V$_jW%clj)%Aq;;rtI@PH<3gt6kWF&0!1UDj0bQR070(qL*{N z--WHCYhB>gKfYLt8&b#GmxrIyQUY){*I%_$rA0?2CdjQCC+jc%lsrxt6Q)OQh9-V7 zJMc+%9+}Fo20i^*uzzW}1oNPLDQPukm>){xKqts^F=Mc_V8ZK4R78Mx{d|0}4{4|& z`Tdc^ZYf{YY9>tExn;b~HY%^N2sgag)m;ZEVl%QI{3#xr)Doq(Anh^S1nk*iR-3_$ zXG^{afiZKPd6n#zuZxL~JHA>ubp{4~zaLVKN804cLzIfsv=-VRjY^C~_ULIAi91Wv zHKo1ArjYK!T6tZnrDr9b3(>e4o{YDJ@Pc;S6(#AcBnVb&^nwDF(sVmM|JCc-pu?wc zD|mwUv7fmD#f&({W1Z6#>MMu~daOKuYTdb4la zaWFi3pSat>xCz*^nv;-BBg3%uodLLtTk814PEb@Tok7KXPjmm;8MpqD>d3vRW?ay- zaNsBx*RH0)x7XBob8g?OBW(nu+$G84(idMz(%Uc~xzDr~p00cmiR$}=H)uFr?^}SW zzE(tcf!F0*ph{A0`c?_xsR8ytbx{}eKiPe0KpiI!8=#Jk=nP0XSf6&%y3w?F)nXRj z|L_@4ar$!Oa*lY&j{8Me=8#DFI>?QSrEhd6>HQh^5jwT1Z+Ul*|5V2urL~CFmwDFv z)CMlcnlb}^I>M1A{)GB=j!+`ant-_%=Y84}>@o9Fa9>)FF_cA1X(9LiFbQrsW`mIV z2{k%ln6<|>7~=vTz5Yqd;=3#wUp+8YjVEsI_LK(A8`_Pb3bEJQTPf@_D08iKmy{>L zmSgxUv<@jWt34`)xTxa>()o7gHyNBFi^FrD`;OfwTrI3zq$?cWKWPaJzC!Ea{iVcN z4s=||@Z~^vd1un>?TvJ-Cu!1o~ zG^I*GmM<++CS*21q^ZHg@e2*$w7&wX;@W-jW4DxZz(@3YgLlelpF2B{AkEKZxS&Xa zEsHfJsxqkdURX4y!ijmK?D^W^#M*AZ+CWsA%vI!ES*$s`L1WXY{esWU>bBzOTC%iD z6)frKgU};86~2K4@3yxYC7GtyLl#%%^NftxXC0f`qe26V#zv+K#;&hi;=F?d37aJ_*}aY5Mm>moe?d^!DqXO>v!?ki~M*4zFBmdU=UHZ^>y@5-V67 z*>TM{p36-U7mY~@pGg;zq&#SUW<+Q!grhDrwwm#M~2ib=ZU!Z0`8S_@IVwY zPq*cU9WlK^9d+7(5pg+Y?}zSkit9df$j`FYl6H1;#I8=JzrtcFherR*P z@;%*8q^?Y1L_2CIF3^qIwzml()HK4 z5;T045aSzUB%jnzC#H|x@H`e)C)gU1c|RV@G$bOam)MivT$EQ8?Pb+L3&oye-F_XN z?>R&-;Mb^CGuV`qMO|THc43InLR?5}B2MuTBEtV@A#fp1rTSlrq%ZzX-*-(o*}kxS%G*uNF{5ngJ97xbn{Sm0LD zaKC*Rk!b#i8%Rjmei?8hZqB=$e-KF-5COMBi|$?E?HA&IE{K5JyKdJLk05Y-vf$`2k& z!FP1QufSw?BGoNx2q+o^*?_eZ=#yKbvu+=Of03Q-cS&|zfa;cO>bu0h1rQ<@ivR^+ zj2uyb=}yeLRk?lx-}?UxnEk(iyC{DH?@ { console.log('Socket connected OK'); + console.log('Connect event data:', data); + console.log('Connect event this:', this); this.setStatusMessage('Connected to EveAI.'); this.updateConnectionStatus(true); this.startHeartbeat(); - if (data.room) { + if (data && data.room) { this.room = data.room; console.log(`Joined room: ${this.room}`); + } else { + console.log('Room information not received on connect'); } }); this.socket.on('authenticated', (data) => { - console.log('Authenticated event received: ', data); + console.log('Authenticated event received'); + console.log('Authentication event data:', data); + console.log('Authentication event this:', this); this.setStatusMessage('Authenticated.'); - if (data.token) { - this.jwtToken = data.token; // Store the JWT token received from the server + if (data && data.token) { + this.jwtToken = data.token; } - if (data.room) { + if (data && data.room) { this.room = data.room; console.log(`Confirmed room: ${this.room}`); + } else { + console.log('Room information not received on authentication'); } }); diff --git a/nginx/static/assets/css/eveai.css b/nginx/static/assets/css/eveai.css index 0bd24f0..ee97655 100644 --- a/nginx/static/assets/css/eveai.css +++ b/nginx/static/assets/css/eveai.css @@ -386,6 +386,7 @@ input[type="radio"] { .btn-danger:hover { background-color: darken(var(--bs-danger), 10%) !important; /* Darken the background on hover */ border-color: darken(var(--bs-danger), 10%) !important; /* Darken the border on hover */ + color: var(--bs-white) !important; /* Ensure the text remains white and readable */ } /* Success Alert Styling */ diff --git a/requirements.txt b/requirements.txt index c1b966c..383a138 100644 --- a/requirements.txt +++ b/requirements.txt @@ -73,4 +73,6 @@ minio~=7.2.7 Werkzeug~=3.0.3 itsdangerous~=2.2.0 cryptography~=43.0.0 -graypy~=2.1.0 \ No newline at end of file +graypy~=2.1.0 + +lxml~=5.3.0