Has anyone had success with the implementation of depicting progress in layouts? I want the text from the function to display in the modal as the embedding process is happening. Since it takes a while with a large batch of documents, I don’t want the user to sit there and wonder if the process is still happening and not know when it’s going to end, however, I can’t return anything until the process ends. Ideally I would like to display the progress bar and the print statements below in a modal. I am not an advanced developer, so any help would be very useful. Thank you!
class CustomEmbeddings:
def __init__(self, model_id='e5-small-v2'):
print("Initializing the embedding model...")
self.model, self.tokenizer = self.load_model_and_tokenizer(model_id)
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
self.model.to(self.device)
print(f"Model moved to device: {self.device}")
def load_model_and_tokenizer(self, model_id):
project_root = os.path.dirname(os.path.dirname(__file__))
model_dir = os.path.join(project_root, 'embeddings', 'models')
print(f"Loading model and tokenizer from {model_dir} for model ID: {model_id}...")
model = AutoModel.from_pretrained(os.path.join(model_dir, model_id))
tokenizer = AutoTokenizer.from_pretrained(os.path.join(model_dir, model_id))
print("Model and tokenizer successfully loaded.")
return model, tokenizer
def embed_documents(self, texts):
print("Starting the embedding process...")
self.model.eval()
embeddings = []
with torch.no_grad():
for i, text in enumerate(texts):
tokens = self.tokenizer(text.page_content, return_tensors="pt", padding=True, truncation=True, max_length=512).to(
self.device)
output = self.model(**tokens)
sum_embeddings = (output.last_hidden_state * tokens['attention_mask'].unsqueeze(-1)).sum(1)
normalized_embeddings = sum_embeddings / tokens['attention_mask'].sum(1, keepdim=True)
embeddings.append(normalized_embeddings.cpu().numpy().tolist()[0])
if (i + 1) % 10 == 0 or i == len(texts) - 1:
print(f"Processed {i + 1}/{len(texts)} texts")
print("Embedding process completed.")
return embeddings
def embed_query(self, query):
return self.embed_documents([query])[0]