Spaces:
Build error
Build error
| import torch | |
| def create_dense_embeddings(query, model): | |
| dense_emb = model.encode([query]).tolist() | |
| return dense_emb | |
| def create_sparse_embeddings(query, model, tokenizer): | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| inputs = tokenizer(query, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| inter = torch.log1p(torch.relu(logits[0])) | |
| token_max = torch.max(inter, dim=0) # sum over input tokens | |
| nz_tokens = torch.where(token_max.values > 0)[0] | |
| nz_weights = token_max.values[nz_tokens] | |
| order = torch.sort(nz_weights, descending=True) | |
| nz_weights = nz_weights[order[1]] | |
| nz_tokens = nz_tokens[order[1]] | |
| return { | |
| "indices": nz_tokens.cpu().numpy().tolist(), | |
| "values": nz_weights.cpu().numpy().tolist(), | |
| } | |
| def hybrid_score_norm(dense, sparse, alpha: float): | |
| """Hybrid score using a convex combination | |
| alpha * dense + (1 - alpha) * sparse | |
| Args: | |
| dense: Array of floats representing | |
| sparse: a dict of `indices` and `values` | |
| alpha: scale between 0 and 1 | |
| """ | |
| if alpha < 0 or alpha > 1: | |
| raise ValueError("Alpha must be between 0 and 1") | |
| hs = { | |
| "indices": sparse["indices"], | |
| "values": [v * (1 - alpha) for v in sparse["values"]], | |
| } | |
| return [v * alpha for v in dense], hs | |