This model has been pushed to the Hub using the PytorchModelHubMixin integration:
For full documentation of this model, please see the official model card. They are the ones who built the model.
Mozilla AI has made it so you can call the govtech/stsb-roberta-base-off-topic using from_pretrained. To do this, you'll need to first pull the CrossEncoderWithMLP model
architectuer from their model card and make sure to add PyTorchModelHubMixin as an inherited class. See this article
Then, you can do the following:
from transformers import AutoModel, AutoTokenizer
from huggingface_hub import PyTorchModelHubMixin
import torch.nn as nn
class CrossEncoderWithMLP(nn.Module, PyTorchModelHubMixin):
def __init__(self, base_model, num_labels=2):
super(CrossEncoderWithMLP, self).__init__()
# Existing cross-encoder model
self.base_model = base_model
# Hidden size of the base model
hidden_size = base_model.config.hidden_size
# MLP layers after combining the cross-encoders
self.mlp = nn.Sequential(
nn.Linear(hidden_size, hidden_size // 2), # Input: a single sentence
nn.ReLU(),
nn.Linear(hidden_size // 2, hidden_size // 4), # Reduce the size of the layer
nn.ReLU()
)
# Classifier head
self.classifier = nn.Linear(hidden_size // 4, num_labels)
def forward(self, input_ids, attention_mask):
# Encode the pair of sentences in one pass
outputs = self.base_model(input_ids, attention_mask)
pooled_output = outputs.pooler_output
# Pass the pooled output through mlp layers
mlp_output = self.mlp(pooled_output)
# Pass the final MLP output through the classifier
logits = self.classifier(mlp_output)
return logits
tokenizer = AutoTokenizer.from_pretrained("cross-encoder/stsb-roberta-base")
base_model = AutoModel.from_pretrained("cross-encoder/stsb-roberta-base")
off_topic = CrossEncoderWithMLP.from_pretrained("mozilla-ai/stsb-roberta-base-off-topic", base_model=base_model)
# Then you can build a predict function that utilizes the tokenizer
def predict(model, tokenizer, sentence1, sentence2):
encoding = tokenizer(
sentence1,
sentence2,
return_tensors="pt",
truncation=True,
padding="max_length",
max_length=max_length,
return_token_type_ids=False
)
input_ids = encoding["input_ids"].to(device)
attention_mask = encoding["attention_mask"].to(device)
with torch.no_grad():
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
probabilities = torch.softmax(outputs, dim=1)
predicted_label = torch.argmax(probabilities, dim=1).item()
return predicted_label, probabilities.cpu().numpy()
- Downloads last month
- 4
Model tree for mozilla-ai/stsb-roberta-base-off-topic
Base model
FacebookAI/roberta-base
Quantized
cross-encoder/stsb-roberta-base