Update README.md

Browse files

Files changed (1) hide show

README.md +79 -0

README.md CHANGED Viewed

	@@ -17,3 +17,82 @@ Recent research efforts have been directed toward the development of automated s
17
18	<img src="https://github.com/OrKatz7/parler-hate-speech/blob/main/docs/parler_results.jpeg?raw=true">
19

 <img src="https://github.com/OrKatz7/parler-hate-speech/blob/main/docs/parler_results.jpeg?raw=true">
+```
+!pip install huggingface_hub
+!pip install tokenizers transformers
+!pip install iterative-stratification
+!git clone https://github.com/OrKatz7/parler-hate-speech
+%cd parler-hate-speech/src
+```
+```
+from huggingface_hub import hf_hub_download
+import torch
+import sys
+from model import CustomModel,MeanPooling
+from transformers import AutoTokenizer, AutoModel, AutoConfig
+import numpy as np
+class CFG:
+    model="microsoft/deberta-v3-base"
+    target_cols=['label_mean']
+```
+```
+name = "OrK7/parler_hate_speech"
+downloaded_model_path = hf_hub_download(repo_id=name, filename="pytorch_model.bin")
+model = torch.load(downloaded_model_path)
+tokenizer = AutoTokenizer.from_pretrained(name)
+```
+```
+def prepare_input(text):
+    inputs = tokenizer.encode_plus(
+        text,
+        return_tensors=None,
+        add_special_tokens=True,
+        max_length=512,
+        pad_to_max_length=True,
+        truncation=True
+    )
+    for k, v in inputs.items():
+        inputs[k] = torch.tensor(np.array(v).reshape(1,-1), dtype=torch.long)
+    return inputs
+def collate(inputs):
+    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
+    for k, v in inputs.items():
+        inputs[k] = inputs[k][:,:mask_len]
+    return inputs
+```
+```
+from transformers import Pipeline
+class HatePipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "maybe_arg" in kwargs:
+            preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+        return preprocess_kwargs, {}, {}
+    def preprocess(self, inputs):
+        out = prepare_input(inputs)
+        return collate(out)
+    def _forward(self, model_inputs):
+        outputs = self.model(model_inputs)
+        return outputs
+    def postprocess(self, model_outputs):
+        return np.array(model_outputs[0,0].numpy()).clip(0,1)*4+1
+```
+```
+pipe = HatePipeline(model=model)
+pipe("I Love you #")
+```
+results: 1.0
+```
+pipe("I Hate #$%#$%Jewish%$#@%^^@#")
+```
+results: 4.155200004577637