sondalex
commited on
Commit
·
25b4d43
0
Parent(s):
Initial commit
Browse files- .gitattributes +35 -0
- NOTICE +18 -0
- README.md +30 -0
- config.json +35 -0
- merges.txt +0 -0
- onnx/model.onnx +3 -0
- onnx/model_bnb4.onnx +3 -0
- onnx/model_fp16.onnx +3 -0
- onnx/model_int8.onnx +3 -0
- onnx/model_q4.onnx +3 -0
- onnx/model_q4f16.onnx +3 -0
- onnx/model_quantized.onnx +3 -0
- onnx/model_uint8.onnx +3 -0
- quantize_config.json +38 -0
- special_tokens_map.json +15 -0
- tokenizer.json +0 -0
- tokenizer_config.json +19 -0
- vocab.json +0 -0
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
NOTICE
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
NOTICE
|
| 2 |
+
------
|
| 3 |
+
|
| 4 |
+
This project, sondalex/GovernanceBERT-governance, is a derivative work based on the original
|
| 5 |
+
*ESGBERT/GovernanceBERT-governance* model, licensed under the Apache 2.0 License.
|
| 6 |
+
|
| 7 |
+
Original work:
|
| 8 |
+
- Title: ESGBERT/GovernanceBERT-governance
|
| 9 |
+
- Author(s): ESGBERT
|
| 10 |
+
- Source: https://huggingface.co/ESGBERT/GovernanceBERT-governance
|
| 11 |
+
- License: Apache 2.0 License ([https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md](https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md))
|
| 12 |
+
|
| 13 |
+
Modifications:
|
| 14 |
+
- Converted the model to ONNX format for compatibility with ONNX runtime.
|
| 15 |
+
- Script used for conversion: https://github.com/huggingface/transformers.js/blob/2.17.2/scripts/convert.py
|
| 16 |
+
|
| 17 |
+
These modifications were made by sondalex in 2024.
|
| 18 |
+
|
README.md
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language: en
|
| 3 |
+
license: apache-2.0
|
| 4 |
+
---
|
| 5 |
+
|
| 6 |
+
# Introduction
|
| 7 |
+
|
| 8 |
+
This model is a conversion to ONNX format of https://huggingface.co/ESGBERT/GovernanceBERT-governance
|
| 9 |
+
|
| 10 |
+
It is compatible with Transformers.js
|
| 11 |
+
|
| 12 |
+
# Usage (Transformers.js)
|
| 13 |
+
|
| 14 |
+
```js
|
| 15 |
+
import { pipeline } from "@huggingface/transformers";
|
| 16 |
+
|
| 17 |
+
const pipe = await pipeline(
|
| 18 |
+
"text-classification",
|
| 19 |
+
"sondalex/GovernanceBERT-governance",
|
| 20 |
+
);
|
| 21 |
+
|
| 22 |
+
const sentences = [
|
| 23 |
+
"The CEO has announced a major restructuring of the company's leadership team.",
|
| 24 |
+
];
|
| 25 |
+
|
| 26 |
+
const output = await pipe(sentences);
|
| 27 |
+
|
| 28 |
+
console.log(output);
|
| 29 |
+
```
|
| 30 |
+
|
config.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "ESGBERT/GovernanceBERT-governance",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"RobertaForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 768,
|
| 13 |
+
"id2label": {
|
| 14 |
+
"0": "none",
|
| 15 |
+
"1": "governance"
|
| 16 |
+
},
|
| 17 |
+
"initializer_range": 0.02,
|
| 18 |
+
"intermediate_size": 3072,
|
| 19 |
+
"label2id": {
|
| 20 |
+
"governance": 1,
|
| 21 |
+
"none": 0
|
| 22 |
+
},
|
| 23 |
+
"layer_norm_eps": 1e-05,
|
| 24 |
+
"max_position_embeddings": 514,
|
| 25 |
+
"model_type": "roberta",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 6,
|
| 28 |
+
"pad_token_id": 2,
|
| 29 |
+
"position_embedding_type": "absolute",
|
| 30 |
+
"problem_type": "single_label_classification",
|
| 31 |
+
"transformers_version": "4.33.2",
|
| 32 |
+
"type_vocab_size": 1,
|
| 33 |
+
"use_cache": true,
|
| 34 |
+
"vocab_size": 50265
|
| 35 |
+
}
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
onnx/model.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4a12a9ad2a62525e02df9d75b2cb5fb772d4a7bcfa1776020c468b40feb0a3c
|
| 3 |
+
size 328626518
|
onnx/model_bnb4.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:160425ee74257bd32b254c269b4d9e707e8b7fb5eb3bab4d1b61532d131d3cde
|
| 3 |
+
size 182680605
|
onnx/model_fp16.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96e66d1446f0f116c8b41fca80b2191e17bd054171b0ea9892f8cb9111a0951b
|
| 3 |
+
size 164416570
|
onnx/model_int8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61cf4066f72e42d4c6731099bb49e5eae29b7daf617ac814283d566f928e6b6d
|
| 3 |
+
size 82745641
|
onnx/model_q4.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd5316a547c4448427b3f3b5347f5c2c02a6139c175b6f1a3dd382df09eed868
|
| 3 |
+
size 185334549
|
onnx/model_q4f16.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7b89b8f39c9c7b33bb8bbbc1633a1e6659c1c9b2b8bde18c58c6ca607f1b4c5
|
| 3 |
+
size 103375109
|
onnx/model_quantized.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f37f6962b0becb8a2c85809bb0da9e986c5918c09053db0433b2d561b5526b87
|
| 3 |
+
size 82762475
|
onnx/model_uint8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eff042533d9a1accb2d8f1c338e79672e2ff7f7b1f21da69cc0e84c610622695
|
| 3 |
+
size 82745640
|
quantize_config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_channel": true,
|
| 3 |
+
"reduce_range": true,
|
| 4 |
+
"per_model_config": {
|
| 5 |
+
"model": {
|
| 6 |
+
"op_types": [
|
| 7 |
+
"Gather",
|
| 8 |
+
"MatMul",
|
| 9 |
+
"Where",
|
| 10 |
+
"Cast",
|
| 11 |
+
"Constant",
|
| 12 |
+
"Add",
|
| 13 |
+
"Pow",
|
| 14 |
+
"Transpose",
|
| 15 |
+
"Slice",
|
| 16 |
+
"Not",
|
| 17 |
+
"Mul",
|
| 18 |
+
"Gemm",
|
| 19 |
+
"Div",
|
| 20 |
+
"Concat",
|
| 21 |
+
"Tanh",
|
| 22 |
+
"CumSum",
|
| 23 |
+
"ConstantOfShape",
|
| 24 |
+
"Softmax",
|
| 25 |
+
"Erf",
|
| 26 |
+
"Equal",
|
| 27 |
+
"Reshape",
|
| 28 |
+
"Sqrt",
|
| 29 |
+
"Shape",
|
| 30 |
+
"Expand",
|
| 31 |
+
"Unsqueeze",
|
| 32 |
+
"ReduceMean",
|
| 33 |
+
"Sub"
|
| 34 |
+
],
|
| 35 |
+
"weight_type": "QInt8"
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"cls_token": "<s>",
|
| 4 |
+
"eos_token": "</s>",
|
| 5 |
+
"mask_token": {
|
| 6 |
+
"content": "<mask>",
|
| 7 |
+
"lstrip": true,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"pad_token": "<pad>",
|
| 13 |
+
"sep_token": "</s>",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"bos_token": "<s>",
|
| 4 |
+
"clean_up_tokenization_spaces": true,
|
| 5 |
+
"cls_token": "<s>",
|
| 6 |
+
"eos_token": "</s>",
|
| 7 |
+
"errors": "replace",
|
| 8 |
+
"mask_token": "<mask>",
|
| 9 |
+
"max_length": 512,
|
| 10 |
+
"model_max_length": 512,
|
| 11 |
+
"pad_token": "<pad>",
|
| 12 |
+
"sep_token": "</s>",
|
| 13 |
+
"stride": 0,
|
| 14 |
+
"tokenizer_class": "RobertaTokenizer",
|
| 15 |
+
"trim_offsets": true,
|
| 16 |
+
"truncation_side": "right",
|
| 17 |
+
"truncation_strategy": "longest_first",
|
| 18 |
+
"unk_token": "<unk>"
|
| 19 |
+
}
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|