Commit
·
caa9d4a
1
Parent(s):
1dd00da
updated readme
Browse files
README.md
CHANGED
|
@@ -1,3 +1,103 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Pretrained Models
|
| 2 |
+
|**Sentence Length**|**Trained Tokens**|**Link**|
|
| 3 |
+
|----------|----------|----------|
|
| 4 |
+
|128|~11B|[BiGS-11B-128](https://drive.google.com/drive/folders/1-nhzeWVgpXwMyNEQ5j-MwJxSzwKyT2an?usp=sharing)
|
| 5 |
+
|128|~29B|[BiGS-29B-128](https://drive.google.com/drive/folders/10Mtl8_XUJb2mmHLyRC9x1wltdIWy6aaP?usp=sharing)
|
| 6 |
+
|128|~97B|[BiGS-97B-128](https://huggingface.co/JunxiongWang/BiGS_128)
|
| 7 |
+
|512|~108B|[BiGS-108B-512](https://huggingface.co/JunxiongWang/BiGS_512)
|
| 8 |
+
|1024|~110B|[BiGS-110B-1024](https://huggingface.co/JunxiongWang/BiGS_1024)
|
| 9 |
+
|4096|~110B|[BiGS-110B-4096](https://huggingface.co/JunxiongWang/BiGS_4096)
|
| 10 |
+
|
| 11 |
+
### MNLI Checkpoints
|
| 12 |
+
|
| 13 |
+
|**Sentence Length**|**Trained Tokens**|**Model**|
|
| 14 |
+
|----------|----------|----------|
|
| 15 |
+
|128|~11B|[BiGS-11B-128MNLI](https://drive.google.com/drive/folders/1-tn5ar_tRi9DnK_bNMZtPpappUdNnVET?usp=sharing)
|
| 16 |
+
|128|~29B|[BiGS-29B-128MNLI](https://drive.google.com/drive/folders/116JwMbChYp9tBuPTz5jbiaulhXrXt1P2?usp=sharing)
|
| 17 |
+
|128|~97B|[BiGS-97B-128MNLI](https://huggingface.co/JunxiongWang/BiGS_128_MNLI)
|
| 18 |
+
|512|~108B|[BiGS-108B-512MNLI](https://huggingface.co/JunxiongWang/BiGS_512_MNLI)
|
| 19 |
+
|
| 20 |
+
<!-- Sentence length: 128
|
| 21 |
+
|
| 22 |
+
|**Training Tokens**|**Model**|
|
| 23 |
+
|----------|----------|
|
| 24 |
+
|~11B|[https://drive.google.com/drive/folders/1-nhzeWVgpXwMyNEQ5j-MwJxSzwKyT2an?usp=sharing](https://drive.google.com/drive/folders/1-nhzeWVgpXwMyNEQ5j-MwJxSzwKyT2an?usp=sharing)
|
| 25 |
+
|~29B|[https://drive.google.com/drive/folders/10Mtl8_XUJb2mmHLyRC9x1wltdIWy6aaP?usp=sharing](https://drive.google.com/drive/folders/10Mtl8_XUJb2mmHLyRC9x1wltdIWy6aaP?usp=sharing)
|
| 26 |
+
|~97B|[https://huggingface.co/JunxiongWang/BiGS_128](https://huggingface.co/JunxiongWang/BiGS_128)
|
| 27 |
+
-->
|
| 28 |
+
|
| 29 |
+
<!-- Sentence length: 512
|
| 30 |
+
|
| 31 |
+
|**Training Tokens**|**Model**|
|
| 32 |
+
|----------|----------|
|
| 33 |
+
|~108B|[https://huggingface.co/JunxiongWang/BiGS_512](https://huggingface.co/JunxiongWang/BiGS_512) -->
|
| 34 |
+
|
| 35 |
+
<!-- MNLI checkpoint:
|
| 36 |
+
|
| 37 |
+
|**Training Tokens**|**Model**|
|
| 38 |
+
|----------|----------|
|
| 39 |
+
|~108B|[https://huggingface.co/JunxiongWang/BiGS_512_MNLI](https://huggingface.co/JunxiongWang/BiGS_512_MNLI)
|
| 40 |
+
|
| 41 |
+
Sentence length: 1024
|
| 42 |
+
|
| 43 |
+
|**Training Tokens**|**Model**|
|
| 44 |
+
|----------|----------|
|
| 45 |
+
|~110B|[https://huggingface.co/JunxiongWang/BiGS_1024](https://huggingface.co/JunxiongWang/BiGS_1024)
|
| 46 |
+
|
| 47 |
+
Sentence length: 4096
|
| 48 |
+
|
| 49 |
+
|**Training Tokens**|**Model**|
|
| 50 |
+
|----------|----------|
|
| 51 |
+
|~110B|[https://huggingface.co/JunxiongWang/BiGS_4096](https://huggingface.co/JunxiongWang/BiGS_4096)
|
| 52 |
+
-->
|
| 53 |
+
## Example Usage
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
### Load Masked Language Model
|
| 57 |
+
|
| 58 |
+
```python
|
| 59 |
+
import jax
|
| 60 |
+
from jax import numpy as jnp
|
| 61 |
+
from transformers import BertTokenizer
|
| 62 |
+
from BiGS.modeling_flax_bigs import FlaxBiGSForMaskedLM
|
| 63 |
+
|
| 64 |
+
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
|
| 65 |
+
model = FlaxBiGSForMaskedLM.from_pretrained('JunxiongWang/BiGS_128')
|
| 66 |
+
|
| 67 |
+
text = "The goal of life is [MASK]."
|
| 68 |
+
encoded_input = tokenizer(text, return_tensors='np', padding='max_length', max_length=128)
|
| 69 |
+
output = model(**encoded_input)
|
| 70 |
+
tokenizer.convert_ids_to_tokens(jnp.flip(jnp.argsort(jax.nn.softmax(output.logits[encoded_input['input_ids']==103]))[0])[:10])
|
| 71 |
+
# output: ['happiness', 'love', 'peace', 'perfection', 'life', 'enlightenment', 'god', 'survival', 'freedom', 'good']
|
| 72 |
+
jnp.flip(jnp.sort(jax.nn.softmax(output.logits[encoded_input['input_ids']==103]))[0])[:10]
|
| 73 |
+
# probability: [0.16052087, 0.04306792, 0.03651363, 0.03468223, 0.02927081, 0.02549769, 0.02385132, 0.02261189, 0.01672831, 0.01619471]
|
| 74 |
+
|
| 75 |
+
text = "Paris is the [MASK] of France."
|
| 76 |
+
encoded_input = tokenizer(text, return_tensors='np', padding='max_length', max_length=128)
|
| 77 |
+
output = model(**encoded_input)
|
| 78 |
+
tokenizer.convert_ids_to_tokens(jnp.flip(jnp.argsort(jax.nn.softmax(output.logits[encoded_input['input_ids']==103]))[0])[:8])
|
| 79 |
+
# output: ['capital', 'centre', 'center', 'city', 'capitol', 'prefecture', 'headquarters', 'president', 'metropolis', 'heart']
|
| 80 |
+
jnp.flip(jnp.sort(jax.nn.softmax(output.logits[encoded_input['input_ids']==103]))[0])[:10]
|
| 81 |
+
# probability: [0.9981787 , 0.00034076, 0.00026992, 0.00026926, 0.00017787, 0.00004816, 0.00004256, 0.00003716, 0.00003634, 0.00002893]
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
### Load Sequence Classification Model
|
| 85 |
+
|
| 86 |
+
```python
|
| 87 |
+
from BiGS.modeling_flax_bigs import FlaxBiGSForSequenceClassification
|
| 88 |
+
model = FlaxBiGSForSequenceClassification.from_pretrained('JunxiongWang/BiGS_512')
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
### Load Question Answering Model
|
| 92 |
+
|
| 93 |
+
```python
|
| 94 |
+
from BiGS.modeling_flax_bigs import FlaxBiGSForQuestionAnswering
|
| 95 |
+
model = FlaxBiGSForQuestionAnswering.from_pretrained('JunxiongWang/BiGS_512')
|
| 96 |
+
```
|
| 97 |
+
|
| 98 |
+
### Load Multiple Choice Classification Model
|
| 99 |
+
|
| 100 |
+
```python
|
| 101 |
+
from BiGS.modeling_flax_bigs import FlaxBiGSForMultipleChoice
|
| 102 |
+
model = FlaxBiGSForMultipleChoice.from_pretrained('JunxiongWang/BiGS_512')
|
| 103 |
+
```
|