Shuwei Hou
commited on
Commit
·
a7b67d5
1
Parent(s):
febafde
nltk_download
Browse files
segmentation/segmentation_batchalign.py
CHANGED
|
@@ -6,6 +6,10 @@ import torch
|
|
| 6 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
| 7 |
from nltk.tokenize import sent_tokenize
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
# input is the list of words, no punctuation, all lower case,
|
| 10 |
# output is the list of label: 0 represent the correspounding word is not the last word of c-unit,
|
| 11 |
# 1 represent the correspounding word is the last word of c-unit
|
|
|
|
| 6 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
| 7 |
from nltk.tokenize import sent_tokenize
|
| 8 |
|
| 9 |
+
import nltk
|
| 10 |
+
nltk.download('punkt_tab')
|
| 11 |
+
nltk.download('punkt')
|
| 12 |
+
|
| 13 |
# input is the list of words, no punctuation, all lower case,
|
| 14 |
# output is the list of label: 0 represent the correspounding word is not the last word of c-unit,
|
| 15 |
# 1 represent the correspounding word is the last word of c-unit
|