Upload README.md
Browse files
README.md
CHANGED
|
@@ -8,11 +8,11 @@ tags:
|
|
| 8 |
- speech
|
| 9 |
- xlsr-fine-tuning-week
|
| 10 |
widget:
|
| 11 |
-
-
|
| 12 |
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample1.flac
|
| 13 |
-
-
|
| 14 |
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample2978.flac
|
| 15 |
-
-
|
| 16 |
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample5168.flac
|
| 17 |
model-index:
|
| 18 |
- name: XLSR Wav2Vec2 Persian (Farsi) V3 by Mehrdad Farahani
|
|
@@ -76,7 +76,7 @@ def cleaning(text):
|
|
| 76 |
|
| 77 |
data_dir = "/content/cv-corpus-6.1-2020-12-11/fa"
|
| 78 |
|
| 79 |
-
test = pd.read_csv(f"{data_dir}/test.tsv", sep="
|
| 80 |
test["path"] = data_dir + "/clips/" + test["path"]
|
| 81 |
print(f"Step 0: {len(test)}")
|
| 82 |
|
|
@@ -93,7 +93,7 @@ test = test.reset_index(drop=True)
|
|
| 93 |
print(test.head())
|
| 94 |
|
| 95 |
test = test[["path", "sentence"]]
|
| 96 |
-
test.to_csv("/content/test.csv", sep="
|
| 97 |
```
|
| 98 |
|
| 99 |
**Prediction**
|
|
@@ -146,7 +146,7 @@ def predict(batch):
|
|
| 146 |
return batch
|
| 147 |
|
| 148 |
|
| 149 |
-
dataset = load_dataset("csv", data_files={"test": "/content/test.csv"}, delimiter="
|
| 150 |
dataset = dataset.map(speech_file_to_array_fn)
|
| 151 |
result = dataset.map(predict, batched=True, batch_size=4)
|
| 152 |
```
|
|
|
|
| 8 |
- speech
|
| 9 |
- xlsr-fine-tuning-week
|
| 10 |
widget:
|
| 11 |
+
- example_title: Common Voice sample 1
|
| 12 |
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample1.flac
|
| 13 |
+
- example_title: Common Voice sample 2978
|
| 14 |
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample2978.flac
|
| 15 |
+
- example_title: Common Voice sample 5168
|
| 16 |
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample5168.flac
|
| 17 |
model-index:
|
| 18 |
- name: XLSR Wav2Vec2 Persian (Farsi) V3 by Mehrdad Farahani
|
|
|
|
| 76 |
|
| 77 |
data_dir = "/content/cv-corpus-6.1-2020-12-11/fa"
|
| 78 |
|
| 79 |
+
test = pd.read_csv(f"{data_dir}/test.tsv", sep=" ")
|
| 80 |
test["path"] = data_dir + "/clips/" + test["path"]
|
| 81 |
print(f"Step 0: {len(test)}")
|
| 82 |
|
|
|
|
| 93 |
print(test.head())
|
| 94 |
|
| 95 |
test = test[["path", "sentence"]]
|
| 96 |
+
test.to_csv("/content/test.csv", sep=" ", encoding="utf-8", index=False)
|
| 97 |
```
|
| 98 |
|
| 99 |
**Prediction**
|
|
|
|
| 146 |
return batch
|
| 147 |
|
| 148 |
|
| 149 |
+
dataset = load_dataset("csv", data_files={"test": "/content/test.csv"}, delimiter=" ")["test"]
|
| 150 |
dataset = dataset.map(speech_file_to_array_fn)
|
| 151 |
result = dataset.map(predict, batched=True, batch_size=4)
|
| 152 |
```
|