Upload folder using huggingface_hub
Browse files- README.md +12 -2
- example_notebook.ipynb +31 -136
- script.py +23 -2
README.md
CHANGED
|
@@ -1,4 +1,14 @@
|
|
| 1 |
-
#
|
| 2 |
|
| 3 |
-
This repo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
|
|
|
| 1 |
+
# Empty solution example for the S23DR competition
|
| 2 |
|
| 3 |
+
This repo provides a minimalistic example of a valid, but empty submission to S23DR competition.
|
| 4 |
+
We recommend you take a look at [this example](https://huggingface.co/usm3d/handcrafted_baseline_submission),
|
| 5 |
+
which implements some primitive algorithms and provides useful I/O and visualization functions.
|
| 6 |
+
|
| 7 |
+
This example seeks to simply provide minimal code which succeeds at reading the dataset and producing a solution (in this case two vertices at the origin and edge of zero length connecting them).
|
| 8 |
+
|
| 9 |
+
`script.py` - is the main file which is run by the competition space. It should produce `submission.parquet` as the result of the run. Please see the additional comments in the `script.py` file.
|
| 10 |
+
|
| 11 |
+
---
|
| 12 |
+
license: apache-2.0
|
| 13 |
+
---
|
| 14 |
|
example_notebook.ipynb
CHANGED
|
@@ -31,7 +31,7 @@
|
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"cell_type": "code",
|
| 34 |
-
"execution_count":
|
| 35 |
"id": "ItDDqoXop8bb",
|
| 36 |
"metadata": {
|
| 37 |
"colab": {
|
|
@@ -61,7 +61,7 @@
|
|
| 61 |
},
|
| 62 |
{
|
| 63 |
"cell_type": "code",
|
| 64 |
-
"execution_count":
|
| 65 |
"id": "zq_ljluLqzzv",
|
| 66 |
"metadata": {
|
| 67 |
"id": "zq_ljluLqzzv"
|
|
@@ -115,7 +115,7 @@
|
|
| 115 |
},
|
| 116 |
{
|
| 117 |
"cell_type": "code",
|
| 118 |
-
"execution_count":
|
| 119 |
"id": "U0J1w3kCZqMD",
|
| 120 |
"metadata": {
|
| 121 |
"colab": {
|
|
@@ -239,88 +239,18 @@
|
|
| 239 |
},
|
| 240 |
"outputs": [
|
| 241 |
{
|
| 242 |
-
"
|
| 243 |
-
"application/vnd.jupyter.widget-view+json": {
|
| 244 |
-
"model_id": "512d1f08f3e64ef3808da8bb846d6354",
|
| 245 |
-
"version_major": 2,
|
| 246 |
-
"version_minor": 0
|
| 247 |
-
},
|
| 248 |
-
"text/plain": [
|
| 249 |
-
"Fetching 4 files: 0%| | 0/4 [00:00<?, ?it/s]"
|
| 250 |
-
]
|
| 251 |
-
},
|
| 252 |
-
"metadata": {},
|
| 253 |
-
"output_type": "display_data"
|
| 254 |
-
},
|
| 255 |
-
{
|
| 256 |
-
"data": {
|
| 257 |
-
"application/vnd.jupyter.widget-view+json": {
|
| 258 |
-
"model_id": "920116c0c939415ba0da14e045bb5fe1",
|
| 259 |
-
"version_major": 2,
|
| 260 |
-
"version_minor": 0
|
| 261 |
-
},
|
| 262 |
-
"text/plain": [
|
| 263 |
-
"script.py: 0%| | 0.00/2.38k [00:00<?, ?B/s]"
|
| 264 |
-
]
|
| 265 |
-
},
|
| 266 |
-
"metadata": {},
|
| 267 |
-
"output_type": "display_data"
|
| 268 |
-
},
|
| 269 |
-
{
|
| 270 |
-
"data": {
|
| 271 |
-
"application/vnd.jupyter.widget-view+json": {
|
| 272 |
-
"model_id": "4975ca0c047e41f6a0bfdb24cf6d980f",
|
| 273 |
-
"version_major": 2,
|
| 274 |
-
"version_minor": 0
|
| 275 |
-
},
|
| 276 |
-
"text/plain": [
|
| 277 |
-
".gitattributes: 0%| | 0.00/1.52k [00:00<?, ?B/s]"
|
| 278 |
-
]
|
| 279 |
-
},
|
| 280 |
-
"metadata": {},
|
| 281 |
-
"output_type": "display_data"
|
| 282 |
-
},
|
| 283 |
-
{
|
| 284 |
-
"data": {
|
| 285 |
-
"application/vnd.jupyter.widget-view+json": {
|
| 286 |
-
"model_id": "663f1903db14424fbbd3a99a6f407408",
|
| 287 |
-
"version_major": 2,
|
| 288 |
-
"version_minor": 0
|
| 289 |
-
},
|
| 290 |
-
"text/plain": [
|
| 291 |
-
"README.md: 0%| | 0.00/779 [00:00<?, ?B/s]"
|
| 292 |
-
]
|
| 293 |
-
},
|
| 294 |
-
"metadata": {},
|
| 295 |
-
"output_type": "display_data"
|
| 296 |
-
},
|
| 297 |
-
{
|
| 298 |
-
"data": {
|
| 299 |
-
"application/vnd.jupyter.widget-view+json": {
|
| 300 |
-
"model_id": "93a306e3e7c94176bf127a9b2d364ce3",
|
| 301 |
-
"version_major": 2,
|
| 302 |
-
"version_minor": 0
|
| 303 |
-
},
|
| 304 |
-
"text/plain": [
|
| 305 |
-
"example_notebook.ipynb: 0%| | 0.00/146k [00:00<?, ?B/s]"
|
| 306 |
-
]
|
| 307 |
-
},
|
| 308 |
-
"metadata": {},
|
| 309 |
-
"output_type": "display_data"
|
| 310 |
-
},
|
| 311 |
-
{
|
| 312 |
-
"name": "stdout",
|
| 313 |
"output_type": "stream",
|
| 314 |
"text": [
|
| 315 |
-
"/
|
| 316 |
-
"https://huggingface.co/
|
| 317 |
-
"
|
| 318 |
]
|
| 319 |
},
|
| 320 |
{
|
| 321 |
"data": {
|
| 322 |
"application/vnd.jupyter.widget-view+json": {
|
| 323 |
-
"model_id": "
|
| 324 |
"version_major": 2,
|
| 325 |
"version_minor": 0
|
| 326 |
},
|
|
@@ -334,7 +264,7 @@
|
|
| 334 |
{
|
| 335 |
"data": {
|
| 336 |
"application/vnd.jupyter.widget-view+json": {
|
| 337 |
-
"model_id": "
|
| 338 |
"version_major": 2,
|
| 339 |
"version_minor": 0
|
| 340 |
},
|
|
@@ -348,12 +278,12 @@
|
|
| 348 |
{
|
| 349 |
"data": {
|
| 350 |
"application/vnd.jupyter.widget-view+json": {
|
| 351 |
-
"model_id": "
|
| 352 |
"version_major": 2,
|
| 353 |
"version_minor": 0
|
| 354 |
},
|
| 355 |
"text/plain": [
|
| 356 |
-
"example_notebook.ipynb: 0%| | 0.00/
|
| 357 |
]
|
| 358 |
},
|
| 359 |
"metadata": {},
|
|
@@ -362,7 +292,7 @@
|
|
| 362 |
{
|
| 363 |
"data": {
|
| 364 |
"application/vnd.jupyter.widget-view+json": {
|
| 365 |
-
"model_id": "
|
| 366 |
"version_major": 2,
|
| 367 |
"version_minor": 0
|
| 368 |
},
|
|
@@ -376,12 +306,12 @@
|
|
| 376 |
{
|
| 377 |
"data": {
|
| 378 |
"application/vnd.jupyter.widget-view+json": {
|
| 379 |
-
"model_id": "
|
| 380 |
"version_major": 2,
|
| 381 |
"version_minor": 0
|
| 382 |
},
|
| 383 |
"text/plain": [
|
| 384 |
-
"script.py: 0%| | 0.00/2.
|
| 385 |
]
|
| 386 |
},
|
| 387 |
"metadata": {},
|
|
@@ -391,7 +321,17 @@
|
|
| 391 |
"name": "stdout",
|
| 392 |
"output_type": "stream",
|
| 393 |
"text": [
|
| 394 |
-
"/Users/jack/dev/S23DR/2025/empty_submission/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
]
|
| 396 |
}
|
| 397 |
],
|
|
@@ -445,7 +385,7 @@
|
|
| 445 |
},
|
| 446 |
{
|
| 447 |
"cell_type": "code",
|
| 448 |
-
"execution_count":
|
| 449 |
"id": "NfxHRGClBYQE",
|
| 450 |
"metadata": {
|
| 451 |
"colab": {
|
|
@@ -454,15 +394,7 @@
|
|
| 454 |
"id": "NfxHRGClBYQE",
|
| 455 |
"outputId": "b1ef8073-2e4e-4303-ce24-fbd01d3ff910"
|
| 456 |
},
|
| 457 |
-
"outputs": [
|
| 458 |
-
{
|
| 459 |
-
"name": "stdout",
|
| 460 |
-
"output_type": "stream",
|
| 461 |
-
"text": [
|
| 462 |
-
"/Users/jack/dev/S23DR/2025/empty_submission/my_cool_submission_2025/my_cool_submission_2025\n"
|
| 463 |
-
]
|
| 464 |
-
}
|
| 465 |
-
],
|
| 466 |
"source": [
|
| 467 |
"# change directories into our submission folder\n",
|
| 468 |
"%cd $submission_name\n",
|
|
@@ -515,7 +447,7 @@
|
|
| 515 |
},
|
| 516 |
{
|
| 517 |
"cell_type": "code",
|
| 518 |
-
"execution_count":
|
| 519 |
"id": "4Y0eTe7M_vIJ",
|
| 520 |
"metadata": {
|
| 521 |
"colab": {
|
|
@@ -524,15 +456,7 @@
|
|
| 524 |
"id": "4Y0eTe7M_vIJ",
|
| 525 |
"outputId": "c77b0e72-c47e-4b3b-b724-dcce4b911d42"
|
| 526 |
},
|
| 527 |
-
"outputs": [
|
| 528 |
-
{
|
| 529 |
-
"name": "stdout",
|
| 530 |
-
"output_type": "stream",
|
| 531 |
-
"text": [
|
| 532 |
-
"[CommitInfo(commit_url='https://huggingface.co/jacklangerman/my_cool_submission_2025/commit/6048b3304a327a3727d860fc44174e96f19d4caa', commit_message='Upload folder using huggingface_hub', commit_description='', oid='6048b3304a327a3727d860fc44174e96f19d4caa', pr_url=None, repo_url=RepoUrl('https://huggingface.co/jacklangerman/my_cool_submission_2025', endpoint='https://huggingface.co', repo_type='model', repo_id='jacklangerman/my_cool_submission_2025'), pr_revision=None, pr_num=None)]\n"
|
| 533 |
-
]
|
| 534 |
-
}
|
| 535 |
-
],
|
| 536 |
"source": [
|
| 537 |
"# Upload our changes to the solution\n",
|
| 538 |
"operations = [\n",
|
|
@@ -557,7 +481,7 @@
|
|
| 557 |
},
|
| 558 |
{
|
| 559 |
"cell_type": "code",
|
| 560 |
-
"execution_count":
|
| 561 |
"id": "Rf59SEZ6Cp4U",
|
| 562 |
"metadata": {
|
| 563 |
"colab": {
|
|
@@ -567,36 +491,7 @@
|
|
| 567 |
"id": "Rf59SEZ6Cp4U",
|
| 568 |
"outputId": "8379d6fa-42ea-4c98-fe83-7023216b2ed6"
|
| 569 |
},
|
| 570 |
-
"outputs": [
|
| 571 |
-
{
|
| 572 |
-
"data": {
|
| 573 |
-
"text/markdown": [
|
| 574 |
-
"\n",
|
| 575 |
-
"1. Follow this link to the competition space for the [S23DR Challenge](https://huggingface.co/spaces/usm3d/S23DR2025)\n",
|
| 576 |
-
"<br>\n",
|
| 577 |
-
"2. Login to Hugging Face. <br>\n",
|
| 578 |
-
"<img src=\"https://huggingface.co/datasets/jacklangerman/hosted_images/resolve/main/submit_to_s23rd/login.png\" width=512>\n",
|
| 579 |
-
"<br>\n",
|
| 580 |
-
"\n",
|
| 581 |
-
"3. Click \"New submission\" <br>\n",
|
| 582 |
-
"<img src=\"https://huggingface.co/datasets/jacklangerman/hosted_images/resolve/main/submit_to_s23rd/click_new_submission.png\" width=512>\n",
|
| 583 |
-
"<br>\n",
|
| 584 |
-
"\n",
|
| 585 |
-
"4. Enter in the repo_id of your submission ([jacklangerman/my_cool_submission_2025](https://huggingface.co/jacklangerman/my_cool_submission_2025))<br>\n",
|
| 586 |
-
"<br>\n",
|
| 587 |
-
"<img src=\"https://huggingface.co/datasets/jacklangerman/hosted_images/resolve/main/submit_to_s23rd/name_and_desc.png\" width=512>\n",
|
| 588 |
-
"<br>\n",
|
| 589 |
-
"\n"
|
| 590 |
-
],
|
| 591 |
-
"text/plain": [
|
| 592 |
-
"<IPython.core.display.Markdown object>"
|
| 593 |
-
]
|
| 594 |
-
},
|
| 595 |
-
"execution_count": 12,
|
| 596 |
-
"metadata": {},
|
| 597 |
-
"output_type": "execute_result"
|
| 598 |
-
}
|
| 599 |
-
],
|
| 600 |
"source": [
|
| 601 |
"from IPython.display import Markdown as md\n",
|
| 602 |
"\n",
|
|
|
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"cell_type": "code",
|
| 34 |
+
"execution_count": 1,
|
| 35 |
"id": "ItDDqoXop8bb",
|
| 36 |
"metadata": {
|
| 37 |
"colab": {
|
|
|
|
| 61 |
},
|
| 62 |
{
|
| 63 |
"cell_type": "code",
|
| 64 |
+
"execution_count": 2,
|
| 65 |
"id": "zq_ljluLqzzv",
|
| 66 |
"metadata": {
|
| 67 |
"id": "zq_ljluLqzzv"
|
|
|
|
| 115 |
},
|
| 116 |
{
|
| 117 |
"cell_type": "code",
|
| 118 |
+
"execution_count": null,
|
| 119 |
"id": "U0J1w3kCZqMD",
|
| 120 |
"metadata": {
|
| 121 |
"colab": {
|
|
|
|
| 239 |
},
|
| 240 |
"outputs": [
|
| 241 |
{
|
| 242 |
+
"name": "stderr",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
"output_type": "stream",
|
| 244 |
"text": [
|
| 245 |
+
"/opt/homebrew/Caskroom/miniforge/base/envs/s23dr/lib/python3.10/site-packages/huggingface_hub/file_download.py:832: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`.\n",
|
| 246 |
+
"For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.\n",
|
| 247 |
+
" warnings.warn(\n"
|
| 248 |
]
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"data": {
|
| 252 |
"application/vnd.jupyter.widget-view+json": {
|
| 253 |
+
"model_id": "0560e0bf24e0464f94b046efa6783163",
|
| 254 |
"version_major": 2,
|
| 255 |
"version_minor": 0
|
| 256 |
},
|
|
|
|
| 264 |
{
|
| 265 |
"data": {
|
| 266 |
"application/vnd.jupyter.widget-view+json": {
|
| 267 |
+
"model_id": "8a63ac13d13943bb8a3bedfc1db4a3df",
|
| 268 |
"version_major": 2,
|
| 269 |
"version_minor": 0
|
| 270 |
},
|
|
|
|
| 278 |
{
|
| 279 |
"data": {
|
| 280 |
"application/vnd.jupyter.widget-view+json": {
|
| 281 |
+
"model_id": "dec031bf5ab64944802ee0ec992f150d",
|
| 282 |
"version_major": 2,
|
| 283 |
"version_minor": 0
|
| 284 |
},
|
| 285 |
"text/plain": [
|
| 286 |
+
"example_notebook.ipynb: 0%| | 0.00/148k [00:00<?, ?B/s]"
|
| 287 |
]
|
| 288 |
},
|
| 289 |
"metadata": {},
|
|
|
|
| 292 |
{
|
| 293 |
"data": {
|
| 294 |
"application/vnd.jupyter.widget-view+json": {
|
| 295 |
+
"model_id": "a9715efac2d84dd7baa881acfeb7977f",
|
| 296 |
"version_major": 2,
|
| 297 |
"version_minor": 0
|
| 298 |
},
|
|
|
|
| 306 |
{
|
| 307 |
"data": {
|
| 308 |
"application/vnd.jupyter.widget-view+json": {
|
| 309 |
+
"model_id": "e0d2871dca7541cba6ea98b8066c2fff",
|
| 310 |
"version_major": 2,
|
| 311 |
"version_minor": 0
|
| 312 |
},
|
| 313 |
"text/plain": [
|
| 314 |
+
"script.py: 0%| | 0.00/2.54k [00:00<?, ?B/s]"
|
| 315 |
]
|
| 316 |
},
|
| 317 |
"metadata": {},
|
|
|
|
| 321 |
"name": "stdout",
|
| 322 |
"output_type": "stream",
|
| 323 |
"text": [
|
| 324 |
+
"/Users/jack/dev/S23DR/2025/empty_submission/upstream\n",
|
| 325 |
+
"https://huggingface.co/jacklangerman/my_cool_submission_2025\n"
|
| 326 |
+
]
|
| 327 |
+
},
|
| 328 |
+
{
|
| 329 |
+
"name": "stderr",
|
| 330 |
+
"output_type": "stream",
|
| 331 |
+
"text": [
|
| 332 |
+
"/opt/homebrew/Caskroom/miniforge/base/envs/s23dr/lib/python3.10/site-packages/huggingface_hub/hf_api.py:9234: UserWarning: Warnings while validating metadata in README.md:\n",
|
| 333 |
+
"- empty or missing yaml metadata in repo card\n",
|
| 334 |
+
" warnings.warn(f\"Warnings while validating metadata in README.md:\\n{message}\")\n"
|
| 335 |
]
|
| 336 |
}
|
| 337 |
],
|
|
|
|
| 385 |
},
|
| 386 |
{
|
| 387 |
"cell_type": "code",
|
| 388 |
+
"execution_count": null,
|
| 389 |
"id": "NfxHRGClBYQE",
|
| 390 |
"metadata": {
|
| 391 |
"colab": {
|
|
|
|
| 394 |
"id": "NfxHRGClBYQE",
|
| 395 |
"outputId": "b1ef8073-2e4e-4303-ce24-fbd01d3ff910"
|
| 396 |
},
|
| 397 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
"source": [
|
| 399 |
"# change directories into our submission folder\n",
|
| 400 |
"%cd $submission_name\n",
|
|
|
|
| 447 |
},
|
| 448 |
{
|
| 449 |
"cell_type": "code",
|
| 450 |
+
"execution_count": null,
|
| 451 |
"id": "4Y0eTe7M_vIJ",
|
| 452 |
"metadata": {
|
| 453 |
"colab": {
|
|
|
|
| 456 |
"id": "4Y0eTe7M_vIJ",
|
| 457 |
"outputId": "c77b0e72-c47e-4b3b-b724-dcce4b911d42"
|
| 458 |
},
|
| 459 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
"source": [
|
| 461 |
"# Upload our changes to the solution\n",
|
| 462 |
"operations = [\n",
|
|
|
|
| 481 |
},
|
| 482 |
{
|
| 483 |
"cell_type": "code",
|
| 484 |
+
"execution_count": null,
|
| 485 |
"id": "Rf59SEZ6Cp4U",
|
| 486 |
"metadata": {
|
| 487 |
"colab": {
|
|
|
|
| 491 |
"id": "Rf59SEZ6Cp4U",
|
| 492 |
"outputId": "8379d6fa-42ea-4c98-fe83-7023216b2ed6"
|
| 493 |
},
|
| 494 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
"source": [
|
| 496 |
"from IPython.display import Markdown as md\n",
|
| 497 |
"\n",
|
script.py
CHANGED
|
@@ -40,10 +40,31 @@ if __name__ == "__main__":
|
|
| 40 |
print(params)
|
| 41 |
import os
|
| 42 |
|
| 43 |
-
print(os.system('pwd'))
|
| 44 |
print(os.system('ls -lahtr'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
-
dataset = load_dataset(params['dataset'], trust_remote_code=True, use_auth_token=params['token'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
print(dataset, flush=True)
|
| 48 |
# dataset = load_dataset('webdataset', data_files={)
|
| 49 |
|
|
|
|
| 40 |
print(params)
|
| 41 |
import os
|
| 42 |
|
| 43 |
+
print('pwd:', os.system('pwd'))
|
| 44 |
print(os.system('ls -lahtr'))
|
| 45 |
+
print(os.system('ls -lahtr /tmp/data/'))
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
data_path_test_server = Path('/tmp/data')
|
| 49 |
+
data_path_local = Path().home() / '.cache/huggingface/datasets/usm3d___hoho25k_test_x/'
|
| 50 |
+
|
| 51 |
+
if data_path_test_server.exists():
|
| 52 |
+
data_path = data_path_test_server
|
| 53 |
+
else:
|
| 54 |
+
data_path = data_path_local
|
| 55 |
+
|
| 56 |
+
print([str(p) for p in data_path.rglob('*validation*.arrow')])
|
| 57 |
|
| 58 |
+
# dataset = load_dataset(params['dataset'], trust_remote_code=True, use_auth_token=params['token'])
|
| 59 |
+
dataset = load_dataset(
|
| 60 |
+
"arrow",
|
| 61 |
+
data_files={
|
| 62 |
+
"validation": [str(p) for p in data_path.rglob('*validation*.arrow')],
|
| 63 |
+
"test": [str(p) for p in data_path.rglob('*test*.arrow')],
|
| 64 |
+
},
|
| 65 |
+
trust_remote_code=True,
|
| 66 |
+
# streaming=True
|
| 67 |
+
)
|
| 68 |
print(dataset, flush=True)
|
| 69 |
# dataset = load_dataset('webdataset', data_files={)
|
| 70 |
|