Upload folder using huggingface_hub
Browse files- README.md +12 -2
- example_notebook.ipynb +0 -0
- script.py +33 -35
README.md
CHANGED
|
@@ -1,4 +1,14 @@
|
|
| 1 |
-
#
|
| 2 |
|
| 3 |
-
This repo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
|
|
|
| 1 |
+
# Empty solution example for the S23DR competition
|
| 2 |
|
| 3 |
+
This repo provides a minimalistic example of a valid, but empty submission to S23DR competition.
|
| 4 |
+
We recommend you take a look at [this example](https://huggingface.co/usm3d/handcrafted_baseline_submission),
|
| 5 |
+
which implements some primitive algorithms and provides useful I/O and visualization functions.
|
| 6 |
+
|
| 7 |
+
This example seeks to simply provide minimal code which succeeds at reading the dataset and producing a solution (in this case two vertices at the origin and edge of zero length connecting them).
|
| 8 |
+
|
| 9 |
+
`script.py` - is the main file which is run by the competition space. It should produce `submission.parquet` as the result of the run. Please see the additional comments in the `script.py` file.
|
| 10 |
+
|
| 11 |
+
---
|
| 12 |
+
license: apache-2.0
|
| 13 |
+
---
|
| 14 |
|
example_notebook.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
script.py
CHANGED
|
@@ -1,58 +1,56 @@
|
|
| 1 |
### This is example of the script that will be run in the test environment.
|
| 2 |
-
|
| 3 |
-
### They are between '''---compulsory---''' comments.
|
| 4 |
### You can change the rest of the code to define and test your solution.
|
| 5 |
### However, you should not change the signature of the provided function.
|
| 6 |
-
### The script
|
| 7 |
### You can use any additional files and subdirectories to organize your code.
|
| 8 |
|
| 9 |
-
'''---compulsory---'''
|
| 10 |
-
import hoho; hoho.setup() # YOU MUST CALL hoho.setup() BEFORE ANYTHING ELSE
|
| 11 |
-
'''---compulsory---'''
|
| 12 |
-
|
| 13 |
from pathlib import Path
|
| 14 |
from tqdm import tqdm
|
| 15 |
import pandas as pd
|
| 16 |
import numpy as np
|
| 17 |
-
|
|
|
|
| 18 |
|
| 19 |
def empty_solution(sample):
|
| 20 |
'''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
|
| 21 |
return np.zeros((2,3)), [(0, 1)]
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
if __name__ == "__main__":
|
| 25 |
print ("------------ Loading dataset------------ ")
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
# by default it is usually better to use `get_dataset()` like this
|
| 29 |
-
#
|
| 30 |
-
# dataset = hoho.get_dataset(split='all')
|
| 31 |
-
#
|
| 32 |
-
# but in this case (because we don't do anything with the sample
|
| 33 |
-
# anyway) we set `decode=None`. We can set the `split` argument
|
| 34 |
-
# to 'train' or 'val' ('all' defaults back to 'train') if we are
|
| 35 |
-
# testing ourselves locally.
|
| 36 |
-
#
|
| 37 |
-
# dataset = hoho.get_dataset(split='val', decode=None)
|
| 38 |
-
#
|
| 39 |
-
# On the test server *`split` must be set to 'all'*
|
| 40 |
-
# to compute both the public and private leaderboards.
|
| 41 |
-
#
|
| 42 |
-
dataset = hoho.get_dataset(split='all', decode=None)
|
| 43 |
|
| 44 |
print('------------ Now you can do your solution ---------------')
|
| 45 |
solution = []
|
| 46 |
-
for
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
-
solution.append({
|
| 51 |
-
'__key__': sample['__key__'],
|
| 52 |
-
'wf_vertices': pred_vertices.tolist(),
|
| 53 |
-
'wf_edges': pred_edges
|
| 54 |
-
})
|
| 55 |
print('------------ Saving results ---------------')
|
| 56 |
-
sub = pd.DataFrame(solution, columns=["
|
| 57 |
-
sub.to_parquet(
|
| 58 |
print("------------ Done ------------ ")
|
|
|
|
| 1 |
### This is example of the script that will be run in the test environment.
|
| 2 |
+
|
|
|
|
| 3 |
### You can change the rest of the code to define and test your solution.
|
| 4 |
### However, you should not change the signature of the provided function.
|
| 5 |
+
### The script saves "submission.parquet" file in the current directory.
|
| 6 |
### You can use any additional files and subdirectories to organize your code.
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from pathlib import Path
|
| 9 |
from tqdm import tqdm
|
| 10 |
import pandas as pd
|
| 11 |
import numpy as np
|
| 12 |
+
from datasets import load_dataset
|
| 13 |
+
from typing import Dict
|
| 14 |
|
| 15 |
def empty_solution(sample):
|
| 16 |
'''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
|
| 17 |
return np.zeros((2,3)), [(0, 1)]
|
| 18 |
|
| 19 |
+
class Sample(Dict):
|
| 20 |
+
def pick_repr_data(self, x):
|
| 21 |
+
if hasattr(x, 'shape'):
|
| 22 |
+
return x.shape
|
| 23 |
+
if isinstance(x, (str, float, int)):
|
| 24 |
+
return x
|
| 25 |
+
if isinstance(x, list):
|
| 26 |
+
return [type(x[0])] if len(x) > 0 else []
|
| 27 |
+
return type(x)
|
| 28 |
+
|
| 29 |
+
def __repr__(self):
|
| 30 |
+
# return str({k: v.shape if hasattr(v, 'shape') else [type(v[0])] if isinstance(v, list) else type(v) for k,v in self.items()})
|
| 31 |
+
return str({k: self.pick_repr_data(v) for k,v in self.items()})
|
| 32 |
+
|
| 33 |
|
| 34 |
if __name__ == "__main__":
|
| 35 |
print ("------------ Loading dataset------------ ")
|
| 36 |
+
dataset = load_dataset("usm3d/hoho25k_test_x", trust_remote_code=True)
|
| 37 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
print('------------ Now you can do your solution ---------------')
|
| 40 |
solution = []
|
| 41 |
+
for subset_name in dataset:
|
| 42 |
+
for i, sample in enumerate(tqdm(dataset[subset_name])):
|
| 43 |
+
# replace this with your solution
|
| 44 |
+
print(Sample(sample), flush=True)
|
| 45 |
+
print('------')
|
| 46 |
+
pred_vertices, pred_edges = empty_solution(sample)
|
| 47 |
+
solution.append({
|
| 48 |
+
'order_id': sample['order_id'],
|
| 49 |
+
'wf_vertices': pred_vertices.tolist(),
|
| 50 |
+
'wf_edges': pred_edges
|
| 51 |
+
})
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
print('------------ Saving results ---------------')
|
| 54 |
+
sub = pd.DataFrame(solution, columns=["order_id", "wf_vertices", "wf_edges"])
|
| 55 |
+
sub.to_parquet("submission.parquet")
|
| 56 |
print("------------ Done ------------ ")
|