Spaces:
Running
Running
| import gradio as gr | |
| import datasets | |
| import huggingface_hub | |
| import sys | |
| from pathlib import Path | |
| theme = gr.themes.Soft( | |
| primary_hue="green", | |
| ) | |
| def check_model(model_id): | |
| try: | |
| task = huggingface_hub.model_info(model_id).pipeline_tag | |
| except Exception: | |
| return None, None | |
| try: | |
| from transformers import pipeline | |
| ppl = pipeline(task=task, model=model_id) | |
| return model_id, ppl | |
| except Exception as e: | |
| return model_id, e | |
| def check_dataset(dataset_id, dataset_config="default", dataset_split="test"): | |
| try: | |
| configs = datasets.get_dataset_config_names(dataset_id) | |
| except Exception: | |
| # Dataset may not exist | |
| return None, dataset_config, dataset_split | |
| if dataset_config not in configs: | |
| # Need to choose dataset subset (config) | |
| return dataset_id, configs, dataset_split | |
| ds = datasets.load_dataset(dataset_id, dataset_config) | |
| if isinstance(ds, datasets.DatasetDict): | |
| # Need to choose dataset split | |
| if dataset_split not in ds.keys(): | |
| return dataset_id, None, list(ds.keys()) | |
| elif not isinstance(ds, datasets.Dataset): | |
| # Unknown type | |
| return dataset_id, None, None | |
| return dataset_id, dataset_config, dataset_split | |
| def try_submit(model_id, dataset_id, dataset_config, dataset_split, local): | |
| # Validate model | |
| m_id, ppl = check_model(model_id=model_id) | |
| if m_id is None: | |
| gr.Warning(f'Model "{model_id}" is not accessible. Please set your HF_TOKEN if it is a private model.') | |
| return dataset_config, dataset_split | |
| if isinstance(ppl, Exception): | |
| gr.Warning(f'Failed to load "{model_id} model": {ppl}') | |
| return dataset_config, dataset_split | |
| # Validate dataset | |
| d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split) | |
| dataset_ok = False | |
| if d_id is None: | |
| gr.Warning(f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.') | |
| elif isinstance(config, list): | |
| gr.Warning(f'Dataset "{dataset_id}" does not have "{dataset_config}" config. Please choose a valid config.') | |
| config = gr.Dropdown.update(choices=config, value=config[0]) | |
| elif isinstance(split, list): | |
| gr.Warning(f'Dataset "{dataset_id}" does not have "{dataset_split}" split. Please choose a valid split.') | |
| split = gr.Dropdown.update(choices=split, value=split[0]) | |
| else: | |
| dataset_ok = True | |
| if not dataset_ok: | |
| return config, split | |
| # TODO: Validate column mapping by running once | |
| del ppl | |
| if local: | |
| if "cicd" not in sys.path: | |
| sys.path.append("cicd") | |
| from giskard_cicd.loaders import HuggingFaceLoader | |
| from giskard_cicd.pipeline.runner import PipelineRunner | |
| from cicd.automation import create_discussion | |
| supported_loaders = { | |
| "huggingface": HuggingFaceLoader(), | |
| } | |
| runner = PipelineRunner(loaders=supported_loaders) | |
| runner_kwargs = { | |
| "loader_id": "huggingface", | |
| "model": m_id, | |
| "dataset": d_id, | |
| "scan_config": None, | |
| "dataset_split": split, | |
| "dataset_config": config, | |
| } | |
| report = runner.run(**runner_kwargs) | |
| # TODO: Publish it | |
| # rendered_report = report.to_markdown(template="github") | |
| # Cache locally | |
| rendered_report = report.to_html() | |
| output_dir = Path(f"output/{m_id}/{d_id}/{config}/{split}/") | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| with open(output_dir / "report.html", "w") as f: | |
| print(f'Writing to {output_dir / "report.html"}') | |
| f.write(rendered_report) | |
| return config, split | |
| with gr.Blocks(theme=theme) as iface: | |
| with gr.Row(): | |
| with gr.Column(): | |
| model_id_input = gr.Textbox( | |
| label="Hugging Face model id", | |
| placeholder="cardiffnlp/twitter-roberta-base-sentiment-latest", | |
| ) | |
| # TODO: Add supported model pairs: Text Classification - text-classification | |
| model_type = gr.Dropdown( | |
| label="Hugging Face model type", | |
| choices=[ | |
| ("Auto-detect", 0), | |
| ("Text Classification", 1), | |
| ], | |
| value=0, | |
| ) | |
| run_local = gr.Checkbox(value=True, label="Run in this Space") | |
| with gr.Column(): | |
| dataset_id_input = gr.Textbox( | |
| label="Hugging Face dataset id", | |
| placeholder="tweet_eval", | |
| ) | |
| dataset_config_input = gr.Dropdown( | |
| label="Hugging Face dataset subset", | |
| choices=[ | |
| "default", | |
| ], | |
| allow_custom_value=True, | |
| value="default", | |
| ) | |
| dataset_split_input = gr.Dropdown( | |
| label="Hugging Face dataset split", | |
| choices=[ | |
| "test", | |
| ], | |
| allow_custom_value=True, | |
| value="test", | |
| ) | |
| with gr.Row(): | |
| run_btn = gr.Button("Validate and submit evaluation task", variant="primary") | |
| run_btn.click( | |
| try_submit, | |
| inputs=[ | |
| model_id_input, | |
| dataset_id_input, | |
| dataset_config_input, | |
| dataset_split_input, | |
| run_local, | |
| ], | |
| outputs=[ | |
| dataset_config_input, | |
| dataset_split_input | |
| ], | |
| ) | |
| iface.queue(max_size=20) | |
| iface.launch() | |