Spaces:
Running
Running
Commit
·
a743d61
1
Parent(s):
8a0053d
update submit
Browse files- app.py +42 -2
- src/about.py +62 -43
app.py
CHANGED
|
@@ -437,8 +437,48 @@ with demo:
|
|
| 437 |
gr.HTML(TABLE_TEXT)
|
| 438 |
gr.Markdown(LLM_BENCHMARKS_TEXT2, elem_classes="markdown-text")
|
| 439 |
with gr.TabItem("📤 Submit here!", elem_id="submit-model-tab", id=2):
|
| 440 |
-
|
| 441 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
with gr.Row():
|
| 443 |
model_name = gr.Textbox(label="Model Name")
|
| 444 |
revision_commit = gr.Textbox(label="Revision commit")
|
|
|
|
| 437 |
gr.HTML(TABLE_TEXT)
|
| 438 |
gr.Markdown(LLM_BENCHMARKS_TEXT2, elem_classes="markdown-text")
|
| 439 |
with gr.TabItem("📤 Submit here!", elem_id="submit-model-tab", id=2):
|
| 440 |
+
with gr.Column():
|
| 441 |
+
with gr.Row():
|
| 442 |
+
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
| 443 |
+
|
| 444 |
+
with gr.Column():
|
| 445 |
+
with gr.Accordion(
|
| 446 |
+
f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
|
| 447 |
+
open=False,
|
| 448 |
+
):
|
| 449 |
+
with gr.Row():
|
| 450 |
+
finished_eval_table = gr.components.Dataframe(
|
| 451 |
+
value=finished_eval_queue_df,
|
| 452 |
+
headers=EVAL_COLS,
|
| 453 |
+
datatype=EVAL_TYPES,
|
| 454 |
+
row_count=5,
|
| 455 |
+
)
|
| 456 |
+
with gr.Accordion(
|
| 457 |
+
f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
|
| 458 |
+
open=False,
|
| 459 |
+
):
|
| 460 |
+
with gr.Row():
|
| 461 |
+
running_eval_table = gr.components.Dataframe(
|
| 462 |
+
value=running_eval_queue_df,
|
| 463 |
+
headers=EVAL_COLS,
|
| 464 |
+
datatype=EVAL_TYPES,
|
| 465 |
+
row_count=5,
|
| 466 |
+
)
|
| 467 |
+
|
| 468 |
+
with gr.Accordion(
|
| 469 |
+
f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
|
| 470 |
+
open=False,
|
| 471 |
+
):
|
| 472 |
+
with gr.Row():
|
| 473 |
+
pending_eval_table = gr.components.Dataframe(
|
| 474 |
+
value=pending_eval_queue_df,
|
| 475 |
+
headers=EVAL_COLS,
|
| 476 |
+
datatype=EVAL_TYPES,
|
| 477 |
+
row_count=5,
|
| 478 |
+
)
|
| 479 |
+
with gr.Row():
|
| 480 |
+
# 1. Submit your modelinfos here!
|
| 481 |
+
gr.Markdown("✨ Submit your modelinfos here!")
|
| 482 |
with gr.Row():
|
| 483 |
model_name = gr.Textbox(label="Model Name")
|
| 484 |
revision_commit = gr.Textbox(label="Revision commit")
|
src/about.py
CHANGED
|
@@ -360,38 +360,47 @@ Planning
|
|
| 360 |
"""
|
| 361 |
|
| 362 |
EVALUATION_QUEUE_TEXT = """
|
| 363 |
-
|
| 364 |
-
Models added here will be automatically evaluated on the FlagEval cluster.
|
| 365 |
|
| 366 |
-
|
| 367 |
-
1. If you choose to evaluate via API call, you need to provide the Model interface, name and corresponding API key.
|
| 368 |
-
2. If you choose to do open source model evaluation directly through huggingface, you don't need to fill in the Model online api url and Model online api key.
|
| 369 |
|
| 370 |
-
|
| 371 |
|
| 372 |
-
|
| 373 |
-
1. model_name: Name of the model to use
|
| 374 |
-
2. api_key: API access key
|
| 375 |
-
3. api_base: Base URL for the API service
|
| 376 |
|
| 377 |
-
|
|
|
|
| 378 |
|
| 379 |
-
|
| 380 |
|
| 381 |
-
|
| 382 |
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
3. Set up the initialization and inference pipeline
|
| 387 |
|
| 388 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
|
| 390 |
-
Here is an example:[model_adapter.py](https://github.com/flageval-baai/FlagEvalMM/blob/main/model_zoo/vlm/qwen_vl/model_adapter.py)
|
| 391 |
|
| 392 |
-
#### 1. Create Preprocess Custom Dataset Class
|
|
|
|
|
|
|
| 393 |
|
| 394 |
-
Inherit from `ServerDataset` to handle data loading:
|
| 395 |
```python
|
| 396 |
# model_adapter.py
|
| 397 |
class CustomDataset(ServerDataset):
|
|
@@ -411,8 +420,9 @@ class CustomDataset(ServerDataset):
|
|
| 411 |
return question_id, img_path_idx, qs
|
| 412 |
```
|
| 413 |
|
| 414 |
-
The function `get_data` returns a structure like this:
|
| 415 |
-
|
|
|
|
| 416 |
{
|
| 417 |
"img_path": "A list where each element is an absolute path to an image that can be read directly using PIL, cv2, etc.",
|
| 418 |
"question": "A string containing the question, where image positions are marked with <image1> <image2>",
|
|
@@ -421,11 +431,15 @@ The function `get_data` returns a structure like this:
|
|
| 421 |
}
|
| 422 |
```
|
| 423 |
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
```python
|
| 430 |
# model_adapter.py
|
| 431 |
class ModelAdapter(BaseModelAdapter):
|
|
@@ -461,30 +475,35 @@ class ModelAdapter(BaseModelAdapter):
|
|
| 461 |
Use the provided meta_info and rank parameters to manage result storage as needed.
|
| 462 |
'''
|
| 463 |
```
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
```
|
| 471 |
-
After collecting all results, they are saved using `save_result()`
|
| 472 |
|
| 473 |
-
|
| 474 |
-
run.sh is the entry script for launching model evaluation, used to set environment variables and start the evaluation program.
|
| 475 |
|
| 476 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
#!/bin/bash
|
| 478 |
current_file="$0"
|
| 479 |
current_dir="$(dirname "$current_file")"
|
| 480 |
SERVER_IP=$1
|
| 481 |
SERVER_PORT=$2
|
| 482 |
-
PYTHONPATH=$current_dir:$PYTHONPATH python $current_dir/model_adapter.py
|
| 483 |
-
--server_ip $SERVER_IP \
|
| 484 |
-
--server_port $SERVER_PORT \
|
| 485 |
-
"${@:3}"
|
| 486 |
```
|
| 487 |
-
|
| 488 |
"""
|
| 489 |
|
| 490 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
|
|
|
| 360 |
"""
|
| 361 |
|
| 362 |
EVALUATION_QUEUE_TEXT = """
|
| 363 |
+
Submit here!功能部署内容
|
|
|
|
| 364 |
|
| 365 |
+
Evaluation Queue for the FlagEval VLM Leaderboard
|
|
|
|
|
|
|
| 366 |
|
| 367 |
+
Models added here will be automatically evaluated on the FlagEval cluster.
|
| 368 |
|
| 369 |
+
Currently, we offer two methods for model evaluation, including API calls and private deployments:
|
|
|
|
|
|
|
|
|
|
| 370 |
|
| 371 |
+
1. If you choose to evaluate via API call, you need to provide the Model interface, name and corresponding API key.
|
| 372 |
+
2. If you choose to do open source model evaluation directly through huggingface, you don't need to fill in the Model online api url and Model online api key.
|
| 373 |
|
| 374 |
+
## Open API model Integration Documentation
|
| 375 |
|
| 376 |
+
For models accessed via API calls (such as OpenAI API, Anthropic API, etc.), the integration process is straightforward and only requires providing necessary configuration information.
|
| 377 |
|
| 378 |
+
1. `model_name`: Name of the model to use
|
| 379 |
+
2. `api_key`: API access key
|
| 380 |
+
3. `api_base`: Base URL for the API service
|
|
|
|
| 381 |
|
| 382 |
+
---
|
| 383 |
+
|
| 384 |
+
## Adding a Custom Model to the Platform
|
| 385 |
+
|
| 386 |
+
This guide explains how to integrate your custom model into the platform by implementing a model adapter and `run.sh` script. We'll use the Qwen-VL implementation as a reference example.
|
| 387 |
+
|
| 388 |
+
### Overview
|
| 389 |
+
|
| 390 |
+
To add your custom model, you need to:
|
| 391 |
+
|
| 392 |
+
1. Create a custom dataset class
|
| 393 |
+
2. Implement a model adapter class
|
| 394 |
+
3. Set up the initialization and inference pipeline
|
| 395 |
+
|
| 396 |
+
### Step-by-Step Implementation
|
| 397 |
|
| 398 |
+
Here is an example: [Qwen-VL model_adapter.py](https://github.com/flageval-baai/FlagEvalMM/blob/main/model_zoo/vlm/qwen_vl/model_adapter.py)
|
| 399 |
|
| 400 |
+
#### 1. Create Preprocess Custom Dataset Class
|
| 401 |
+
|
| 402 |
+
Inherit from `ServerDataset` to handle data loading:
|
| 403 |
|
|
|
|
| 404 |
```python
|
| 405 |
# model_adapter.py
|
| 406 |
class CustomDataset(ServerDataset):
|
|
|
|
| 420 |
return question_id, img_path_idx, qs
|
| 421 |
```
|
| 422 |
|
| 423 |
+
The function `get_data` returns a structure like this:
|
| 424 |
+
|
| 425 |
+
```json
|
| 426 |
{
|
| 427 |
"img_path": "A list where each element is an absolute path to an image that can be read directly using PIL, cv2, etc.",
|
| 428 |
"question": "A string containing the question, where image positions are marked with <image1> <image2>",
|
|
|
|
| 431 |
}
|
| 432 |
```
|
| 433 |
|
| 434 |
+
---
|
| 435 |
+
|
| 436 |
+
#### 2. Implement Model Adapter
|
| 437 |
+
|
| 438 |
+
Inherit from `BaseModelAdapter` and implement the required methods:
|
| 439 |
+
|
| 440 |
+
- `model_init`: is responsible for model initialization and serves as the entry point for model loading and setup.
|
| 441 |
+
- `run_one_task`: implements the inference pipeline, handling data processing and result generation for a single evaluation task.
|
| 442 |
+
|
| 443 |
```python
|
| 444 |
# model_adapter.py
|
| 445 |
class ModelAdapter(BaseModelAdapter):
|
|
|
|
| 475 |
Use the provided meta_info and rank parameters to manage result storage as needed.
|
| 476 |
'''
|
| 477 |
```
|
| 478 |
+
|
| 479 |
+
**Note:**
|
| 480 |
+
|
| 481 |
+
`results` is a list of dictionaries.
|
| 482 |
+
Each dictionary must contain two keys:
|
| 483 |
+
|
| 484 |
+
```json
|
| 485 |
+
{
|
| 486 |
+
"question_id": "identifies the specific question",
|
| 487 |
+
"answer": "contains the model's prediction/output"
|
| 488 |
+
}
|
| 489 |
```
|
|
|
|
| 490 |
|
| 491 |
+
After collecting all results, they are saved using `save_result()`.
|
|
|
|
| 492 |
|
| 493 |
+
---
|
| 494 |
+
|
| 495 |
+
#### 3. Launch Script (`run.sh`)
|
| 496 |
+
|
| 497 |
+
`run.sh` is the entry script for launching model evaluation, used to set environment variables and start the evaluation program.
|
| 498 |
+
|
| 499 |
+
```bash
|
| 500 |
#!/bin/bash
|
| 501 |
current_file="$0"
|
| 502 |
current_dir="$(dirname "$current_file")"
|
| 503 |
SERVER_IP=$1
|
| 504 |
SERVER_PORT=$2
|
| 505 |
+
PYTHONPATH=$current_dir:$PYTHONPATH python $current_dir/model_adapter.py --server_ip $SERVER_IP --server_port $SERVER_PORT "${@:3}"
|
|
|
|
|
|
|
|
|
|
| 506 |
```
|
|
|
|
| 507 |
"""
|
| 508 |
|
| 509 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|