Update app.py
Browse files
app.py
CHANGED
|
@@ -82,7 +82,11 @@ def infer(image, labels):
|
|
| 82 |
|
| 83 |
with gr.Blocks() as demo:
|
| 84 |
gr.Markdown("# Compare Multilingual Zero-shot Image Classification")
|
| 85 |
-
gr.Markdown("Compare the performance of SigLIP and
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
with gr.Row():
|
| 87 |
with gr.Column():
|
| 88 |
image_input = gr.Image(type="pil")
|
|
|
|
| 82 |
|
| 83 |
with gr.Blocks() as demo:
|
| 84 |
gr.Markdown("# Compare Multilingual Zero-shot Image Classification")
|
| 85 |
+
gr.Markdown("Compare the performance of SigLIP and other models on zero-shot classification in this Space.")
|
| 86 |
+
gr.Markdown("Three models are compared: CLIP-ViT, NLLB-CLIP and SigLIP. Note that SigLIP outputs are normalized for visualization purposes.")
|
| 87 |
+
gr.Markdown("NLLB-CLIP is a multilingual vision-language model that combines [NLLB](https://ai.meta.com/research/no-language-left-behind/) with [CLIP](https://openai.com/research/clip) to extend CLIP to 200+ languages.")
|
| 88 |
+
gr.Markdown("CLIP-ViT is CLIP model extended to other languages using [multilingual knowledge distillation](https://arxiv.org/abs/2004.09813).")
|
| 89 |
+
gr.Markdown("Finally, SigLIP is the state-of-the-art vision-language model released by Google. Multilingual checkpoint is pre-trained by Google.")
|
| 90 |
with gr.Row():
|
| 91 |
with gr.Column():
|
| 92 |
image_input = gr.Image(type="pil")
|