multilingual-zero-shot-image-clf

Sleeping

merve HF Staff commited on Jan 9, 2024

Commit

ae97e32

1 Parent(s): 1436f3b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -82,7 +82,11 @@ def infer(image, labels):
 with gr.Blocks() as demo:
   gr.Markdown("# Compare Multilingual Zero-shot Image Classification")
-  gr.Markdown("Compare the performance of SigLIP and othe rmodels on zero-shot classification in this Space. Three models are compared: CLIP-ViT, NLLB-CLIP and SigLIP-Multilingual. Note that SigLIP outputs are normalized for visualization purposes.")
   with gr.Row():
     with gr.Column():
         image_input = gr.Image(type="pil")

 with gr.Blocks() as demo:
   gr.Markdown("# Compare Multilingual Zero-shot Image Classification")
+  gr.Markdown("Compare the performance of SigLIP and other models on zero-shot classification in this Space.")
+  gr.Markdown("Three models are compared: CLIP-ViT, NLLB-CLIP and SigLIP. Note that SigLIP outputs are normalized for visualization purposes.")
+  gr.Markdown("NLLB-CLIP is a multilingual vision-language model that combines [NLLB](https://ai.meta.com/research/no-language-left-behind/) with [CLIP](https://openai.com/research/clip) to extend CLIP to 200+ languages.")
+  gr.Markdown("CLIP-ViT is CLIP model extended to other languages using [multilingual knowledge distillation](https://arxiv.org/abs/2004.09813).")
+  gr.Markdown("Finally, SigLIP is the state-of-the-art vision-language model released by Google. Multilingual checkpoint is pre-trained by Google.")
   with gr.Row():
     with gr.Column():
         image_input = gr.Image(type="pil")