merve HF Staff commited on
Commit
7972af2
·
verified ·
1 Parent(s): 1de1eb1

Upload Vision_Releases_transformers_4_56.ipynb

Browse files
Vision_Releases_transformers_4_56.ipynb CHANGED
@@ -1737,7 +1737,7 @@
1737
  "\n",
1738
  "New transformers release comes with amazing vision/multimodal models: Florence-2 by MSFT, SAM-2 by Meta, KOSMOS-2.5 by MSFT, MetaCLIP2 by Meta, all runnable in Colab free tier. This notebook enables you to try them all!\n",
1739
  "\n",
1740
- "Note: This notebook has a lot of image outputs, so you need to run the notebook to see them."
1741
  ],
1742
  "metadata": {
1743
  "id": "zwCKwR_TkwLy"
@@ -1809,7 +1809,7 @@
1809
  "metadata": {
1810
  "id": "JudF0LvsgInQ"
1811
  },
1812
- "execution_count": 12,
1813
  "outputs": []
1814
  },
1815
  {
@@ -1824,7 +1824,7 @@
1824
  "metadata": {
1825
  "id": "s3oe_JZ0hiY5"
1826
  },
1827
- "execution_count": 13,
1828
  "outputs": []
1829
  },
1830
  {
@@ -1841,7 +1841,7 @@
1841
  },
1842
  "outputId": "750f0535-6804-403e-98d7-da390d4a1be3"
1843
  },
1844
- "execution_count": 14,
1845
  "outputs": [
1846
  {
1847
  "output_type": "stream",
@@ -1877,7 +1877,7 @@
1877
  "metadata": {
1878
  "id": "FndxHptOinaC"
1879
  },
1880
- "execution_count": 15,
1881
  "outputs": []
1882
  },
1883
  {
@@ -1894,7 +1894,7 @@
1894
  },
1895
  "outputId": "de6c2001-caef-4184-8430-448815eabbb3"
1896
  },
1897
- "execution_count": 16,
1898
  "outputs": [
1899
  {
1900
  "output_type": "stream",
@@ -1978,7 +1978,7 @@
1978
  "source": [
1979
  "## Kosmos 2.5\n",
1980
  "\n",
1981
- "Kosmos 2.5 by Microsoft is a great document model that can not only convert documents to markdown, it also can locate meaningful structures on documents.\n",
1982
  "It has a \"normal\" checkpoint and a \"chat\" checkpoint which can be used for VQA tasks. Let's see how to use it."
1983
  ],
1984
  "metadata": {
@@ -2012,7 +2012,7 @@
2012
  "metadata": {
2013
  "id": "tPSqn-POl4up"
2014
  },
2015
- "execution_count": 3,
2016
  "outputs": []
2017
  },
2018
  {
@@ -2054,7 +2054,7 @@
2054
  },
2055
  "outputId": "0bdc32d0-3897-46ef-f897-b2208e1cc28d"
2056
  },
2057
- "execution_count": 4,
2058
  "outputs": [
2059
  {
2060
  "output_type": "stream",
@@ -2157,7 +2157,7 @@
2157
  },
2158
  "outputId": "58d36158-c08c-4faf-897c-f9a83ce02760"
2159
  },
2160
- "execution_count": 6,
2161
  "outputs": [
2162
  {
2163
  "output_type": "stream",
@@ -2208,7 +2208,7 @@
2208
  "metadata": {
2209
  "id": "VbHH5RQ2qzlo"
2210
  },
2211
- "execution_count": 8,
2212
  "outputs": []
2213
  },
2214
  {
@@ -2221,7 +2221,7 @@
2221
  "metadata": {
2222
  "id": "XjCTXKWCmWdI"
2223
  },
2224
- "execution_count": 9,
2225
  "outputs": []
2226
  },
2227
  {
@@ -2249,7 +2249,7 @@
2249
  },
2250
  "outputId": "bab3453d-5846-4518-be23-92cdc0d9e5a1"
2251
  },
2252
- "execution_count": 10,
2253
  "outputs": [
2254
  {
2255
  "output_type": "stream",
@@ -2345,7 +2345,7 @@
2345
  },
2346
  "outputId": "3f012250-0a09-492e-d675-3950ff447e5f"
2347
  },
2348
- "execution_count": 1,
2349
  "outputs": [
2350
  {
2351
  "output_type": "display_data",
@@ -2452,7 +2452,7 @@
2452
  "metadata": {
2453
  "id": "kxbraMeuvMni"
2454
  },
2455
- "execution_count": 2,
2456
  "outputs": []
2457
  },
2458
  {
@@ -2482,7 +2482,7 @@
2482
  "metadata": {
2483
  "id": "LZ1fD_VTvLdV"
2484
  },
2485
- "execution_count": 8,
2486
  "outputs": []
2487
  },
2488
  {
@@ -2506,7 +2506,7 @@
2506
  "id": "0mRS1uGWPieQ",
2507
  "outputId": "abb87538-5a2f-4be8-9a7e-fa860b872321"
2508
  },
2509
- "execution_count": 10,
2510
  "outputs": [
2511
  {
2512
  "output_type": "execute_result",
@@ -2606,7 +2606,7 @@
2606
  "id": "n1FrIXFWRi_d",
2607
  "outputId": "d3b3bc0c-058d-46bd-a1fb-df128f81acef"
2608
  },
2609
- "execution_count": 13,
2610
  "outputs": [
2611
  {
2612
  "output_type": "stream",
@@ -2648,7 +2648,7 @@
2648
  "metadata": {
2649
  "id": "ZF-ANJiLRTjE"
2650
  },
2651
- "execution_count": 2,
2652
  "outputs": []
2653
  },
2654
  {
@@ -2683,7 +2683,7 @@
2683
  "metadata": {
2684
  "id": "6HZUQjytSBfd"
2685
  },
2686
- "execution_count": 3,
2687
  "outputs": []
2688
  },
2689
  {
@@ -2706,7 +2706,7 @@
2706
  "metadata": {
2707
  "id": "RdAy8fwQSSSF"
2708
  },
2709
- "execution_count": 7,
2710
  "outputs": []
2711
  },
2712
  {
@@ -2717,7 +2717,7 @@
2717
  "metadata": {
2718
  "id": "132QpoZDScgw"
2719
  },
2720
- "execution_count": 10,
2721
  "outputs": []
2722
  },
2723
  {
@@ -2750,7 +2750,7 @@
2750
  "metadata": {
2751
  "id": "6V5JQ7pqS4GG"
2752
  },
2753
- "execution_count": 18,
2754
  "outputs": []
2755
  },
2756
  {
@@ -2772,7 +2772,7 @@
2772
  "id": "fhhtCRo2S6hR",
2773
  "outputId": "070f7b3e-48c0-46a1-f3cb-115a9e77fa97"
2774
  },
2775
- "execution_count": 24,
2776
  "outputs": [
2777
  {
2778
  "output_type": "stream",
@@ -2855,9 +2855,9 @@
2855
  "base_uri": "https://localhost:8080/"
2856
  },
2857
  "id": "eX-_Cl9jVMQA",
2858
- "outputId": "78284741-50e9-40ac-8fe3-bee5b453f70f"
2859
  },
2860
- "execution_count": 38,
2861
  "outputs": [
2862
  {
2863
  "output_type": "execute_result",
@@ -2873,7 +2873,7 @@
2873
  ]
2874
  },
2875
  "metadata": {},
2876
- "execution_count": 38
2877
  }
2878
  ]
2879
  },
@@ -2902,6 +2902,21 @@
2902
  },
2903
  "execution_count": null,
2904
  "outputs": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2905
  }
2906
  ]
2907
  }
 
1737
  "\n",
1738
  "New transformers release comes with amazing vision/multimodal models: Florence-2 by MSFT, SAM-2 by Meta, KOSMOS-2.5 by MSFT, MetaCLIP2 by Meta, all runnable in Colab free tier. This notebook enables you to try them all!\n",
1739
  "\n",
1740
+ "Note: This notebook has a lot of image outputs, so you need to run the notebook to see them. There's links at the end for each model's documentation, check them out for more inference options & info!"
1741
  ],
1742
  "metadata": {
1743
  "id": "zwCKwR_TkwLy"
 
1809
  "metadata": {
1810
  "id": "JudF0LvsgInQ"
1811
  },
1812
+ "execution_count": null,
1813
  "outputs": []
1814
  },
1815
  {
 
1824
  "metadata": {
1825
  "id": "s3oe_JZ0hiY5"
1826
  },
1827
+ "execution_count": null,
1828
  "outputs": []
1829
  },
1830
  {
 
1841
  },
1842
  "outputId": "750f0535-6804-403e-98d7-da390d4a1be3"
1843
  },
1844
+ "execution_count": null,
1845
  "outputs": [
1846
  {
1847
  "output_type": "stream",
 
1877
  "metadata": {
1878
  "id": "FndxHptOinaC"
1879
  },
1880
+ "execution_count": null,
1881
  "outputs": []
1882
  },
1883
  {
 
1894
  },
1895
  "outputId": "de6c2001-caef-4184-8430-448815eabbb3"
1896
  },
1897
+ "execution_count": null,
1898
  "outputs": [
1899
  {
1900
  "output_type": "stream",
 
1978
  "source": [
1979
  "## Kosmos 2.5\n",
1980
  "\n",
1981
+ "Kosmos 2.5 by Microsoft is a great document model that can not only convert documents to markdown, it also can locate meaningful structures on documents and indicate parts of documents with bounding boxes. You can try [this demo](https://huggingface.co/spaces/nielsr/kosmos-2.5-demo) to see what it can do.\n",
1982
  "It has a \"normal\" checkpoint and a \"chat\" checkpoint which can be used for VQA tasks. Let's see how to use it."
1983
  ],
1984
  "metadata": {
 
2012
  "metadata": {
2013
  "id": "tPSqn-POl4up"
2014
  },
2015
+ "execution_count": null,
2016
  "outputs": []
2017
  },
2018
  {
 
2054
  },
2055
  "outputId": "0bdc32d0-3897-46ef-f897-b2208e1cc28d"
2056
  },
2057
+ "execution_count": null,
2058
  "outputs": [
2059
  {
2060
  "output_type": "stream",
 
2157
  },
2158
  "outputId": "58d36158-c08c-4faf-897c-f9a83ce02760"
2159
  },
2160
+ "execution_count": null,
2161
  "outputs": [
2162
  {
2163
  "output_type": "stream",
 
2208
  "metadata": {
2209
  "id": "VbHH5RQ2qzlo"
2210
  },
2211
+ "execution_count": null,
2212
  "outputs": []
2213
  },
2214
  {
 
2221
  "metadata": {
2222
  "id": "XjCTXKWCmWdI"
2223
  },
2224
+ "execution_count": null,
2225
  "outputs": []
2226
  },
2227
  {
 
2249
  },
2250
  "outputId": "bab3453d-5846-4518-be23-92cdc0d9e5a1"
2251
  },
2252
+ "execution_count": null,
2253
  "outputs": [
2254
  {
2255
  "output_type": "stream",
 
2345
  },
2346
  "outputId": "3f012250-0a09-492e-d675-3950ff447e5f"
2347
  },
2348
+ "execution_count": null,
2349
  "outputs": [
2350
  {
2351
  "output_type": "display_data",
 
2452
  "metadata": {
2453
  "id": "kxbraMeuvMni"
2454
  },
2455
+ "execution_count": null,
2456
  "outputs": []
2457
  },
2458
  {
 
2482
  "metadata": {
2483
  "id": "LZ1fD_VTvLdV"
2484
  },
2485
+ "execution_count": null,
2486
  "outputs": []
2487
  },
2488
  {
 
2506
  "id": "0mRS1uGWPieQ",
2507
  "outputId": "abb87538-5a2f-4be8-9a7e-fa860b872321"
2508
  },
2509
+ "execution_count": null,
2510
  "outputs": [
2511
  {
2512
  "output_type": "execute_result",
 
2606
  "id": "n1FrIXFWRi_d",
2607
  "outputId": "d3b3bc0c-058d-46bd-a1fb-df128f81acef"
2608
  },
2609
+ "execution_count": null,
2610
  "outputs": [
2611
  {
2612
  "output_type": "stream",
 
2648
  "metadata": {
2649
  "id": "ZF-ANJiLRTjE"
2650
  },
2651
+ "execution_count": null,
2652
  "outputs": []
2653
  },
2654
  {
 
2683
  "metadata": {
2684
  "id": "6HZUQjytSBfd"
2685
  },
2686
+ "execution_count": null,
2687
  "outputs": []
2688
  },
2689
  {
 
2706
  "metadata": {
2707
  "id": "RdAy8fwQSSSF"
2708
  },
2709
+ "execution_count": null,
2710
  "outputs": []
2711
  },
2712
  {
 
2717
  "metadata": {
2718
  "id": "132QpoZDScgw"
2719
  },
2720
+ "execution_count": null,
2721
  "outputs": []
2722
  },
2723
  {
 
2750
  "metadata": {
2751
  "id": "6V5JQ7pqS4GG"
2752
  },
2753
+ "execution_count": null,
2754
  "outputs": []
2755
  },
2756
  {
 
2772
  "id": "fhhtCRo2S6hR",
2773
  "outputId": "070f7b3e-48c0-46a1-f3cb-115a9e77fa97"
2774
  },
2775
+ "execution_count": null,
2776
  "outputs": [
2777
  {
2778
  "output_type": "stream",
 
2855
  "base_uri": "https://localhost:8080/"
2856
  },
2857
  "id": "eX-_Cl9jVMQA",
2858
+ "outputId": "2ab6a027-9467-4183-875b-a55cd874d862"
2859
  },
2860
+ "execution_count": null,
2861
  "outputs": [
2862
  {
2863
  "output_type": "execute_result",
 
2873
  ]
2874
  },
2875
  "metadata": {},
2876
+ "execution_count": 41
2877
  }
2878
  ]
2879
  },
 
2902
  },
2903
  "execution_count": null,
2904
  "outputs": []
2905
+ },
2906
+ {
2907
+ "cell_type": "markdown",
2908
+ "source": [
2909
+ "## Docs\n",
2910
+ "Get more info in below links!\n",
2911
+ "- [SAM2 docs](https://huggingface.co/docs/transformers/main/en/model_doc/sam2)\n",
2912
+ "- [KOSMOS2.5 docs](https://huggingface.co/docs/transformers/main/en/model_doc/kosmos2_5)\n",
2913
+ "- [Florence-2 docs](https://huggingface.co/docs/transformers/main/en/model_doc/florence2)\n",
2914
+ "- [DINOv3 docs](https://huggingface.co/docs/transformers/main/en/model_doc/dinov3)\n",
2915
+ "- [MetaCLIP2 docs](https://huggingface.co/docs/transformers/main/en/model_doc/metaclip_2)"
2916
+ ],
2917
+ "metadata": {
2918
+ "id": "N3Yp8BZNd7aM"
2919
+ }
2920
  }
2921
  ]
2922
  }