{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "machine_shape": "hm", "gpuType": "A100" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "2daf4bced26548a6bf8319f5f7e274c4": { "model_module": "@jupyter-widgets/controls", "model_name": "VBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "VBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "VBoxView", "box_style": "", "children": [ "IPY_MODEL_c0df28e87bb148a3bb0e86be0782411b", "IPY_MODEL_fa7f5d29f6af43c6a53ed241befb9c2c", "IPY_MODEL_b9d0b58707e84e9e9f096441f067f544", "IPY_MODEL_129eb9f545314ca3ac474462e331e22e", "IPY_MODEL_4f8c02cb22df4f8999efa2f17ae0ebed" ], "layout": "IPY_MODEL_91846047b296465fb9291e579a131a87" } }, "c0df28e87bb148a3bb0e86be0782411b": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ffaebd71bf87461693949cdc6e5c1d24", "placeholder": "​", "style": "IPY_MODEL_7c5f4bb5dc494b2db921b022a0183554", "value": "

Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
" } }, "fa7f5d29f6af43c6a53ed241befb9c2c": { "model_module": "@jupyter-widgets/controls", "model_name": "PasswordModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "PasswordModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "PasswordView", "continuous_update": true, "description": "Token:", "description_tooltip": null, "disabled": false, "layout": "IPY_MODEL_8d1df099c7db44d39958df9143d2e744", "placeholder": "​", "style": "IPY_MODEL_8373cf9e3b3b4dfb919767cb7749c489", "value": "" } }, "b9d0b58707e84e9e9f096441f067f544": { "model_module": "@jupyter-widgets/controls", "model_name": "CheckboxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "CheckboxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "CheckboxView", "description": "Add token as git credential?", "description_tooltip": null, "disabled": false, "indent": true, "layout": "IPY_MODEL_ed38dac26c7646c3bd2047fd3566d941", "style": "IPY_MODEL_6e1a3cfac9d843ae9c73384ad08c9ad3", "value": true } }, "129eb9f545314ca3ac474462e331e22e": { "model_module": "@jupyter-widgets/controls", "model_name": "ButtonModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ButtonModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ButtonView", "button_style": "", "description": "Login", "disabled": false, "icon": "", "layout": "IPY_MODEL_a8b986bdbb7246debd38e5666cc395c1", "style": "IPY_MODEL_f1a9d1b4db7941c7837a09df72d51db8", "tooltip": "" } }, "4f8c02cb22df4f8999efa2f17ae0ebed": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b0bacd899325495db2e85006ba4a3073", "placeholder": "​", "style": "IPY_MODEL_eee5cdde27df400192191c0947701928", "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. " } }, "91846047b296465fb9291e579a131a87": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": "center", "align_self": null, "border": null, "bottom": null, "display": "flex", "flex": null, "flex_flow": "column", "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "50%" } }, "ffaebd71bf87461693949cdc6e5c1d24": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7c5f4bb5dc494b2db921b022a0183554": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "8d1df099c7db44d39958df9143d2e744": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8373cf9e3b3b4dfb919767cb7749c489": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ed38dac26c7646c3bd2047fd3566d941": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6e1a3cfac9d843ae9c73384ad08c9ad3": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "a8b986bdbb7246debd38e5666cc395c1": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f1a9d1b4db7941c7837a09df72d51db8": { "model_module": "@jupyter-widgets/controls", "model_name": "ButtonStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ButtonStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "button_color": null, "font_weight": "" } }, "b0bacd899325495db2e85006ba4a3073": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "eee5cdde27df400192191c0947701928": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "a4746bf217474a0d9affb86db5c1cce8": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_c3d17ba4ef004b6faeaaabb669c6f0d8", "IPY_MODEL_8e083ebf8cde4eb88189d84c877dd900", "IPY_MODEL_f203df875acc417f8a6a9be74c4065b6" ], "layout": "IPY_MODEL_91b290c2575f4a33824a76df9b298a0f" } }, "c3d17ba4ef004b6faeaaabb669c6f0d8": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d8062daa055e425b9af71107e9eed4bc", "placeholder": "​", "style": "IPY_MODEL_ab64a446d53d4ac283d4a7cf0113d4c3", "value": "config.json: 100%" } }, "8e083ebf8cde4eb88189d84c877dd900": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_25a36d81be1c43048bae15fdc26775fb", "max": 772, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_1a9a2f568f294260aa50d4606d4c8d64", "value": 772 } }, "f203df875acc417f8a6a9be74c4065b6": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_71ad36ca6f134432820b8e7965c45180", "placeholder": "​", "style": "IPY_MODEL_65001e19aac24e57a0fe608f17c91d5b", "value": " 772/772 [00:00<00:00, 74.3kB/s]" } }, "91b290c2575f4a33824a76df9b298a0f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d8062daa055e425b9af71107e9eed4bc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ab64a446d53d4ac283d4a7cf0113d4c3": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "25a36d81be1c43048bae15fdc26775fb": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1a9a2f568f294260aa50d4606d4c8d64": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "71ad36ca6f134432820b8e7965c45180": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "65001e19aac24e57a0fe608f17c91d5b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "2424af04b4c24c7c8652cdcb8c3be0bc": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_f4c021aa8af3454eba56d764e214931a", "IPY_MODEL_9e0fcbfda4404309bbc30c89f0f6adb0", "IPY_MODEL_6046f23955f34909b43f5b8dcb16d36f" ], "layout": "IPY_MODEL_07f14799c15e4822837eda4ef042baf0" } }, "f4c021aa8af3454eba56d764e214931a": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b0df7bd9b6f14c36bc9b07113a1abfb6", "placeholder": "​", "style": "IPY_MODEL_097ffa8e16124025afa058555b858f0e", "value": "tf_model.h5: 100%" } }, "9e0fcbfda4404309bbc30c89f0f6adb0": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2684b7ef46374256a23f301b191838b8", "max": 1246320936, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_4ff1c0d810c244529bd3f2a55d367dd3", "value": 1246320936 } }, "6046f23955f34909b43f5b8dcb16d36f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_76c2512a164845e8948d397265600fbb", "placeholder": "​", "style": "IPY_MODEL_b1797d7a7d3d449cb1215aab02239b23", "value": " 1.25G/1.25G [00:05<00:00, 248MB/s]" } }, "07f14799c15e4822837eda4ef042baf0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b0df7bd9b6f14c36bc9b07113a1abfb6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "097ffa8e16124025afa058555b858f0e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "2684b7ef46374256a23f301b191838b8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4ff1c0d810c244529bd3f2a55d367dd3": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "76c2512a164845e8948d397265600fbb": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b1797d7a7d3d449cb1215aab02239b23": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "2e74d76f1a494339ab7252a581074c15": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_47114f8f796940deaba6ea8d628fe066", "IPY_MODEL_81aa0311a21f4400b02c2d0aa3cb8893", "IPY_MODEL_d370846c316d46bbbeca4d870f2bdd19" ], "layout": "IPY_MODEL_ff5a2933b1ac447a91fbe72632c5a2cd" } }, "47114f8f796940deaba6ea8d628fe066": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9c9cedb6648e47f397487b3591735549", "placeholder": "​", "style": "IPY_MODEL_888ac39fb29448cca1f4c5a6139d37ba", "value": "tokenizer_config.json: 100%" } }, "81aa0311a21f4400b02c2d0aa3cb8893": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_24ebc1b975b642808a5f0c902008d4d3", "max": 453, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_4c4723c6bc574c36ab82e6447f0b8d58", "value": 453 } }, "d370846c316d46bbbeca4d870f2bdd19": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4b65f625884144c8a4d1d8a377754237", "placeholder": "​", "style": "IPY_MODEL_19f41ddf72a14677a0a164b92646f4dd", "value": " 453/453 [00:00<00:00, 44.8kB/s]" } }, "ff5a2933b1ac447a91fbe72632c5a2cd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9c9cedb6648e47f397487b3591735549": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "888ac39fb29448cca1f4c5a6139d37ba": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "24ebc1b975b642808a5f0c902008d4d3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4c4723c6bc574c36ab82e6447f0b8d58": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "4b65f625884144c8a4d1d8a377754237": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "19f41ddf72a14677a0a164b92646f4dd": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "574cdc3a03c647c8a076285ae7cbdfe7": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_0a8f717b462a42169899372361a1d428", "IPY_MODEL_fb367c86f4cf4abe9e49cc586ad08fbc", "IPY_MODEL_a053ce96d4f14082977826930c11ef6d" ], "layout": "IPY_MODEL_37ba28a10cca4285b842f1fbf46ae202" } }, "0a8f717b462a42169899372361a1d428": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1496bd2b8eb0479387ff949ea2d17727", "placeholder": "​", "style": "IPY_MODEL_afdedee168ba4642bb259763bf57b468", "value": "tokenizer.json: 100%" } }, "fb367c86f4cf4abe9e49cc586ad08fbc": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9cacf93a6086448da1209d644a41d94e", "max": 17082660, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_72bbe035096645838ecca74350f6f020", "value": 17082660 } }, "a053ce96d4f14082977826930c11ef6d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d05c9890fb364ad7b72add139aaddc53", "placeholder": "​", "style": "IPY_MODEL_75c75b37e91446909e0d1b7837454541", "value": " 17.1M/17.1M [00:00<00:00, 274MB/s]" } }, "37ba28a10cca4285b842f1fbf46ae202": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1496bd2b8eb0479387ff949ea2d17727": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "afdedee168ba4642bb259763bf57b468": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "9cacf93a6086448da1209d644a41d94e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "72bbe035096645838ecca74350f6f020": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "d05c9890fb364ad7b72add139aaddc53": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "75c75b37e91446909e0d1b7837454541": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e585791e09314c4b8ef6a34e3924d4f2": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_2a6b145637a74b319adb37a942ebaad6", "IPY_MODEL_c857032102bd4a1aae12ce8469e51e83", "IPY_MODEL_9905bae17058451791dd519c41a80ccc" ], "layout": "IPY_MODEL_5630b74679344224804e529205c5ccb6" } }, "2a6b145637a74b319adb37a942ebaad6": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5406db678eaa4b878a06ebf9eea18d7a", "placeholder": "​", "style": "IPY_MODEL_97b658602ec84f61814ff7de958428e3", "value": "special_tokens_map.json: 100%" } }, "c857032102bd4a1aae12ce8469e51e83": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_129bd5ec1cd0426d92a867981c6ddc38", "max": 280, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_31089732637a4bf4a43c74e7655ef561", "value": 280 } }, "9905bae17058451791dd519c41a80ccc": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_15ceb638dd5d49fcb43c45588221d4d8", "placeholder": "​", "style": "IPY_MODEL_20fc1f68ddc24acb87bbb1ed7bc0590e", "value": " 280/280 [00:00<00:00, 24.5kB/s]" } }, "5630b74679344224804e529205c5ccb6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5406db678eaa4b878a06ebf9eea18d7a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "97b658602ec84f61814ff7de958428e3": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "129bd5ec1cd0426d92a867981c6ddc38": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "31089732637a4bf4a43c74e7655ef561": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "15ceb638dd5d49fcb43c45588221d4d8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "20fc1f68ddc24acb87bbb1ed7bc0590e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "source": [ "from huggingface_hub import notebook_login\n", "notebook_login()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 331, "referenced_widgets": [ "2daf4bced26548a6bf8319f5f7e274c4", "c0df28e87bb148a3bb0e86be0782411b", "fa7f5d29f6af43c6a53ed241befb9c2c", "b9d0b58707e84e9e9f096441f067f544", "129eb9f545314ca3ac474462e331e22e", "4f8c02cb22df4f8999efa2f17ae0ebed", "91846047b296465fb9291e579a131a87", "ffaebd71bf87461693949cdc6e5c1d24", "7c5f4bb5dc494b2db921b022a0183554", "8d1df099c7db44d39958df9143d2e744", "8373cf9e3b3b4dfb919767cb7749c489", "ed38dac26c7646c3bd2047fd3566d941", "6e1a3cfac9d843ae9c73384ad08c9ad3", "a8b986bdbb7246debd38e5666cc395c1", "f1a9d1b4db7941c7837a09df72d51db8", "b0bacd899325495db2e85006ba4a3073", "eee5cdde27df400192191c0947701928" ] }, "id": "yrM6ZzXldMLo", "outputId": "90c19250-ebde-4caa-aece-2415e6999239" }, "execution_count": 2, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "VBox(children=(HTML(value='
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISOLLM usedTypeData SplitOriginal textOriginal Word CountOriginal Char CountSplit LocationModified textNew Word CountNew Char Count
0UKRGemini-Pro-1.5PartialTest'Цукор - біла смерть' Дебати щодо шкоди цукру ...8816094597'Цукор - біла смерть' Дебати щодо шкоди цукру ...6614530
1UKRAya-23PartialTestЗ цього парадоксу випадають екзит-поли, адже з...11175668З цього парадоксу випадають екзит-поли, адже з...86594
2UKRGPT-4oPartialTrainВивезені лордом Елгіним скульптури Парфенону в...4303128276Вивезені лордом Елгіним скульптури Парфенону в...3122270
3UKRGPT-4oPartialTest- Я уважно слідкую за цими подіями за пресою т...7342635- Я уважно слідкую за цими подіями за пресою т...73471
4UKRMistral-Large-2411UnchangedTest- Наш виторг цього року навіть трохи менший, а...5031950- Наш виторг цього року навіть трохи менший, а...50319
....................................
99920UKRMistral-Large-2411PartialTrain— Перше і найголовніше — це 220 робочих місць,...10875532— Перше і найголовніше — це 220 робочих місць,...76545
99921UKRAmazon-Nova-Lite-1.0PartialTrainТа це лише офіційні цифри, багато хто мовчить ...6642546Та це лише офіційні цифри, багато хто мовчить ...124847
99922UKRGPT-o1PartialTrain1. На українські землі масонство із Західної Є...32208171 . На українські землі масонство із Західної ...1591134
99923UKRGPT-o1PartialTest'Уряд намагався не помічати кризи до того часу...6849352'Уряд намагався не помічати кризи до того часу...1511045
99924UKRAmazon-Nova-Lite-1.0PartialTestПроводять вигонку в дерев’яних ящиках чи спеці...5637138Проводять вигонку в дерев’яних ящиках чи спеці...111739
\n", "

99925 rows × 11 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", " \n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df", "summary": "{\n \"name\": \"df\",\n \"rows\": 99925,\n \"fields\": [\n {\n \"column\": \"ISO\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"UKR\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"LLM used\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"GPT-o1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Partial\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Data Split\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Test\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 99892,\n \"samples\": [\n \"'\\u042f \\u043f\\u0440\\u0438\\u0439\\u0448\\u043b\\u0430 \\u0437 \\u0432\\u0435\\u043b\\u0438\\u043a\\u0438\\u043c \\u0435\\u043d\\u0442\\u0443\\u0437\\u0456\\u0430\\u0437\\u043c\\u043e\\u043c, \\u043f\\u0440\\u0438\\u0439\\u0448\\u043b\\u0430 \\u0437\\u0430 \\u0440\\u0435\\u0432\\u043e\\u043b\\u044e\\u0446\\u0456\\u044e \\u0433\\u043e\\u043b\\u043e\\u0441\\u0443\\u0432\\u0430\\u0442\\u0438 \\u0442\\u0430 \\u0433\\u043e\\u043b\\u043e\\u0441\\u0443\\u0432\\u0430\\u043b\\u0430 \\u0437\\u0430 \\u041d\\u0456\\u043a\\u043e\\u043b\\u0443 \\u041f\\u0430\\u0448\\u0438\\u043d\\u044f\\u043d\\u0430, \\u0449\\u043e\\u0431 \\u043d\\u0430\\u0440\\u0435\\u0448\\u0442\\u0456 \\u0437\\u0430 20 \\u0440\\u043e\\u043a\\u0456\\u0432 \\u0437\\u043c\\u0456\\u043d\\u0438 \\u0431\\u0443\\u043b\\u0438 \\u0434\\u043b\\u044f \\u043d\\u0430\\u0448\\u0438\\u0445 \\u0434\\u0456\\u0442\\u0435\\u0439, \\u043e\\u043d\\u0443\\u043a\\u0456\\u0432', - \\u0433\\u043e\\u0432\\u043e\\u0440\\u0438\\u0442\\u044c \\u043c\\u0435\\u0448\\u043a\\u0430\\u043d\\u043a\\u0430 \\u0404\\u0440\\u0435\\u0432\\u0430\\u043d\\u0430, \\u043f\\u0435\\u043d\\u0441\\u0456\\u043e\\u043d\\u0435\\u0440\\u043a\\u0430 \\u041d\\u0456\\u043d\\u0435\\u043b\\u044c \\u0410\\u0442\\u0430\\u0431\\u0435\\u043a\\u044f\\u043d. - \\u0412\\u0438\\u0431\\u043e\\u0440\\u0438 \\u0432\\u0430\\u0436\\u043b\\u0438\\u0432\\u0456 \\u0434\\u043b\\u044f \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u0457, \\u0449\\u043e\\u0431 \\u0437\\u0430\\u0442\\u0432\\u0435\\u0440\\u0434\\u0438\\u043b\\u0430\\u0441\\u044f \\u0432\\u043b\\u0430\\u0434\\u0430 \\u043d\\u0430\\u0440\\u0435\\u0448\\u0442\\u0456, \\u0430 \\u0442\\u043e \\u0432\\u043e\\u043d\\u0438 \\u0441\\u0438\\u0434\\u044f\\u0442\\u044c \\u0432 \\u043f\\u0430\\u0440\\u043b\\u0430\\u043c\\u0435\\u043d\\u0442\\u0456 \\u0442\\u0430 \\u0437\\u0430\\u0432\\u0430\\u0436\\u0430\\u044e\\u0442\\u044c, \\u043e\\u0441\\u044c \\u0446\\u044f \\u0441\\u0442\\u0430\\u0440\\u0430 \\u0433\\u0432\\u0430\\u0440\\u0434\\u0456\\u044f \\u0432 55 \\u0447\\u043e\\u043b\\u043e\\u0432\\u0456\\u043a \\u0437\\u0430\\u0432\\u0430\\u0436\\u0430\\u044e\\u0442\\u044c \\u043d\\u043e\\u0432\\u043e\\u043c\\u0443 \\u0436\\u0438\\u0442\\u0442\\u044e'. \\u0423 \\u0442\\u043e\\u043c\\u0443, \\u0449\\u043e \\u0446\\u0456 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0438 \\u043f\\u0440\\u043e\\u0445\\u043e\\u0434\\u0438\\u043b\\u0438 \\u0447\\u0435\\u0441\\u043d\\u043e, \\u0432\\u043e\\u043d\\u0430 \\u043d\\u0435 \\u0441\\u0443\\u043c\\u043d\\u0456\\u0432\\u0430\\u0454\\u0442\\u044c\\u0441\\u044f. \\u0417\\u0430 \\u0457\\u0457 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438, \\u0432 \\u0446\\u044c\\u043e\\u043c\\u0443 \\u0457\\u0445\\u043d\\u044f \\u0432\\u0456\\u0434\\u043c\\u0456\\u043d\\u043d\\u0456\\u0441\\u0442\\u044c \\u0432\\u0456\\u0434 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445. '\\u0423 \\u043c\\u0438\\u043d\\u0443\\u043b\\u0438\\u0445 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0430\\u0445 \\u0432\\u0441\\u0456\\u043c \\u0440\\u043e\\u0437\\u0434\\u0430\\u0432\\u0430\\u043b\\u0438 \\u0445\\u0430\\u0431\\u0430\\u0440\\u0456, \\u0432 \\u043d\\u0430\\u0448\\u043e\\u043c\\u0443 \\u0431\\u0443\\u0434\\u0438\\u043d\\u043a\\u0443, \\u0433\\u043e\\u0432\\u043e\\u0440\\u0438\\u043b\\u0438, 3-4 \\u0440\\u043e\\u0434\\u0438\\u043d\\u0438 \\u043d\\u0435 \\u043e\\u0442\\u0440\\u0438\\u043c\\u0430\\u043b\\u0438 ... \\u0410 \\u0441\\u044c\\u043e\\u0433\\u043e\\u0434\\u043d\\u0456 \\u043d\\u0456\\u0445\\u0442\\u043e \\u043d\\u0456\\u0447\\u043e\\u0433\\u043e \\u043d\\u0435 \\u0434\\u0430\\u0454, \\u0432\\u043e\\u043b\\u0435\\u0432\\u0438\\u044f\\u0432\\u043b\\u0435\\u043d\\u043d\\u044f \\u043d\\u0430\\u0440\\u043e\\u0434\\u0443', - \\u0433\\u043e\\u0432\\u043e\\u0440\\u0438\\u0442\\u044c \\u0432\\u043e\\u043d\\u0430. \\u0413\\u043e\\u043b\\u043e\\u0432\\u0430 \\u0426\\u0435\\u043d\\u0442\\u0440\\u0430\\u043b\\u044c\\u043d\\u043e\\u0457 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0447\\u043e\\u0457 \\u043a\\u043e\\u043c\\u0456\\u0441\\u0456\\u0457 \\u0422\\u0438\\u0433\\u0440\\u0430\\u043d \\u041c\\u0443\\u043a\\u0443\\u0447\\u044f\\u043d \\u0432 \\u0456\\u043d\\u0442\\u0435\\u0440\\u0432'\\u044e \\u0420\\u0430\\u0434\\u0456\\u043e \\u0410\\u0437\\u0430\\u0442\\u0443\\u0442\\u044e\\u043d \\u0432\\u0438\\u0441\\u043b\\u043e\\u0432\\u0438\\u0432 \\u0443\\u043f\\u0435\\u0432\\u043d\\u0435\\u043d\\u0456\\u0441\\u0442\\u044c, \\u0449\\u043e \\u0432\\u0438\\u0431\\u043e\\u0440\\u0438, \\u044f\\u043a\\u0456 \\u043f\\u0440\\u043e\\u0439\\u0448\\u043b\\u0438 \\u0432 \\u043d\\u0435\\u0434\\u0456\\u043b\\u044e, \\u0431\\u0443\\u043b\\u0438 '\\u0432\\u0456\\u043b\\u044c\\u043d\\u0438\\u043c\\u0438, \\u0447\\u0435\\u0441\\u043d\\u0438\\u043c\\u0438 \\u0442\\u0430 \\u043f\\u0440\\u043e\\u0437\\u043e\\u0440\\u0438\\u043c\\u0438'. \\u041f\\u043e\\u0437\\u0438\\u0442\\u0438\\u0432\\u043d\\u043e \\u043e\\u0446\\u0456\\u043d\\u0438\\u043b\\u0438 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0438 \\u0432 \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u0457 \\u0442\\u0430 \\u043c\\u0456\\u0436\\u043d\\u0430\\u0440\\u043e\\u0434\\u043d\\u0456 \\u0441\\u043f\\u043e\\u0441\\u0442\\u0435\\u0440\\u0456\\u0433\\u0430\\u0447\\u0456. \\u0411\\u0435\\u0437 \\u043e\\u043f\\u043e\\u0437\\u0438\\u0446\\u0456\\u0457? '\\u0420\\u0435\\u0432\\u043e\\u043b\\u044e\\u0446\\u0456\\u044f \\u0431\\u0443\\u043b\\u0430 \\u043e\\u0444\\u043e\\u0440\\u043c\\u043b\\u0435\\u043d\\u0430 \\u0446\\u0438\\u043c\\u0438 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0430\\u043c\\u0438, \\u0456 \\u043c\\u043e\\u0436\\u043d\\u0430 \\u0432\\u0432\\u0430\\u0436\\u0430\\u0442\\u0438 \\u0457\\u0457 \\u0432 \\u044f\\u043a\\u043e\\u043c\\u0443\\u0441\\u044c \\u0441\\u0435\\u043d\\u0441\\u0456 \\u0437\\u0430\\u0432\\u0435\\u0440\\u0448\\u0435\\u043d\\u043e\\u044e. \\u0424\\u0430\\u043a\\u0442\\u0438\\u0447\\u043d\\u043e \\u041f\\u0430\\u0448\\u0438\\u043d\\u044f\\u043d \\u0432\\u0436\\u0435 \\u043f\\u043e\\u0432\\u0438\\u043d\\u0435\\u043d \\u0441\\u0442\\u0430\\u0442\\u0438 \\u043b\\u0456\\u0434\\u0435\\u0440\\u043e\\u043c, \\u044f\\u043a\\u0438\\u0439 \\u043f\\u0435\\u0440\\u0435\\u0441\\u0442\\u0430\\u0432 \\u0432\\u043e\\u044e\\u0432\\u0430\\u0442\\u0438 \\u0437 \\u043c\\u0438\\u043d\\u0443\\u043b\\u043e\\u044e \\u0432\\u043b\\u0430\\u0434\\u043e\\u044e', - \\u0433\\u043e\\u0432\\u043e\\u0440\\u0438\\u0442\\u044c \\u0430\\u043d\\u0430\\u043b\\u0456\\u0442\\u0438\\u043a \\u0421\\u0430\\u043c\\u0432\\u0435\\u043b \\u041c\\u0430\\u0440\\u0442\\u0438\\u0440\\u043e\\u0441\\u044f\\u043d. \\u0420\\u0435\\u0441\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u0430\\u043d\\u0441\\u044c\\u043a\\u0430 \\u043f\\u0430\\u0440\\u0442\\u0456\\u044f - \\u0432 \\u043c\\u0438\\u043d\\u0443\\u043b\\u043e\\u043c\\u0443 \\u043f\\u0440\\u0430\\u0432\\u043b\\u044f\\u0447\\u0430 - \\u043d\\u0435 \\u0437\\u043c\\u043e\\u0433\\u043b\\u0430 \\u043f\\u043e\\u0434\\u043e\\u043b\\u0430\\u0442\\u0438 5% \\u0431\\u0430\\u0440'\\u0454\\u0440, \\u043d\\u0435\\u043e\\u0431\\u0445\\u0456\\u0434\\u043d\\u0438\\u0439 \\u0434\\u043b\\u044f \\u043f\\u0440\\u043e\\u0445\\u043e\\u0434\\u0436\\u0435\\u043d\\u043d\\u044f \\u0432 \\u043f\\u0430\\u0440\\u043b\\u0430\\u043c\\u0435\\u043d\\u0442. \\u041a\\u0440\\u0456\\u043c '\\u041c\\u043e\\u0433\\u043e \\u043a\\u0440\\u043e\\u043a\\u0443' \\u0432 \\u041d\\u0430\\u0446\\u0456\\u043e\\u043d\\u0430\\u043b\\u044c\\u043d\\u0456 \\u0437\\u0431\\u043e\\u0440\\u0438 \\u043f\\u043e\\u0442\\u0440\\u0430\\u043f\\u0438\\u043b\\u0438 \\u043f\\u0430\\u0440\\u0442\\u0456\\u044f \\u0432\\u0435\\u043b\\u0438\\u043a\\u043e\\u0433\\u043e \\u0431\\u0456\\u0437\\u043d\\u0435\\u0441\\u043c\\u0435\\u043d\\u0430 \\u0413\\u0430\\u0433\\u0456\\u043a\\u0430 \\u0426\\u0430\\u0440\\u0443\\u043a\\u044f\\u043d\\u0430 '\\u041a\\u0432\\u0456\\u0442\\u0443\\u0447\\u0430 \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u044f' (8,27% \\u0433\\u043e\\u043b\\u043e\\u0441\\u0456\\u0432) \\u0442\\u0430 \\u043f\\u0430\\u0440\\u0442\\u0456\\u044f '\\u041e\\u0441\\u0432\\u0456\\u0447\\u0435\\u043d\\u0430 \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u044f' (6,37%), \\u044f\\u043a\\u0430 \\u0432 \\u0430\\u043b\\u044c\\u044f\\u043d\\u0441\\u0456 \\u0437 \\u043f\\u0430\\u0440\\u0442\\u0456\\u0454\\u044e \\u041f\\u0430\\u0448\\u0438\\u043d\\u044f\\u043d\\u0430 '\\u0426\\u0438\\u0432\\u0456\\u043b\\u044c\\u043d\\u0438\\u0439 \\u0434\\u043e\\u0433\\u043e\\u0432\\u0456\\u0440' \\u0431\\u0440\\u0430\\u043b\\u0430 \\u0443\\u0447\\u0430\\u0441\\u0442\\u044c \\u0432 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043f\\u0430\\u0440\\u043b\\u0430\\u043c\\u0435\\u043d\\u0442\\u0441\\u044c\\u043a\\u0438\\u0445 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0430\\u0445. \\u041f\\u0430\\u0440\\u0442\\u0456\\u044e \\u0413\\u0430\\u0433\\u0456\\u043a\\u0430 \\u0426\\u0430\\u0440\\u0443\\u043a\\u044f\\u043d\\u0430 \\u043f\\u0456\\u0434\\u0442\\u0440\\u0438\\u043c\\u0443\\u0454 \\u0432\\u0435\\u043b\\u0438\\u043a\\u0438\\u0439 \\u0431\\u0456\\u0437\\u043d\\u0435\\u0441. \\u0415\\u043a\\u0441\\u043f\\u0435\\u0440\\u0442\\u0438 \\u0432\\u0432\\u0430\\u0436\\u0430\\u044e\\u0442\\u044c, \\u0449\\u043e \\u043d\\u0430\\u0432\\u0440\\u044f\\u0434 \\u0447\\u0438 \\u0432\\u043e\\u043d\\u0430 \\u0431\\u0443\\u0434\\u0435 \\u0436\\u043e\\u0440\\u0441\\u0442\\u043a\\u043e\\u044e \\u043e\\u043f\\u043e\\u0437\\u0438\\u0446\\u0456\\u0454\\u044e \\u0434\\u043e \\u0443\\u0440\\u044f\\u0434\\u0443 \\u041f\\u0430\\u0448\\u0438\\u043d\\u044f\\u043d\\u0430 \\u0424\\u043e\\u0440\\u043c\\u0430\\u043b\\u044c\\u043d\\u043e \\u0432 \\u043d\\u043e\\u0432\\u043e\\u043c\\u0443 \\u043f\\u0430\\u0440\\u043b\\u0430\\u043c\\u0435\\u043d\\u0442\\u0456 \\u043d\\u0435 \\u043c\\u0435\\u043d\\u0448\\u0435 \\u0442\\u0440\\u0435\\u0442\\u0438\\u043d\\u0438 \\u043c\\u0430\\u043d\\u0434\\u0430\\u0442\\u0456\\u0432 \\u0431\\u0443\\u0434\\u0435 \\u0443 \\u0441\\u0443\\u043f\\u0435\\u0440\\u043d\\u0438\\u043a\\u0456\\u0432 \\u043f\\u0430\\u0440\\u0442\\u0456\\u0457 \\u041f\\u0430\\u0448\\u0438\\u043d\\u044f\\u043d\\u0430, \\u043f\\u0440\\u043e\\u0442\\u0435 \\u0441\\u0435\\u0440\\u0439\\u043e\\u0437\\u043d\\u043e\\u044e \\u043e\\u043f\\u043e\\u0437\\u0438\\u0446\\u0456\\u0454\\u044e \\u0446\\u0435 \\u043d\\u0430\\u0437\\u0432\\u0430\\u0442\\u0438 \\u043d\\u0435 \\u043c\\u043e\\u0436\\u043d\\u0430, \\u0437\\u0430\\u0437\\u043d\\u0430\\u0447\\u0430\\u044e\\u0442\\u044c \\u0430\\u043d\\u0430\\u043b\\u0456\\u0442\\u0438\\u043a\\u0438. \\u0412 '\\u041a\\u0432\\u0456\\u0442\\u0443\\u0447\\u0456\\u0439 \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u0457' \\u0431\\u0430\\u0433\\u0430\\u0442\\u043e \\u043f\\u0440\\u0435\\u0434\\u0441\\u0442\\u0430\\u0432\\u043d\\u0438\\u043a\\u0456\\u0432 \\u0432\\u0435\\u043b\\u0438\\u043a\\u043e\\u0433\\u043e \\u0431\\u0456\\u0437\\u043d\\u0435\\u0441\\u0443, \\u0456 \\u0432\\u043e\\u043d\\u0438 \\u0443\\u043d\\u0438\\u043a\\u0430\\u044e\\u0442\\u044c \\u0432\\u0456\\u0434\\u043a\\u0440\\u0438\\u0442\\u043e\\u0433\\u043e \\u043f\\u0440\\u043e\\u0442\\u0438\\u0441\\u0442\\u043e\\u044f\\u043d\\u043d\\u044f, \\u043f\\u0440\\u0438\\u0447\\u043e\\u043c\\u0443 \\u0431\\u0443\\u0434\\u044c-\\u044f\\u043a\\u0456\\u0439 \\u0432\\u043b\\u0430\\u0434\\u0456', - \\u0437\\u0430\\u0437\\u043d\\u0430\\u0447\\u0430\\u0454 \\u043f\\u043e\\u043b\\u0456\\u0442\\u043e\\u043b\\u043e\\u0433 \\u0442\\u0430 \\u043d\\u0430\\u0443\\u043a\\u043e\\u0432\\u0438\\u0439 \\u0441\\u043f\\u0456\\u0432\\u0440\\u043e\\u0431\\u0456\\u0442\\u043d\\u0438\\u043a \\u0406\\u043d\\u0441\\u0442\\u0438\\u0442\\u0443\\u0442\\u0443 \\u041a\\u0430\\u0432\\u043a\\u0430\\u0437\\u0443 \\u0413\\u0440\\u0430\\u043d\\u0442 \\u041c\\u0456\\u043a\\u0430\\u0454\\u043b\\u044f\\u043d. '\\u041a\\u0432\\u0456\\u0442\\u0443\\u0447\\u0430 \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u044f' - \\u0446\\u0435 \\u0441\\u043a\\u043e\\u0440\\u0456\\u0448\\u0435 \\u043f\\u0430\\u0440\\u0442\\u0456\\u044f, \\u044f\\u043a\\u0430 \\u043b\\u043e\\u0431\\u0456\\u044e\\u0454 \\u0456\\u043d\\u0442\\u0435\\u0440\\u0435\\u0441\\u0438 \\u043f\\u0435\\u0432\\u043d\\u0438\\u0445 \\u0435\\u043a\\u043e\\u043d\\u043e\\u043c\\u0456\\u0447\\u043d\\u0438\\u0445 \\u043a\\u0456\\u043b, - \\u0437\\u0433\\u043e\\u0434\\u0435\\u043d \\u041c\\u0430\\u0440\\u0442\\u0438\\u0440\\u043e\\u0441\\u044f\\u043d. - \\u0410 '\\u041e\\u0441\\u0432\\u0456\\u0447\\u0435\\u043d\\u0430 \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u044f' (6,37% \\u0433\\u043e\\u043b\\u043e\\u0441\\u0456\\u0432) \\u043e\\u0441\\u043e\\u0431\\u043b\\u0438\\u0432\\u0438\\u0445 \\u0440\\u043e\\u0437\\u0431\\u0456\\u0436\\u043d\\u043e\\u0441\\u0442\\u0435\\u0439 \\u0437 \\u041f\\u0430\\u0448\\u0438\\u043d\\u044f\\u043d\\u043e\\u043c \\u043d\\u0435 \\u043c\\u0430\\u0454. \\u0411\\u0443\\u0434\\u0435 \\u0456\\u043c\\u0456\\u0442\\u0430\\u0446\\u0456\\u044f \\u043e\\u043f\\u043e\\u0437\\u0438\\u0446\\u0456\\u0439\\u043d\\u043e\\u0457 \\u0430\\u043a\\u0442\\u0438\\u0432\\u043d\\u043e\\u0441\\u0442\\u0456, \\u0430\\u043b\\u0435 \\u0432 \\u0446\\u0456\\u043b\\u043e\\u043c\\u0443 \\u0457\\u0457 \\u0432\\u0430\\u0440\\u0442\\u043e \\u0447\\u0435\\u043a\\u0430\\u0442\\u0438 \\u0441\\u043a\\u043e\\u0440\\u0456\\u0448\\u0435 \\u0432\\u0456\\u0434 \\u043f\\u043e\\u0437\\u0430\\u043f\\u0430\\u0440\\u043b\\u0430\\u043c\\u0435\\u043d\\u0442\\u0441\\u044c\\u043a\\u0438\\u0445 \\u0441\\u0438\\u043b'. \\u041a\\u0430\\u043d\\u0434\\u0438\\u0434\\u0430\\u0442 \\u0432 \\u0434\\u0435\\u043f\\u0443\\u0442\\u0430\\u0442\\u0438 \\u0432\\u0456\\u0434 '\\u041a\\u0432\\u0456\\u0442\\u0443\\u0447\\u0430 \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u044f' \\u0410\\u0440\\u043c\\u0430\\u043d \\u0410\\u0431\\u043e\\u0432\\u044f\\u043d, \\u043e\\u0434\\u043d\\u0430\\u043a, \\u0437 \\u0446\\u0438\\u043c \\u043d\\u0435 \\u0437\\u0433\\u043e\\u0434\\u0435\\u043d; \\u0437\\u0430 \\u0439\\u043e\\u0433\\u043e \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438, \\u0439\\u043e\\u0433\\u043e \\u043f\\u0430\\u0440\\u0442\\u0456\\u044f \\u0437\\u0430\\u0439\\u043c\\u0430\\u0454 '\\u043f\\u0440\\u0430\\u0432\\u0438\\u043b\\u044c\\u043d\\u0443 \\u0446\\u0435\\u043d\\u0442\\u0440\\u0438\\u0441\\u0442\\u0441\\u044c\\u043a\\u0443 \\u043f\\u043e\\u0437\\u0438\\u0446\\u0456\\u044e'. '\\u041c\\u0438 \\u0441\\u0435\\u0431\\u0435 \\u0432\\u0432\\u0430\\u0436\\u0430\\u0454\\u043c\\u043e \\u0441\\u0438\\u043b\\u043e\\u044e, \\u044f\\u043a\\u0430 \\u043f\\u0456\\u0441\\u043b\\u044f \\u0432\\u0438\\u0431\\u043e\\u0440\\u0456\\u0432 \\u0431\\u0443\\u0434\\u0435 \\u043d\\u0430\\u043c\\u0430\\u0433\\u0430\\u0442\\u0438\\u0441\\u044f \\u0437\\u0440\\u043e\\u0431\\u0438\\u0442\\u0438 \\u0432\\u0441\\u0435, \\u0449\\u043e\\u0431 \\u0445\\u043e\\u0440\\u043e\\u0448\\u0435 \\u0431\\u0443\\u043b\\u043e \\u0445\\u043e\\u0440\\u043e\\u0448\\u0438\\u043c, \\u0430 \\u044f\\u043a\\u0449\\u043e \\u0449\\u043e\\u0441\\u044c \\u043f\\u043e\\u0433\\u0430\\u043d\\u0435 \\u0431\\u0443\\u0434\\u0435, \\u0442\\u043e \\u043f\\u0440\\u043e \\u0446\\u0435 \\u043c\\u043e\\u0432\\u0447\\u0430\\u0442\\u0438 \\u043d\\u0435 \\u0437\\u0431\\u0438\\u0440\\u0430\\u0454\\u043c\\u043e\\u0441\\u044f, - \\u043a\\u0430\\u0436\\u0435 \\u0432\\u0456\\u043d. - \\u0422\\u0456, \\u0445\\u0442\\u043e \\u0433\\u043e\\u0432\\u043e\\u0440\\u0438\\u0442\\u044c, \\u0449\\u043e \\u043f\\u0430\\u0440\\u043b\\u0430\\u043c\\u0435\\u043d\\u0442 \\u0431\\u0443\\u0434\\u0435 \\u0431\\u0435\\u0437 \\u043e\\u043f\\u043e\\u0437\\u0438\\u0446\\u0456\\u0457, - \\u0446\\u0435 \\u0442\\u0456 \\u043b\\u044e\\u0434\\u0438, \\u044f\\u043a\\u0456 \\u0437\\u0432\\u0438\\u043a\\u043b\\u0438 \\u0431\\u0430\\u0447\\u0438\\u0442\\u0438 \\u043e\\u043f\\u043e\\u0437\\u0438\\u0446\\u0456\\u044e \\u0432 \\u0436\\u043e\\u0440\\u0441\\u0442\\u043a\\u043e\\u043c\\u0443 \\u043a\\u043b\\u0456\\u043d\\u0447\\u0456 \\u0437 \\u0432\\u043b\\u0430\\u0434\\u043e\\u044e. \\u041d\\u0456\\u0445\\u0442\\u043e \\u043d\\u0435 \\u0437\\u0431\\u0438\\u0440\\u0430\\u0454\\u0442\\u044c\\u0441\\u044f \\u043e\\u043f\\u043e\\u043d\\u0443\\u0432\\u0430\\u0442\\u0438, \\u0442\\u0456\\u043b\\u044c\\u043a\\u0438 \\u0449\\u043e\\u0431 \\u043e\\u043f\\u043e\\u043d\\u0443\\u0432\\u0430\\u0442\\u0438, \\u0456 \\u043d\\u0456\\u0445\\u0442\\u043e \\u043d\\u0435 \\u0437\\u0431\\u0438\\u0440\\u0430\\u0454\\u0442\\u044c\\u0441\\u044f \\u0442\\u0438\\u0441\\u043d\\u0443\\u0442\\u0438 \\u043d\\u0430 \\u043a\\u043d\\u043e\\u043f\\u043a\\u0438, \\u043d\\u0435 \\u0440\\u043e\\u0437\\u0433\\u043b\\u044f\\u043d\\u0443\\u0432\\u0448\\u0438 \\u043f\\u0438\\u0442\\u0430\\u043d\\u043d\\u044f\\u00bb. \\u041f\\u0430\\u0440\\u0442\\u0456\\u044f '\\u041e\\u0441\\u0432\\u0456\\u0447\\u0435\\u043d\\u0430 \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u044f', \\u0443 \\u0441\\u0432\\u043e\\u044e \\u0447\\u0435\\u0440\\u0433\\u0443, \\u043e\\u0431\\u0456\\u0446\\u044f\\u0454 \\u0441\\u0444\\u043e\\u0440\\u043c\\u0443\\u0432\\u0430\\u0442\\u0438 '\\u0456\\u0434\\u0435\\u0439\\u043d\\u0443 \\u043e\\u043f\\u043e\\u0437\\u0438\\u0446\\u0456\\u044e' \\u043f\\u0430\\u0440\\u043b\\u0430\\u043c\\u0435\\u043d\\u0442\\u0441\\u044c\\u043a\\u0456\\u0439 \\u0431\\u0456\\u043b\\u044c\\u0448\\u043e\\u0441\\u0442\\u0456 \\u0442\\u0430 \\u043d\\u0430\\u0437\\u0438\\u0432\\u0430\\u0454 \\u0441\\u0435\\u0431\\u0435 \\u0454\\u0434\\u0438\\u043d\\u043e\\u044e \\u043e\\u043f\\u043e\\u0437\\u0438\\u0446\\u0456\\u0454\\u044e \\u0432 \\u043f\\u0430\\u0440\\u043b\\u0430\\u043c\\u0435\\u043d\\u0442\\u0456 \\u043f\\u0456\\u0441\\u043b\\u044f \\u0446\\u0438\\u0445 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0456\\u0432. \\u0420\\u0435\\u0441\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u0430\\u043d\\u0441\\u044c\\u043a\\u0430 \\u043f\\u0430\\u0440\\u0442\\u0456\\u044f \\u0437\\u0430\\u044f\\u0432\\u043b\\u044f\\u043b\\u0430 \\u043f\\u0440\\u043e \\u043f\\u043e\\u0440\\u0443\\u0448\\u0435\\u043d\\u043d\\u044f \\u0432 \\u0445\\u043e\\u0434\\u0456 \\u0433\\u043e\\u043b\\u043e\\u0441\\u0443\\u0432\\u0430\\u043d\\u043d\\u044f \\u0456 \\u0437\\u0432\\u0438\\u043d\\u0443\\u0432\\u0430\\u0447\\u0443\\u0432\\u0430\\u043b\\u0430 \\u0443\\u0440\\u044f\\u0434 \\u041f\\u0430\\u0448\\u0438\\u043d\\u044f\\u043d\\u0430 \\u0432 \\u0442\\u0438\\u0441\\u043a\\u0443 \\u043d\\u0430 \\u043e\\u043f\\u043e\\u043d\\u0435\\u043d\\u0442\\u0456\\u0432 \\u0456 \\u043d\\u0430\\u043c\\u0456\\u0440 '\\u0437\\u043c\\u0456\\u0446\\u043d\\u0438\\u0442\\u0438 \\u0430\\u0442\\u043c\\u043e\\u0441\\u0444\\u0435\\u0440\\u0443 \\u0441\\u0442\\u0440\\u0430\\u0445\\u0443' \\u0432 \\u043a\\u0440\\u0430\\u0457\\u043d\\u0456 \\u043d\\u0430\\u043f\\u0435\\u0440\\u0435\\u0434\\u043e\\u0434\\u043d\\u0456 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0456\\u0432. \\u041f\\u0456\\u0441\\u043b\\u044f \\u0442\\u043e\\u0433\\u043e \\u044f\\u043a \\u0432\\u0456\\u043d \\u043f\\u0440\\u0438\\u0439\\u0448\\u043e\\u0432 \\u0434\\u043e \\u0432\\u043b\\u0430\\u0434\\u0438, \\u0431\\u0430\\u0433\\u0430\\u0442\\u044c\\u043e\\u043c \\u043a\\u043e\\u043b\\u0438\\u0448\\u043d\\u0456\\u043c \\u0447\\u0438\\u043d\\u043e\\u0432\\u043d\\u0438\\u043a\\u0430\\u043c \\u0442\\u0430 \\u0457\\u0445\\u043d\\u0456\\u043c \\u0440\\u043e\\u0434\\u0438\\u0447\\u0430\\u043c \\u0431\\u0443\\u043b\\u0438 \\u043f\\u0440\\u0435\\u0434'\\u044f\\u0432\\u043b\\u0435\\u043d\\u0456 \\u0437\\u0432\\u0438\\u043d\\u0443\\u0432\\u0430\\u0447\\u0435\\u043d\\u043d\\u044f. \\u0421\\u0430\\u043c \\u041f\\u0430\\u0448\\u0438\\u043d\\u044f\\u043d \\u0437\\u0430\\u043f\\u0435\\u0440\\u0435\\u0447\\u0443\\u0454 \\u0437\\u0432'\\u044f\\u0437\\u043e\\u043a \\u043c\\u0456\\u0436 \\u043a\\u0440\\u0438\\u043c\\u0456\\u043d\\u0430\\u043b\\u044c\\u043d\\u0438\\u043c\\u0438 \\u0441\\u043f\\u0440\\u0430\\u0432\\u0430\\u043c\\u0438 \\u0442\\u0430 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0430\\u043c\\u0438. \\u041e\\u0447\\u0456\\u043a\\u0443\\u0432\\u0430\\u043d\\u043d\\u044f \\u0442\\u0430 \\u043d\\u0430\\u0434\\u0456\\u0457 \\u0417\\u0430 \\u0434\\u0430\\u043d\\u0438\\u043c\\u0438 \\u0426\\u0435\\u043d\\u0442\\u0440\\u0430\\u043b\\u044c\\u043d\\u043e\\u0457 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0447\\u043e\\u0457 \\u043a\\u043e\\u043c\\u0456\\u0441\\u0456\\u0457, \\u043d\\u0430 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0430\\u0445 \\u043f\\u0440\\u043e\\u0433\\u043e\\u043b\\u043e\\u0441\\u0443\\u0432\\u0430\\u043b\\u043e \\u043c\\u0435\\u043d\\u0448\\u0435 \\u043f\\u043e\\u043b\\u043e\\u0432\\u0438\\u043d\\u0438 - 48,63% \\u0432\\u0438\\u0431\\u043e\\u0440\\u0446\\u0456\\u0432. \\u0414\\u0435\\u044f\\u043a\\u0456 \\u0430\\u043d\\u0430\\u043b\\u0456\\u0442\\u0438\\u043a\\u0438 \\u0432\\u0432\\u0430\\u0436\\u0430\\u044e\\u0442\\u044c, \\u0449\\u043e \\u043f\\u0440\\u0438\\u0447\\u0438\\u043d\\u0430 \\u0442\\u0430\\u043a\\u043e\\u0457 \\u043f\\u0430\\u0441\\u0438\\u0432\\u043d\\u043e\\u0441\\u0442\\u0456 - \\u043d\\u0438\\u0437\\u044c\\u043a\\u0430 \\u0434\\u043e\\u0432\\u0456\\u0440\\u0430 \\u0434\\u043e \\u043f\\u043e\\u043b\\u0456\\u0442\\u0438\\u0447\\u043d\\u0438\\u0445 \\u043f\\u0430\\u0440\\u0442\\u0456\\u0439 \\u0432 \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u0457. \\u0414\\u043b\\u044f \\u043f\\u043e\\u0440\\u0456\\u0432\\u043d\\u044f\\u043d\\u043d\\u044f - \\u043d\\u0430 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0430\\u0445 \\u0443 \\u043a\\u0432\\u0456\\u0442\\u043d\\u0456 2017 \\u0440\\u043e\\u043a\\u0443 \\u043f\\u0440\\u043e\\u0433\\u043e\\u043b\\u043e\\u0441\\u0443\\u0432\\u0430\\u043b\\u0438 60,86%. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u041c\\u0430\\u0440\\u0442\\u0438\\u0440\\u043e\\u0441\\u044f\\u043d\\u0430, \\u0446\\u0435 \\u043c\\u043e\\u0436\\u0435 \\u0431\\u0443\\u0442\\u0438 \\u043f\\u043e\\u0432'\\u044f\\u0437\\u0430\\u043d\\u043e \\u0437 \\u0442\\u0438\\u043c, \\u0449\\u043e \\u0431\\u0430\\u0433\\u0430\\u0442\\u043e \\u0437 \\u043d\\u0438\\u0445 \\u043f\\u0435\\u0440\\u0435\\u0431\\u0443\\u0432\\u0430\\u044e\\u0442\\u044c \\u0437\\u0430 \\u043a\\u043e\\u0440\\u0434\\u043e\\u043d\\u043e\\u043c \\u043d\\u0430 \\u0437\\u0430\\u0440\\u043e\\u0431\\u0456\\u0442\\u043a\\u0430\\u0445, \\u0456 \\u0432\\u0456\\u0434\\u0441\\u0443\\u0442\\u043d\\u0456\\u0441\\u0442\\u044e \\u0444\\u0430\\u043a\\u0442\\u043e\\u0440\\u0443 \\u0430\\u0434\\u043c\\u0456\\u043d\\u0456\\u0441\\u0442\\u0440\\u0430\\u0442\\u0438\\u0432\\u043d\\u043e\\u0433\\u043e \\u0440\\u0435\\u0441\\u0443\\u0440\\u0441\\u0443. '\\u0420\\u0430\\u043d\\u0456\\u0448\\u0435 \\u043b\\u044e\\u0434\\u0435\\u0439 \\u0432\\u0440\\u0430\\u043d\\u0446\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0431\\u0443\\u0441\\u0430\\u043c\\u0438 \\u043f\\u0440\\u0438\\u0432\\u043e\\u0437\\u0438\\u043b\\u0438 \\u043d\\u0430 \\u0434\\u0456\\u043b\\u044f\\u043d\\u043a\\u0438' - \\u0433\\u043e\\u0432\\u043e\\u0440\\u0438\\u0442\\u044c \\u0432\\u0456\\u043d. \\u0414\\u0435\\u044f\\u043a\\u0456 \\u0436\\u0438\\u0442\\u0435\\u043b\\u0456 \\u0404\\u0440\\u0435\\u0432\\u0430\\u043d\\u0430 \\u0437\\u0456\\u0437\\u043d\\u0430\\u044e\\u0442\\u044c\\u0441\\u044f, \\u0449\\u043e \\u043f\\u043e\\u043f\\u0440\\u0438 \\u0442\\u0435, \\u0449\\u043e \\u0432\\u043e\\u043d\\u0438 \\u043f\\u0456\\u0434\\u0442\\u0440\\u0438\\u043c\\u0443\\u0432\\u0430\\u043b\\u0438 \\u043c\\u0438\\u0440\\u043d\\u0443 \\u0440\\u0435\\u0432\\u043e\\u043b\\u044e\\u0446\\u0456\\u044e \\u0456 \\u043f\\u0440\\u043e\\u0442\\u0435\\u0441\\u0442\\u043d\\u0438\\u0439 \\u0440\\u0443\\u0445 \\u043d\\u0430\\u0432\\u0435\\u0441\\u043d\\u0456 \\u0446\\u044c\\u043e\\u0433\\u043e \\u0440\\u043e\\u043a\\u0443, \\u0457\\u0445 \\u043e\\u0447\\u0456\\u043a\\u0443\\u0432\\u0430\\u043d\\u043d\\u044f \\u043d\\u0435 \\u0432\\u0438\\u043f\\u0440\\u0430\\u0432\\u0434\\u0430\\u043b\\u0438\\u0441\\u044f. '\\u0423 \\u043f\\u0440\\u043e\\u0441\\u0442\\u043e\\u0457 \\u043b\\u044e\\u0434\\u0438\\u043d\\u0438 \\u043d\\u0456\\u0447\\u043e\\u0433\\u043e \\u043d\\u0435 \\u0437\\u043c\\u0456\\u043d\\u0438\\u043b\\u043e\\u0441\\u044f. \\u041c\\u0438 \\u0445\\u043e\\u0442\\u0456\\u043b\\u0438 \\u043f\\u043e\\u0437\\u0431\\u0443\\u0442\\u0438\\u0441\\u044f \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u0447\\u0438\\u043d\\u043e\\u0432\\u043d\\u0438\\u043a\\u0456\\u0432, \\u0430\\u043b\\u0435 \\u0443 \\u0446\\u0438\\u0445 \\u0434\\u043e\\u0441\\u0432\\u0456\\u0434\\u0443 \\u043d\\u0435\\u043c\\u0430\\u0454 \\u0432\\u0437\\u0430\\u0433\\u0430\\u043b\\u0456', - \\u0441\\u043a\\u0430\\u0440\\u0436\\u0438\\u0442\\u044c\\u0441\\u044f \\u043e\\u0434\\u0438\\u043d \\u0437 \\u0432\\u043b\\u0430\\u0441\\u043d\\u0438\\u043a\\u0456\\u0432 \\u043d\\u0435\\u0432\\u0435\\u043b\\u0438\\u043a\\u043e\\u0433\\u043e \\u0431\\u0456\\u0437\\u043d\\u0435\\u0441\\u0443 \\u0432 \\u0404\\u0440\\u0435\\u0432\\u0430\\u043d\\u0456, \\u044f\\u043a\\u0438\\u0439 \\u0432\\u0432\\u0430\\u0436\\u0430\\u0454 \\u0437\\u0430 \\u043a\\u0440\\u0430\\u0449\\u0435 \\u043d\\u0435 \\u043d\\u0430\\u0437\\u0438\\u0432\\u0430\\u0442\\u0438 \\u0441\\u0432\\u043e\\u0433\\u043e \\u0456\\u043c\\u0435\\u043d\\u0456. \\u042f\\u0432\\u043a\\u0430 \\u043d\\u0430 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0430\\u0445 \\u0431\\u0443\\u043b\\u0430 \\u043d\\u0438\\u0437\\u044c\\u043a\\u043e\\u044e - \\u043f\\u043e\\u043d\\u0430\\u0434 48%. \\u041e\\u0433\\u043b\\u044f\\u0434\\u0430\\u0447\\u0456 \\u043f\\u0440\\u0438\\u043f\\u0443\\u0441\\u043a\\u0430\\u044e\\u0442\\u044c, \\u0449\\u043e \\u043d\\u0430 \\u043d\\u0435\\u0457 \\u0432\\u043f\\u043b\\u0438\\u043d\\u0443\\u043b\\u0430 \\u0432\\u0438\\u0441\\u043e\\u043a\\u0430 \\u0442\\u0440\\u0443\\u0434\\u043e\\u0432\\u0430 \\u043c\\u0456\\u0433\\u0440\\u0430\\u0446\\u0456\\u044f \\u0442\\u0430 \\u0432\\u0456\\u0434\\u0441\\u0443\\u0442\\u043d\\u0456\\u0441\\u0442\\u044c \\u0432\\u0438\\u043a\\u043e\\u0440\\u0438\\u0441\\u0442\\u0430\\u043d\\u043d\\u044f \\u0430\\u0434\\u043c\\u0456\\u043d\\u0440\\u0435\\u0441\\u0443\\u0440\\u0441\\u0443 \\u043d\\u0430 \\u0446\\u0438\\u0445 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0430\\u0445 \\u041f\\u0435\\u0432\\u043d\\u0435 \\u0440\\u043e\\u0437\\u0447\\u0430\\u0440\\u0443\\u0432\\u0430\\u043d\\u043d\\u044f \\u0440\\u0435\\u0432\\u043e\\u043b\\u044e\\u0446\\u0456\\u0454\\u044e \\u0432 \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u0457 \\u0434\\u0456\\u0439\\u0441\\u043d\\u043e \\u0454, \\u0456 \\u0446\\u0435 \\u0432\\u0438\\u043a\\u043b\\u0438\\u043a\\u0430\\u043d\\u043e \\u043d\\u0435 \\u0441\\u0442\\u0456\\u043b\\u044c\\u043a\\u0438 \\u043e\\u0431\\u0456\\u0446\\u044f\\u043d\\u043a\\u0430\\u043c\\u0438 \\u0441\\u0430\\u043c\\u043e\\u0433\\u043e \\u041f\\u0430\\u0448\\u0438\\u043d\\u044f\\u043d\\u0430, \\u0441\\u043a\\u0456\\u043b\\u044c\\u043a\\u0438 \\u0442\\u0438\\u043c\\u0438 \\u043e\\u0447\\u0456\\u043a\\u0443\\u0432\\u0430\\u043d\\u043d\\u044f\\u043c\\u0438, \\u044f\\u043a\\u0456 \\u0431\\u0443\\u043b\\u0438 \\u0432 \\u043f\\u0435\\u0440\\u0435\\u0434\\u0440\\u0435\\u0432\\u043e\\u043b\\u044e\\u0446\\u0456\\u0439\\u043d\\u0438\\u0439 \\u043f\\u0435\\u0440\\u0456\\u043e\\u0434, \\u0432\\u0432\\u0430\\u0436\\u0430\\u0454 \\u043f\\u043e\\u043b\\u0456\\u0442\\u043e\\u043b\\u043e\\u0433 \\u0413\\u0440\\u0430\\u043d\\u0442 \\u041c\\u0456\\u043a\\u0430\\u0454\\u043b\\u044f\\u043d. '\\u0412\\u0456\\u043d \\u043d\\u0435 \\u043e\\u0431\\u0456\\u0446\\u044f\\u0432, \\u0449\\u043e \\u0434\\u043e\\u0445\\u043e\\u0434\\u0438 \\u0437\\u0440\\u043e\\u0441\\u0442\\u0443\\u0442\\u044c \\u0443 \\u0434\\u0432\\u0430 \\u0440\\u0430\\u0437\\u0438, \\u0430\\u043b\\u0435 \\u0442\\u0430\\u043a\\u0456 \\u043e\\u0447\\u0456\\u043a\\u0443\\u0432\\u0430\\u043d\\u043d\\u044f \\u0431\\u0443\\u043b\\u0438 \\u0443 \\u0441\\u0443\\u0441\\u043f\\u0456\\u043b\\u044c\\u0441\\u0442\\u0432\\u0430, - \\u0433\\u043e\\u0432\\u043e\\u0440\\u0438\\u0442\\u044c \\u0432\\u0456\\u043d. - \\u041b\\u044e\\u0434\\u0438 \\u0432\\u0432\\u0430\\u0436\\u0430\\u043b\\u0438, \\u0449\\u043e \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u044f \\u043c\\u0430\\u0454 \\u0436\\u0438\\u0442\\u0438 \\u044f\\u043a \\u0424\\u0440\\u0430\\u043d\\u0446\\u0456\\u044f, \\u0437\\u0430\\u0440\\u043f\\u043b\\u0430\\u0442\\u0438 \\u043c\\u0430\\u044e\\u0442\\u044c \\u0431\\u0443\\u0442\\u0438 3000 \\u0434\\u043e\\u043b\\u0430\\u0440\\u0456\\u0432, \\u0430 \\u044f\\u043a\\u0449\\u043e \\u0437\\u0430\\u0440\\u043f\\u043b\\u0430\\u0442\\u0438 3000 \\u0434\\u043e\\u043b\\u0430\\u0440\\u0456\\u0432, \\u0442\\u043e 2700 \\u0434\\u043e\\u043b\\u0430\\u0440\\u0456\\u0432 \\u0437'\\u0457\\u0434\\u0430\\u0454 \\u0420\\u0435\\u0441\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u0430\\u043d\\u0441\\u044c\\u043a\\u0430 \\u043f\\u0430\\u0440\\u0442\\u0456\\u044f \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u0457 (\\u044f\\u043a\\u0430 \\u043f\\u0440\\u0430\\u0432\\u0438\\u043b\\u0430 \\u0443 1999-2018 \\u0440\\u043e\\u043a\\u0430\\u0445. - \\u0420\\u0435\\u0434.). \\u0412\\u0456\\u0434\\u043f\\u043e\\u0432\\u0456\\u0434\\u043d\\u043e, \\u044f\\u043a\\u0449\\u043e \\u0457\\u0457 \\u043f\\u0440\\u0438\\u0431\\u0440\\u0430\\u0442\\u0438 \\u0437 \\u0432\\u043b\\u0430\\u0434\\u0438, \\u0442\\u043e \\u0446\\u0456 2700 \\u0432\\u043e\\u043d\\u0438 \\u043e\\u0442\\u0440\\u0438\\u043c\\u0430\\u044e\\u0442\\u044c. \\u0420\\u0435\\u0441\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u0430\\u043d\\u0441\\u044c\\u043a\\u043e\\u0457 \\u043f\\u0430\\u0440\\u0442\\u0456\\u0457 \\u043f\\u0440\\u0438 \\u0432\\u043b\\u0430\\u0434\\u0456 \\u043d\\u0435\\u043c\\u0430\\u0454, \\u0430\\u043b\\u0435 \\u0437 \\u0446\\u0438\\u0445 2700 \\u0434\\u043e\\u043b\\u0430\\u0440\\u0456\\u0432 \\u0432\\u043e\\u043d\\u0438 \\u043f\\u043e\\u043a\\u0438 \\u043d\\u0456\\u0447\\u043e\\u0433\\u043e \\u043d\\u0435 \\u043e\\u0442\\u0440\\u0438\\u043c\\u0430\\u043b\\u0438'. '\\u0427\\u0438\\u043c \\u0431\\u0456\\u043b\\u044c\\u0448\\u0435 \\u043b\\u044e\\u0434\\u0438 \\u0431\\u0443\\u0434\\u0443\\u0442\\u044c \\u0431\\u0430\\u0447\\u0438\\u0442\\u0438, \\u0449\\u043e \\u0440\\u0430\\u0434\\u0438\\u043a\\u0430\\u043b\\u044c\\u043d\\u043e\\u0433\\u043e \\u0456 \\u043f\\u0440\\u0438\\u043d\\u0446\\u0438\\u043f\\u043e\\u0432\\u043e\\u0433\\u043e \\u043f\\u043e\\u043b\\u0456\\u043f\\u0448\\u0435\\u043d\\u043d\\u044f \\u0440\\u0456\\u0432\\u043d\\u044f \\u0436\\u0438\\u0442\\u0442\\u044f \\u043d\\u0435\\u043c\\u0430\\u0454, \\u0442\\u0438\\u043c \\u0431\\u0456\\u043b\\u044c\\u0448\\u0435 \\u0446\\u0435 \\u0440\\u043e\\u0437\\u0447\\u0430\\u0440\\u0443\\u0432\\u0430\\u043d\\u043d\\u044f \\u0431\\u0443\\u0434\\u0435 \\u043d\\u0430\\u0440\\u043e\\u0441\\u0442\\u0430\\u0442\\u0438', - \\u0434\\u043e\\u0434\\u0430\\u0454 \\u041c\\u0456\\u043a\\u0430\\u0454\\u043b\\u044f\\u043d. \\u0410\\u043b\\u0435 \\u041d\\u0456\\u043d\\u0435\\u043b\\u044c \\u0410\\u0442\\u0430\\u0431\\u0435\\u043a\\u044f\\u043d, \\u044f\\u043a \\u0456 \\u0431\\u0430\\u0433\\u0430\\u0442\\u043e \\u0436\\u0438\\u0442\\u0435\\u043b\\u0456\\u0432 \\u0432\\u0456\\u0440\\u043c\\u0435\\u043d\\u0441\\u044c\\u043a\\u043e\\u0457 \\u0441\\u0442\\u043e\\u043b\\u0438\\u0446\\u0456, \\u0432\\u0432\\u0430\\u0436\\u0430\\u0454, \\u0449\\u043e \\u0443 \\u043d\\u043e\\u0432\\u043e\\u0457 \\u0432\\u043b\\u0430\\u0434\\u0438 \\u0431\\u0443\\u043b\\u043e \\u0434\\u0443\\u0436\\u0435 \\u043c\\u0430\\u043b\\u043e \\u0447\\u0430\\u0441\\u0443 \\u0434\\u043b\\u044f \\u043a\\u0430\\u0440\\u0434\\u0438\\u043d\\u0430\\u043b\\u044c\\u043d\\u0438\\u0445 \\u043f\\u0435\\u0440\\u0435\\u0442\\u0432\\u043e\\u0440\\u0435\\u043d\\u044c. '\\u0417\\u0430 20 \\u0440\\u043e\\u043a\\u0456\\u0432 \\u0432\\u043e\\u043d\\u0438 [\\u043a\\u043e\\u043b\\u0438\\u0448\\u043d\\u044f \\u0432\\u043b\\u0430\\u0434\\u0430] \\u0442\\u0430\\u043a\\u0438\\u0439 \\u0445\\u0430\\u043e\\u0441 \\u0432\\u043b\\u0430\\u0448\\u0442\\u0443\\u0432\\u0430\\u043b\\u0438, \\u0449\\u043e \\u0437\\u0430 \\u0448\\u0456\\u0441\\u0442\\u044c \\u043c\\u0456\\u0441\\u044f\\u0446\\u0456\\u0432 \\u043d\\u0435\\u043c\\u043e\\u0436\\u043b\\u0438\\u0432\\u043e \\u0432\\u0438\\u043f\\u0440\\u0430\\u0432\\u0438\\u0442\\u0438. \\u0426\\u0435 \\u0442\\u0440\\u0435\\u0431\\u0430 \\u0437\\u0440\\u043e\\u0437\\u0443\\u043c\\u0456\\u0442\\u0438. \\u0425\\u0442\\u043e \\u043d\\u0435\\u0437\\u0430\\u0434\\u043e\\u0432\\u043e\\u043b\\u0435\\u043d\\u0438\\u0439? \\u0422\\u0456, \\u0449\\u043e \\u0431\\u0443\\u043b\\u0438 \\u043f\\u0440\\u0438 \\u0432\\u043b\\u0430\\u0434\\u0456, \\u0442\\u0456, \\u0449\\u043e \\u0433\\u043e\\u0434\\u0443\\u0432\\u0430\\u043b\\u0438\\u0441\\u044f \\u0437 \\u0433\\u043e\\u0434\\u0456\\u0432\\u043d\\u0438\\u0446\\u0456 \\u043f\\u0440\\u0438 \\u0432\\u043b\\u0430\\u0434\\u0456. \\u041f\\u0440\\u043e\\u0441\\u0442\\u0456 \\u043b\\u044e\\u0434\\u0438 \\u0437\\u0430\\u0434\\u043e\\u0432\\u043e\\u043b\\u0435\\u043d\\u0456', - \\u0432\\u043f\\u0435\\u0432\\u043d\\u0435\\u043d\\u0430 \\u0432\\u043e\\u043d\\u0430. \\u0417 \\u0447\\u043e\\u0433\\u043e \\u0432\\u0441\\u0435 \\u043f\\u043e\\u0447\\u0438\\u043d\\u0430\\u043b\\u043e\\u0441\\u044f? \\u0423 \\u043a\\u0432\\u0456\\u0442\\u043d\\u0456 \\u0446\\u044c\\u043e\\u0433\\u043e \\u0440\\u043e\\u043a\\u0443 \\u0443 \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u0457 \\u043f\\u043e\\u0447\\u0430\\u043b\\u0438\\u0441\\u044f \\u043c\\u0430\\u0441\\u043e\\u0432\\u0456 \\u043f\\u0440\\u043e\\u0442\\u0435\\u0441\\u0442\\u0438, \\u0432 \\u0440\\u0435\\u0437\\u0443\\u043b\\u044c\\u0442\\u0430\\u0442\\u0456 \\u044f\\u043a\\u0438\\u0445 \\u043f\\u0440\\u0435\\u043c'\\u0454\\u0440-\\u043c\\u0456\\u043d\\u0456\\u0441\\u0442\\u0440 \\u0421\\u0435\\u0440\\u0436 \\u0421\\u0430\\u0440\\u0433\\u0441\\u044f\\u043d \\u043f\\u0456\\u0448\\u043e\\u0432 \\u0443 \\u0432\\u0456\\u0434\\u0441\\u0442\\u0430\\u0432\\u043a\\u0443. 8 \\u0442\\u0440\\u0430\\u0432\\u043d\\u044f \\u043d\\u0430 \\u0442\\u043b\\u0456 \\u043f\\u0440\\u043e\\u0442\\u0435\\u0441\\u0442\\u0456\\u0432 \\u043f\\u0430\\u0440\\u043b\\u0430\\u043c\\u0435\\u043d\\u0442 \\u0437\\u0430\\u0442\\u0432\\u0435\\u0440\\u0434\\u0438\\u0432 \\u043d\\u0430 \\u043f\\u043e\\u0441\\u0430\\u0434\\u0456 \\u0433\\u043b\\u0430\\u0432\\u0438 \\u0443\\u0440\\u044f\\u0434\\u0443 \\u043e\\u043f\\u043e\\u0437\\u0438\\u0446\\u0456\\u0439\\u043d\\u043e\\u0433\\u043e \\u043b\\u0456\\u0434\\u0435\\u0440\\u0430 \\u041d\\u0456\\u043a\\u043e\\u043b\\u0430 \\u041f\\u0430\\u0448\\u0438\\u043d\\u044f\\u043d\\u0430. \\u0417\\u043c\\u0456\\u043d\\u0430 \\u0432\\u043b\\u0430\\u0434\\u0438 \\u0443 \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u0457 \\u0441\\u0442\\u0430\\u043b\\u0430 \\u043c\\u043e\\u0436\\u043b\\u0438\\u0432\\u043e\\u044e \\u0437\\u0430\\u0432\\u0434\\u044f\\u043a\\u0438 \\u043c\\u0430\\u0441\\u043e\\u0432\\u0438\\u043c \\u043f\\u0440\\u043e\\u0442\\u0435\\u0441\\u0442\\u0430\\u043c \\u043d\\u0430\\u0432\\u0435\\u0441\\u043d\\u0456, \\u0432 \\u044f\\u043a\\u0438\\u0445 \\u0430\\u043a\\u0442\\u0438\\u0432\\u043d\\u0443 \\u0443\\u0447\\u0430\\u0441\\u0442\\u044c \\u0432\\u0437\\u044f\\u043b\\u0430 \\u043c\\u043e\\u043b\\u043e\\u0434\\u044c \\u0410\\u043b\\u0435 \\u0432 \\u043f\\u0430\\u0440\\u043b\\u0430\\u043c\\u0435\\u043d\\u0442\\u0456 \\u0443 \\u0420\\u0435\\u0441\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u0430\\u043d\\u0441\\u044c\\u043a\\u043e\\u0457 \\u043f\\u0430\\u0440\\u0442\\u0456\\u0457, \\u043a\\u043e\\u043b\\u0438\\u0448\\u043d\\u044c\\u043e\\u0457 \\u043f\\u0440\\u0430\\u0432\\u043b\\u044f\\u0447\\u043e\\u0457 \\u043f\\u0430\\u0440\\u0442\\u0456\\u0457 \\u0412\\u0456\\u0440\\u043c\\u0435\\u043d\\u0456\\u0457, \\u044f\\u043a \\u0456 \\u0440\\u0430\\u043d\\u0456\\u0448\\u0435 \\u0437\\u0430\\u043b\\u0438\\u0448\\u0430\\u043b\\u0430\\u0441\\u044f \\u043c\\u0430\\u0439\\u0436\\u0435 \\u043f\\u043e\\u043b\\u043e\\u0432\\u0438\\u043d\\u0430 \\u0434\\u0435\\u043f\\u0443\\u0442\\u0430\\u0442\\u0456\\u0432 \\u0456 \\u043d\\u0430\\u0439\\u0431\\u0456\\u043b\\u044c\\u0448\\u0430 \\u0444\\u0440\\u0430\\u043a\\u0446\\u0456\\u044f. \\u0423 \\u0436\\u043e\\u0432\\u0442\\u043d\\u0456 \\u041d\\u0456\\u043a\\u043e\\u043b \\u041f\\u0430\\u0448\\u0438\\u043d\\u044f\\u043d \\u043e\\u0433\\u043e\\u043b\\u043e\\u0441\\u0438\\u0432 \\u043f\\u0440\\u043e \\u0432\\u0456\\u0434\\u0441\\u0442\\u0430\\u0432\\u043a\\u0443 \\u0456 \\u0440\\u043e\\u0437\\u043f\\u0443\\u0441\\u043a \\u043f\\u0430\\u0440\\u043b\\u0430\\u043c\\u0435\\u043d\\u0442\\u0443, \\u0449\\u043e\\u0431 \\u043f\\u0440\\u043e\\u0432\\u0435\\u0441\\u0442\\u0438 \\u0434\\u043e\\u0441\\u0442\\u0440\\u043e\\u043a\\u043e\\u0432\\u0456 \\u0432\\u0438\\u0431\\u043e\\u0440\\u0438.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original Word Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 316,\n \"min\": 3,\n \"max\": 12134,\n \"num_unique_values\": 2103,\n \"samples\": [\n 1296\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original Char Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2172,\n \"min\": 22,\n \"max\": 82575,\n \"num_unique_values\": 8661,\n \"samples\": [\n 1284\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Split Location\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 190,\n \"min\": 0,\n \"max\": 11508,\n \"num_unique_values\": 1475,\n \"samples\": [\n 907\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Modified text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 99905,\n \"samples\": [\n \"\\u0410\\u0434\\u0432\\u043e\\u043a\\u0430\\u0442 \\u0421\\u0432\\u0435\\u043d \\u041c\\u0430\\u0440\\u0456 \\u043d\\u0430\\u0432\\u0456\\u0432 \\u0443 \\u044f\\u043a\\u043e\\u0441\\u0442\\u0456 \\u043f\\u0440\\u0438\\u043a\\u043b\\u0430\\u0434\\u0443 \\u0444\\u0440\\u0430\\u0437\\u0443 \\u043f\\u0440\\u043e\\u043a\\u0443\\u0440\\u043e\\u0440\\u0430, \\u043f\\u0440\\u043e \\u0442\\u0435, \\u0449\\u043e \\u0410\\u0431\\u0434\\u0435\\u0441\\u043b\\u0430\\u043c \\u0445\\u043e\\u0442\\u0456\\u0432 \\u043f\\u0456\\u0434\\u0456\\u0440\\u0432\\u0430\\u0442\\u0438 \\u0441\\u0435\\u0431\\u0435 \\u0431\\u0456\\u043b\\u044f \\u0441\\u0442\\u0430\\u0434\\u0456\\u043e\\u043d\\u0443, \\u0430\\u043b\\u0435 \\u043f\\u0435\\u0440\\u0435\\u0434\\u0443\\u043c\\u0430\\u0432 \\u0432 \\u043e\\u0441\\u0442\\u0430\\u043d\\u043d\\u044e \\u0445\\u0432\\u0438\\u043b\\u0438\\u043d\\u0443. \\u0412 \\u0456\\u043d\\u0442\\u0435\\u0440\\u0432'\\u044e \\u0442\\u0435\\u043b\\u0435\\u043a\\u043e\\u043c\\u043f\\u0430\\u043d\\u0456\\u0457 RTFB \\u0430\\u0434\\u0432\\u043e\\u043a\\u0430\\u0442 \\u043d\\u0430\\u0437\\u0432\\u0430\\u0432 \\u0441\\u043b\\u043e\\u0432\\u0430 \\u043f\\u0440\\u043e\\u043a\\u0443\\u0440\\u043e\\u0440\\u0430 '\\u043f\\u043e\\u0440\\u0443\\u0448\\u0435\\u043d\\u043d\\u044f\\u043c' \\u0456, \\u0449\\u043e \\u0446\\u044e '\\u043f\\u043e\\u043c\\u0438\\u043b\\u043a\\u0443' \\u0432\\u0456\\u043d \\u043d\\u0435 \\u043c\\u043e\\u0436\\u0435 \\u0437\\u0430\\u043b\\u0438\\u0448\\u0438\\u0442\\u0438 \\u0431\\u0435\\u0437 \\u043d\\u0430\\u0441\\u043b\\u0456\\u0434\\u043a\\u0456\\u0432. \\u0424\\u0440\\u0430\\u043d\\u0446\\u0456\\u044f \\u0440\\u043e\\u0437\\u0440\\u0430\\u0445\\u043e\\u0432\\u0443\\u0454, \\u0449\\u043e \\u0410\\u0431\\u0434\\u0435\\u0441\\u043b\\u0430\\u043c\\u0430, \\u044f\\u043a\\u043e\\u0433\\u043e \\u0437\\u0430\\u0442\\u0440\\u0438\\u043c\\u0430\\u043b\\u0438 \\u0443 \\u0411\\u0440\\u044e\\u0441\\u0441\\u0435\\u043b\\u0456, \\u0432\\u0438\\u043d\\u0435\\u0441\\u0443\\u0442\\u044c \\u0441\\u0443\\u0434\\u0443 \\u0437\\u0430 \\u043f\\u0440\\u0438\\u0447\\u0435\\u0442\\u043d\\u0456\\u0441\\u0442\\u044c \\u0434\\u043e \\u0442\\u0435\\u0440\\u043e\\u0440\\u0438\\u0441\\u0442\\u0438\\u0447\\u043d\\u0438\\u0445 \\u0430\\u0442\\u0430\\u043a \\u0443 \\u041f\\u0430\\u0440\\u0438\\u0436\\u0456. \\u0410\\u0434\\u0432\\u043e\\u043a\\u0430\\u0442 \\u0442\\u0430\\u043a\\u043e\\u0436 \\u043d\\u0430\\u0433\\u043e\\u043b\\u043e\\u0441\\u0438\\u0432, \\u0449\\u043e \\u0439\\u043e\\u0433\\u043e \\u043f\\u0456\\u0434\\u0437\\u0430\\u0445\\u0438\\u0441\\u043d\\u0438\\u0439 \\u043d\\u0456\\u043a\\u043e\\u043b\\u0438 \\u043d\\u0435 \\u043c\\u0430\\u0432 \\u043d\\u0430\\u043c\\u0456\\u0440\\u0443 \\u0437\\u0434\\u0456\\u0439\\u0441\\u043d\\u044e\\u0432\\u0430\\u0442\\u0438 \\u0431\\u0443\\u0434\\u044c-\\u044f\\u043a\\u0456 \\u0442\\u0435\\u0440\\u043e\\u0440\\u0438\\u0441\\u0442\\u0438\\u0447\\u043d\\u0456 \\u0430\\u043a\\u0442\\u0438, \\u0430 \\u0442\\u0430\\u043a\\u0456 \\u043e\\u0431\\u0432\\u0438\\u043d\\u0443\\u0432\\u0430\\u0447\\u0435\\u043d\\u043d\\u044f \\u0454 \\u043d\\u0435\\u0441\\u043f\\u0440\\u0430\\u0432\\u0435\\u0434\\u043b\\u0438\\u0432\\u0438\\u043c\\u0438.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"New Word Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 196,\n \"min\": 1,\n \"max\": 11673,\n \"num_unique_values\": 1532,\n \"samples\": [\n 556\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"New Char Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1329,\n \"min\": 6,\n \"max\": 68283,\n \"num_unique_values\": 6351,\n \"samples\": [\n 8538\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 3 } ], "source": [ "import pandas as pd\n", "splits = {'Arabic': 'Data-v3.1/ARA-v3-1.csv', 'Chinese': 'Data-v3.1/ZHO-v3-1.csv', 'Czech': 'Data-v3.1/CES-v3-1.csv', 'Dutch': 'Data-v3.1/NLD-v3-1.csv', 'English': 'Data-v3.1/ENG-v3-1.csv', 'French': 'Data-v3.1/FRA-v3-1.csv', 'German': 'Data-v3.1/DEU-v3-1.csv', 'Greek': 'Data-v3.1/ELL-v3-1.csv', 'Hebrew': 'Data-v3.1/HEB-v3-1.csv', 'Hindi': 'Data-v3.1/HIN-v3-1.csv', 'Indonesian': 'Data-v3.1/IND-v3-1.csv', 'Italian': 'Data-v3.1/ITA-v3-1.csv', 'Japanese': 'Data-v3.1/JPN-v3-1.csv', 'Korean': 'Data-v3.1/KOR-v3-1.csv', 'Persian': 'Data-v3.1/PES-v3-1.csv', 'Polish': 'Data-v3.1/POL-v3-1.csv', 'Portuguese': 'Data-v3.1/POR-v3-1.csv', 'Romanian': 'Data-v3.1/RON-v3-1.csv', 'Russian': 'Data-v3.1/RUS-v3-1.csv', 'Spanish': 'Data-v3.1/SPA-v3-1.csv', 'Turkish': 'Data-v3.1/TUR-v3-1.csv', 'Vietnamese': 'Data-v3.1/VIE-v3-1.csv', 'Ukrainian': 'Data-v3.1/UKR-v3-1.csv'}\n", "df = pd.read_csv(\"hf://datasets/1024m/mMGTD-Corpus/\" + splits[\"Ukrainian\"])\n", "df" ] }, { "cell_type": "code", "source": [ "df = df.sample(frac=1).reset_index(drop=True)" ], "metadata": { "id": "KIgwx1iCpC3f" }, "execution_count": 4, "outputs": [] }, { "cell_type": "code", "source": [ "df_train = df[(df['Type'].isin(['Partial', 'Rewritten', 'Unchanged'])) & (df['Data Split'] == 'Train')]\n", "df_dev = df[(df['Type'].isin(['Partial', 'Rewritten', 'Unchanged'])) & (df['Data Split'] == 'Dev')]\n", "df_test = df[(df['Type'].isin(['Partial', 'Rewritten', 'Unchanged'])) & (df['Data Split'] == 'Test')]\n", "print(len(df_train))\n", "print(len(df_dev))\n", "print(len(df_test))" ], "metadata": { "id": "cVKBbVG9qDGF", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ec2039a8-0b83-424e-e51b-22cf4a227ac7" }, "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "39976\n", "9993\n", "49956\n" ] } ] }, { "cell_type": "code", "source": [ "UKR_train = df_train.copy()\n", "UKR_dev = df_dev.copy()\n", "UKR_test = df_test.copy()" ], "metadata": { "id": "1QWJPFozqFUh" }, "execution_count": 6, "outputs": [] }, { "cell_type": "code", "source": [ "df_train['id'] = 'UKR' + df_train.index.astype(str) # Creating the 'id' column\n", "df_train = df_train.rename(columns={'Modified text': 'text', 'Split Location': 'label'})\n", "df_dev['id'] = 'UKR' + df_dev.index.astype(str) # Creating the 'id' column\n", "df_dev = df_dev.rename(columns={'Modified text': 'text', 'Split Location': 'label'})\n", "df_test['id'] = 'UKR' + df_test.index.astype(str) # Creating the 'id' column\n", "df_test = df_test.rename(columns={'Modified text': 'text', 'Split Location': 'label'})" ], "metadata": { "id": "bSlHXAnzqHmd", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "613faa44-d5d2-4e1d-b811-064f23cb6c99" }, "execution_count": 7, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ ":1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_train['id'] = 'UKR' + df_train.index.astype(str) # Creating the 'id' column\n", ":3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_dev['id'] = 'UKR' + df_dev.index.astype(str) # Creating the 'id' column\n", ":5: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_test['id'] = 'UKR' + df_test.index.astype(str) # Creating the 'id' column\n" ] } ] }, { "cell_type": "code", "source": [ "df_train = pd.concat([df_train, df_dev], ignore_index=True)" ], "metadata": { "id": "aGvboB0ZqJ8M" }, "execution_count": 8, "outputs": [] }, { "cell_type": "code", "source": [ "print(len(df_train))\n", "print(len(df_dev))\n", "print(len(df_test))" ], "metadata": { "id": "qIVYeup9qM5X", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "2a51c1a9-4b42-4e6b-d00e-185eceae3391" }, "execution_count": 9, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "49969\n", "9993\n", "49956\n" ] } ] }, { "cell_type": "code", "source": [ "df_train.to_json('UKR_train.jsonl', orient='records', lines=True)\n", "df_test.to_json('UKR_test.jsonl', orient='records', lines=True)" ], "metadata": { "id": "9javNVKDqO1j" }, "execution_count": 10, "outputs": [] }, { "cell_type": "code", "source": [ "!pip install torch\n", "!pip install transformers\n", "!pip install accelerate -U\n", "!pip install tqdm\n", "!pip install pytorch-crf\n", "!pip install sentencepiece" ], "metadata": { "id": "C6wCkGRXqQpc", "collapsed": true, "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "7760a0fd-126b-4874-cb1a-8458113b1f4c" }, "execution_count": 11, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.5.1+cu124)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch) (3.17.0)\n", "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.11/dist-packages (from torch) (4.12.2)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.4.2)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.5)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch) (2024.10.0)\n", "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)\n", " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)\n", " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)\n", " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)\n", " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)\n", " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)\n", " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-curand-cu12==10.3.5.147 (from torch)\n", " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)\n", " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)\n", " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n", "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch)\n", " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.0)\n", "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch) (3.0.2)\n", "Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m34.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m77.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m41.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m40.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m18.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m95.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12\n", " Attempting uninstall: nvidia-nvjitlink-cu12\n", " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", " Attempting uninstall: nvidia-curand-cu12\n", " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", " Attempting uninstall: nvidia-cufft-cu12\n", " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", " Attempting uninstall: nvidia-cuda-runtime-cu12\n", " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", " Attempting uninstall: nvidia-cuda-cupti-cu12\n", " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", " Attempting uninstall: nvidia-cublas-cu12\n", " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", " Attempting uninstall: nvidia-cusparse-cu12\n", " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", " Attempting uninstall: nvidia-cudnn-cu12\n", " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", " Attempting uninstall: nvidia-cusolver-cu12\n", " Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n", " Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n", " Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n", "Successfully installed nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127\n", "Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.48.2)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from transformers) (3.17.0)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.24.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.28.1)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (1.26.4)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (24.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (6.0.2)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers) (2.32.3)\n", "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.21.0)\n", "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.2)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers) (4.67.1)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub<1.0,>=0.24.0->transformers) (2024.10.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub<1.0,>=0.24.0->transformers) (4.12.2)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.4.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2.3.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2025.1.31)\n", "Requirement already satisfied: accelerate in /usr/local/lib/python3.11/dist-packages (1.3.0)\n", "Requirement already satisfied: numpy<3.0.0,>=1.17 in /usr/local/lib/python3.11/dist-packages (from accelerate) (1.26.4)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from accelerate) (24.2)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from accelerate) (5.9.5)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from accelerate) (6.0.2)\n", "Requirement already satisfied: torch>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from accelerate) (2.5.1+cu124)\n", "Requirement already satisfied: huggingface-hub>=0.21.0 in /usr/local/lib/python3.11/dist-packages (from accelerate) (0.28.1)\n", "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from accelerate) (0.5.2)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.21.0->accelerate) (3.17.0)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.21.0->accelerate) (2024.10.0)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.21.0->accelerate) (2.32.3)\n", "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.21.0->accelerate) (4.67.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.21.0->accelerate) (4.12.2)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (3.4.2)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (3.1.5)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)\n", "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (9.1.0.70)\n", "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (12.4.5.8)\n", "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (11.2.1.3)\n", "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (10.3.5.147)\n", "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (11.6.1.9)\n", "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (12.3.1.170)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (2.21.5)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)\n", "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)\n", "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (3.1.0)\n", "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (1.13.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.0.0->accelerate) (1.3.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=2.0.0->accelerate) (3.0.2)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.21.0->accelerate) (3.4.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.21.0->accelerate) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.21.0->accelerate) (2.3.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.21.0->accelerate) (2025.1.31)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (4.67.1)\n", "Collecting pytorch-crf\n", " Downloading pytorch_crf-0.7.2-py3-none-any.whl.metadata (2.4 kB)\n", "Downloading pytorch_crf-0.7.2-py3-none-any.whl (9.5 kB)\n", "Installing collected packages: pytorch-crf\n", "Successfully installed pytorch-crf-0.7.2\n", "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.11/dist-packages (0.2.0)\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "os.makedirs(\"./runs/exp_seed\", exist_ok=True)\n", "os.makedirs(\"./runs/exp_seed/logs\", exist_ok=True)\n", "os.makedirs(\"./runs/exp_seed/xlmlongformerbase\", exist_ok=True)" ], "metadata": { "id": "if7zZ-egqSrE" }, "execution_count": 12, "outputs": [] }, { "cell_type": "code", "source": [ "import torch\n", "import json\n", "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers.trainer_callback import TrainerState\n", "from torch.utils.data import Dataset, DataLoader\n", "from torch.nn.utils.rnn import pad_sequence\n", "import transformers\n", "from torch import nn\n", "from transformers import AutoModel, AutoConfig\n", "from torchcrf import CRF\n", "from torch.cuda.amp import autocast\n", "from transformers import Trainer\n", "from tqdm import tqdm\n", "import numpy as np\n", "import logging\n", "import glob\n", "from tqdm import tqdm\n", "from dataclasses import dataclass, field\n", "logging.basicConfig(level=logging.INFO)\n", "logger = logging.getLogger()\n", "@dataclass\n", "class ModelConfig:\n", " model_path = \"hyperonym/xlm-roberta-longformer-base-16384\"\n", " model_checkpoint_dir = \"./runs\"\n", "@dataclass\n", "class DatasetConfig:\n", " train_file = \"/content/UKR_train.jsonl\"\n", " test_files = [\"/content/UKR_test.jsonl\"]\n", "@dataclass\n", "class TrainingArgsConfig:\n", " do_train = False\n", " do_predict = False\n", " seed = 1024\n", " output_dir = \"./runs/exp_seed\"\n", " logging_steps = 160\n", " logging_dir = \"./runs/exp_seed\"\n", " num_train_epochs = 30\n", " per_device_train_batch_size = 12\n", " per_device_eval_batch_size = 12\n", " max_length = 2048\n", "model_args = ModelConfig()\n", "data_args = DatasetConfig()\n", "training_args = TrainingArgsConfig()\n", "class CRFTrainer(Trainer):\n", " def __init__(self, *args, **kwargs):\n", " super().__init__(*args, **kwargs)\n", " def compute_loss(self, model, inputs, return_outputs=False):\n", " print(inputs.keys())\n", " labels = inputs.pop(\"labels\")\n", " outputs = model(**inputs)\n", " emissions = outputs[0]\n", " mask = inputs[\"attention_mask\"]\n", " crf_loss = -model.crf(emissions, labels, mask=mask)\n", " return crf_loss\n", " def training_step(self, model, inputs):\n", " loss = self.compute_loss(model, inputs)\n", " return {\"loss\": loss, \"inputs\": inputs}\n", "class AutoModelCRF(nn.Module):\n", " def __init__(self, model_name_or_path, dropout=0.075):\n", " super(AutoModelCRF, self).__init__()\n", " self.config = AutoConfig.from_pretrained(model_name_or_path)\n", " self.num_labels = 2\n", " self.encoder = AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True, config=self.config, from_tf=True)\n", " self.dropout = nn.Dropout(dropout)\n", " self.linear = nn.Linear(self.config.hidden_size, self.num_labels)\n", " self.crf = CRF(self.num_labels, batch_first=True)\n", " def forward(self, input_ids, attention_mask, labels=None):\n", " inputs = {'input_ids': input_ids, 'attention_mask': attention_mask}\n", " outputs = self.encoder(**inputs)\n", " seq_output = outputs[0]\n", " seq_output = self.dropout(seq_output)\n", " emission = self.linear(seq_output)\n", " if labels is None:\n", " tags = self.crf.decode(emission, attention_mask.byte())\n", " tags_padded = []\n", " for idx, sequence in enumerate(tags):\n", " if len(attention_mask[idx]) > len(sequence):\n", " tag_padded = sequence + [sequence[-1]]*(len(attention_mask[idx])-len(sequence))\n", " else:\n", " tag_padded = sequence\n", " tags_padded.append(tag_padded)\n", " out = np.array(tags_padded)\n", " return out\n", " else:\n", " crf_loss = -self.crf(emission, labels, mask=attention_mask.byte())\n", " return crf_loss\n", "def evaluate_position_difference(actual_position, predicted_position):\n", " return abs(actual_position - predicted_position)\n", "def get_start_position(sequence, mapping=None, token_level=True):\n", " if mapping is not None:\n", " mask = mapping != -100\n", " sequence = sequence[mask]\n", " mapping = mapping[mask]\n", " change_indices = np.where(np.diff(sequence) == 1)[0]\n", " if len(change_indices) > 0:\n", " value = change_indices[0] + 1\n", " else:\n", " value = 0 if sequence[0] == 1 else len(sequence) - 1\n", " if not token_level:\n", " value = mapping[value] if mapping is not None else value\n", " return value\n", "def evaluate_machine_start_position(labels, predictions, idx2word=None, token_level=False):\n", " actual_starts = []\n", " predicted_starts = []\n", " if not token_level and idx2word is None:\n", " raise ValueError(\"idx2word must be provided if evaluation is at word level (token_level=False)\")\n", " for idx in range(labels.shape[0]):\n", " predict, label, mapping = (predictions[idx][1:len(labels[idx])], labels[idx][1:len(labels[idx])], idx2word[idx][1:len(labels[idx])] if not token_level else None,)\n", " predicted_value = get_start_position(predict, mapping, token_level)\n", " actual_value = get_start_position(label, mapping, token_level)\n", " predicted_starts.append(predicted_value)\n", " actual_starts.append(actual_value)\n", " position_differences = [ evaluate_position_difference(actual, predict) for actual, predict in zip(actual_starts, predicted_starts) ]\n", " mean_position_difference = np.mean(position_differences)\n", " return mean_position_difference\n", "def compute_metrics(p):\n", " pred, labels = p\n", " mean_absolute_diff = evaluate_machine_start_position(labels, pred, token_level=True)\n", " return {\"mean_absolute_diff\": mean_absolute_diff,}\n", "def training_loop(model, optimizer, train_dataloader, device):\n", " model.train()\n", " total_loss = 0\n", " for step, batch in enumerate(tqdm(train_dataloader)):\n", " input_ids = batch[\"input_ids\"].to(device)\n", " attention_mask = batch[\"attention_mask\"].to(device)\n", " labels = batch[\"labels\"].to(device)\n", " optimizer.zero_grad()\n", " loss = model(input_ids, attention_mask, labels=labels)\n", " loss.backward()\n", " optimizer.step()\n", " logger.info(f\"Step {step}: {loss.item():.4f}\")\n", " total_loss += loss.item()\n", " avg_loss = total_loss/len(train_dataloader)\n", " print(f\"Training loss: {avg_loss:.4f}\")\n", "def predict(model, test_dataloader, device):\n", " all_preds = []\n", " with torch.no_grad():\n", " for batch in tqdm(test_dataloader):\n", " input_ids = batch[\"input_ids\"].to(device)\n", " attention_mask = batch[\"attention_mask\"].to(device)\n", " preds = model(input_ids, attention_mask)\n", " all_preds.extend(preds)\n", " out = np.array(all_preds)\n", " print(out.shape)\n", " return out\n", "def save_model(model_name, model, optimizer, epoch, output_dir): # train_mae, val_mae,\n", " if not os.path.exists(output_dir):\n", " os.makedirs(output_dir)\n", " checkpoint = {'model_state_dict': model.state_dict(),'optimizer_state_dict': optimizer.state_dict()} # 'train_mae': train_mae,'val_mae': val_mae,\n", " model_name = model_name.replace(\"/\", \"-\")\n", " file_path = os.path.join(output_dir, f\"{model_name}-epoch-{epoch}.pt\")\n", " print(file_path)\n", " torch.save(checkpoint, file_path)\n", " logger.info(f\"Model has been saved successfully to {file_path}\")\n", "class Semeval_Data(torch.utils.data.Dataset):\n", " def __init__(self, data_path, model_name, max_length=512, inference=False, debug=False):\n", " with open(data_path, \"r\") as f:\n", " self.data = [json.loads(line) for line in f]\n", " self.inference = inference\n", " self.tokenizer = AutoTokenizer.from_pretrained(model_name)\n", " self.max_length = max_length\n", " self.debug = debug\n", " def __len__(self):\n", " return len(self.data)\n", " def __getitem__(self, idx):\n", " text = self.data[idx][\"text\"]\n", " id = self.data[idx][\"id\"]\n", " label = None\n", " labels_available = \"label\" in self.data[idx]\n", " if labels_available:\n", " label = self.data[idx][\"label\"]\n", " labels = []\n", " corresponding_word = []\n", " tokens = []\n", " input_ids = []\n", " attention_mask = []\n", " for jdx, word in enumerate(text.split(\" \")):\n", " word_encoded = self.tokenizer.tokenize(word)\n", " sub_words = len(word_encoded)\n", " if labels_available:\n", " is_machine_text = 1 if jdx >= label else 0\n", " labels.extend([is_machine_text] * sub_words)\n", " corresponding_word.extend([jdx] * sub_words)\n", " tokens.extend(word_encoded)\n", " input_ids.extend(self.tokenizer.convert_tokens_to_ids(word_encoded))\n", " attention_mask.extend([1] * sub_words)\n", " if len(input_ids) < self.max_length - 2:\n", " input_ids = ( [0] + input_ids + [2] + [1] * (self.max_length - len(input_ids) - 2) )\n", " if labels_available:\n", " labels = [0] + labels + [labels[-1]] * (self.max_length - len(labels) - 1)\n", " attention_mask = ( [1] + attention_mask + [1] + [0] * (self.max_length - len(attention_mask) - 2) )\n", " corresponding_word = ( [-100] + corresponding_word + [-100] * (self.max_length - len(corresponding_word) - 1) )\n", " tokens = ( [\"\"] + tokens + [\"\"] + [\"\"] * (self.max_length - len(tokens) - 2) )\n", " else:\n", " input_ids = [0] + input_ids[: self.max_length - 2] + [2]\n", " if labels_available:\n", " labels = [0] + labels[: self.max_length - 2] + [labels[self.max_length - 3]]\n", " corresponding_word = ( [-100] + corresponding_word[: self.max_length - 2] + [-100] )\n", " attention_mask = [1] + attention_mask[: self.max_length - 2] + [1]\n", " tokens = [\"\"] + tokens[: self.max_length - 2] + [\"\"]\n", " encoded = {}\n", " if labels_available:\n", " encoded[\"labels\"] = torch.tensor(labels)\n", " encoded[\"input_ids\"] = torch.tensor(input_ids)\n", " encoded[\"attention_mask\"] = torch.tensor(attention_mask)\n", " if labels_available:\n", " assert encoded[\"input_ids\"].shape == encoded[\"labels\"].shape\n", " if self.debug and not self.inference:\n", " encoded[\"partial_human_review\"] = \" \".join(text.split(\" \")[:label])\n", " if self.inference:\n", " encoded[\"text\"] = text\n", " encoded[\"id\"] = id\n", " encoded[\"corresponding_word\"] = corresponding_word\n", " return encoded\n", "if __name__ == \"__main__\":\n", " model_args = ModelConfig()\n", " data_args = DatasetConfig()\n", " training_args = TrainingArgsConfig()\n", " transformers.set_seed(training_args.seed)\n", " model_path = model_args.model_path\n", " model_checkpoint_dir = model_args.model_checkpoint_dir\n", " device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", " model = AutoModelCRF(model_path).to(device)\n", " optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)\n", " train_set = Semeval_Data(data_args.train_file, model_path, max_length=training_args.max_length)\n", " train_dataloader = DataLoader(train_set, batch_size=training_args.per_device_train_batch_size, shuffle=True)\n", " train_eval_dataloader = DataLoader(train_set, batch_size=training_args.per_device_eval_batch_size, shuffle=False)\n", " if training_args.do_train:\n", " logger.info(\"Training...\")\n", " logger.info(\"*** Train Dataset ***\")\n", " logger.info(f\"Number of samples: {len(train_set)}\")\n", " num_train_epochs = training_args.num_train_epochs\n", " for epoch in tqdm(range(num_train_epochs)):\n", " training_loop(model, optimizer, train_dataloader, device)\n", " save_model(model_path, model, optimizer, epoch, model_checkpoint_dir) # ,train_mse ,val_mse" ], "metadata": { "id": "tXBLrJp0quLE", "colab": { "base_uri": "https://localhost:8080/", "height": 301, "referenced_widgets": [ "a4746bf217474a0d9affb86db5c1cce8", "c3d17ba4ef004b6faeaaabb669c6f0d8", "8e083ebf8cde4eb88189d84c877dd900", "f203df875acc417f8a6a9be74c4065b6", "91b290c2575f4a33824a76df9b298a0f", "d8062daa055e425b9af71107e9eed4bc", "ab64a446d53d4ac283d4a7cf0113d4c3", "25a36d81be1c43048bae15fdc26775fb", "1a9a2f568f294260aa50d4606d4c8d64", "71ad36ca6f134432820b8e7965c45180", "65001e19aac24e57a0fe608f17c91d5b", "2424af04b4c24c7c8652cdcb8c3be0bc", "f4c021aa8af3454eba56d764e214931a", "9e0fcbfda4404309bbc30c89f0f6adb0", "6046f23955f34909b43f5b8dcb16d36f", "07f14799c15e4822837eda4ef042baf0", "b0df7bd9b6f14c36bc9b07113a1abfb6", "097ffa8e16124025afa058555b858f0e", "2684b7ef46374256a23f301b191838b8", "4ff1c0d810c244529bd3f2a55d367dd3", "76c2512a164845e8948d397265600fbb", "b1797d7a7d3d449cb1215aab02239b23", "2e74d76f1a494339ab7252a581074c15", "47114f8f796940deaba6ea8d628fe066", "81aa0311a21f4400b02c2d0aa3cb8893", "d370846c316d46bbbeca4d870f2bdd19", "ff5a2933b1ac447a91fbe72632c5a2cd", "9c9cedb6648e47f397487b3591735549", "888ac39fb29448cca1f4c5a6139d37ba", "24ebc1b975b642808a5f0c902008d4d3", "4c4723c6bc574c36ab82e6447f0b8d58", "4b65f625884144c8a4d1d8a377754237", "19f41ddf72a14677a0a164b92646f4dd", "574cdc3a03c647c8a076285ae7cbdfe7", "0a8f717b462a42169899372361a1d428", "fb367c86f4cf4abe9e49cc586ad08fbc", "a053ce96d4f14082977826930c11ef6d", "37ba28a10cca4285b842f1fbf46ae202", "1496bd2b8eb0479387ff949ea2d17727", "afdedee168ba4642bb259763bf57b468", "9cacf93a6086448da1209d644a41d94e", "72bbe035096645838ecca74350f6f020", "d05c9890fb364ad7b72add139aaddc53", "75c75b37e91446909e0d1b7837454541", "e585791e09314c4b8ef6a34e3924d4f2", "2a6b145637a74b319adb37a942ebaad6", "c857032102bd4a1aae12ce8469e51e83", "9905bae17058451791dd519c41a80ccc", "5630b74679344224804e529205c5ccb6", "5406db678eaa4b878a06ebf9eea18d7a", "97b658602ec84f61814ff7de958428e3", "129bd5ec1cd0426d92a867981c6ddc38", "31089732637a4bf4a43c74e7655ef561", "15ceb638dd5d49fcb43c45588221d4d8", "20fc1f68ddc24acb87bbb1ed7bc0590e" ] }, "outputId": "f12d7982-609b-4792-f031-8751dad1460b" }, "execution_count": 13, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "config.json: 0%| | 0.00/772 [00:00] 3.32G 23.7MB/s in 2m 25s \n", "\n", "2025-02-12 01:11:47 (23.4 MB/s) - ‘UKR-xlm-longformer’ saved [3563459222/3563459222]\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "model = AutoModelCRF(model_args.model_path).to(device)\n", "tokenizer = AutoTokenizer.from_pretrained(model_args.model_path)\n", "checkpoint = torch.load('UKR-xlm-longformer')\n", "model.load_state_dict(checkpoint['model_state_dict'])\n", "model.eval()\n", "test_sets = []\n", "for test_file in data_args.test_files:\n", " test_set = Semeval_Data(test_file, model_args.model_path, max_length=training_args.max_length, inference=True)\n", " test_dataloader = DataLoader(test_set, batch_size=training_args.per_device_eval_batch_size, shuffle=False)\n", " test_sets.append(test_dataloader)\n", "logger.info(\"Predicting...\")\n", "logger.info(\"*** Test Datasets ***\")\n", "logger.info(f\"Number of sets: {len(test_sets)}\")\n", "for idx, test_set in enumerate(test_sets):\n", " logger.info(f\"Test Dataset {idx + 1}\")\n", " logger.info(f\"Number of samples: {len(test_set)}\")\n", " predictions = predict(model, test_set, device)\n", " corresponding_words = []\n", " ids = []\n", " for batch in test_set:\n", " corr_word_tensors = [torch.tensor(cw) for cw in batch['corresponding_word']]\n", " corr_word_padded = torch.nn.utils.rnn.pad_sequence(corr_word_tensors, batch_first=True, padding_value=-100)\n", " corr_word = np.transpose(corr_word_padded.numpy(), (1, 0))\n", " ids.extend(batch[\"id\"])\n", " corresponding_words.extend(corr_word)\n", " corresponding_words = np.array(corresponding_words)\n", " logger.info(\"Predictions completed!\")\n", " df_ids = []\n", " df_labels = []\n", " for id, pred, corr_word in zip(ids, predictions, corresponding_words):\n", " df_ids.append(id)\n", " df_labels.append(get_start_position(pred, corr_word, token_level=False))\n", " df = pd.DataFrame({\"id\": df_ids, \"label\": df_labels})\n", " file_name = os.path.basename(test_file)\n", " file_dirs = os.path.join(training_args.output_dir, \"predictions\")\n", " os.makedirs(file_dirs, exist_ok=True)\n", " file_path = os.path.join(file_dirs, file_name)\n", " records = df.to_dict(\"records\")\n", " with open(file_path, \"w\") as f:\n", " for record in records:\n", " f.write(json.dumps(record) + \"\\n\")" ], "metadata": { "id": "fIMLFzDxrVSA", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "1a623810-82c0-418f-9539-b90e14d0d7ff" }, "execution_count": 15, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "All TF 2.0 model weights were used when initializing LongformerModel.\n", "\n", "All the weights of LongformerModel were initialized from the TF 2.0 model.\n", "If your task is similar to the task the model of the checkpoint was trained on, you can already use LongformerModel for predictions without further training.\n", ":4: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", " checkpoint = torch.load('UKR-xlm-longformer')\n", " 0%| | 0/4163 [00:00:22: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", " corr_word_tensors = [torch.tensor(cw) for cw in batch['corresponding_word']]\n" ] } ] }, { "cell_type": "code", "source": [ "!pip install jsonlines\n", "import pandas as pd\n", "import jsonlines\n", "jsonl_file_path = '/content/runs/exp_seed/predictions/UKR_test.jsonl'\n", "def display_jsonl_as_dataframe(file_path):\n", " data = []\n", " with jsonlines.open(file_path) as reader:\n", " for obj in reader:\n", " data.append(obj)\n", " df = pd.DataFrame(data)\n", " return df\n", "jsonl_df = display_jsonl_as_dataframe(jsonl_file_path)\n", "jsonl_df" ], "metadata": { "id": "yutpCG-Drcjn", "colab": { "base_uri": "https://localhost:8080/", "height": 527 }, "outputId": "48bd3744-bfd4-4c79-bdfc-4b71678f9bdd" }, "execution_count": 16, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting jsonlines\n", " Downloading jsonlines-4.0.0-py3-none-any.whl.metadata (1.6 kB)\n", "Requirement already satisfied: attrs>=19.2.0 in /usr/local/lib/python3.11/dist-packages (from jsonlines) (25.1.0)\n", "Downloading jsonlines-4.0.0-py3-none-any.whl (8.7 kB)\n", "Installing collected packages: jsonlines\n", "Successfully installed jsonlines-4.0.0\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ " id label\n", "0 UKR2 81\n", "1 UKR3 0\n", "2 UKR4 322\n", "3 UKR7 24\n", "4 UKR8 42\n", "... ... ...\n", "49951 UKR99919 26\n", "49952 UKR99921 55\n", "49953 UKR99922 75\n", "49954 UKR99923 31\n", "49955 UKR99924 18\n", "\n", "[49956 rows x 2 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idlabel
0UKR281
1UKR30
2UKR4322
3UKR724
4UKR842
.........
49951UKR9991926
49952UKR9992155
49953UKR9992275
49954UKR9992331
49955UKR9992418
\n", "

49956 rows × 2 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "jsonl_df", "summary": "{\n \"name\": \"jsonl_df\",\n \"rows\": 49956,\n \"fields\": [\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49956,\n \"samples\": [\n \"UKR15758\",\n \"UKR4821\",\n \"UKR2116\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 151,\n \"min\": 0,\n \"max\": 1321,\n \"num_unique_values\": 1084,\n \"samples\": [\n 467,\n 261,\n 258\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 16 } ] }, { "cell_type": "code", "source": [ "jsonl_file_path = '/content/UKR_test.jsonl'\n", "def display_jsonl_as_dataframe(file_path):\n", " data = []\n", " with jsonlines.open(file_path) as reader:\n", " for obj in reader:\n", " data.append(obj)\n", " df = pd.DataFrame(data)\n", " return df\n", "jsonl_df_gold = display_jsonl_as_dataframe(jsonl_file_path)\n", "jsonl_df_gold" ], "metadata": { "id": "nLm2KGliriEN", "colab": { "base_uri": "https://localhost:8080/", "height": 614 }, "outputId": "38b457ab-82ef-45b8-aad2-747c8a2f6f20" }, "execution_count": 17, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " ISO LLM used Type Data Split \\\n", "0 UKR GPT-o1 Partial Test \n", "1 UKR Gemini-Pro-1.5 Rewritten Test \n", "2 UKR Claude-Haiku-3.5 Partial Test \n", "3 UKR GPT-4o Partial Test \n", "4 UKR Amazon-Nova-Lite-1.0 Partial Test \n", "... ... ... ... ... \n", "49951 UKR Gemini-Pro-1.5 Partial Test \n", "49952 UKR Amazon-Nova-Pro-1.0 Partial Test \n", "49953 UKR Gemini-Flash-1.5 Unchanged Test \n", "49954 UKR Amazon-Nova-Pro-1.0 Partial Test \n", "49955 UKR Mistral-Large-2411 Partial Test \n", "\n", " Original text Original Word Count \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 136 \n", "1 “Поїду, а коли настануть кращі часи, повернусь... 106 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 555 \n", "3 У мене є таємниця. Від людини, якій я розповід... 1239 \n", "4 – Цього року у нас були не тільки жителі Вінни... 109 \n", "... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 68 \n", "49952 У верхній частині турнірної таблиці нині переб... 99 \n", "49953 Саме тому австрійське командування й намагалос... 76 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 81 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 46 \n", "\n", " Original Char Count label \\\n", "0 955 80 \n", "1 650 0 \n", "2 3873 322 \n", "3 7660 541 \n", "4 649 39 \n", "... ... ... \n", "49951 426 26 \n", "49952 616 56 \n", "49953 595 76 \n", "49954 622 24 \n", "49955 287 20 \n", "\n", " text New Word Count \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 171 \n", "1 Наступного дня дискусія щодо заробітків продов... 94 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 368 \n", "3 У мене є таємниця. Від людини, якій я розповід... 586 \n", "4 – Цього року у нас були не тільки жителі Вінни... 118 \n", "... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 93 \n", "49952 У верхній частині турнірної таблиці нині переб... 115 \n", "49953 Саме тому австрійське командування й намагалос... 76 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 88 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 72 \n", "\n", " New Char Count id \n", "0 1248 UKR2 \n", "1 555 UKR3 \n", "2 2535 UKR4 \n", "3 3541 UKR7 \n", "4 737 UKR8 \n", "... ... ... \n", "49951 574 UKR99919 \n", "49952 797 UKR99921 \n", "49953 595 UKR99922 \n", "49954 623 UKR99923 \n", "49955 523 UKR99924 \n", "\n", "[49956 rows x 12 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISOLLM usedTypeData SplitOriginal textOriginal Word CountOriginal Char CountlabeltextNew Word CountNew Char Countid
0UKRGPT-o1PartialTestПро це заявив заступник начальника Генштабу ЗС...13695580Про це заявив заступник начальника Генштабу ЗС...1711248UKR2
1UKRGemini-Pro-1.5RewrittenTest“Поїду, а коли настануть кращі часи, повернусь...1066500Наступного дня дискусія щодо заробітків продов...94555UKR3
2UKRClaude-Haiku-3.5PartialTestХу Цзіньтао виступає перед делегатами з'їзду, ...5553873322Ху Цзіньтао виступає перед делегатами з'їзду, ...3682535UKR4
3UKRGPT-4oPartialTestУ мене є таємниця. Від людини, якій я розповід...12397660541У мене є таємниця. Від людини, якій я розповід...5863541UKR7
4UKRAmazon-Nova-Lite-1.0PartialTest– Цього року у нас були не тільки жителі Вінни...10964939– Цього року у нас були не тільки жителі Вінни...118737UKR8
.......................................
49951UKRGemini-Pro-1.5PartialTestВільне життя, на превеликий подив мурчика, на ...6842626Вільне життя, на превеликий подив мурчика, на ...93574UKR99919
49952UKRAmazon-Nova-Pro-1.0PartialTestУ верхній частині турнірної таблиці нині переб...9961656У верхній частині турнірної таблиці нині переб...115797UKR99921
49953UKRGemini-Flash-1.5UnchangedTestСаме тому австрійське командування й намагалос...7659576Саме тому австрійське командування й намагалос...76595UKR99922
49954UKRAmazon-Nova-Pro-1.0PartialTestУ штабі наголосили, що бойовики посилили обстр...8162224У штабі наголосили, що бойовики посилили обстр...88623UKR99923
49955UKRMistral-Large-2411PartialTestЧим же ж має бути такий громадянський іспит? П...4628720Чим же ж має бути такий громадянський іспит? П...72523UKR99924
\n", "

49956 rows × 12 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "jsonl_df_gold", "summary": "{\n \"name\": \"jsonl_df_gold\",\n \"rows\": 49956,\n \"fields\": [\n {\n \"column\": \"ISO\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"UKR\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"LLM used\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Gemini-Flash-1.5\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Partial\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Data Split\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Test\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49943,\n \"samples\": [\n \"\\u0410 \\u043d\\u0430\\u0439\\u0431\\u0456\\u043b\\u044c\\u0448\\u043e\\u044e \\u0432\\u0442\\u0440\\u0430\\u0442\\u043e\\u044e \\u0437\\u0430 \\u0440\\u043e\\u043a\\u0438 \\u043d\\u0435\\u0437\\u0430\\u043b\\u0435\\u0436\\u043d\\u043e\\u0441\\u0442\\u0456 \\u0454 \\u043a\\u0443\\u043b\\u044c\\u0442\\u0443\\u0440\\u043d\\u0430 \\u0456\\u043d\\u0444\\u0440\\u0430\\u0441\\u0442\\u0440\\u0443\\u043a\\u0442\\u0443\\u0440\\u0430. \\u041f\\u0435\\u0440\\u0435\\u0434\\u043e\\u0432\\u0441\\u0456\\u043c, \\u043c\\u0430\\u0442\\u0435\\u0440\\u0456\\u0430\\u043b\\u044c\\u043d\\u0430. \\u0411\\u043e \\u0442\\u0430\\u043b\\u0430\\u043d\\u0442\\u0438 \\u0432 \\u043d\\u0430\\u0441 \\u044f\\u043a \\u043d\\u0430\\u0440\\u043e\\u0434\\u0436\\u0443\\u0432\\u0430\\u043b\\u0438\\u0441\\u044f, \\u0442\\u0430\\u043a \\u0456 \\u043d\\u0430\\u0440\\u043e\\u0434\\u0436\\u0443\\u0432\\u0430\\u0442\\u0438\\u043c\\u0443\\u0442\\u044c\\u0441\\u044f.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original Word Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 320,\n \"min\": 6,\n \"max\": 12134,\n \"num_unique_values\": 1767,\n \"samples\": [\n 1251\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original Char Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2198,\n \"min\": 22,\n \"max\": 82575,\n \"num_unique_values\": 6706,\n \"samples\": [\n 716\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 190,\n \"min\": 0,\n \"max\": 6890,\n \"num_unique_values\": 1242,\n \"samples\": [\n 1195\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49949,\n \"samples\": [\n \"\\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0430, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\\u0443\\u0441 \\u0421\\u0435\\u0432\\u0435\\u0440\\u0443\\u0441. '\\u041f\\u043e\\u043a\\u0438 \\u0413\\u0430\\u0440\\u0440\\u0456 \\u043f\\u0440\\u043e\\u0442\\u0438\\u0441\\u0442\\u043e\\u0457\\u0442\\u044c \\u043c\\u0438\\u043d\\u0443\\u043b\\u043e\\u043c\\u0443, \\u044f\\u043a\\u0435 \\u043d\\u0435 \\u0445\\u043e\\u0447\\u0435 \\u0437\\u0430\\u043b\\u0438\\u0448\\u0430\\u0442\\u0438\\u0441\\u044f \\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0438, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"New Word Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 197,\n \"min\": 1,\n \"max\": 6951,\n \"num_unique_values\": 1310,\n \"samples\": [\n 620\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"New Char Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1338,\n \"min\": 6,\n \"max\": 47900,\n \"num_unique_values\": 5067,\n \"samples\": [\n 2371\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49956,\n \"samples\": [\n \"UKR15758\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 17 } ] }, { "cell_type": "code", "source": [ "jsonl_df = jsonl_df.rename(columns={'label': 'label_pred'})\n", "jsonl_df_gold = jsonl_df_gold.rename(columns={'label': 'label_gold'})\n", "merged_df = pd.merge(jsonl_df[['id', 'label_pred']], jsonl_df_gold[['id','text','label_gold']], on='id')\n", "merged_df" ], "metadata": { "id": "wFmwSZsirsFY", "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "outputId": "b2dec8e6-f3d5-46f3-9aee-e5c8074b26d1" }, "execution_count": 18, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " id label_pred \\\n", "0 UKR2 81 \n", "1 UKR3 0 \n", "2 UKR4 322 \n", "3 UKR7 24 \n", "4 UKR8 42 \n", "... ... ... \n", "49951 UKR99919 26 \n", "49952 UKR99921 55 \n", "49953 UKR99922 75 \n", "49954 UKR99923 31 \n", "49955 UKR99924 18 \n", "\n", " text label_gold \n", "0 Про це заявив заступник начальника Генштабу ЗС... 80 \n", "1 Наступного дня дискусія щодо заробітків продов... 0 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 322 \n", "3 У мене є таємниця. Від людини, якій я розповід... 541 \n", "4 – Цього року у нас були не тільки жителі Вінни... 39 \n", "... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 26 \n", "49952 У верхній частині турнірної таблиці нині переб... 56 \n", "49953 Саме тому австрійське командування й намагалос... 76 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 24 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 20 \n", "\n", "[49956 rows x 4 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idlabel_predtextlabel_gold
0UKR281Про це заявив заступник начальника Генштабу ЗС...80
1UKR30Наступного дня дискусія щодо заробітків продов...0
2UKR4322Ху Цзіньтао виступає перед делегатами з'їзду, ...322
3UKR724У мене є таємниця. Від людини, якій я розповід...541
4UKR842– Цього року у нас були не тільки жителі Вінни...39
...............
49951UKR9991926Вільне життя, на превеликий подив мурчика, на ...26
49952UKR9992155У верхній частині турнірної таблиці нині переб...56
49953UKR9992275Саме тому австрійське командування й намагалос...76
49954UKR9992331У штабі наголосили, що бойовики посилили обстр...24
49955UKR9992418Чим же ж має бути такий громадянський іспит? П...20
\n", "

49956 rows × 4 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "merged_df", "summary": "{\n \"name\": \"merged_df\",\n \"rows\": 49956,\n \"fields\": [\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49956,\n \"samples\": [\n \"UKR15758\",\n \"UKR4821\",\n \"UKR2116\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_pred\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 151,\n \"min\": 0,\n \"max\": 1321,\n \"num_unique_values\": 1084,\n \"samples\": [\n 467,\n 261,\n 258\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49949,\n \"samples\": [\n \"\\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0430, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\\u0443\\u0441 \\u0421\\u0435\\u0432\\u0435\\u0440\\u0443\\u0441. '\\u041f\\u043e\\u043a\\u0438 \\u0413\\u0430\\u0440\\u0440\\u0456 \\u043f\\u0440\\u043e\\u0442\\u0438\\u0441\\u0442\\u043e\\u0457\\u0442\\u044c \\u043c\\u0438\\u043d\\u0443\\u043b\\u043e\\u043c\\u0443, \\u044f\\u043a\\u0435 \\u043d\\u0435 \\u0445\\u043e\\u0447\\u0435 \\u0437\\u0430\\u043b\\u0438\\u0448\\u0430\\u0442\\u0438\\u0441\\u044f \\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0438, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\",\n \"\\u0415\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0430\\u043c \\u0434\\u0430\\u043b\\u0438 \\u0440\\u043e\\u0437\\u0432\\u0438\\u0442\\u0438\\u0441\\u044f \\u043b\\u0438\\u0448\\u0435 \\u0434\\u043e 28 \\u0434\\u043d\\u0456\\u0432 \\u0412\\u0438\\u0440\\u043e\\u0449\\u0435\\u043d\\u0456 \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0438 \\u043c\\u0435\\u043d\\u0448 \\u043d\\u0456\\u0436 \\u043d\\u0430 0,001% \\u0441\\u043a\\u043b\\u0430\\u0434\\u0430\\u044e\\u0442\\u044c\\u0441\\u044f \\u0456\\u0437 \\u043b\\u044e\\u0434\\u0441\\u044c\\u043a\\u0438\\u0445 \\u043a\\u043b\\u0456\\u0442\\u0438\\u043d, \\u0440\\u0435\\u0448\\u0442\\u0430 - \\u043a\\u043b\\u0456\\u0442\\u0438\\u043d\\u0438 \\u200e\\u0441\\u0432\\u0438\\u043d\\u0456. \\u041f\\u0440\\u0438 \\u0446\\u044c\\u043e\\u043c\\u0443 \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0430\\u043c \\u0434\\u0430\\u043b\\u0438 \\u0440\\u043e\\u0437\\u0432\\u0438\\u0442\\u0438\\u0441\\u044f \\u043b\\u0438\\u0448\\u0435 \\u0434\\u043e 28 \\u0434\\u043d\\u0456\\u0432.\\u200e \\u0426\\u0435 \\u043f\\u0435\\u0440\\u0448\\u0438\\u0439 \\u0434\\u043e\\u043a\\u0430\\u0437 \\u0442\\u043e\\u0433\\u043e, \\u0449\\u043e \\u0445\\u0438\\u043c\\u0435\\u0440, \\u043d\\u0430\\u0437\\u0432\\u0430\\u043d\\u0438\\u0445 \\u0442\\u0430\\u043a \\u043d\\u0430 \\u0447\\u0435\\u0441\\u0442\\u044c \\u0430\\u043d\\u0442\\u0438\\u0447\\u043d\\u043e\\u0457 \\u0456\\u0441\\u0442\\u043e\\u0442\\u0438 \\u0437 \\u0433\\u043e\\u043b\\u043e\\u0432\\u043e\\u044e \\u0439 \\u200e\\u0448\\u0438\\u0454\\u044e \\u043b\\u0435\\u0432\\u0430, \\u0437 \\u0442\\u0443\\u043b\\u0443\\u0431\\u043e\\u043c \\u043a\\u043e\\u0437\\u0438 \\u0442\\u0430 \\u0437 \\u0445\\u0432\\u043e\\u0441\\u0442\\u043e\\u043c \\u0434\\u0440\\u0430\\u043a\\u043e\\u043d\\u0430, \\u043c\\u043e\\u0436\\u043d\\u0430 \\u0441\\u0442\\u0432\\u043e\\u0440\\u0438\\u0442\\u0438 \\u0448\\u043b\\u044f\\u0445\\u043e\\u043c \\u043f\\u043e\\u0454\\u0434\\u043d\\u0430\\u043d\\u043d\\u044f \\u200e\\u0433\\u0435\\u043d\\u0435\\u0442\\u0438\\u0447\\u043d\\u043e\\u0433\\u043e \\u043c\\u0430\\u0442\\u0435\\u0440\\u0456\\u0430\\u043b\\u0443 \\u043b\\u044e\\u0434\\u0438\\u043d\\u0438 \\u0456 \\u0441\\u0432\\u0438\\u043d\\u0456.\\u200e \\u041a\\u0456\\u043d\\u0446\\u0435\\u0432\\u0430 \\u043c\\u0435\\u0442\\u0430 \\u0442\\u0430\\u043a\\u0438\\u0445 \\u0434\\u043e\\u0441\\u043b\\u0456\\u0434\\u0436\\u0435\\u043d\\u044c - \\u0441\\u0442\\u0432\\u043e\\u0440\\u0438\\u0442\\u0438 \\u0442\\u0435\\u0445\\u043d\\u043e\\u043b\\u043e\\u0433\\u0456\\u044e \\u0456\\u0437 \\u0432\\u0438\\u0440\\u043e\\u0449\\u0443\\u0432\\u0430\\u043d\\u043d\\u044f \\u043b\\u044e\\u0434\\u0441\\u044c\\u043a\\u0438\\u0445 \\u043e\\u0440\\u0433\\u0430\\u043d\\u0456\\u0432 \\u200e\\u0432\\u0441\\u0435\\u0440\\u0435\\u0434\\u0438\\u043d\\u0456 \\u0442\\u0432\\u0430\\u0440\\u0438\\u043d, \\u0445\\u043e\\u0447 \\u0434\\u043e \\u0457\\u0457 \\u0432\\u0442\\u0456\\u043b\\u0435\\u043d\\u043d\\u044f \\u0449\\u0435 \\u0434\\u0443\\u0436\\u0435 \\u0434\\u0430\\u043b\\u0435\\u043a\\u043e.\\u200e \\u0406\\u043d\\u0448\\u0456 \\u0432\\u0447\\u0435\\u043d\\u0456 \\u043d\\u0430\\u0437\\u0432\\u0430\\u043b\\u0438 \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0435 \\u0434\\u043e\\u0441\\u043b\\u0456\\u0434\\u0436\\u0435\\u043d\\u043d\\u044f '\\u0437\\u0430\\u0445\\u043e\\u043f\\u043b\\u044e\\u044e\\u0447\\u0438\\u043c'. \\u200e \\u041b\\u044e\\u0434\\u0441\\u044c\\u043a\\u0456 \\u0441\\u0442\\u043e\\u0432\\u0431\\u0443\\u0440\\u043e\\u0432\\u0456 \\u043a\\u043b\\u0456\\u0442\\u0438\\u043d\\u0438 \\u0432\\u0436\\u0438\\u0432\\u043b\\u044f\\u044e\\u0442\\u044c \\u0434\\u043e \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0430 \\u0441\\u0432\\u0438\\u043d\\u0456 \\u0414\\u043b\\u044f \\u0442\\u043e\\u0433\\u043e, \\u0430\\u0431\\u0438 \\u0441\\u0442\\u0432\\u043e\\u0440\\u0438\\u0442\\u0438 \\u0445\\u0438\\u043c\\u0435\\u0440\\u0443, \\u0434\\u043b\\u044f \\u0434\\u043e\\u0441\\u043b\\u0456\\u0434\\u0436\\u0435\\u043d\\u043d\\u044f \\u0432\\u0438\\u043a\\u043e\\u0440\\u0438\\u0441\\u0442\\u043e\\u0432\\u0443\\u0432\\u0430\\u043b\\u0438 \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0438 \\u0441\\u0432\\u0438\\u043d\\u0456 \\u043d\\u0430 \\u0440\\u0430\\u043d\\u043d\\u0456\\u0439 \\u0441\\u0442\\u0430\\u0434\\u0456\\u0457 \\u0440\\u043e\\u0437\\u0432\\u0438\\u0442\\u043a\\u0443. \\u041b\\u044e\\u0434\\u0441\\u044c\\u043a\\u0456 \\u0441\\u0442\\u043e\\u0432\\u0431\\u0443\\u0440\\u043e\\u0432\\u0456 \\u043a\\u043b\\u0456\\u0442\\u0438\\u043d\\u0438 \\u0432\\u0436\\u0438\\u0432\\u043b\\u044f\\u044e\\u0442\\u044c \\u0434\\u043e \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0430 \\u0441\\u0432\\u0438\\u043d\\u0456, \\u0456 \\u0432\\u0447\\u0435\\u043d\\u0438\\u043c \\u0432\\u0434\\u0430\\u043b\\u043e\\u0441\\u044f \\u0434\\u043e\\u0441\\u044f\\u0433\\u0442\\u0438 \\u0442\\u043e\\u0433\\u043e, \\u0449\\u043e \\u0432\\u0447\\u0435\\u043d\\u0456 \\u0440\\u0430\\u043d\\u0456\\u0448\\u0435 \\u0432\\u0432\\u0430\\u0436\\u0430\\u043b\\u0438 \\u043d\\u0435\\u043c\\u043e\\u0436\\u043b\\u0438\\u0432\\u0438\\u043c.\\u200e\\n\\n\\u041e\\u0434\\u043d\\u0430\\u043a \\u0446\\u044f \\u0456\\u043d\\u043d\\u043e\\u0432\\u0430\\u0446\\u0456\\u0439\\u043d\\u0430 \\u0442\\u0435\\u0445\\u043d\\u043e\\u043b\\u043e\\u0433\\u0456\\u044f \\u043f\\u0456\\u0434\\u043d\\u0456\\u043c\\u0430\\u0454 \\u0447\\u0438\\u043c\\u0430\\u043b\\u043e \\u0435\\u0442\\u0438\\u0447\\u043d\\u0438\\u0445 \\u043f\\u0438\\u0442\\u0430\\u043d\\u044c. \\u041d\\u0430\\u043f\\u0440\\u0438\\u043a\\u043b\\u0430\\u0434, \\u0434\\u043e\\u0441\\u043b\\u0456\\u0434\\u043d\\u0438\\u043a\\u0438 \\u043f\\u043e\\u0432\\u0438\\u043d\\u043d\\u0456 \\u0443\\u0432\\u0430\\u0436\\u043d\\u043e \\u043f\\u043e\\u0434\\u0431\\u0430\\u0442\\u0438 \\u043f\\u0440\\u043e \\u0442\\u0435, \\u044f\\u043a \\u0434\\u043e\\u0432\\u0433\\u043e \\u0446\\u0456 \\u0433\\u0456\\u0431\\u0440\\u0438\\u0434\\u0438 \\u043c\\u043e\\u0436\\u0443\\u0442\\u044c \\u0440\\u043e\\u0437\\u0432\\u0438\\u0432\\u0430\\u0442\\u0438\\u0441\\u044f \\u0456 \\u044f\\u043a \\u0457\\u0445 \\u0441\\u043b\\u0456\\u0434 \\u0442\\u0440\\u0438\\u043c\\u0430\\u0442\\u0438 \\u043f\\u0456\\u0434 \\u043a\\u043e\\u043d\\u0442\\u0440\\u043e\\u043b\\u0435\\u043c, \\u0449\\u043e\\u0431 \\u0443\\u043d\\u0438\\u043a\\u043d\\u0443\\u0442\\u0438 \\u043d\\u0435\\u0431\\u0430\\u0436\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430\\u0441\\u043b\\u0456\\u0434\\u043a\\u0456\\u0432.\\u200e\",\n \"\\u041a\\u043e\\u043b\\u0438 \\u0410\\u043b\\u044c\\u0431\\u0435\\u0440\\u0442, \\u043f\\u0456\\u0434\\u043c\\u043e\\u0440\\u0433\\u043d\\u0443\\u0432\\u0448\\u0438, \\u043e\\u0434\\u044f\\u0433\\u043d\\u0443\\u0432 \\u043e\\u0431\\u0440\\u0443\\u0447\\u043a\\u0443 \\u043d\\u0430 \\u043f\\u0430\\u043b\\u0435\\u0446\\u044c \\u0427\\u0430\\u0440\\u043b\\u0456\\u043d, \\u043d\\u0430\\u0440\\u0435\\u0447\\u0435\\u043d\\u0430 \\u0440\\u043e\\u0437\\u0441\\u043b\\u0430\\u0431\\u043b\\u0435\\u043d\\u043e \\u0443\\u0441\\u043c\\u0456\\u0445\\u043d\\u0443\\u043b\\u0430\\u0441\\u044f. \\u0417\\u0430\\u043a\\u043e\\u0445\\u0430\\u043d\\u0456 \\u043f\\u0430\\u0440\\u0430 \\u043f\\u043e\\u0433\\u043b\\u044f\\u043d\\u0443\\u043b\\u0430 \\u043e\\u0434\\u043d\\u0435 \\u043d\\u0430 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0437 \\u043b\\u0430\\u0433\\u0456\\u0434\\u043d\\u0456\\u0441\\u0442\\u044e \\u0432 \\u043e\\u0447\\u0430\\u0445. \\u0411\\u0456\\u043b\\u044f \\u043d\\u0438\\u0445 \\u0441\\u0442\\u043e\\u044f\\u043b\\u0438 \\u0457\\u0445\\u043d\\u0456 \\u0431\\u043b\\u0438\\u0437\\u044c\\u043a\\u0456 \\u0434\\u0440\\u0443\\u0437\\u0456 \\u0456 \\u0440\\u043e\\u0434\\u0438\\u0447\\u0456, \\u044f\\u043a\\u0456 \\u0440\\u0430\\u0434\\u0456\\u0441\\u043d\\u043e \\u0430\\u043f\\u043b\\u043e\\u0434\\u0443\\u0432\\u0430\\u043b\\u0438. \\u0426\\u0435\\u0439 \\u043c\\u043e\\u043c\\u0435\\u043d\\u0442 \\u0431\\u0443\\u0432 \\u043d\\u0430\\u0439\\u0449\\u0430\\u0441\\u043b\\u0438\\u0432\\u0456\\u0448\\u0438\\u043c \\u0443 \\u0457\\u0445\\u043d\\u044c\\u043e\\u043c\\u0443 \\u0436\\u0438\\u0442\\u0442\\u0456. \\u0412\\u043e\\u043d\\u0438 \\u0437\\u043d\\u0430\\u043b\\u0438, \\u0449\\u043e \\u0432\\u043e\\u043d\\u0438 \\u0441\\u0442\\u0432\\u043e\\u0440\\u0435\\u043d\\u0456 \\u043e\\u0434\\u043d\\u0435 \\u0434\\u043b\\u044f \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0456 \\u0440\\u0430\\u0437\\u043e\\u043c \\u043f\\u0440\\u043e\\u0439\\u0434\\u0443\\u0442\\u044c \\u0443\\u0432\\u0435\\u0441\\u044c \\u0448\\u043b\\u044f\\u0445 \\u0436\\u0438\\u0442\\u0442\\u044f, \\u0442\\u0440\\u0438\\u043c\\u0430\\u044e\\u0447\\u0438\\u0441\\u044c \\u0437\\u0430\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_gold\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 190,\n \"min\": 0,\n \"max\": 6890,\n \"num_unique_values\": 1242,\n \"samples\": [\n 1195,\n 714,\n 525\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 18 } ] }, { "cell_type": "code", "source": [ "merged_df['diff'] = (merged_df['label_pred'] - merged_df['label_gold']).abs()\n", "merged_df" ], "metadata": { "id": "Lh8HQBtIrvFx", "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "outputId": "095aea4e-3893-4194-8d2c-328d62dfbf53" }, "execution_count": 19, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " id label_pred \\\n", "0 UKR2 81 \n", "1 UKR3 0 \n", "2 UKR4 322 \n", "3 UKR7 24 \n", "4 UKR8 42 \n", "... ... ... \n", "49951 UKR99919 26 \n", "49952 UKR99921 55 \n", "49953 UKR99922 75 \n", "49954 UKR99923 31 \n", "49955 UKR99924 18 \n", "\n", " text label_gold diff \n", "0 Про це заявив заступник начальника Генштабу ЗС... 80 1 \n", "1 Наступного дня дискусія щодо заробітків продов... 0 0 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 322 0 \n", "3 У мене є таємниця. Від людини, якій я розповід... 541 517 \n", "4 – Цього року у нас були не тільки жителі Вінни... 39 3 \n", "... ... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 26 0 \n", "49952 У верхній частині турнірної таблиці нині переб... 56 1 \n", "49953 Саме тому австрійське командування й намагалос... 76 1 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 24 7 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 20 2 \n", "\n", "[49956 rows x 5 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idlabel_predtextlabel_golddiff
0UKR281Про це заявив заступник начальника Генштабу ЗС...801
1UKR30Наступного дня дискусія щодо заробітків продов...00
2UKR4322Ху Цзіньтао виступає перед делегатами з'їзду, ...3220
3UKR724У мене є таємниця. Від людини, якій я розповід...541517
4UKR842– Цього року у нас були не тільки жителі Вінни...393
..................
49951UKR9991926Вільне життя, на превеликий подив мурчика, на ...260
49952UKR9992155У верхній частині турнірної таблиці нині переб...561
49953UKR9992275Саме тому австрійське командування й намагалос...761
49954UKR9992331У штабі наголосили, що бойовики посилили обстр...247
49955UKR9992418Чим же ж має бути такий громадянський іспит? П...202
\n", "

49956 rows × 5 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "merged_df", "summary": "{\n \"name\": \"merged_df\",\n \"rows\": 49956,\n \"fields\": [\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49956,\n \"samples\": [\n \"UKR15758\",\n \"UKR4821\",\n \"UKR2116\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_pred\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 151,\n \"min\": 0,\n \"max\": 1321,\n \"num_unique_values\": 1084,\n \"samples\": [\n 467,\n 261,\n 258\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49949,\n \"samples\": [\n \"\\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0430, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\\u0443\\u0441 \\u0421\\u0435\\u0432\\u0435\\u0440\\u0443\\u0441. '\\u041f\\u043e\\u043a\\u0438 \\u0413\\u0430\\u0440\\u0440\\u0456 \\u043f\\u0440\\u043e\\u0442\\u0438\\u0441\\u0442\\u043e\\u0457\\u0442\\u044c \\u043c\\u0438\\u043d\\u0443\\u043b\\u043e\\u043c\\u0443, \\u044f\\u043a\\u0435 \\u043d\\u0435 \\u0445\\u043e\\u0447\\u0435 \\u0437\\u0430\\u043b\\u0438\\u0448\\u0430\\u0442\\u0438\\u0441\\u044f \\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0438, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\",\n \"\\u0415\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0430\\u043c \\u0434\\u0430\\u043b\\u0438 \\u0440\\u043e\\u0437\\u0432\\u0438\\u0442\\u0438\\u0441\\u044f \\u043b\\u0438\\u0448\\u0435 \\u0434\\u043e 28 \\u0434\\u043d\\u0456\\u0432 \\u0412\\u0438\\u0440\\u043e\\u0449\\u0435\\u043d\\u0456 \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0438 \\u043c\\u0435\\u043d\\u0448 \\u043d\\u0456\\u0436 \\u043d\\u0430 0,001% \\u0441\\u043a\\u043b\\u0430\\u0434\\u0430\\u044e\\u0442\\u044c\\u0441\\u044f \\u0456\\u0437 \\u043b\\u044e\\u0434\\u0441\\u044c\\u043a\\u0438\\u0445 \\u043a\\u043b\\u0456\\u0442\\u0438\\u043d, \\u0440\\u0435\\u0448\\u0442\\u0430 - \\u043a\\u043b\\u0456\\u0442\\u0438\\u043d\\u0438 \\u200e\\u0441\\u0432\\u0438\\u043d\\u0456. \\u041f\\u0440\\u0438 \\u0446\\u044c\\u043e\\u043c\\u0443 \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0430\\u043c \\u0434\\u0430\\u043b\\u0438 \\u0440\\u043e\\u0437\\u0432\\u0438\\u0442\\u0438\\u0441\\u044f \\u043b\\u0438\\u0448\\u0435 \\u0434\\u043e 28 \\u0434\\u043d\\u0456\\u0432.\\u200e \\u0426\\u0435 \\u043f\\u0435\\u0440\\u0448\\u0438\\u0439 \\u0434\\u043e\\u043a\\u0430\\u0437 \\u0442\\u043e\\u0433\\u043e, \\u0449\\u043e \\u0445\\u0438\\u043c\\u0435\\u0440, \\u043d\\u0430\\u0437\\u0432\\u0430\\u043d\\u0438\\u0445 \\u0442\\u0430\\u043a \\u043d\\u0430 \\u0447\\u0435\\u0441\\u0442\\u044c \\u0430\\u043d\\u0442\\u0438\\u0447\\u043d\\u043e\\u0457 \\u0456\\u0441\\u0442\\u043e\\u0442\\u0438 \\u0437 \\u0433\\u043e\\u043b\\u043e\\u0432\\u043e\\u044e \\u0439 \\u200e\\u0448\\u0438\\u0454\\u044e \\u043b\\u0435\\u0432\\u0430, \\u0437 \\u0442\\u0443\\u043b\\u0443\\u0431\\u043e\\u043c \\u043a\\u043e\\u0437\\u0438 \\u0442\\u0430 \\u0437 \\u0445\\u0432\\u043e\\u0441\\u0442\\u043e\\u043c \\u0434\\u0440\\u0430\\u043a\\u043e\\u043d\\u0430, \\u043c\\u043e\\u0436\\u043d\\u0430 \\u0441\\u0442\\u0432\\u043e\\u0440\\u0438\\u0442\\u0438 \\u0448\\u043b\\u044f\\u0445\\u043e\\u043c \\u043f\\u043e\\u0454\\u0434\\u043d\\u0430\\u043d\\u043d\\u044f \\u200e\\u0433\\u0435\\u043d\\u0435\\u0442\\u0438\\u0447\\u043d\\u043e\\u0433\\u043e \\u043c\\u0430\\u0442\\u0435\\u0440\\u0456\\u0430\\u043b\\u0443 \\u043b\\u044e\\u0434\\u0438\\u043d\\u0438 \\u0456 \\u0441\\u0432\\u0438\\u043d\\u0456.\\u200e \\u041a\\u0456\\u043d\\u0446\\u0435\\u0432\\u0430 \\u043c\\u0435\\u0442\\u0430 \\u0442\\u0430\\u043a\\u0438\\u0445 \\u0434\\u043e\\u0441\\u043b\\u0456\\u0434\\u0436\\u0435\\u043d\\u044c - \\u0441\\u0442\\u0432\\u043e\\u0440\\u0438\\u0442\\u0438 \\u0442\\u0435\\u0445\\u043d\\u043e\\u043b\\u043e\\u0433\\u0456\\u044e \\u0456\\u0437 \\u0432\\u0438\\u0440\\u043e\\u0449\\u0443\\u0432\\u0430\\u043d\\u043d\\u044f \\u043b\\u044e\\u0434\\u0441\\u044c\\u043a\\u0438\\u0445 \\u043e\\u0440\\u0433\\u0430\\u043d\\u0456\\u0432 \\u200e\\u0432\\u0441\\u0435\\u0440\\u0435\\u0434\\u0438\\u043d\\u0456 \\u0442\\u0432\\u0430\\u0440\\u0438\\u043d, \\u0445\\u043e\\u0447 \\u0434\\u043e \\u0457\\u0457 \\u0432\\u0442\\u0456\\u043b\\u0435\\u043d\\u043d\\u044f \\u0449\\u0435 \\u0434\\u0443\\u0436\\u0435 \\u0434\\u0430\\u043b\\u0435\\u043a\\u043e.\\u200e \\u0406\\u043d\\u0448\\u0456 \\u0432\\u0447\\u0435\\u043d\\u0456 \\u043d\\u0430\\u0437\\u0432\\u0430\\u043b\\u0438 \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0435 \\u0434\\u043e\\u0441\\u043b\\u0456\\u0434\\u0436\\u0435\\u043d\\u043d\\u044f '\\u0437\\u0430\\u0445\\u043e\\u043f\\u043b\\u044e\\u044e\\u0447\\u0438\\u043c'. \\u200e \\u041b\\u044e\\u0434\\u0441\\u044c\\u043a\\u0456 \\u0441\\u0442\\u043e\\u0432\\u0431\\u0443\\u0440\\u043e\\u0432\\u0456 \\u043a\\u043b\\u0456\\u0442\\u0438\\u043d\\u0438 \\u0432\\u0436\\u0438\\u0432\\u043b\\u044f\\u044e\\u0442\\u044c \\u0434\\u043e \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0430 \\u0441\\u0432\\u0438\\u043d\\u0456 \\u0414\\u043b\\u044f \\u0442\\u043e\\u0433\\u043e, \\u0430\\u0431\\u0438 \\u0441\\u0442\\u0432\\u043e\\u0440\\u0438\\u0442\\u0438 \\u0445\\u0438\\u043c\\u0435\\u0440\\u0443, \\u0434\\u043b\\u044f \\u0434\\u043e\\u0441\\u043b\\u0456\\u0434\\u0436\\u0435\\u043d\\u043d\\u044f \\u0432\\u0438\\u043a\\u043e\\u0440\\u0438\\u0441\\u0442\\u043e\\u0432\\u0443\\u0432\\u0430\\u043b\\u0438 \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0438 \\u0441\\u0432\\u0438\\u043d\\u0456 \\u043d\\u0430 \\u0440\\u0430\\u043d\\u043d\\u0456\\u0439 \\u0441\\u0442\\u0430\\u0434\\u0456\\u0457 \\u0440\\u043e\\u0437\\u0432\\u0438\\u0442\\u043a\\u0443. \\u041b\\u044e\\u0434\\u0441\\u044c\\u043a\\u0456 \\u0441\\u0442\\u043e\\u0432\\u0431\\u0443\\u0440\\u043e\\u0432\\u0456 \\u043a\\u043b\\u0456\\u0442\\u0438\\u043d\\u0438 \\u0432\\u0436\\u0438\\u0432\\u043b\\u044f\\u044e\\u0442\\u044c \\u0434\\u043e \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0430 \\u0441\\u0432\\u0438\\u043d\\u0456, \\u0456 \\u0432\\u0447\\u0435\\u043d\\u0438\\u043c \\u0432\\u0434\\u0430\\u043b\\u043e\\u0441\\u044f \\u0434\\u043e\\u0441\\u044f\\u0433\\u0442\\u0438 \\u0442\\u043e\\u0433\\u043e, \\u0449\\u043e \\u0432\\u0447\\u0435\\u043d\\u0456 \\u0440\\u0430\\u043d\\u0456\\u0448\\u0435 \\u0432\\u0432\\u0430\\u0436\\u0430\\u043b\\u0438 \\u043d\\u0435\\u043c\\u043e\\u0436\\u043b\\u0438\\u0432\\u0438\\u043c.\\u200e\\n\\n\\u041e\\u0434\\u043d\\u0430\\u043a \\u0446\\u044f \\u0456\\u043d\\u043d\\u043e\\u0432\\u0430\\u0446\\u0456\\u0439\\u043d\\u0430 \\u0442\\u0435\\u0445\\u043d\\u043e\\u043b\\u043e\\u0433\\u0456\\u044f \\u043f\\u0456\\u0434\\u043d\\u0456\\u043c\\u0430\\u0454 \\u0447\\u0438\\u043c\\u0430\\u043b\\u043e \\u0435\\u0442\\u0438\\u0447\\u043d\\u0438\\u0445 \\u043f\\u0438\\u0442\\u0430\\u043d\\u044c. \\u041d\\u0430\\u043f\\u0440\\u0438\\u043a\\u043b\\u0430\\u0434, \\u0434\\u043e\\u0441\\u043b\\u0456\\u0434\\u043d\\u0438\\u043a\\u0438 \\u043f\\u043e\\u0432\\u0438\\u043d\\u043d\\u0456 \\u0443\\u0432\\u0430\\u0436\\u043d\\u043e \\u043f\\u043e\\u0434\\u0431\\u0430\\u0442\\u0438 \\u043f\\u0440\\u043e \\u0442\\u0435, \\u044f\\u043a \\u0434\\u043e\\u0432\\u0433\\u043e \\u0446\\u0456 \\u0433\\u0456\\u0431\\u0440\\u0438\\u0434\\u0438 \\u043c\\u043e\\u0436\\u0443\\u0442\\u044c \\u0440\\u043e\\u0437\\u0432\\u0438\\u0432\\u0430\\u0442\\u0438\\u0441\\u044f \\u0456 \\u044f\\u043a \\u0457\\u0445 \\u0441\\u043b\\u0456\\u0434 \\u0442\\u0440\\u0438\\u043c\\u0430\\u0442\\u0438 \\u043f\\u0456\\u0434 \\u043a\\u043e\\u043d\\u0442\\u0440\\u043e\\u043b\\u0435\\u043c, \\u0449\\u043e\\u0431 \\u0443\\u043d\\u0438\\u043a\\u043d\\u0443\\u0442\\u0438 \\u043d\\u0435\\u0431\\u0430\\u0436\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430\\u0441\\u043b\\u0456\\u0434\\u043a\\u0456\\u0432.\\u200e\",\n \"\\u041a\\u043e\\u043b\\u0438 \\u0410\\u043b\\u044c\\u0431\\u0435\\u0440\\u0442, \\u043f\\u0456\\u0434\\u043c\\u043e\\u0440\\u0433\\u043d\\u0443\\u0432\\u0448\\u0438, \\u043e\\u0434\\u044f\\u0433\\u043d\\u0443\\u0432 \\u043e\\u0431\\u0440\\u0443\\u0447\\u043a\\u0443 \\u043d\\u0430 \\u043f\\u0430\\u043b\\u0435\\u0446\\u044c \\u0427\\u0430\\u0440\\u043b\\u0456\\u043d, \\u043d\\u0430\\u0440\\u0435\\u0447\\u0435\\u043d\\u0430 \\u0440\\u043e\\u0437\\u0441\\u043b\\u0430\\u0431\\u043b\\u0435\\u043d\\u043e \\u0443\\u0441\\u043c\\u0456\\u0445\\u043d\\u0443\\u043b\\u0430\\u0441\\u044f. \\u0417\\u0430\\u043a\\u043e\\u0445\\u0430\\u043d\\u0456 \\u043f\\u0430\\u0440\\u0430 \\u043f\\u043e\\u0433\\u043b\\u044f\\u043d\\u0443\\u043b\\u0430 \\u043e\\u0434\\u043d\\u0435 \\u043d\\u0430 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0437 \\u043b\\u0430\\u0433\\u0456\\u0434\\u043d\\u0456\\u0441\\u0442\\u044e \\u0432 \\u043e\\u0447\\u0430\\u0445. \\u0411\\u0456\\u043b\\u044f \\u043d\\u0438\\u0445 \\u0441\\u0442\\u043e\\u044f\\u043b\\u0438 \\u0457\\u0445\\u043d\\u0456 \\u0431\\u043b\\u0438\\u0437\\u044c\\u043a\\u0456 \\u0434\\u0440\\u0443\\u0437\\u0456 \\u0456 \\u0440\\u043e\\u0434\\u0438\\u0447\\u0456, \\u044f\\u043a\\u0456 \\u0440\\u0430\\u0434\\u0456\\u0441\\u043d\\u043e \\u0430\\u043f\\u043b\\u043e\\u0434\\u0443\\u0432\\u0430\\u043b\\u0438. \\u0426\\u0435\\u0439 \\u043c\\u043e\\u043c\\u0435\\u043d\\u0442 \\u0431\\u0443\\u0432 \\u043d\\u0430\\u0439\\u0449\\u0430\\u0441\\u043b\\u0438\\u0432\\u0456\\u0448\\u0438\\u043c \\u0443 \\u0457\\u0445\\u043d\\u044c\\u043e\\u043c\\u0443 \\u0436\\u0438\\u0442\\u0442\\u0456. \\u0412\\u043e\\u043d\\u0438 \\u0437\\u043d\\u0430\\u043b\\u0438, \\u0449\\u043e \\u0432\\u043e\\u043d\\u0438 \\u0441\\u0442\\u0432\\u043e\\u0440\\u0435\\u043d\\u0456 \\u043e\\u0434\\u043d\\u0435 \\u0434\\u043b\\u044f \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0456 \\u0440\\u0430\\u0437\\u043e\\u043c \\u043f\\u0440\\u043e\\u0439\\u0434\\u0443\\u0442\\u044c \\u0443\\u0432\\u0435\\u0441\\u044c \\u0448\\u043b\\u044f\\u0445 \\u0436\\u0438\\u0442\\u0442\\u044f, \\u0442\\u0440\\u0438\\u043c\\u0430\\u044e\\u0447\\u0438\\u0441\\u044c \\u0437\\u0430\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_gold\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 190,\n \"min\": 0,\n \"max\": 6890,\n \"num_unique_values\": 1242,\n \"samples\": [\n 1195,\n 714,\n 525\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"diff\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 89,\n \"min\": 0,\n \"max\": 5950,\n \"num_unique_values\": 599,\n \"samples\": [\n 471,\n 220,\n 530\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 19 } ] }, { "cell_type": "code", "source": [ "merged_df['id'] = merged_df['id'].str[3:].astype(int)\n", "merged_df" ], "metadata": { "id": "zZf3ctI2rwvS", "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "outputId": "c07f1976-7d89-438d-dafb-8cccdd7f8a3a" }, "execution_count": 20, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " id label_pred text \\\n", "0 2 81 Про це заявив заступник начальника Генштабу ЗС... \n", "1 3 0 Наступного дня дискусія щодо заробітків продов... \n", "2 4 322 Ху Цзіньтао виступає перед делегатами з'їзду, ... \n", "3 7 24 У мене є таємниця. Від людини, якій я розповід... \n", "4 8 42 – Цього року у нас були не тільки жителі Вінни... \n", "... ... ... ... \n", "49951 99919 26 Вільне життя, на превеликий подив мурчика, на ... \n", "49952 99921 55 У верхній частині турнірної таблиці нині переб... \n", "49953 99922 75 Саме тому австрійське командування й намагалос... \n", "49954 99923 31 У штабі наголосили, що бойовики посилили обстр... \n", "49955 99924 18 Чим же ж має бути такий громадянський іспит? П... \n", "\n", " label_gold diff \n", "0 80 1 \n", "1 0 0 \n", "2 322 0 \n", "3 541 517 \n", "4 39 3 \n", "... ... ... \n", "49951 26 0 \n", "49952 56 1 \n", "49953 76 1 \n", "49954 24 7 \n", "49955 20 2 \n", "\n", "[49956 rows x 5 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idlabel_predtextlabel_golddiff
0281Про це заявив заступник начальника Генштабу ЗС...801
130Наступного дня дискусія щодо заробітків продов...00
24322Ху Цзіньтао виступає перед делегатами з'їзду, ...3220
3724У мене є таємниця. Від людини, якій я розповід...541517
4842– Цього року у нас були не тільки жителі Вінни...393
..................
499519991926Вільне життя, на превеликий подив мурчика, на ...260
499529992155У верхній частині турнірної таблиці нині переб...561
499539992275Саме тому австрійське командування й намагалос...761
499549992331У штабі наголосили, що бойовики посилили обстр...247
499559992418Чим же ж має бути такий громадянський іспит? П...202
\n", "

49956 rows × 5 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "merged_df", "summary": "{\n \"name\": \"merged_df\",\n \"rows\": 49956,\n \"fields\": [\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28821,\n \"min\": 2,\n \"max\": 99924,\n \"num_unique_values\": 49956,\n \"samples\": [\n 15758,\n 4821,\n 2116\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_pred\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 151,\n \"min\": 0,\n \"max\": 1321,\n \"num_unique_values\": 1084,\n \"samples\": [\n 467,\n 261,\n 258\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49949,\n \"samples\": [\n \"\\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0430, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\\u0443\\u0441 \\u0421\\u0435\\u0432\\u0435\\u0440\\u0443\\u0441. '\\u041f\\u043e\\u043a\\u0438 \\u0413\\u0430\\u0440\\u0440\\u0456 \\u043f\\u0440\\u043e\\u0442\\u0438\\u0441\\u0442\\u043e\\u0457\\u0442\\u044c \\u043c\\u0438\\u043d\\u0443\\u043b\\u043e\\u043c\\u0443, \\u044f\\u043a\\u0435 \\u043d\\u0435 \\u0445\\u043e\\u0447\\u0435 \\u0437\\u0430\\u043b\\u0438\\u0448\\u0430\\u0442\\u0438\\u0441\\u044f \\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0438, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\",\n \"\\u0415\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0430\\u043c \\u0434\\u0430\\u043b\\u0438 \\u0440\\u043e\\u0437\\u0432\\u0438\\u0442\\u0438\\u0441\\u044f \\u043b\\u0438\\u0448\\u0435 \\u0434\\u043e 28 \\u0434\\u043d\\u0456\\u0432 \\u0412\\u0438\\u0440\\u043e\\u0449\\u0435\\u043d\\u0456 \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0438 \\u043c\\u0435\\u043d\\u0448 \\u043d\\u0456\\u0436 \\u043d\\u0430 0,001% \\u0441\\u043a\\u043b\\u0430\\u0434\\u0430\\u044e\\u0442\\u044c\\u0441\\u044f \\u0456\\u0437 \\u043b\\u044e\\u0434\\u0441\\u044c\\u043a\\u0438\\u0445 \\u043a\\u043b\\u0456\\u0442\\u0438\\u043d, \\u0440\\u0435\\u0448\\u0442\\u0430 - \\u043a\\u043b\\u0456\\u0442\\u0438\\u043d\\u0438 \\u200e\\u0441\\u0432\\u0438\\u043d\\u0456. \\u041f\\u0440\\u0438 \\u0446\\u044c\\u043e\\u043c\\u0443 \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0430\\u043c \\u0434\\u0430\\u043b\\u0438 \\u0440\\u043e\\u0437\\u0432\\u0438\\u0442\\u0438\\u0441\\u044f \\u043b\\u0438\\u0448\\u0435 \\u0434\\u043e 28 \\u0434\\u043d\\u0456\\u0432.\\u200e \\u0426\\u0435 \\u043f\\u0435\\u0440\\u0448\\u0438\\u0439 \\u0434\\u043e\\u043a\\u0430\\u0437 \\u0442\\u043e\\u0433\\u043e, \\u0449\\u043e \\u0445\\u0438\\u043c\\u0435\\u0440, \\u043d\\u0430\\u0437\\u0432\\u0430\\u043d\\u0438\\u0445 \\u0442\\u0430\\u043a \\u043d\\u0430 \\u0447\\u0435\\u0441\\u0442\\u044c \\u0430\\u043d\\u0442\\u0438\\u0447\\u043d\\u043e\\u0457 \\u0456\\u0441\\u0442\\u043e\\u0442\\u0438 \\u0437 \\u0433\\u043e\\u043b\\u043e\\u0432\\u043e\\u044e \\u0439 \\u200e\\u0448\\u0438\\u0454\\u044e \\u043b\\u0435\\u0432\\u0430, \\u0437 \\u0442\\u0443\\u043b\\u0443\\u0431\\u043e\\u043c \\u043a\\u043e\\u0437\\u0438 \\u0442\\u0430 \\u0437 \\u0445\\u0432\\u043e\\u0441\\u0442\\u043e\\u043c \\u0434\\u0440\\u0430\\u043a\\u043e\\u043d\\u0430, \\u043c\\u043e\\u0436\\u043d\\u0430 \\u0441\\u0442\\u0432\\u043e\\u0440\\u0438\\u0442\\u0438 \\u0448\\u043b\\u044f\\u0445\\u043e\\u043c \\u043f\\u043e\\u0454\\u0434\\u043d\\u0430\\u043d\\u043d\\u044f \\u200e\\u0433\\u0435\\u043d\\u0435\\u0442\\u0438\\u0447\\u043d\\u043e\\u0433\\u043e \\u043c\\u0430\\u0442\\u0435\\u0440\\u0456\\u0430\\u043b\\u0443 \\u043b\\u044e\\u0434\\u0438\\u043d\\u0438 \\u0456 \\u0441\\u0432\\u0438\\u043d\\u0456.\\u200e \\u041a\\u0456\\u043d\\u0446\\u0435\\u0432\\u0430 \\u043c\\u0435\\u0442\\u0430 \\u0442\\u0430\\u043a\\u0438\\u0445 \\u0434\\u043e\\u0441\\u043b\\u0456\\u0434\\u0436\\u0435\\u043d\\u044c - \\u0441\\u0442\\u0432\\u043e\\u0440\\u0438\\u0442\\u0438 \\u0442\\u0435\\u0445\\u043d\\u043e\\u043b\\u043e\\u0433\\u0456\\u044e \\u0456\\u0437 \\u0432\\u0438\\u0440\\u043e\\u0449\\u0443\\u0432\\u0430\\u043d\\u043d\\u044f \\u043b\\u044e\\u0434\\u0441\\u044c\\u043a\\u0438\\u0445 \\u043e\\u0440\\u0433\\u0430\\u043d\\u0456\\u0432 \\u200e\\u0432\\u0441\\u0435\\u0440\\u0435\\u0434\\u0438\\u043d\\u0456 \\u0442\\u0432\\u0430\\u0440\\u0438\\u043d, \\u0445\\u043e\\u0447 \\u0434\\u043e \\u0457\\u0457 \\u0432\\u0442\\u0456\\u043b\\u0435\\u043d\\u043d\\u044f \\u0449\\u0435 \\u0434\\u0443\\u0436\\u0435 \\u0434\\u0430\\u043b\\u0435\\u043a\\u043e.\\u200e \\u0406\\u043d\\u0448\\u0456 \\u0432\\u0447\\u0435\\u043d\\u0456 \\u043d\\u0430\\u0437\\u0432\\u0430\\u043b\\u0438 \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0435 \\u0434\\u043e\\u0441\\u043b\\u0456\\u0434\\u0436\\u0435\\u043d\\u043d\\u044f '\\u0437\\u0430\\u0445\\u043e\\u043f\\u043b\\u044e\\u044e\\u0447\\u0438\\u043c'. \\u200e \\u041b\\u044e\\u0434\\u0441\\u044c\\u043a\\u0456 \\u0441\\u0442\\u043e\\u0432\\u0431\\u0443\\u0440\\u043e\\u0432\\u0456 \\u043a\\u043b\\u0456\\u0442\\u0438\\u043d\\u0438 \\u0432\\u0436\\u0438\\u0432\\u043b\\u044f\\u044e\\u0442\\u044c \\u0434\\u043e \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0430 \\u0441\\u0432\\u0438\\u043d\\u0456 \\u0414\\u043b\\u044f \\u0442\\u043e\\u0433\\u043e, \\u0430\\u0431\\u0438 \\u0441\\u0442\\u0432\\u043e\\u0440\\u0438\\u0442\\u0438 \\u0445\\u0438\\u043c\\u0435\\u0440\\u0443, \\u0434\\u043b\\u044f \\u0434\\u043e\\u0441\\u043b\\u0456\\u0434\\u0436\\u0435\\u043d\\u043d\\u044f \\u0432\\u0438\\u043a\\u043e\\u0440\\u0438\\u0441\\u0442\\u043e\\u0432\\u0443\\u0432\\u0430\\u043b\\u0438 \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0438 \\u0441\\u0432\\u0438\\u043d\\u0456 \\u043d\\u0430 \\u0440\\u0430\\u043d\\u043d\\u0456\\u0439 \\u0441\\u0442\\u0430\\u0434\\u0456\\u0457 \\u0440\\u043e\\u0437\\u0432\\u0438\\u0442\\u043a\\u0443. \\u041b\\u044e\\u0434\\u0441\\u044c\\u043a\\u0456 \\u0441\\u0442\\u043e\\u0432\\u0431\\u0443\\u0440\\u043e\\u0432\\u0456 \\u043a\\u043b\\u0456\\u0442\\u0438\\u043d\\u0438 \\u0432\\u0436\\u0438\\u0432\\u043b\\u044f\\u044e\\u0442\\u044c \\u0434\\u043e \\u0435\\u043c\\u0431\\u0440\\u0456\\u043e\\u043d\\u0430 \\u0441\\u0432\\u0438\\u043d\\u0456, \\u0456 \\u0432\\u0447\\u0435\\u043d\\u0438\\u043c \\u0432\\u0434\\u0430\\u043b\\u043e\\u0441\\u044f \\u0434\\u043e\\u0441\\u044f\\u0433\\u0442\\u0438 \\u0442\\u043e\\u0433\\u043e, \\u0449\\u043e \\u0432\\u0447\\u0435\\u043d\\u0456 \\u0440\\u0430\\u043d\\u0456\\u0448\\u0435 \\u0432\\u0432\\u0430\\u0436\\u0430\\u043b\\u0438 \\u043d\\u0435\\u043c\\u043e\\u0436\\u043b\\u0438\\u0432\\u0438\\u043c.\\u200e\\n\\n\\u041e\\u0434\\u043d\\u0430\\u043a \\u0446\\u044f \\u0456\\u043d\\u043d\\u043e\\u0432\\u0430\\u0446\\u0456\\u0439\\u043d\\u0430 \\u0442\\u0435\\u0445\\u043d\\u043e\\u043b\\u043e\\u0433\\u0456\\u044f \\u043f\\u0456\\u0434\\u043d\\u0456\\u043c\\u0430\\u0454 \\u0447\\u0438\\u043c\\u0430\\u043b\\u043e \\u0435\\u0442\\u0438\\u0447\\u043d\\u0438\\u0445 \\u043f\\u0438\\u0442\\u0430\\u043d\\u044c. \\u041d\\u0430\\u043f\\u0440\\u0438\\u043a\\u043b\\u0430\\u0434, \\u0434\\u043e\\u0441\\u043b\\u0456\\u0434\\u043d\\u0438\\u043a\\u0438 \\u043f\\u043e\\u0432\\u0438\\u043d\\u043d\\u0456 \\u0443\\u0432\\u0430\\u0436\\u043d\\u043e \\u043f\\u043e\\u0434\\u0431\\u0430\\u0442\\u0438 \\u043f\\u0440\\u043e \\u0442\\u0435, \\u044f\\u043a \\u0434\\u043e\\u0432\\u0433\\u043e \\u0446\\u0456 \\u0433\\u0456\\u0431\\u0440\\u0438\\u0434\\u0438 \\u043c\\u043e\\u0436\\u0443\\u0442\\u044c \\u0440\\u043e\\u0437\\u0432\\u0438\\u0432\\u0430\\u0442\\u0438\\u0441\\u044f \\u0456 \\u044f\\u043a \\u0457\\u0445 \\u0441\\u043b\\u0456\\u0434 \\u0442\\u0440\\u0438\\u043c\\u0430\\u0442\\u0438 \\u043f\\u0456\\u0434 \\u043a\\u043e\\u043d\\u0442\\u0440\\u043e\\u043b\\u0435\\u043c, \\u0449\\u043e\\u0431 \\u0443\\u043d\\u0438\\u043a\\u043d\\u0443\\u0442\\u0438 \\u043d\\u0435\\u0431\\u0430\\u0436\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430\\u0441\\u043b\\u0456\\u0434\\u043a\\u0456\\u0432.\\u200e\",\n \"\\u041a\\u043e\\u043b\\u0438 \\u0410\\u043b\\u044c\\u0431\\u0435\\u0440\\u0442, \\u043f\\u0456\\u0434\\u043c\\u043e\\u0440\\u0433\\u043d\\u0443\\u0432\\u0448\\u0438, \\u043e\\u0434\\u044f\\u0433\\u043d\\u0443\\u0432 \\u043e\\u0431\\u0440\\u0443\\u0447\\u043a\\u0443 \\u043d\\u0430 \\u043f\\u0430\\u043b\\u0435\\u0446\\u044c \\u0427\\u0430\\u0440\\u043b\\u0456\\u043d, \\u043d\\u0430\\u0440\\u0435\\u0447\\u0435\\u043d\\u0430 \\u0440\\u043e\\u0437\\u0441\\u043b\\u0430\\u0431\\u043b\\u0435\\u043d\\u043e \\u0443\\u0441\\u043c\\u0456\\u0445\\u043d\\u0443\\u043b\\u0430\\u0441\\u044f. \\u0417\\u0430\\u043a\\u043e\\u0445\\u0430\\u043d\\u0456 \\u043f\\u0430\\u0440\\u0430 \\u043f\\u043e\\u0433\\u043b\\u044f\\u043d\\u0443\\u043b\\u0430 \\u043e\\u0434\\u043d\\u0435 \\u043d\\u0430 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0437 \\u043b\\u0430\\u0433\\u0456\\u0434\\u043d\\u0456\\u0441\\u0442\\u044e \\u0432 \\u043e\\u0447\\u0430\\u0445. \\u0411\\u0456\\u043b\\u044f \\u043d\\u0438\\u0445 \\u0441\\u0442\\u043e\\u044f\\u043b\\u0438 \\u0457\\u0445\\u043d\\u0456 \\u0431\\u043b\\u0438\\u0437\\u044c\\u043a\\u0456 \\u0434\\u0440\\u0443\\u0437\\u0456 \\u0456 \\u0440\\u043e\\u0434\\u0438\\u0447\\u0456, \\u044f\\u043a\\u0456 \\u0440\\u0430\\u0434\\u0456\\u0441\\u043d\\u043e \\u0430\\u043f\\u043b\\u043e\\u0434\\u0443\\u0432\\u0430\\u043b\\u0438. \\u0426\\u0435\\u0439 \\u043c\\u043e\\u043c\\u0435\\u043d\\u0442 \\u0431\\u0443\\u0432 \\u043d\\u0430\\u0439\\u0449\\u0430\\u0441\\u043b\\u0438\\u0432\\u0456\\u0448\\u0438\\u043c \\u0443 \\u0457\\u0445\\u043d\\u044c\\u043e\\u043c\\u0443 \\u0436\\u0438\\u0442\\u0442\\u0456. \\u0412\\u043e\\u043d\\u0438 \\u0437\\u043d\\u0430\\u043b\\u0438, \\u0449\\u043e \\u0432\\u043e\\u043d\\u0438 \\u0441\\u0442\\u0432\\u043e\\u0440\\u0435\\u043d\\u0456 \\u043e\\u0434\\u043d\\u0435 \\u0434\\u043b\\u044f \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0456 \\u0440\\u0430\\u0437\\u043e\\u043c \\u043f\\u0440\\u043e\\u0439\\u0434\\u0443\\u0442\\u044c \\u0443\\u0432\\u0435\\u0441\\u044c \\u0448\\u043b\\u044f\\u0445 \\u0436\\u0438\\u0442\\u0442\\u044f, \\u0442\\u0440\\u0438\\u043c\\u0430\\u044e\\u0447\\u0438\\u0441\\u044c \\u0437\\u0430\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_gold\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 190,\n \"min\": 0,\n \"max\": 6890,\n \"num_unique_values\": 1242,\n \"samples\": [\n 1195,\n 714,\n 525\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"diff\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 89,\n \"min\": 0,\n \"max\": 5950,\n \"num_unique_values\": 599,\n \"samples\": [\n 471,\n 220,\n 530\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 20 } ] }, { "cell_type": "code", "source": [ "merged_df = UKR_test.merge(merged_df, left_index=True, right_on='id', how='outer')\n", "merged_df" ], "metadata": { "id": "yzQw_jhDr1-E", "colab": { "base_uri": "https://localhost:8080/", "height": 892 }, "outputId": "65f0fae3-db07-434f-910d-6e585edc0088" }, "execution_count": 21, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " ISO LLM used Type Data Split \\\n", "0 UKR GPT-o1 Partial Test \n", "1 UKR Gemini-Pro-1.5 Rewritten Test \n", "2 UKR Claude-Haiku-3.5 Partial Test \n", "3 UKR GPT-4o Partial Test \n", "4 UKR Amazon-Nova-Lite-1.0 Partial Test \n", "... ... ... ... ... \n", "49951 UKR Gemini-Pro-1.5 Partial Test \n", "49952 UKR Amazon-Nova-Pro-1.0 Partial Test \n", "49953 UKR Gemini-Flash-1.5 Unchanged Test \n", "49954 UKR Amazon-Nova-Pro-1.0 Partial Test \n", "49955 UKR Mistral-Large-2411 Partial Test \n", "\n", " Original text Original Word Count \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 136 \n", "1 “Поїду, а коли настануть кращі часи, повернусь... 106 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 555 \n", "3 У мене є таємниця. Від людини, якій я розповід... 1239 \n", "4 – Цього року у нас були не тільки жителі Вінни... 109 \n", "... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 68 \n", "49952 У верхній частині турнірної таблиці нині переб... 99 \n", "49953 Саме тому австрійське командування й намагалос... 76 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 81 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 46 \n", "\n", " Original Char Count Split Location \\\n", "0 955 80 \n", "1 650 0 \n", "2 3873 322 \n", "3 7660 541 \n", "4 649 39 \n", "... ... ... \n", "49951 426 26 \n", "49952 616 56 \n", "49953 595 76 \n", "49954 622 24 \n", "49955 287 20 \n", "\n", " Modified text New Word Count \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 171 \n", "1 Наступного дня дискусія щодо заробітків продов... 94 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 368 \n", "3 У мене є таємниця. Від людини, якій я розповід... 586 \n", "4 – Цього року у нас були не тільки жителі Вінни... 118 \n", "... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 93 \n", "49952 У верхній частині турнірної таблиці нині переб... 115 \n", "49953 Саме тому австрійське командування й намагалос... 76 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 88 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 72 \n", "\n", " New Char Count id label_pred \\\n", "0 1248 2 81 \n", "1 555 3 0 \n", "2 2535 4 322 \n", "3 3541 7 24 \n", "4 737 8 42 \n", "... ... ... ... \n", "49951 574 99919 26 \n", "49952 797 99921 55 \n", "49953 595 99922 75 \n", "49954 623 99923 31 \n", "49955 523 99924 18 \n", "\n", " text label_gold diff \n", "0 Про це заявив заступник начальника Генштабу ЗС... 80 1 \n", "1 Наступного дня дискусія щодо заробітків продов... 0 0 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 322 0 \n", "3 У мене є таємниця. Від людини, якій я розповід... 541 517 \n", "4 – Цього року у нас були не тільки жителі Вінни... 39 3 \n", "... ... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 26 0 \n", "49952 У верхній частині турнірної таблиці нині переб... 56 1 \n", "49953 Саме тому австрійське командування й намагалос... 76 1 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 24 7 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 20 2 \n", "\n", "[49956 rows x 16 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISOLLM usedTypeData SplitOriginal textOriginal Word CountOriginal Char CountSplit LocationModified textNew Word CountNew Char Countidlabel_predtextlabel_golddiff
0UKRGPT-o1PartialTestПро це заявив заступник начальника Генштабу ЗС...13695580Про це заявив заступник начальника Генштабу ЗС...1711248281Про це заявив заступник начальника Генштабу ЗС...801
1UKRGemini-Pro-1.5RewrittenTest“Поїду, а коли настануть кращі часи, повернусь...1066500Наступного дня дискусія щодо заробітків продов...9455530Наступного дня дискусія щодо заробітків продов...00
2UKRClaude-Haiku-3.5PartialTestХу Цзіньтао виступає перед делегатами з'їзду, ...5553873322Ху Цзіньтао виступає перед делегатами з'їзду, ...36825354322Ху Цзіньтао виступає перед делегатами з'їзду, ...3220
3UKRGPT-4oPartialTestУ мене є таємниця. Від людини, якій я розповід...12397660541У мене є таємниця. Від людини, якій я розповід...5863541724У мене є таємниця. Від людини, якій я розповід...541517
4UKRAmazon-Nova-Lite-1.0PartialTest– Цього року у нас були не тільки жителі Вінни...10964939– Цього року у нас були не тільки жителі Вінни...118737842– Цього року у нас були не тільки жителі Вінни...393
...................................................
49951UKRGemini-Pro-1.5PartialTestВільне життя, на превеликий подив мурчика, на ...6842626Вільне життя, на превеликий подив мурчика, на ...935749991926Вільне життя, на превеликий подив мурчика, на ...260
49952UKRAmazon-Nova-Pro-1.0PartialTestУ верхній частині турнірної таблиці нині переб...9961656У верхній частині турнірної таблиці нині переб...1157979992155У верхній частині турнірної таблиці нині переб...561
49953UKRGemini-Flash-1.5UnchangedTestСаме тому австрійське командування й намагалос...7659576Саме тому австрійське командування й намагалос...765959992275Саме тому австрійське командування й намагалос...761
49954UKRAmazon-Nova-Pro-1.0PartialTestУ штабі наголосили, що бойовики посилили обстр...8162224У штабі наголосили, що бойовики посилили обстр...886239992331У штабі наголосили, що бойовики посилили обстр...247
49955UKRMistral-Large-2411PartialTestЧим же ж має бути такий громадянський іспит? П...4628720Чим же ж має бути такий громадянський іспит? П...725239992418Чим же ж має бути такий громадянський іспит? П...202
\n", "

49956 rows × 16 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "merged_df", "summary": "{\n \"name\": \"merged_df\",\n \"rows\": 49956,\n \"fields\": [\n {\n \"column\": \"ISO\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"UKR\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"LLM used\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Gemini-Flash-1.5\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Partial\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Data Split\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Test\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49943,\n \"samples\": [\n \"\\u0410 \\u043d\\u0430\\u0439\\u0431\\u0456\\u043b\\u044c\\u0448\\u043e\\u044e \\u0432\\u0442\\u0440\\u0430\\u0442\\u043e\\u044e \\u0437\\u0430 \\u0440\\u043e\\u043a\\u0438 \\u043d\\u0435\\u0437\\u0430\\u043b\\u0435\\u0436\\u043d\\u043e\\u0441\\u0442\\u0456 \\u0454 \\u043a\\u0443\\u043b\\u044c\\u0442\\u0443\\u0440\\u043d\\u0430 \\u0456\\u043d\\u0444\\u0440\\u0430\\u0441\\u0442\\u0440\\u0443\\u043a\\u0442\\u0443\\u0440\\u0430. \\u041f\\u0435\\u0440\\u0435\\u0434\\u043e\\u0432\\u0441\\u0456\\u043c, \\u043c\\u0430\\u0442\\u0435\\u0440\\u0456\\u0430\\u043b\\u044c\\u043d\\u0430. \\u0411\\u043e \\u0442\\u0430\\u043b\\u0430\\u043d\\u0442\\u0438 \\u0432 \\u043d\\u0430\\u0441 \\u044f\\u043a \\u043d\\u0430\\u0440\\u043e\\u0434\\u0436\\u0443\\u0432\\u0430\\u043b\\u0438\\u0441\\u044f, \\u0442\\u0430\\u043a \\u0456 \\u043d\\u0430\\u0440\\u043e\\u0434\\u0436\\u0443\\u0432\\u0430\\u0442\\u0438\\u043c\\u0443\\u0442\\u044c\\u0441\\u044f.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original Word Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 320,\n \"min\": 6,\n \"max\": 12134,\n \"num_unique_values\": 1767,\n \"samples\": [\n 1251\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original Char Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2198,\n \"min\": 22,\n \"max\": 82575,\n \"num_unique_values\": 6706,\n \"samples\": [\n 716\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Split Location\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 190,\n \"min\": 0,\n \"max\": 6890,\n \"num_unique_values\": 1242,\n \"samples\": [\n 1195\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Modified text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49949,\n \"samples\": [\n \"\\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0430, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\\u0443\\u0441 \\u0421\\u0435\\u0432\\u0435\\u0440\\u0443\\u0441. '\\u041f\\u043e\\u043a\\u0438 \\u0413\\u0430\\u0440\\u0440\\u0456 \\u043f\\u0440\\u043e\\u0442\\u0438\\u0441\\u0442\\u043e\\u0457\\u0442\\u044c \\u043c\\u0438\\u043d\\u0443\\u043b\\u043e\\u043c\\u0443, \\u044f\\u043a\\u0435 \\u043d\\u0435 \\u0445\\u043e\\u0447\\u0435 \\u0437\\u0430\\u043b\\u0438\\u0448\\u0430\\u0442\\u0438\\u0441\\u044f \\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0438, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"New Word Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 197,\n \"min\": 1,\n \"max\": 6951,\n \"num_unique_values\": 1310,\n \"samples\": [\n 620\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"New Char Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1338,\n \"min\": 6,\n \"max\": 47900,\n \"num_unique_values\": 5067,\n \"samples\": [\n 2371\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28821,\n \"min\": 2,\n \"max\": 99924,\n \"num_unique_values\": 49956,\n \"samples\": [\n 15758\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_pred\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 151,\n \"min\": 0,\n \"max\": 1321,\n \"num_unique_values\": 1084,\n \"samples\": [\n 467\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49949,\n \"samples\": [\n \"\\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0430, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\\u0443\\u0441 \\u0421\\u0435\\u0432\\u0435\\u0440\\u0443\\u0441. '\\u041f\\u043e\\u043a\\u0438 \\u0413\\u0430\\u0440\\u0440\\u0456 \\u043f\\u0440\\u043e\\u0442\\u0438\\u0441\\u0442\\u043e\\u0457\\u0442\\u044c \\u043c\\u0438\\u043d\\u0443\\u043b\\u043e\\u043c\\u0443, \\u044f\\u043a\\u0435 \\u043d\\u0435 \\u0445\\u043e\\u0447\\u0435 \\u0437\\u0430\\u043b\\u0438\\u0448\\u0430\\u0442\\u0438\\u0441\\u044f \\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0438, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_gold\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 190,\n \"min\": 0,\n \"max\": 6890,\n \"num_unique_values\": 1242,\n \"samples\": [\n 1195\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"diff\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 89,\n \"min\": 0,\n \"max\": 5950,\n \"num_unique_values\": 599,\n \"samples\": [\n 471\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 21 } ] }, { "cell_type": "code", "source": [ "df = merged_df.copy()\n", "tokenizer = AutoTokenizer.from_pretrained(\"hyperonym/xlm-roberta-longformer-base-16384\") # USE SAME TOKENIZER AS USED IN TRAINING\n", "def check_split_position(row):\n", " text = row['Modified text']\n", " words = text.split()\n", " cumulative_tokens = 0\n", " for i in range(row['Split Location']): # Assuming Split Location is 1-based index\n", " tokens = tokenizer.tokenize(words[i])\n", " cumulative_tokens += len(tokens)\n", " if cumulative_tokens > 2048: # Check if we've already passed 2048 tokens\n", " return \"Outside\"\n", " return \"Inside\"\n", "df['Token Limit Check'] = df.apply(check_split_position, axis=1)\n", "df" ], "metadata": { "id": "n3EtjBRXr53j", "colab": { "base_uri": "https://localhost:8080/", "height": 961 }, "outputId": "fea903be-481b-4ecf-ccfa-296ad774de37" }, "execution_count": 22, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " ISO LLM used Type Data Split \\\n", "0 UKR GPT-o1 Partial Test \n", "1 UKR Gemini-Pro-1.5 Rewritten Test \n", "2 UKR Claude-Haiku-3.5 Partial Test \n", "3 UKR GPT-4o Partial Test \n", "4 UKR Amazon-Nova-Lite-1.0 Partial Test \n", "... ... ... ... ... \n", "49951 UKR Gemini-Pro-1.5 Partial Test \n", "49952 UKR Amazon-Nova-Pro-1.0 Partial Test \n", "49953 UKR Gemini-Flash-1.5 Unchanged Test \n", "49954 UKR Amazon-Nova-Pro-1.0 Partial Test \n", "49955 UKR Mistral-Large-2411 Partial Test \n", "\n", " Original text Original Word Count \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 136 \n", "1 “Поїду, а коли настануть кращі часи, повернусь... 106 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 555 \n", "3 У мене є таємниця. Від людини, якій я розповід... 1239 \n", "4 – Цього року у нас були не тільки жителі Вінни... 109 \n", "... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 68 \n", "49952 У верхній частині турнірної таблиці нині переб... 99 \n", "49953 Саме тому австрійське командування й намагалос... 76 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 81 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 46 \n", "\n", " Original Char Count Split Location \\\n", "0 955 80 \n", "1 650 0 \n", "2 3873 322 \n", "3 7660 541 \n", "4 649 39 \n", "... ... ... \n", "49951 426 26 \n", "49952 616 56 \n", "49953 595 76 \n", "49954 622 24 \n", "49955 287 20 \n", "\n", " Modified text New Word Count \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 171 \n", "1 Наступного дня дискусія щодо заробітків продов... 94 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 368 \n", "3 У мене є таємниця. Від людини, якій я розповід... 586 \n", "4 – Цього року у нас були не тільки жителі Вінни... 118 \n", "... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 93 \n", "49952 У верхній частині турнірної таблиці нині переб... 115 \n", "49953 Саме тому австрійське командування й намагалос... 76 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 88 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 72 \n", "\n", " New Char Count id label_pred \\\n", "0 1248 2 81 \n", "1 555 3 0 \n", "2 2535 4 322 \n", "3 3541 7 24 \n", "4 737 8 42 \n", "... ... ... ... \n", "49951 574 99919 26 \n", "49952 797 99921 55 \n", "49953 595 99922 75 \n", "49954 623 99923 31 \n", "49955 523 99924 18 \n", "\n", " text label_gold diff \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 80 1 \n", "1 Наступного дня дискусія щодо заробітків продов... 0 0 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 322 0 \n", "3 У мене є таємниця. Від людини, якій я розповід... 541 517 \n", "4 – Цього року у нас були не тільки жителі Вінни... 39 3 \n", "... ... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 26 0 \n", "49952 У верхній частині турнірної таблиці нині переб... 56 1 \n", "49953 Саме тому австрійське командування й намагалос... 76 1 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 24 7 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 20 2 \n", "\n", " Token Limit Check \n", "0 Inside \n", "1 Inside \n", "2 Inside \n", "3 Inside \n", "4 Inside \n", "... ... \n", "49951 Inside \n", "49952 Inside \n", "49953 Inside \n", "49954 Inside \n", "49955 Inside \n", "\n", "[49956 rows x 17 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISOLLM usedTypeData SplitOriginal textOriginal Word CountOriginal Char CountSplit LocationModified textNew Word CountNew Char Countidlabel_predtextlabel_golddiffToken Limit Check
0UKRGPT-o1PartialTestПро це заявив заступник начальника Генштабу ЗС...13695580Про це заявив заступник начальника Генштабу ЗС...1711248281Про це заявив заступник начальника Генштабу ЗС...801Inside
1UKRGemini-Pro-1.5RewrittenTest“Поїду, а коли настануть кращі часи, повернусь...1066500Наступного дня дискусія щодо заробітків продов...9455530Наступного дня дискусія щодо заробітків продов...00Inside
2UKRClaude-Haiku-3.5PartialTestХу Цзіньтао виступає перед делегатами з'їзду, ...5553873322Ху Цзіньтао виступає перед делегатами з'їзду, ...36825354322Ху Цзіньтао виступає перед делегатами з'їзду, ...3220Inside
3UKRGPT-4oPartialTestУ мене є таємниця. Від людини, якій я розповід...12397660541У мене є таємниця. Від людини, якій я розповід...5863541724У мене є таємниця. Від людини, якій я розповід...541517Inside
4UKRAmazon-Nova-Lite-1.0PartialTest– Цього року у нас були не тільки жителі Вінни...10964939– Цього року у нас були не тільки жителі Вінни...118737842– Цього року у нас були не тільки жителі Вінни...393Inside
......................................................
49951UKRGemini-Pro-1.5PartialTestВільне життя, на превеликий подив мурчика, на ...6842626Вільне життя, на превеликий подив мурчика, на ...935749991926Вільне життя, на превеликий подив мурчика, на ...260Inside
49952UKRAmazon-Nova-Pro-1.0PartialTestУ верхній частині турнірної таблиці нині переб...9961656У верхній частині турнірної таблиці нині переб...1157979992155У верхній частині турнірної таблиці нині переб...561Inside
49953UKRGemini-Flash-1.5UnchangedTestСаме тому австрійське командування й намагалос...7659576Саме тому австрійське командування й намагалос...765959992275Саме тому австрійське командування й намагалос...761Inside
49954UKRAmazon-Nova-Pro-1.0PartialTestУ штабі наголосили, що бойовики посилили обстр...8162224У штабі наголосили, що бойовики посилили обстр...886239992331У штабі наголосили, що бойовики посилили обстр...247Inside
49955UKRMistral-Large-2411PartialTestЧим же ж має бути такий громадянський іспит? П...4628720Чим же ж має бути такий громадянський іспит? П...725239992418Чим же ж має бути такий громадянський іспит? П...202Inside
\n", "

49956 rows × 17 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df", "summary": "{\n \"name\": \"df\",\n \"rows\": 49956,\n \"fields\": [\n {\n \"column\": \"ISO\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"UKR\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"LLM used\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Gemini-Flash-1.5\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Partial\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Data Split\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Test\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49943,\n \"samples\": [\n \"\\u0410 \\u043d\\u0430\\u0439\\u0431\\u0456\\u043b\\u044c\\u0448\\u043e\\u044e \\u0432\\u0442\\u0440\\u0430\\u0442\\u043e\\u044e \\u0437\\u0430 \\u0440\\u043e\\u043a\\u0438 \\u043d\\u0435\\u0437\\u0430\\u043b\\u0435\\u0436\\u043d\\u043e\\u0441\\u0442\\u0456 \\u0454 \\u043a\\u0443\\u043b\\u044c\\u0442\\u0443\\u0440\\u043d\\u0430 \\u0456\\u043d\\u0444\\u0440\\u0430\\u0441\\u0442\\u0440\\u0443\\u043a\\u0442\\u0443\\u0440\\u0430. \\u041f\\u0435\\u0440\\u0435\\u0434\\u043e\\u0432\\u0441\\u0456\\u043c, \\u043c\\u0430\\u0442\\u0435\\u0440\\u0456\\u0430\\u043b\\u044c\\u043d\\u0430. \\u0411\\u043e \\u0442\\u0430\\u043b\\u0430\\u043d\\u0442\\u0438 \\u0432 \\u043d\\u0430\\u0441 \\u044f\\u043a \\u043d\\u0430\\u0440\\u043e\\u0434\\u0436\\u0443\\u0432\\u0430\\u043b\\u0438\\u0441\\u044f, \\u0442\\u0430\\u043a \\u0456 \\u043d\\u0430\\u0440\\u043e\\u0434\\u0436\\u0443\\u0432\\u0430\\u0442\\u0438\\u043c\\u0443\\u0442\\u044c\\u0441\\u044f.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original Word Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 320,\n \"min\": 6,\n \"max\": 12134,\n \"num_unique_values\": 1767,\n \"samples\": [\n 1251\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original Char Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2198,\n \"min\": 22,\n \"max\": 82575,\n \"num_unique_values\": 6706,\n \"samples\": [\n 716\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Split Location\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 190,\n \"min\": 0,\n \"max\": 6890,\n \"num_unique_values\": 1242,\n \"samples\": [\n 1195\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Modified text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49949,\n \"samples\": [\n \"\\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0430, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\\u0443\\u0441 \\u0421\\u0435\\u0432\\u0435\\u0440\\u0443\\u0441. '\\u041f\\u043e\\u043a\\u0438 \\u0413\\u0430\\u0440\\u0440\\u0456 \\u043f\\u0440\\u043e\\u0442\\u0438\\u0441\\u0442\\u043e\\u0457\\u0442\\u044c \\u043c\\u0438\\u043d\\u0443\\u043b\\u043e\\u043c\\u0443, \\u044f\\u043a\\u0435 \\u043d\\u0435 \\u0445\\u043e\\u0447\\u0435 \\u0437\\u0430\\u043b\\u0438\\u0448\\u0430\\u0442\\u0438\\u0441\\u044f \\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0438, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"New Word Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 197,\n \"min\": 1,\n \"max\": 6951,\n \"num_unique_values\": 1310,\n \"samples\": [\n 620\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"New Char Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1338,\n \"min\": 6,\n \"max\": 47900,\n \"num_unique_values\": 5067,\n \"samples\": [\n 2371\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28821,\n \"min\": 2,\n \"max\": 99924,\n \"num_unique_values\": 49956,\n \"samples\": [\n 15758\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_pred\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 151,\n \"min\": 0,\n \"max\": 1321,\n \"num_unique_values\": 1084,\n \"samples\": [\n 467\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49949,\n \"samples\": [\n \"\\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0430, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\\u0443\\u0441 \\u0421\\u0435\\u0432\\u0435\\u0440\\u0443\\u0441. '\\u041f\\u043e\\u043a\\u0438 \\u0413\\u0430\\u0440\\u0440\\u0456 \\u043f\\u0440\\u043e\\u0442\\u0438\\u0441\\u0442\\u043e\\u0457\\u0442\\u044c \\u043c\\u0438\\u043d\\u0443\\u043b\\u043e\\u043c\\u0443, \\u044f\\u043a\\u0435 \\u043d\\u0435 \\u0445\\u043e\\u0447\\u0435 \\u0437\\u0430\\u043b\\u0438\\u0448\\u0430\\u0442\\u0438\\u0441\\u044f \\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0438, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_gold\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 190,\n \"min\": 0,\n \"max\": 6890,\n \"num_unique_values\": 1242,\n \"samples\": [\n 1195\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"diff\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 89,\n \"min\": 0,\n \"max\": 5950,\n \"num_unique_values\": 599,\n \"samples\": [\n 471\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Token Limit Check\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Outside\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 22 } ] }, { "cell_type": "code", "source": [ "df['Token Limit Check'].value_counts()" ], "metadata": { "id": "gWfUnO17r8zb", "colab": { "base_uri": "https://localhost:8080/", "height": 178 }, "outputId": "24d3667c-7740-4af2-943b-e35d9301e547" }, "execution_count": 23, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Token Limit Check\n", "Inside 49696\n", "Outside 260\n", "Name: count, dtype: int64" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
count
Token Limit Check
Inside49696
Outside260
\n", "

" ] }, "metadata": {}, "execution_count": 23 } ] }, { "cell_type": "code", "source": [ "df['Split Location'].max()" ], "metadata": { "id": "HdNmbX6yr_Lv", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "b31df13b-66bc-4218-c5c1-cf2d9f7b518c" }, "execution_count": 24, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "6890" ] }, "metadata": {}, "execution_count": 24 } ] }, { "cell_type": "code", "source": [ "# prompt: 2 new columns in df_demo as series/list of zeroes and ones : WORDS_REAL : length is row's Word Count, start with row's Split Location number of zeroes and end with ones : WORDS_PRED : lenght is rows' Word Count , start with row's label_pred number of zeroes and end wit ones\n", "def create_word_series(row, column_name):\n", " word_count = row['New Word Count']\n", " split_location = row[column_name]\n", " series = [0] * split_location + [1] * (word_count - split_location)\n", " return series\n", "df['WORDS_REAL'] = df.apply(create_word_series, axis=1, args=('Split Location',))\n", "df['WORDS_PRED'] = df.apply(create_word_series, axis=1, args=('label_pred',))\n", "df" ], "metadata": { "id": "R6waU4p-sCcV", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "5701c558-1a9f-483a-ca7e-781035b72ef2" }, "execution_count": 25, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " ISO LLM used Type Data Split \\\n", "0 UKR GPT-o1 Partial Test \n", "1 UKR Gemini-Pro-1.5 Rewritten Test \n", "2 UKR Claude-Haiku-3.5 Partial Test \n", "3 UKR GPT-4o Partial Test \n", "4 UKR Amazon-Nova-Lite-1.0 Partial Test \n", "... ... ... ... ... \n", "49951 UKR Gemini-Pro-1.5 Partial Test \n", "49952 UKR Amazon-Nova-Pro-1.0 Partial Test \n", "49953 UKR Gemini-Flash-1.5 Unchanged Test \n", "49954 UKR Amazon-Nova-Pro-1.0 Partial Test \n", "49955 UKR Mistral-Large-2411 Partial Test \n", "\n", " Original text Original Word Count \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 136 \n", "1 “Поїду, а коли настануть кращі часи, повернусь... 106 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 555 \n", "3 У мене є таємниця. Від людини, якій я розповід... 1239 \n", "4 – Цього року у нас були не тільки жителі Вінни... 109 \n", "... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 68 \n", "49952 У верхній частині турнірної таблиці нині переб... 99 \n", "49953 Саме тому австрійське командування й намагалос... 76 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 81 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 46 \n", "\n", " Original Char Count Split Location \\\n", "0 955 80 \n", "1 650 0 \n", "2 3873 322 \n", "3 7660 541 \n", "4 649 39 \n", "... ... ... \n", "49951 426 26 \n", "49952 616 56 \n", "49953 595 76 \n", "49954 622 24 \n", "49955 287 20 \n", "\n", " Modified text New Word Count \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 171 \n", "1 Наступного дня дискусія щодо заробітків продов... 94 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 368 \n", "3 У мене є таємниця. Від людини, якій я розповід... 586 \n", "4 – Цього року у нас були не тільки жителі Вінни... 118 \n", "... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 93 \n", "49952 У верхній частині турнірної таблиці нині переб... 115 \n", "49953 Саме тому австрійське командування й намагалос... 76 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 88 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 72 \n", "\n", " New Char Count id label_pred \\\n", "0 1248 2 81 \n", "1 555 3 0 \n", "2 2535 4 322 \n", "3 3541 7 24 \n", "4 737 8 42 \n", "... ... ... ... \n", "49951 574 99919 26 \n", "49952 797 99921 55 \n", "49953 595 99922 75 \n", "49954 623 99923 31 \n", "49955 523 99924 18 \n", "\n", " text label_gold diff \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 80 1 \n", "1 Наступного дня дискусія щодо заробітків продов... 0 0 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 322 0 \n", "3 У мене є таємниця. Від людини, якій я розповід... 541 517 \n", "4 – Цього року у нас були не тільки жителі Вінни... 39 3 \n", "... ... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 26 0 \n", "49952 У верхній частині турнірної таблиці нині переб... 56 1 \n", "49953 Саме тому австрійське командування й намагалос... 76 1 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 24 7 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 20 2 \n", "\n", " Token Limit Check WORDS_REAL \\\n", "0 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "1 Inside [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... \n", "2 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "3 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "4 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "... ... ... \n", "49951 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49952 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49953 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49954 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49955 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "\n", " WORDS_PRED \n", "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "1 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... \n", "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "... ... \n", "49951 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49952 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49953 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49954 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49955 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "\n", "[49956 rows x 19 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISOLLM usedTypeData SplitOriginal textOriginal Word CountOriginal Char CountSplit LocationModified textNew Word CountNew Char Countidlabel_predtextlabel_golddiffToken Limit CheckWORDS_REALWORDS_PRED
0UKRGPT-o1PartialTestПро це заявив заступник начальника Генштабу ЗС...13695580Про це заявив заступник начальника Генштабу ЗС...1711248281Про це заявив заступник начальника Генштабу ЗС...801Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1UKRGemini-Pro-1.5RewrittenTest“Поїду, а коли настануть кращі часи, повернусь...1066500Наступного дня дискусія щодо заробітків продов...9455530Наступного дня дискусія щодо заробітків продов...00Inside[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
2UKRClaude-Haiku-3.5PartialTestХу Цзіньтао виступає перед делегатами з'їзду, ...5553873322Ху Цзіньтао виступає перед делегатами з'їзду, ...36825354322Ху Цзіньтао виступає перед делегатами з'їзду, ...3220Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
3UKRGPT-4oPartialTestУ мене є таємниця. Від людини, якій я розповід...12397660541У мене є таємниця. Від людини, якій я розповід...5863541724У мене є таємниця. Від людини, якій я розповід...541517Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
4UKRAmazon-Nova-Lite-1.0PartialTest– Цього року у нас були не тільки жителі Вінни...10964939– Цього року у нас були не тільки жителі Вінни...118737842– Цього року у нас були не тільки жителі Вінни...393Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
............................................................
49951UKRGemini-Pro-1.5PartialTestВільне життя, на превеликий подив мурчика, на ...6842626Вільне життя, на превеликий подив мурчика, на ...935749991926Вільне життя, на превеликий подив мурчика, на ...260Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
49952UKRAmazon-Nova-Pro-1.0PartialTestУ верхній частині турнірної таблиці нині переб...9961656У верхній частині турнірної таблиці нині переб...1157979992155У верхній частині турнірної таблиці нині переб...561Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
49953UKRGemini-Flash-1.5UnchangedTestСаме тому австрійське командування й намагалос...7659576Саме тому австрійське командування й намагалос...765959992275Саме тому австрійське командування й намагалос...761Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
49954UKRAmazon-Nova-Pro-1.0PartialTestУ штабі наголосили, що бойовики посилили обстр...8162224У штабі наголосили, що бойовики посилили обстр...886239992331У штабі наголосили, що бойовики посилили обстр...247Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
49955UKRMistral-Large-2411PartialTestЧим же ж має бути такий громадянський іспит? П...4628720Чим же ж має бути такий громадянський іспит? П...725239992418Чим же ж має бути такий громадянський іспит? П...202Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
\n", "

49956 rows × 19 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df", "summary": "{\n \"name\": \"df\",\n \"rows\": 49956,\n \"fields\": [\n {\n \"column\": \"ISO\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"UKR\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"LLM used\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Gemini-Flash-1.5\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Partial\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Data Split\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Test\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49943,\n \"samples\": [\n \"\\u0410 \\u043d\\u0430\\u0439\\u0431\\u0456\\u043b\\u044c\\u0448\\u043e\\u044e \\u0432\\u0442\\u0440\\u0430\\u0442\\u043e\\u044e \\u0437\\u0430 \\u0440\\u043e\\u043a\\u0438 \\u043d\\u0435\\u0437\\u0430\\u043b\\u0435\\u0436\\u043d\\u043e\\u0441\\u0442\\u0456 \\u0454 \\u043a\\u0443\\u043b\\u044c\\u0442\\u0443\\u0440\\u043d\\u0430 \\u0456\\u043d\\u0444\\u0440\\u0430\\u0441\\u0442\\u0440\\u0443\\u043a\\u0442\\u0443\\u0440\\u0430. \\u041f\\u0435\\u0440\\u0435\\u0434\\u043e\\u0432\\u0441\\u0456\\u043c, \\u043c\\u0430\\u0442\\u0435\\u0440\\u0456\\u0430\\u043b\\u044c\\u043d\\u0430. \\u0411\\u043e \\u0442\\u0430\\u043b\\u0430\\u043d\\u0442\\u0438 \\u0432 \\u043d\\u0430\\u0441 \\u044f\\u043a \\u043d\\u0430\\u0440\\u043e\\u0434\\u0436\\u0443\\u0432\\u0430\\u043b\\u0438\\u0441\\u044f, \\u0442\\u0430\\u043a \\u0456 \\u043d\\u0430\\u0440\\u043e\\u0434\\u0436\\u0443\\u0432\\u0430\\u0442\\u0438\\u043c\\u0443\\u0442\\u044c\\u0441\\u044f.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original Word Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 320,\n \"min\": 6,\n \"max\": 12134,\n \"num_unique_values\": 1767,\n \"samples\": [\n 1251\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Original Char Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2198,\n \"min\": 22,\n \"max\": 82575,\n \"num_unique_values\": 6706,\n \"samples\": [\n 716\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Split Location\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 190,\n \"min\": 0,\n \"max\": 6890,\n \"num_unique_values\": 1242,\n \"samples\": [\n 1195\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Modified text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49949,\n \"samples\": [\n \"\\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0430, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\\u0443\\u0441 \\u0421\\u0435\\u0432\\u0435\\u0440\\u0443\\u0441. '\\u041f\\u043e\\u043a\\u0438 \\u0413\\u0430\\u0440\\u0440\\u0456 \\u043f\\u0440\\u043e\\u0442\\u0438\\u0441\\u0442\\u043e\\u0457\\u0442\\u044c \\u043c\\u0438\\u043d\\u0443\\u043b\\u043e\\u043c\\u0443, \\u044f\\u043a\\u0435 \\u043d\\u0435 \\u0445\\u043e\\u0447\\u0435 \\u0437\\u0430\\u043b\\u0438\\u0448\\u0430\\u0442\\u0438\\u0441\\u044f \\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0438, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"New Word Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 197,\n \"min\": 1,\n \"max\": 6951,\n \"num_unique_values\": 1310,\n \"samples\": [\n 620\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"New Char Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1338,\n \"min\": 6,\n \"max\": 47900,\n \"num_unique_values\": 5067,\n \"samples\": [\n 2371\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28821,\n \"min\": 2,\n \"max\": 99924,\n \"num_unique_values\": 49956,\n \"samples\": [\n 15758\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_pred\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 151,\n \"min\": 0,\n \"max\": 1321,\n \"num_unique_values\": 1084,\n \"samples\": [\n 467\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 49949,\n \"samples\": [\n \"\\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0430, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\\u0443\\u0441 \\u0421\\u0435\\u0432\\u0435\\u0440\\u0443\\u0441. '\\u041f\\u043e\\u043a\\u0438 \\u0413\\u0430\\u0440\\u0440\\u0456 \\u043f\\u0440\\u043e\\u0442\\u0438\\u0441\\u0442\\u043e\\u0457\\u0442\\u044c \\u043c\\u0438\\u043d\\u0443\\u043b\\u043e\\u043c\\u0443, \\u044f\\u043a\\u0435 \\u043d\\u0435 \\u0445\\u043e\\u0447\\u0435 \\u0437\\u0430\\u043b\\u0438\\u0448\\u0430\\u0442\\u0438\\u0441\\u044f \\u041d\\u0430 \\u0437\\u043e\\u0431\\u0440\\u0430\\u0436\\u0435\\u043d\\u043d\\u044f\\u0445, \\u043e\\u043f\\u0443\\u0431\\u043b\\u0456\\u043a\\u043e\\u0432\\u0430\\u043d\\u0438\\u0445 \\u043d\\u0430 \\u0441\\u0430\\u0439\\u0442\\u0456 \\u0430\\u0432\\u0442\\u043e\\u0440\\u043a\\u0438 \\u0441\\u0435\\u0440\\u0456\\u0457 \\u0414\\u0436\\u043e\\u0430\\u043d \\u0420\\u043e\\u0443\\u043b\\u0456\\u043d\\u0433 Pottermore, \\u0432\\u0438\\u0434\\u043d\\u043e \\u0445\\u043b\\u043e\\u043f\\u0447\\u0438\\u043a\\u0430 \\u0443 \\u0433\\u043d\\u0456\\u0437\\u0434\\u0456 \\u0456\\u0437 \\u043a\\u0440\\u0438\\u043b\\u0430\\u043c\\u0438. \\u0412\\u0438\\u0441\\u0442\\u0430\\u0432\\u0430, \\u0440\\u043e\\u0437\\u0431\\u0438\\u0442\\u0430 \\u043d\\u0430 \\u0434\\u0432\\u0456 \\u0447\\u0430\\u0441\\u0442\\u0438\\u043d\\u0438, \\u0437\\u0430\\u0434\\u0443\\u043c\\u0430\\u043d\\u0430 \\u0442\\u0430\\u043a, \\u0430\\u0431\\u0438 \\u0457\\u0457 \\u0434\\u0438\\u0432\\u0438\\u043b\\u0438\\u0441\\u044f \\u0432\\u043f\\u0440\\u043e\\u0434\\u043e\\u0432\\u0436 \\u043e\\u0434\\u043d\\u043e\\u0433\\u043e \\u0432\\u0435\\u0447\\u043e\\u0440\\u0430, \\u0430\\u0431\\u043e \\u0436 \\u0432\\u0435\\u0447\\u0456\\u0440 \\u0437\\u0430 \\u0432\\u0435\\u0447\\u043e\\u0440\\u043e\\u043c. \\u0417\\u0430 \\u0441\\u043b\\u043e\\u0432\\u0430\\u043c\\u0438 \\u043f\\u0440\\u043e\\u0434\\u044e\\u0441\\u0435\\u0440\\u0456\\u0432, \\u0446\\u0435 \\u0431\\u0443\\u0434\\u0435 \\u0441\\u0438\\u043a\\u0432\\u0435\\u043b \\u0434\\u043e \\u0456\\u0441\\u0442\\u043e\\u0440\\u0456\\u0439, \\u043e\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u0445 \\u0443 \\u043f\\u043e\\u043f\\u0435\\u0440\\u0435\\u0434\\u043d\\u0456\\u0445 \\u043a\\u043d\\u0438\\u0433\\u0430\\u0445 \\u043f\\u0438\\u0441\\u044c\\u043c\\u0435\\u043d\\u043d\\u0438\\u0446\\u0456, \\u0456 \\u0443 \\u0439\\u043e\\u0433\\u043e \\u0446\\u0435\\u043d\\u0442\\u0440\\u0456 - \\u0441\\u0438\\u043d \\u0447\\u0430\\u0440\\u0456\\u0432\\u043d\\u0438\\u043a\\u0430 \\u0410\\u043b\\u044c\\u0431\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_gold\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 190,\n \"min\": 0,\n \"max\": 6890,\n \"num_unique_values\": 1242,\n \"samples\": [\n 1195\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"diff\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 89,\n \"min\": 0,\n \"max\": 5950,\n \"num_unique_values\": 599,\n \"samples\": [\n 471\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Token Limit Check\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Outside\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"WORDS_REAL\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"WORDS_PRED\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 25 } ] }, { "cell_type": "code", "source": [ "# prompt: 4 new columns : ROW_TP, ROW_FP, ROW_TN , ROW_FN : based on zeroes and ones in WORDS_PRED , WORDS_REAL . note : lenght of series is diff in each row\n", "def calculate_metrics(row):\n", " tp = 0\n", " fp = 0\n", " tn = 0\n", " fn = 0\n", " for i in range(len(row['WORDS_REAL'])):\n", " if row['WORDS_REAL'][i] == 1 and row['WORDS_PRED'][i] == 1:\n", " tp += 1\n", " elif row['WORDS_REAL'][i] == 0 and row['WORDS_PRED'][i] == 1:\n", " fp += 1\n", " elif row['WORDS_REAL'][i] == 0 and row['WORDS_PRED'][i] == 0:\n", " tn += 1\n", " elif row['WORDS_REAL'][i] == 1 and row['WORDS_PRED'][i] == 0:\n", " fn += 1\n", " return tp, fp, tn, fn\n", "df[['ROW_TP', 'ROW_FP', 'ROW_TN', 'ROW_FN']] = df.apply(calculate_metrics, axis=1, result_type='expand')\n", "df" ], "metadata": { "id": "WI83u4mjsEvy", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "be53e60c-d2d9-49aa-8213-b13ebadc8859" }, "execution_count": 26, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " ISO LLM used Type Data Split \\\n", "0 UKR GPT-o1 Partial Test \n", "1 UKR Gemini-Pro-1.5 Rewritten Test \n", "2 UKR Claude-Haiku-3.5 Partial Test \n", "3 UKR GPT-4o Partial Test \n", "4 UKR Amazon-Nova-Lite-1.0 Partial Test \n", "... ... ... ... ... \n", "49951 UKR Gemini-Pro-1.5 Partial Test \n", "49952 UKR Amazon-Nova-Pro-1.0 Partial Test \n", "49953 UKR Gemini-Flash-1.5 Unchanged Test \n", "49954 UKR Amazon-Nova-Pro-1.0 Partial Test \n", "49955 UKR Mistral-Large-2411 Partial Test \n", "\n", " Original text Original Word Count \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 136 \n", "1 “Поїду, а коли настануть кращі часи, повернусь... 106 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 555 \n", "3 У мене є таємниця. Від людини, якій я розповід... 1239 \n", "4 – Цього року у нас були не тільки жителі Вінни... 109 \n", "... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 68 \n", "49952 У верхній частині турнірної таблиці нині переб... 99 \n", "49953 Саме тому австрійське командування й намагалос... 76 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 81 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 46 \n", "\n", " Original Char Count Split Location \\\n", "0 955 80 \n", "1 650 0 \n", "2 3873 322 \n", "3 7660 541 \n", "4 649 39 \n", "... ... ... \n", "49951 426 26 \n", "49952 616 56 \n", "49953 595 76 \n", "49954 622 24 \n", "49955 287 20 \n", "\n", " Modified text New Word Count ... \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 171 ... \n", "1 Наступного дня дискусія щодо заробітків продов... 94 ... \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 368 ... \n", "3 У мене є таємниця. Від людини, якій я розповід... 586 ... \n", "4 – Цього року у нас були не тільки жителі Вінни... 118 ... \n", "... ... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 93 ... \n", "49952 У верхній частині турнірної таблиці нині переб... 115 ... \n", "49953 Саме тому австрійське командування й намагалос... 76 ... \n", "49954 У штабі наголосили, що бойовики посилили обстр... 88 ... \n", "49955 Чим же ж має бути такий громадянський іспит? П... 72 ... \n", "\n", " text label_gold diff \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 80 1 \n", "1 Наступного дня дискусія щодо заробітків продов... 0 0 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 322 0 \n", "3 У мене є таємниця. Від людини, якій я розповід... 541 517 \n", "4 – Цього року у нас були не тільки жителі Вінни... 39 3 \n", "... ... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 26 0 \n", "49952 У верхній частині турнірної таблиці нині переб... 56 1 \n", "49953 Саме тому австрійське командування й намагалос... 76 1 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 24 7 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 20 2 \n", "\n", " Token Limit Check WORDS_REAL \\\n", "0 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "1 Inside [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... \n", "2 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "3 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "4 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "... ... ... \n", "49951 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49952 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49953 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49954 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49955 Inside [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "\n", " WORDS_PRED ROW_TP ROW_FP ROW_TN \\\n", "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 90 0 80 \n", "1 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... 94 0 0 \n", "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 46 0 322 \n", "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 45 517 24 \n", "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 76 0 39 \n", "... ... ... ... ... \n", "49951 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 67 0 26 \n", "49952 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 59 1 55 \n", "49953 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 0 1 75 \n", "49954 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 57 0 24 \n", "49955 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 52 2 18 \n", "\n", " ROW_FN \n", "0 1 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 3 \n", "... ... \n", "49951 0 \n", "49952 0 \n", "49953 0 \n", "49954 7 \n", "49955 0 \n", "\n", "[49956 rows x 23 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISOLLM usedTypeData SplitOriginal textOriginal Word CountOriginal Char CountSplit LocationModified textNew Word Count...textlabel_golddiffToken Limit CheckWORDS_REALWORDS_PREDROW_TPROW_FPROW_TNROW_FN
0UKRGPT-o1PartialTestПро це заявив заступник начальника Генштабу ЗС...13695580Про це заявив заступник начальника Генштабу ЗС...171...Про це заявив заступник начальника Генштабу ЗС...801Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...900801
1UKRGemini-Pro-1.5RewrittenTest“Поїду, а коли настануть кращі часи, повернусь...1066500Наступного дня дискусія щодо заробітків продов...94...Наступного дня дискусія щодо заробітків продов...00Inside[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...94000
2UKRClaude-Haiku-3.5PartialTestХу Цзіньтао виступає перед делегатами з'їзду, ...5553873322Ху Цзіньтао виступає перед делегатами з'їзду, ...368...Ху Цзіньтао виступає перед делегатами з'їзду, ...3220Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...4603220
3UKRGPT-4oPartialTestУ мене є таємниця. Від людини, якій я розповід...12397660541У мене є таємниця. Від людини, якій я розповід...586...У мене є таємниця. Від людини, якій я розповід...541517Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...45517240
4UKRAmazon-Nova-Lite-1.0PartialTest– Цього року у нас були не тільки жителі Вінни...10964939– Цього року у нас були не тільки жителі Вінни...118...– Цього року у нас були не тільки жителі Вінни...393Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...760393
..................................................................
49951UKRGemini-Pro-1.5PartialTestВільне життя, на превеликий подив мурчика, на ...6842626Вільне життя, на превеликий подив мурчика, на ...93...Вільне життя, на превеликий подив мурчика, на ...260Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...670260
49952UKRAmazon-Nova-Pro-1.0PartialTestУ верхній частині турнірної таблиці нині переб...9961656У верхній частині турнірної таблиці нині переб...115...У верхній частині турнірної таблиці нині переб...561Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...591550
49953UKRGemini-Flash-1.5UnchangedTestСаме тому австрійське командування й намагалос...7659576Саме тому австрійське командування й намагалос...76...Саме тому австрійське командування й намагалос...761Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...01750
49954UKRAmazon-Nova-Pro-1.0PartialTestУ штабі наголосили, що бойовики посилили обстр...8162224У штабі наголосили, що бойовики посилили обстр...88...У штабі наголосили, що бойовики посилили обстр...247Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...570247
49955UKRMistral-Large-2411PartialTestЧим же ж має бути такий громадянський іспит? П...4628720Чим же ж має бути такий громадянський іспит? П...72...Чим же ж має бути такий громадянський іспит? П...202Inside[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...522180
\n", "

49956 rows × 23 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df" } }, "metadata": {}, "execution_count": 26 } ] }, { "cell_type": "code", "source": [ "# prompt: 4 new column : ROW_ACC , ROW_PREC , ROW_REC , ROW_F1 based on ROW_TN,ROW_TP,ROW_FN,ROW_FP\n", "def calculate_row_metrics(row):\n", " tp = row['ROW_TP']\n", " fp = row['ROW_FP']\n", " tn = row['ROW_TN']\n", " fn = row['ROW_FN']\n", " if (tp + tn + fp + fn) == 0:\n", " accuracy = 0\n", " else:\n", " accuracy = (tp + tn) / (tp + tn + fp + fn)\n", " if (tp + fp) == 0:\n", " precision = 0\n", " else:\n", " precision = tp / (tp + fp)\n", " if (tp + fn) == 0:\n", " recall = 0\n", " else:\n", " recall = tp / (tp + fn)\n", " if (precision + recall) == 0:\n", " f1_score = 0\n", " else:\n", " f1_score = 2 * (precision * recall) / (precision + recall)\n", " return accuracy, precision, recall, f1_score\n", "df[['ROW_ACC', 'ROW_PREC', 'ROW_REC', 'ROW_F1']] = df.apply(calculate_row_metrics, axis=1, result_type='expand')\n", "df" ], "metadata": { "id": "6PnV_NwCsJNG", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "c6976f39-0188-4d7c-f889-52deb1a6e7b3" }, "execution_count": 27, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " ISO LLM used Type Data Split \\\n", "0 UKR GPT-o1 Partial Test \n", "1 UKR Gemini-Pro-1.5 Rewritten Test \n", "2 UKR Claude-Haiku-3.5 Partial Test \n", "3 UKR GPT-4o Partial Test \n", "4 UKR Amazon-Nova-Lite-1.0 Partial Test \n", "... ... ... ... ... \n", "49951 UKR Gemini-Pro-1.5 Partial Test \n", "49952 UKR Amazon-Nova-Pro-1.0 Partial Test \n", "49953 UKR Gemini-Flash-1.5 Unchanged Test \n", "49954 UKR Amazon-Nova-Pro-1.0 Partial Test \n", "49955 UKR Mistral-Large-2411 Partial Test \n", "\n", " Original text Original Word Count \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 136 \n", "1 “Поїду, а коли настануть кращі часи, повернусь... 106 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 555 \n", "3 У мене є таємниця. Від людини, якій я розповід... 1239 \n", "4 – Цього року у нас були не тільки жителі Вінни... 109 \n", "... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 68 \n", "49952 У верхній частині турнірної таблиці нині переб... 99 \n", "49953 Саме тому австрійське командування й намагалос... 76 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 81 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 46 \n", "\n", " Original Char Count Split Location \\\n", "0 955 80 \n", "1 650 0 \n", "2 3873 322 \n", "3 7660 541 \n", "4 649 39 \n", "... ... ... \n", "49951 426 26 \n", "49952 616 56 \n", "49953 595 76 \n", "49954 622 24 \n", "49955 287 20 \n", "\n", " Modified text New Word Count ... \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 171 ... \n", "1 Наступного дня дискусія щодо заробітків продов... 94 ... \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 368 ... \n", "3 У мене є таємниця. Від людини, якій я розповід... 586 ... \n", "4 – Цього року у нас були не тільки жителі Вінни... 118 ... \n", "... ... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 93 ... \n", "49952 У верхній частині турнірної таблиці нині переб... 115 ... \n", "49953 Саме тому австрійське командування й намагалос... 76 ... \n", "49954 У штабі наголосили, що бойовики посилили обстр... 88 ... \n", "49955 Чим же ж має бути такий громадянський іспит? П... 72 ... \n", "\n", " WORDS_REAL \\\n", "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "1 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... \n", "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "... ... \n", "49951 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49952 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49953 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49954 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "49955 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n", "\n", " WORDS_PRED ROW_TP ROW_FP \\\n", "0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 90 0 \n", "1 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... 94 0 \n", "2 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 46 0 \n", "3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 45 517 \n", "4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 76 0 \n", "... ... ... ... \n", "49951 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 67 0 \n", "49952 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 59 1 \n", "49953 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 0 1 \n", "49954 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 57 0 \n", "49955 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 52 2 \n", "\n", " ROW_TN ROW_FN ROW_ACC ROW_PREC ROW_REC ROW_F1 \n", "0 80 1 0.994152 1.000000 0.989011 0.994475 \n", "1 0 0 1.000000 1.000000 1.000000 1.000000 \n", "2 322 0 1.000000 1.000000 1.000000 1.000000 \n", "3 24 0 0.117747 0.080071 1.000000 0.148270 \n", "4 39 3 0.974576 1.000000 0.962025 0.980645 \n", "... ... ... ... ... ... ... \n", "49951 26 0 1.000000 1.000000 1.000000 1.000000 \n", "49952 55 0 0.991304 0.983333 1.000000 0.991597 \n", "49953 75 0 0.986842 0.000000 0.000000 0.000000 \n", "49954 24 7 0.920455 1.000000 0.890625 0.942149 \n", "49955 18 0 0.972222 0.962963 1.000000 0.981132 \n", "\n", "[49956 rows x 27 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISOLLM usedTypeData SplitOriginal textOriginal Word CountOriginal Char CountSplit LocationModified textNew Word Count...WORDS_REALWORDS_PREDROW_TPROW_FPROW_TNROW_FNROW_ACCROW_PRECROW_RECROW_F1
0UKRGPT-o1PartialTestПро це заявив заступник начальника Генштабу ЗС...13695580Про це заявив заступник начальника Генштабу ЗС...171...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...9008010.9941521.0000000.9890110.994475
1UKRGemini-Pro-1.5RewrittenTest“Поїду, а коли настануть кращі часи, повернусь...1066500Наступного дня дискусія щодо заробітків продов...94...[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...940001.0000001.0000001.0000001.000000
2UKRClaude-Haiku-3.5PartialTestХу Цзіньтао виступає перед делегатами з'їзду, ...5553873322Ху Цзіньтао виступає перед делегатами з'їзду, ...368...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...46032201.0000001.0000001.0000001.000000
3UKRGPT-4oPartialTestУ мене є таємниця. Від людини, якій я розповід...12397660541У мене є таємниця. Від людини, якій я розповід...586...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...455172400.1177470.0800711.0000000.148270
4UKRAmazon-Nova-Lite-1.0PartialTest– Цього року у нас були не тільки жителі Вінни...10964939– Цього року у нас були не тільки жителі Вінни...118...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...7603930.9745761.0000000.9620250.980645
..................................................................
49951UKRGemini-Pro-1.5PartialTestВільне життя, на превеликий подив мурчика, на ...6842626Вільне життя, на превеликий подив мурчика, на ...93...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...6702601.0000001.0000001.0000001.000000
49952UKRAmazon-Nova-Pro-1.0PartialTestУ верхній частині турнірної таблиці нині переб...9961656У верхній частині турнірної таблиці нині переб...115...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...5915500.9913040.9833331.0000000.991597
49953UKRGemini-Flash-1.5UnchangedTestСаме тому австрійське командування й намагалос...7659576Саме тому австрійське командування й намагалос...76...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...017500.9868420.0000000.0000000.000000
49954UKRAmazon-Nova-Pro-1.0PartialTestУ штабі наголосили, що бойовики посилили обстр...8162224У штабі наголосили, що бойовики посилили обстр...88...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...5702470.9204551.0000000.8906250.942149
49955UKRMistral-Large-2411PartialTestЧим же ж має бути такий громадянський іспит? П...4628720Чим же ж має бути такий громадянський іспит? П...72...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...5221800.9722220.9629631.0000000.981132
\n", "

49956 rows × 27 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df" } }, "metadata": {}, "execution_count": 27 } ] }, { "cell_type": "code", "source": [ "def calculate_percentage_of_ones(row):\n", " series = row['WORDS_PRED']\n", " if len(series) == 0:\n", " return 0\n", " else:\n", " return sum(series) / len(series)\n", "df[\"Label : 1\"] = df.apply(calculate_percentage_of_ones, axis=1)\n", "df[\"Label : 0\"] = 1.0 - df[\"Label : 1\"]\n", "df" ], "metadata": { "id": "Yp3FO_HVsLiA", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "f6317d94-0577-47b2-e3e3-f1a73bd77b35" }, "execution_count": 28, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " ISO LLM used Type Data Split \\\n", "0 UKR GPT-o1 Partial Test \n", "1 UKR Gemini-Pro-1.5 Rewritten Test \n", "2 UKR Claude-Haiku-3.5 Partial Test \n", "3 UKR GPT-4o Partial Test \n", "4 UKR Amazon-Nova-Lite-1.0 Partial Test \n", "... ... ... ... ... \n", "49951 UKR Gemini-Pro-1.5 Partial Test \n", "49952 UKR Amazon-Nova-Pro-1.0 Partial Test \n", "49953 UKR Gemini-Flash-1.5 Unchanged Test \n", "49954 UKR Amazon-Nova-Pro-1.0 Partial Test \n", "49955 UKR Mistral-Large-2411 Partial Test \n", "\n", " Original text Original Word Count \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 136 \n", "1 “Поїду, а коли настануть кращі часи, повернусь... 106 \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 555 \n", "3 У мене є таємниця. Від людини, якій я розповід... 1239 \n", "4 – Цього року у нас були не тільки жителі Вінни... 109 \n", "... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 68 \n", "49952 У верхній частині турнірної таблиці нині переб... 99 \n", "49953 Саме тому австрійське командування й намагалос... 76 \n", "49954 У штабі наголосили, що бойовики посилили обстр... 81 \n", "49955 Чим же ж має бути такий громадянський іспит? П... 46 \n", "\n", " Original Char Count Split Location \\\n", "0 955 80 \n", "1 650 0 \n", "2 3873 322 \n", "3 7660 541 \n", "4 649 39 \n", "... ... ... \n", "49951 426 26 \n", "49952 616 56 \n", "49953 595 76 \n", "49954 622 24 \n", "49955 287 20 \n", "\n", " Modified text New Word Count ... \\\n", "0 Про це заявив заступник начальника Генштабу ЗС... 171 ... \n", "1 Наступного дня дискусія щодо заробітків продов... 94 ... \n", "2 Ху Цзіньтао виступає перед делегатами з'їзду, ... 368 ... \n", "3 У мене є таємниця. Від людини, якій я розповід... 586 ... \n", "4 – Цього року у нас були не тільки жителі Вінни... 118 ... \n", "... ... ... ... \n", "49951 Вільне життя, на превеликий подив мурчика, на ... 93 ... \n", "49952 У верхній частині турнірної таблиці нині переб... 115 ... \n", "49953 Саме тому австрійське командування й намагалос... 76 ... \n", "49954 У штабі наголосили, що бойовики посилили обстр... 88 ... \n", "49955 Чим же ж має бути такий громадянський іспит? П... 72 ... \n", "\n", " ROW_TP ROW_FP ROW_TN ROW_FN ROW_ACC ROW_PREC ROW_REC ROW_F1 \\\n", "0 90 0 80 1 0.994152 1.000000 0.989011 0.994475 \n", "1 94 0 0 0 1.000000 1.000000 1.000000 1.000000 \n", "2 46 0 322 0 1.000000 1.000000 1.000000 1.000000 \n", "3 45 517 24 0 0.117747 0.080071 1.000000 0.148270 \n", "4 76 0 39 3 0.974576 1.000000 0.962025 0.980645 \n", "... ... ... ... ... ... ... ... ... \n", "49951 67 0 26 0 1.000000 1.000000 1.000000 1.000000 \n", "49952 59 1 55 0 0.991304 0.983333 1.000000 0.991597 \n", "49953 0 1 75 0 0.986842 0.000000 0.000000 0.000000 \n", "49954 57 0 24 7 0.920455 1.000000 0.890625 0.942149 \n", "49955 52 2 18 0 0.972222 0.962963 1.000000 0.981132 \n", "\n", " Label : 1 Label : 0 \n", "0 0.526316 0.473684 \n", "1 1.000000 0.000000 \n", "2 0.125000 0.875000 \n", "3 0.959044 0.040956 \n", "4 0.644068 0.355932 \n", "... ... ... \n", "49951 0.720430 0.279570 \n", "49952 0.521739 0.478261 \n", "49953 0.013158 0.986842 \n", "49954 0.647727 0.352273 \n", "49955 0.750000 0.250000 \n", "\n", "[49956 rows x 29 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISOLLM usedTypeData SplitOriginal textOriginal Word CountOriginal Char CountSplit LocationModified textNew Word Count...ROW_TPROW_FPROW_TNROW_FNROW_ACCROW_PRECROW_RECROW_F1Label : 1Label : 0
0UKRGPT-o1PartialTestПро це заявив заступник начальника Генштабу ЗС...13695580Про це заявив заступник начальника Генштабу ЗС...171...9008010.9941521.0000000.9890110.9944750.5263160.473684
1UKRGemini-Pro-1.5RewrittenTest“Поїду, а коли настануть кращі часи, повернусь...1066500Наступного дня дискусія щодо заробітків продов...94...940001.0000001.0000001.0000001.0000001.0000000.000000
2UKRClaude-Haiku-3.5PartialTestХу Цзіньтао виступає перед делегатами з'їзду, ...5553873322Ху Цзіньтао виступає перед делегатами з'їзду, ...368...46032201.0000001.0000001.0000001.0000000.1250000.875000
3UKRGPT-4oPartialTestУ мене є таємниця. Від людини, якій я розповід...12397660541У мене є таємниця. Від людини, якій я розповід...586...455172400.1177470.0800711.0000000.1482700.9590440.040956
4UKRAmazon-Nova-Lite-1.0PartialTest– Цього року у нас були не тільки жителі Вінни...10964939– Цього року у нас були не тільки жителі Вінни...118...7603930.9745761.0000000.9620250.9806450.6440680.355932
..................................................................
49951UKRGemini-Pro-1.5PartialTestВільне життя, на превеликий подив мурчика, на ...6842626Вільне життя, на превеликий подив мурчика, на ...93...6702601.0000001.0000001.0000001.0000000.7204300.279570
49952UKRAmazon-Nova-Pro-1.0PartialTestУ верхній частині турнірної таблиці нині переб...9961656У верхній частині турнірної таблиці нині переб...115...5915500.9913040.9833331.0000000.9915970.5217390.478261
49953UKRGemini-Flash-1.5UnchangedTestСаме тому австрійське командування й намагалос...7659576Саме тому австрійське командування й намагалос...76...017500.9868420.0000000.0000000.0000000.0131580.986842
49954UKRAmazon-Nova-Pro-1.0PartialTestУ штабі наголосили, що бойовики посилили обстр...8162224У штабі наголосили, що бойовики посилили обстр...88...5702470.9204551.0000000.8906250.9421490.6477270.352273
49955UKRMistral-Large-2411PartialTestЧим же ж має бути такий громадянський іспит? П...4628720Чим же ж має бути такий громадянський іспит? П...72...5221800.9722220.9629631.0000000.9811320.7500000.250000
\n", "

49956 rows × 29 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df" } }, "metadata": {}, "execution_count": 28 } ] }, { "cell_type": "code", "source": [ "df_unchanged = df[df['Type'] == 'Unchanged']\n", "df_rewritten = df[df['Type'] == 'Rewritten']\n", "df_partial = df[df['Type'] == 'Partial']\n", "print(\"######################################\")\n", "print(\" METRICS BY TEXT TYPE : \")\n", "print(\"######################################\")\n", "AVG_ACC = df_partial['ROW_ACC'].mean()\n", "AVG_PREC = df_partial['ROW_PREC'].mean()\n", "AVG_REC = df_partial['ROW_REC'].mean()\n", "AVG_F1 = df_partial['ROW_F1'].mean()\n", "print(\"Partial Cases : Average Accuracy : \" , AVG_ACC )\n", "print(\"Partial Cases : Average Precision : \" , AVG_PREC)\n", "print(\"Partial Cases : Average Recall : \" , AVG_REC )\n", "print(\"Partial Cases : Average F1-score : \" , AVG_F1 )\n", "AVG_ACC = df_unchanged['ROW_ACC'].mean()\n", "AVG_PREC = df_unchanged['ROW_PREC'].mean()\n", "AVG_REC = df_unchanged['ROW_REC'].mean()\n", "AVG_F1 = df_unchanged['ROW_F1'].mean()\n", "print(\"Unchanged Cases : Average Accuracy : \" , AVG_ACC )\n", "print(\"Unchanged Cases : Average Precision : \" , AVG_PREC)\n", "print(\"Unchanged Cases : Average Recall : \" , AVG_REC )\n", "print(\"Unchanged Cases : Average F1-score : \" , AVG_F1 )\n", "AVG_ACC = df_rewritten['ROW_ACC'].mean()\n", "AVG_PREC = df_rewritten['ROW_PREC'].mean()\n", "AVG_REC = df_rewritten['ROW_REC'].mean()\n", "AVG_F1 = df_rewritten['ROW_F1'].mean()\n", "print(\"Rewritten Cases : Average Accuracy : \" , AVG_ACC )\n", "print(\"Rewritten Cases : Average Precision : \" , AVG_PREC)\n", "print(\"Rewritten Cases : Average Recall : \" , AVG_REC )\n", "print(\"Rewritten Cases : Average F1-score : \" , AVG_F1 )\n", "print(\"######################################\")\n", "print(\" METRICS OVERALL : \")\n", "print(\"######################################\")\n", "# prompt: print AVG_ACC, AVG_PREC , AVG_REC , AVG_F1 as mean of values in columns ROW_ACC , ROW_REC , ROW_PREC , ROW_F1 from dataframe df\n", "AVG_ACC = df['ROW_ACC'].mean()\n", "AVG_PREC = df['ROW_PREC'].mean()\n", "AVG_REC = df['ROW_REC'].mean()\n", "AVG_F1 = df['ROW_F1'].mean()\n", "print(\"All Cases : Average Accuracy:\", AVG_ACC)\n", "print(\"All Cases : Average Precision:\", AVG_PREC)\n", "print(\"All Cases : Average Recall:\", AVG_REC)\n", "print(\"All Cases : Average F1-score:\", AVG_F1)\n", "print(\"######################################\")\n", "# prompt: Also print overall ACC,PREC,REC,F1 based on values of columns ROW_TN,ROW_TP,ROW_FN,ROW_FP\n", "total_tp = df['ROW_TP'].sum()\n", "total_fp = df['ROW_FP'].sum()\n", "total_tn = df['ROW_TN'].sum()\n", "total_fn = df['ROW_FN'].sum()\n", "if (total_tp + total_tn + total_fp + total_fn) == 0:\n", " accuracy = 0\n", "else:\n", " accuracy = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn)\n", "if (total_tp + total_fp) == 0:\n", " precision = 0\n", "else:\n", " precision = total_tp / (total_tp + total_fp)\n", "if (total_tp + total_fn) == 0:\n", " recall = 0\n", "else:\n", " recall = total_tp / (total_tp + total_fn)\n", "if (precision + recall) == 0:\n", " f1_score = 0\n", "else:\n", " f1_score = 2 * (precision * recall) / (precision + recall)\n", "print(\"Overall Accuracy:\", accuracy)\n", "print(\"Overall Precision:\", precision)\n", "print(\"Overall Recall:\", recall)\n", "print(\"Overall F1-score:\", f1_score)" ], "metadata": { "id": "cuuc9gPjsU_T", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ed478b1b-000c-4055-b15e-ea9f7a4ac7ff" }, "execution_count": 29, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "######################################\n", " METRICS BY TEXT TYPE : \n", "######################################\n", "Partial Cases : Average Accuracy : 0.9509885461887018\n", "Partial Cases : Average Precision : 0.929719905232757\n", "Partial Cases : Average Recall : 0.9798120388726234\n", "Partial Cases : Average F1-score : 0.9412235303424951\n", "Unchanged Cases : Average Accuracy : 0.6984839570378542\n", "Unchanged Cases : Average Precision : 0.0\n", "Unchanged Cases : Average Recall : 0.0\n", "Unchanged Cases : Average F1-score : 0.0\n", "Rewritten Cases : Average Accuracy : 0.9572867739347733\n", "Rewritten Cases : Average Precision : 1.0\n", "Rewritten Cases : Average Recall : 0.9572867739347733\n", "Rewritten Cases : Average F1-score : 0.9721472315042066\n", "######################################\n", " METRICS OVERALL : \n", "######################################\n", "All Cases : Average Accuracy: 0.9262155747439099\n", "All Cases : Average Precision: 0.8431871249224745\n", "All Cases : Average Recall: 0.8790150182741708\n", "All Cases : Average F1-score: 0.8496212975776226\n", "######################################\n", "Overall Accuracy: 0.9089084398896049\n", "Overall Precision: 0.8148125513177231\n", "Overall Recall: 0.9798036674642768\n", "Overall F1-score: 0.8897237389494574\n" ] } ] }, { "cell_type": "code", "source": [ "llm_values = df['LLM used'].unique()\n", "for llm in llm_values:\n", " print(\"LLM used:\", llm)\n", " df_llm = df[df['LLM used'] == llm]\n", " for text_type in ['Partial', 'Unchanged', 'Rewritten']:\n", " df_subset = df_llm[df_llm['Type'] == text_type]\n", " if df_subset.empty:\n", " continue\n", " avg_acc = df_subset['ROW_ACC'].mean()\n", " avg_prec = df_subset['ROW_PREC'].mean()\n", " avg_rec = df_subset['ROW_REC'].mean()\n", " avg_f1 = df_subset['ROW_F1'].mean()\n", " print(f\"{text_type} Cases : Average Accuracy : {avg_acc}\")\n", " print(f\"{text_type} Cases : Average Precision : {avg_prec}\")\n", " print(f\"{text_type} Cases : Average Recall : {avg_rec}\")\n", " print(f\"{text_type} Cases : Average F1-score : {avg_f1}\")\n", " print(\"######################################\")" ], "metadata": { "id": "9PwzmDF9xJzl", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "f6ca9542-9373-4282-f414-0674e2b522e6" }, "execution_count": 30, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "LLM used: GPT-o1\n", "Partial Cases : Average Accuracy : 0.9898379173170131\n", "Partial Cases : Average Precision : 0.9867979350185309\n", "Partial Cases : Average Recall : 0.9956259051199084\n", "Partial Cases : Average F1-score : 0.9901597701605712\n", "Unchanged Cases : Average Accuracy : 0.6986944111536125\n", "Unchanged Cases : Average Precision : 0.0\n", "Unchanged Cases : Average Recall : 0.0\n", "Unchanged Cases : Average F1-score : 0.0\n", "Rewritten Cases : Average Accuracy : 0.9200763999581089\n", "Rewritten Cases : Average Precision : 1.0\n", "Rewritten Cases : Average Recall : 0.9200763999581089\n", "Rewritten Cases : Average F1-score : 0.9497390713047822\n", "######################################\n", "LLM used: Gemini-Pro-1.5\n", "Partial Cases : Average Accuracy : 0.9344270551525079\n", "Partial Cases : Average Precision : 0.9234118804756156\n", "Partial Cases : Average Recall : 0.9669674619023626\n", "Partial Cases : Average F1-score : 0.9274735747765671\n", "Unchanged Cases : Average Accuracy : 0.684712113949033\n", "Unchanged Cases : Average Precision : 0.0\n", "Unchanged Cases : Average Recall : 0.0\n", "Unchanged Cases : Average F1-score : 0.0\n", "Rewritten Cases : Average Accuracy : 0.9963762327537259\n", "Rewritten Cases : Average Precision : 1.0\n", "Rewritten Cases : Average Recall : 0.9963762327537259\n", "Rewritten Cases : Average F1-score : 0.9977641779434844\n", "######################################\n", "LLM used: Claude-Haiku-3.5\n", "Partial Cases : Average Accuracy : 0.9851743288004994\n", "Partial Cases : Average Precision : 0.9761828812392632\n", "Partial Cases : Average Recall : 0.991917424342252\n", "Partial Cases : Average F1-score : 0.9766096825711487\n", "Unchanged Cases : Average Accuracy : 0.6952655645791249\n", "Unchanged Cases : Average Precision : 0.0\n", "Unchanged Cases : Average Recall : 0.0\n", "Unchanged Cases : Average F1-score : 0.0\n", "Rewritten Cases : Average Accuracy : 0.8874531363603257\n", "Rewritten Cases : Average Precision : 1.0\n", "Rewritten Cases : Average Recall : 0.8874531363603257\n", "Rewritten Cases : Average F1-score : 0.9296529763003499\n", "######################################\n", "LLM used: GPT-4o\n", "Partial Cases : Average Accuracy : 0.9019216192676597\n", "Partial Cases : Average Precision : 0.8325659051354811\n", "Partial Cases : Average Recall : 0.9799551808804378\n", "Partial Cases : Average F1-score : 0.8816649032282404\n", "Unchanged Cases : Average Accuracy : 0.7244770989932006\n", "Unchanged Cases : Average Precision : 0.0\n", "Unchanged Cases : Average Recall : 0.0\n", "Unchanged Cases : Average F1-score : 0.0\n", "Rewritten Cases : Average Accuracy : 0.9562992552398876\n", "Rewritten Cases : Average Precision : 1.0\n", "Rewritten Cases : Average Recall : 0.9562992552398876\n", "Rewritten Cases : Average F1-score : 0.9708934788986652\n", "######################################\n", "LLM used: Amazon-Nova-Lite-1.0\n", "Partial Cases : Average Accuracy : 0.9625321939119483\n", "Partial Cases : Average Precision : 0.9712363564013551\n", "Partial Cases : Average Recall : 0.9623048040433507\n", "Partial Cases : Average F1-score : 0.9599779429089674\n", "Unchanged Cases : Average Accuracy : 0.679556910523361\n", "Unchanged Cases : Average Precision : 0.0\n", "Unchanged Cases : Average Recall : 0.0\n", "Unchanged Cases : Average F1-score : 0.0\n", "Rewritten Cases : Average Accuracy : 0.9532639266295352\n", "Rewritten Cases : Average Precision : 1.0\n", "Rewritten Cases : Average Recall : 0.9532639266295352\n", "Rewritten Cases : Average F1-score : 0.9669181092278795\n", "######################################\n", "LLM used: Amazon-Nova-Pro-1.0\n", "Partial Cases : Average Accuracy : 0.954794795041098\n", "Partial Cases : Average Precision : 0.94314022341397\n", "Partial Cases : Average Recall : 0.9784004767573835\n", "Partial Cases : Average F1-score : 0.9527021586998528\n", "Unchanged Cases : Average Accuracy : 0.6967633207662086\n", "Unchanged Cases : Average Precision : 0.0\n", "Unchanged Cases : Average Recall : 0.0\n", "Unchanged Cases : Average F1-score : 0.0\n", "Rewritten Cases : Average Accuracy : 0.931338389932385\n", "Rewritten Cases : Average Precision : 1.0\n", "Rewritten Cases : Average Recall : 0.931338389932385\n", "Rewritten Cases : Average F1-score : 0.952529559324324\n", "######################################\n", "LLM used: Mistral-Large-2411\n", "Partial Cases : Average Accuracy : 0.9499308730545355\n", "Partial Cases : Average Precision : 0.9222925277973153\n", "Partial Cases : Average Recall : 0.984583760769221\n", "Partial Cases : Average F1-score : 0.9434915226705914\n", "Unchanged Cases : Average Accuracy : 0.7081567302254325\n", "Unchanged Cases : Average Precision : 0.0\n", "Unchanged Cases : Average Recall : 0.0\n", "Unchanged Cases : Average F1-score : 0.0\n", "Rewritten Cases : Average Accuracy : 0.9655239855331266\n", "Rewritten Cases : Average Precision : 1.0\n", "Rewritten Cases : Average Recall : 0.9655239855331266\n", "Rewritten Cases : Average F1-score : 0.9775121280882056\n", "######################################\n", "LLM used: Claude-Sonnet-3.5\n", "Partial Cases : Average Accuracy : 0.982263669694747\n", "Partial Cases : Average Precision : 0.9693822192305422\n", "Partial Cases : Average Recall : 0.9979386521701669\n", "Partial Cases : Average F1-score : 0.9773437686153316\n", "Unchanged Cases : Average Accuracy : 0.6975890483248091\n", "Unchanged Cases : Average Precision : 0.0\n", "Unchanged Cases : Average Recall : 0.0\n", "Unchanged Cases : Average F1-score : 0.0\n", "Rewritten Cases : Average Accuracy : 0.9988221525600837\n", "Rewritten Cases : Average Precision : 1.0\n", "Rewritten Cases : Average Recall : 0.9988221525600837\n", "Rewritten Cases : Average F1-score : 0.9993192598897899\n", "######################################\n", "LLM used: Gemini-Flash-1.5\n", "Partial Cases : Average Accuracy : 0.9679224602603772\n", "Partial Cases : Average Precision : 0.9514308931618037\n", "Partial Cases : Average Recall : 0.9908204549490308\n", "Partial Cases : Average F1-score : 0.9606658747594531\n", "Unchanged Cases : Average Accuracy : 0.7131568935146354\n", "Unchanged Cases : Average Precision : 0.0\n", "Unchanged Cases : Average Recall : 0.0\n", "Unchanged Cases : Average F1-score : 0.0\n", "Rewritten Cases : Average Accuracy : 0.997590914652326\n", "Rewritten Cases : Average Precision : 1.0\n", "Rewritten Cases : Average Recall : 0.997590914652326\n", "Rewritten Cases : Average F1-score : 0.9985224461048724\n", "######################################\n", "LLM used: Aya-23\n", "Partial Cases : Average Accuracy : 0.8812092770073202\n", "Partial Cases : Average Precision : 0.820937571377683\n", "Partial Cases : Average Recall : 0.949658851789456\n", "Partial Cases : Average F1-score : 0.8422972864817903\n", "Unchanged Cases : Average Accuracy : 0.6864078092356644\n", "Unchanged Cases : Average Precision : 0.0\n", "Unchanged Cases : Average Recall : 0.0\n", "Unchanged Cases : Average F1-score : 0.0\n", "Rewritten Cases : Average Accuracy : 0.9630901366358088\n", "Rewritten Cases : Average Precision : 1.0\n", "Rewritten Cases : Average Recall : 0.9630901366358088\n", "Rewritten Cases : Average F1-score : 0.9767664362890467\n", "######################################\n" ] } ] }, { "cell_type": "code", "source": [ "llm_values = df['LLM used'].unique()\n", "for llm in llm_values:\n", " print(\"LLM used:\", llm)\n", " df_llm = df[df['LLM used'] == llm]\n", " total_tp = df_llm['ROW_TP'].sum()\n", " total_fp = df_llm['ROW_FP'].sum()\n", " total_tn = df_llm['ROW_TN'].sum()\n", " total_fn = df_llm['ROW_FN'].sum()\n", " if (total_tp + total_tn + total_fp + total_fn) == 0:\n", " accuracy = 0\n", " else:\n", " accuracy = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn)\n", " if (total_tp + total_fp) == 0:\n", " precision = 0\n", " else:\n", " precision = total_tp / (total_tp + total_fp)\n", " if (total_tp + total_fn) == 0:\n", " recall = 0\n", " else:\n", " recall = total_tp / (total_tp + total_fn)\n", " if (precision + recall) == 0:\n", " f1_score = 0\n", " else:\n", " f1_score = 2 * (precision * recall) / (precision + recall)\n", " print(\"Overall Accuracy:\", accuracy)\n", " print(\"Overall Precision:\", precision)\n", " print(\"Overall Recall:\", recall)\n", " print(\"Overall F1-score:\", f1_score)\n", " print(\"######################################\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "0hJtC-t9sROM", "outputId": "65ab6c6c-d2f8-4d5b-880a-9177ab4bb4ad" }, "execution_count": 31, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "LLM used: GPT-o1\n", "Overall Accuracy: 0.9565041792206116\n", "Overall Precision: 0.9268514612336733\n", "Overall Recall: 0.9900192264193621\n", "Overall F1-score: 0.9573945416001975\n", "######################################\n", "LLM used: Gemini-Pro-1.5\n", "Overall Accuracy: 0.8950592963508784\n", "Overall Precision: 0.7840328781490188\n", "Overall Recall: 0.9766844789803305\n", "Overall F1-score: 0.8698190427870793\n", "######################################\n", "LLM used: Claude-Haiku-3.5\n", "Overall Accuracy: 0.9046047055750532\n", "Overall Precision: 0.7783230003614223\n", "Overall Recall: 0.9787096435875474\n", "Overall F1-score: 0.8670894407148443\n", "######################################\n", "LLM used: GPT-4o\n", "Overall Accuracy: 0.8646523725065425\n", "Overall Precision: 0.6665699373608652\n", "Overall Recall: 0.9777204845064942\n", "Overall F1-score: 0.7927055627725368\n", "######################################\n", "LLM used: Amazon-Nova-Lite-1.0\n", "Overall Accuracy: 0.9166393103707509\n", "Overall Precision: 0.8594566695399062\n", "Overall Recall: 0.962197296789938\n", "Overall F1-score: 0.9079297159882703\n", "######################################\n", "LLM used: Amazon-Nova-Pro-1.0\n", "Overall Accuracy: 0.9066606173056868\n", "Overall Precision: 0.8147959168269362\n", "Overall Recall: 0.9726903780167815\n", "Overall F1-score: 0.8867694825198262\n", "######################################\n", "LLM used: Mistral-Large-2411\n", "Overall Accuracy: 0.9010803872660791\n", "Overall Precision: 0.7731439939344615\n", "Overall Recall: 0.9820812440635285\n", "Overall F1-score: 0.8651769573111284\n", "######################################\n", "LLM used: Claude-Sonnet-3.5\n", "Overall Accuracy: 0.92390160520704\n", "Overall Precision: 0.8098209212769092\n", "Overall Recall: 0.9980764741988191\n", "Overall F1-score: 0.8941472141761799\n", "######################################\n", "LLM used: Gemini-Flash-1.5\n", "Overall Accuracy: 0.9047417122059443\n", "Overall Precision: 0.7971212858373828\n", "Overall Recall: 0.9911082929744582\n", "Overall F1-score: 0.8835929416007291\n", "######################################\n", "LLM used: Aya-23\n", "Overall Accuracy: 0.8863700412595388\n", "Overall Precision: 0.7947277238263224\n", "Overall Recall: 0.9682381917176826\n", "Overall F1-score: 0.8729445390191395\n", "######################################\n" ] } ] }, { "cell_type": "code", "source": [ "df.info()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MnoPOMt_sROQ", "outputId": "0661ab36-14ab-4f09-eb48-e47294af81bc" }, "execution_count": 32, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "RangeIndex: 49956 entries, 0 to 49955\n", "Data columns (total 29 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 ISO 49956 non-null object \n", " 1 LLM used 49956 non-null object \n", " 2 Type 49956 non-null object \n", " 3 Data Split 49956 non-null object \n", " 4 Original text 49956 non-null object \n", " 5 Original Word Count 49956 non-null int64 \n", " 6 Original Char Count 49956 non-null int64 \n", " 7 Split Location 49956 non-null int64 \n", " 8 Modified text 49956 non-null object \n", " 9 New Word Count 49956 non-null int64 \n", " 10 New Char Count 49956 non-null int64 \n", " 11 id 49956 non-null int64 \n", " 12 label_pred 49956 non-null int64 \n", " 13 text 49956 non-null object \n", " 14 label_gold 49956 non-null int64 \n", " 15 diff 49956 non-null int64 \n", " 16 Token Limit Check 49956 non-null object \n", " 17 WORDS_REAL 49956 non-null object \n", " 18 WORDS_PRED 49956 non-null object \n", " 19 ROW_TP 49956 non-null int64 \n", " 20 ROW_FP 49956 non-null int64 \n", " 21 ROW_TN 49956 non-null int64 \n", " 22 ROW_FN 49956 non-null int64 \n", " 23 ROW_ACC 49956 non-null float64\n", " 24 ROW_PREC 49956 non-null float64\n", " 25 ROW_REC 49956 non-null float64\n", " 26 ROW_F1 49956 non-null float64\n", " 27 Label : 1 49956 non-null float64\n", " 28 Label : 0 49956 non-null float64\n", "dtypes: float64(6), int64(13), object(10)\n", "memory usage: 11.1+ MB\n" ] } ] }, { "cell_type": "code", "source": [ "df.to_csv(\"UKR-INFERENCE-3.csv\")" ], "metadata": { "id": "n0b8tccusROQ" }, "execution_count": 33, "outputs": [] }, { "cell_type": "code", "source": [ "print(\"CLICK CTRL+S, WAIT 2 SEC FOR IT TO BE SAVED, DOWNLOAD BOTH CODE AND THE CSV FILE FROM RUNTIME\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JrPrxeYGsROR", "outputId": "0ae6afd7-ad54-409e-9bc2-2e59eb8d5d9d" }, "execution_count": 34, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "CLICK CTRL+S, WAIT 2 SEC FOR IT TO BE SAVED, DOWNLOAD BOTH CODE AND THE CSV FILE FROM RUNTIME\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "cvgQMJtTsSJQ" }, "execution_count": 34, "outputs": [] } ] }