Skip to content
Permalink
28881fc565
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
1 lines (1 sloc) 68.6 KB
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4","mount_file_id":"1ZoBYSurFQhjO1od4c5smI7iQt4y-jPbX","authorship_tag":"ABX9TyMt8SlgmN2ohwGbDIqUXnvT"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"6fac1cbcc30f41f6bcb79417b2731600":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_bfebc60565cd4fdab41c0046d895899a","IPY_MODEL_dcadb85b6c244f45924c64bfdeec12c1","IPY_MODEL_8e62ce166bd048c091a0221cd400cf60"],"layout":"IPY_MODEL_374eba5d81cb4cffa219520f3344fab6"}},"bfebc60565cd4fdab41c0046d895899a":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e9bb4c595b0e4c33a76204a74dacfc79","placeholder":"​","style":"IPY_MODEL_47800baa5203415cb65f5581f8d616cd","value":"Downloading (…)olve/main/vocab.json: 100%"}},"dcadb85b6c244f45924c64bfdeec12c1":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_6cc2fc3efc2346739f6302505b8ec0dc","max":927059,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c1a18fc8b61c41cca72b215b05772ad1","value":927059}},"8e62ce166bd048c091a0221cd400cf60":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_2057207218834b50abaffe9f000a4a3f","placeholder":"​","style":"IPY_MODEL_eeba0f2f9ded40fca96524e334a1b538","value":" 927k/927k [00:00&lt;00:00, 3.66MB/s]"}},"374eba5d81cb4cffa219520f3344fab6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e9bb4c595b0e4c33a76204a74dacfc79":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"47800baa5203415cb65f5581f8d616cd":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6cc2fc3efc2346739f6302505b8ec0dc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c1a18fc8b61c41cca72b215b05772ad1":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"2057207218834b50abaffe9f000a4a3f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eeba0f2f9ded40fca96524e334a1b538":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1fd5843ddb3347c98cd87654d86602c7":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2a533da71bf24a1a9378f16133cf197d","IPY_MODEL_31b7f836488642058b2a8d1a0123ea9c","IPY_MODEL_113987fa9c4f48b2af3b4ed309faf093"],"layout":"IPY_MODEL_2e884be9fd1842608120fa92b69784a1"}},"2a533da71bf24a1a9378f16133cf197d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a3b28ae530a442ebbfe320d6d5728d77","placeholder":"​","style":"IPY_MODEL_692cdd5b03384366bb12a7045bc42b34","value":"Downloading (…)olve/main/merges.txt: 100%"}},"31b7f836488642058b2a8d1a0123ea9c":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_20791f5eed9b433db404499a60206d19","max":696298,"min":0,"orientation":"horizontal","style":"IPY_MODEL_06c744b6583546b698b2fbaacf51ff58","value":696298}},"113987fa9c4f48b2af3b4ed309faf093":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7d3f2b8246444765b02f145a9e8c685e","placeholder":"​","style":"IPY_MODEL_a83646f57a594de79fabfab9d614e7f5","value":" 696k/696k [00:00&lt;00:00, 12.6MB/s]"}},"2e884be9fd1842608120fa92b69784a1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a3b28ae530a442ebbfe320d6d5728d77":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"692cdd5b03384366bb12a7045bc42b34":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"20791f5eed9b433db404499a60206d19":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"06c744b6583546b698b2fbaacf51ff58":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"7d3f2b8246444765b02f145a9e8c685e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a83646f57a594de79fabfab9d614e7f5":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"134bfe2569fc4b5f87df32cfd1e15c08":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4f6ac953e9414f1aa1c1b847f2bfdcae","IPY_MODEL_c41ae56c9d50408584f73ccc356e108e","IPY_MODEL_57c8e7ea9f2d41f7bdc8c32a484078b2"],"layout":"IPY_MODEL_8afc1262dc824342a8a59a8bf22b7118"}},"4f6ac953e9414f1aa1c1b847f2bfdcae":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4737a24f366b469094eee31e02137d67","placeholder":"​","style":"IPY_MODEL_c0f2cfabb7074065bbf9bf9950f6280e","value":"Downloading (…)lve/main/config.json: 100%"}},"c41ae56c9d50408584f73ccc356e108e":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_c21b8841c9a243ec9188c5dc886e3d20","max":595,"min":0,"orientation":"horizontal","style":"IPY_MODEL_2da67bc904694a328e2ce06bd6514558","value":595}},"57c8e7ea9f2d41f7bdc8c32a484078b2":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_58e001cec5af48c4bbada1c667b89766","placeholder":"​","style":"IPY_MODEL_d102c741a8a245938c8cf2ec6e802af4","value":" 595/595 [00:00&lt;00:00, 34.3kB/s]"}},"8afc1262dc824342a8a59a8bf22b7118":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4737a24f366b469094eee31e02137d67":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c0f2cfabb7074065bbf9bf9950f6280e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c21b8841c9a243ec9188c5dc886e3d20":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2da67bc904694a328e2ce06bd6514558":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"58e001cec5af48c4bbada1c667b89766":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d102c741a8a245938c8cf2ec6e802af4":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5da37a1524824a959be4fadbd1fd7500":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_1a5b32b9e9124cb4a36a4e0b678f7cbb","IPY_MODEL_c096d3d8a6e34fb197035bfb28785d5f","IPY_MODEL_b046fab8656a4f639f1a6a7a11c7c63a"],"layout":"IPY_MODEL_0c0bd808d21744429faf294163b426be"}},"1a5b32b9e9124cb4a36a4e0b678f7cbb":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_68cb56ea45ae4d749a9a806f8003f6e3","placeholder":"​","style":"IPY_MODEL_a0b366be2cbb4a5db9b37ac849a1a4f0","value":"Downloading pytorch_model.bin: 100%"}},"c096d3d8a6e34fb197035bfb28785d5f":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_fb6354937f9741b99fe6099993828365","max":1560781537,"min":0,"orientation":"horizontal","style":"IPY_MODEL_9a342335a672455687d753b1f04ae146","value":1560781537}},"b046fab8656a4f639f1a6a7a11c7c63a":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_fad14dd72ba54bd7af695317749c79f8","placeholder":"​","style":"IPY_MODEL_43f36806111f465fa0cbd25e0b802455","value":" 1.56G/1.56G [00:13&lt;00:00, 140MB/s]"}},"0c0bd808d21744429faf294163b426be":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"68cb56ea45ae4d749a9a806f8003f6e3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a0b366be2cbb4a5db9b37ac849a1a4f0":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"fb6354937f9741b99fe6099993828365":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9a342335a672455687d753b1f04ae146":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"fad14dd72ba54bd7af695317749c79f8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"43f36806111f465fa0cbd25e0b802455":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"markdown","source":["# Install Required libraries"],"metadata":{"id":"oNme5YN3mwRS"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"OmxFfI92lslA","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1690461369015,"user_tz":-60,"elapsed":38670,"user":{"displayName":"Unknown","userId":"16317712665857714848"}},"outputId":"021927ac-bff7-4bca-c0b4-b163b46cd108"},"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting sacremoses\n"," Downloading sacremoses-0.0.53.tar.gz (880 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m880.6/880.6 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Collecting transformers\n"," Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m21.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting peft\n"," Downloading peft-0.4.0-py3-none-any.whl (72 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.9/72.9 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from sacremoses) (2022.10.31)\n","Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from sacremoses) (1.16.0)\n","Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from sacremoses) (8.1.6)\n","Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from sacremoses) (1.3.1)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from sacremoses) (4.65.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n","Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)\n"," Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.22.4)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n","Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)\n"," Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m33.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting safetensors>=0.3.1 (from transformers)\n"," Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m62.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft) (5.9.5)\n","Requirement already satisfied: torch>=1.13.0 in /usr/local/lib/python3.10/dist-packages (from peft) (2.0.1+cu118)\n","Collecting accelerate (from peft)\n"," Downloading accelerate-0.21.0-py3-none-any.whl (244 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m244.2/244.2 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.6.0)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.7.1)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (1.11.1)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.1.2)\n","Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (2.0.0)\n","Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.0->peft) (3.25.2)\n","Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.0->peft) (16.0.6)\n","Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.16)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n","Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.0->peft) (2.1.3)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.0->peft) (1.3.0)\n","Building wheels for collected packages: sacremoses\n"," Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for sacremoses: filename=sacremoses-0.0.53-py3-none-any.whl size=895239 sha256=934cfc98c7670cca3032228a72b27c5f109d7831050ae9e678e8f349cb927428\n"," Stored in directory: /root/.cache/pip/wheels/00/24/97/a2ea5324f36bc626e1ea0267f33db6aa80d157ee977e9e42fb\n","Successfully built sacremoses\n","Installing collected packages: tokenizers, safetensors, sacremoses, huggingface-hub, transformers, accelerate, peft\n","Successfully installed accelerate-0.21.0 huggingface-hub-0.16.4 peft-0.4.0 sacremoses-0.0.53 safetensors-0.3.1 tokenizers-0.13.3 transformers-4.31.0\n","Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.31.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.16.4)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.22.4)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n","Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.13.3)\n","Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.3.1)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.65.0)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.6.0)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.7.1)\n","Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.16)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n","Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n"]}],"source":["!pip install sacremoses transformers peft\n","! pip install -U transformers"]},{"cell_type":"markdown","source":["# Import required libraries"],"metadata":{"id":"nxKi5cK6nU5Z"}},{"cell_type":"code","source":["from transformers import BioGptTokenizer, AutoModelForSequenceClassification, pipeline, AutoModel, AutoTokenizer\n","import torch\n","import pandas as pd\n","import numpy as np\n","from sklearn.model_selection import train_test_split"],"metadata":{"id":"9Q4U4TWWnUNa"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Import Data"],"metadata":{"id":"nK9JropUm3yK"}},{"cell_type":"code","source":["df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/dissertation/data/GOUT-CC-2019-CORPUS-REDACTED.csv')\n","#df = df.drop(\"Predict\", axis=1)\n","#df2 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/dissertation/data/GOUT-CC-2020-CORPUS-REDACTED.csv')\n","#df2 = df2.drop(\"Predict\", axis=1)\n","\n","\n","#df = pd.concat([df, df2], ignore_index=True)"],"metadata":{"id":"pX9KCq5pSseO"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Check for consensus and mark rows for removal\n","rows_to_drop = []\n","for index, row in df.iterrows():\n"," if row['Consensus'] == '-' or row['Consensus'] == 'U': #Drop unknown and unmarked\n"," rows_to_drop.append(index)\n","\n","# Drop the marked rows\n","df.drop(rows_to_drop, inplace=True)\n","\n","# Print the updated DataFrame\n","print(df)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"lBfzey48Uufu","executionInfo":{"status":"ok","timestamp":1689337277964,"user_tz":-60,"elapsed":264,"user":{"displayName":"Unknown","userId":"16317712665857714848"}},"outputId":"06a1b858-271c-4d1c-c74a-1c39d37b0a92"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":[" Chief Complaint Predict Consensus \\\n","1 \"can't walk\", reports onset at <<TIME>>. orien... Y N \n","2 \"dehydration\" Chest hurts, hips hurt, cramps P... Y Y \n","3 \"gout flare up\" L arm swelling x 1 week. denie... Y Y \n","5 \"I started breathing hard\" hx- htn, gout, anx... N N \n","6 \"I think I have a gout flare up\" L wrist pain ... Y Y \n",".. ... ... ... \n","294 unwitnessed seizure last night, no dialysis in... N N \n","295 upper abd/R side chest pain x1 month, new onse... N N \n","296 upper lip swelling x one day, pmh HTN, COPD, b... N N \n","298 was getting prepped for colonoscopy and was se... N N \n","299 Was seen at <<HOSPITAL>> after an MVC. Pt stat... N N \n","\n"," emb_biogpt_no_FineT \n","1 [[-0.9474165439605713, -0.18875360488891602, -... \n","2 [[-0.7398123741149902, 0.812239944934845, -0.0... \n","3 [[-0.35727423429489136, 0.6445433497428894, 1.... \n","5 [[-0.7679358124732971, 0.8111965656280518, 0.3... \n","6 [[-1.405350923538208, -0.23051273822784424, -0... \n",".. ... \n","294 [[0.7233701944351196, 0.047650132328271866, -2... \n","295 [[-0.17614494264125824, 1.5961328744888306, 0.... \n","296 [[0.6994464993476868, 1.2614316940307617, -1.8... \n","298 [[0.32566601037979126, -0.4120418131351471, -1... \n","299 [[1.250546932220459, 0.0630776435136795, -1.18... \n","\n","[188 rows x 4 columns]\n"]}]},{"cell_type":"markdown","source":["# Small data analysis"],"metadata":{"id":"PEoE_tyrm7qS"}},{"cell_type":"code","source":["print(\"group by consensus\")\n","print(df.groupby(\"Consensus\").size())\n","print(\"-----------------------------------------------------------------\")\n","print(\"Predict is U\")\n","print(df[df['Predict'] == 'U'].groupby('Consensus').size())\n","print(\"-----------------------------------------------------------------\")\n","print(\"Predict is N\")\n","print(df[df['Predict'] == 'N'].groupby('Consensus').size())\n","print(\"-----------------------------------------------------------------\")\n","print(\"Predict is Y\")\n","print(df[df['Predict'] == 'Y'].groupby('Consensus').size())\n","print(\"-----------------------------------------------------------------\")\n","print(\"Predict is -\")\n","print(df[df['Predict'] == '-'].groupby('Consensus').size())\n","print(\"-----------------------------------------------------------------\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Kpe-3PP3mtsc","executionInfo":{"status":"ok","timestamp":1690375801921,"user_tz":-60,"elapsed":4,"user":{"displayName":"Unknown","userId":"16317712665857714848"}},"outputId":"81e9ff60-4790-4770-fb2c-0c845bac44c2"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["group by consensus\n","Consensus\n","- 103\n","N 118\n","U 9\n","Y 70\n","dtype: int64\n","-----------------------------------------------------------------\n","Predict is U\n","Consensus\n","N 16\n","U 5\n","Y 6\n","dtype: int64\n","-----------------------------------------------------------------\n","Predict is N\n","Consensus\n","- 86\n","N 85\n","U 4\n","Y 1\n","dtype: int64\n","-----------------------------------------------------------------\n","Predict is Y\n","Consensus\n","- 17\n","N 17\n","Y 63\n","dtype: int64\n","-----------------------------------------------------------------\n","Predict is -\n","Series([], dtype: int64)\n","-----------------------------------------------------------------\n"]}]},{"cell_type":"markdown","source":["# Load Model and tokenizer (BioGPT)"],"metadata":{"id":"7h7jfsy9njIx"}},{"cell_type":"code","source":["device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n","\n","#GPT-2 is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than the left. Remember this\n","tokenizer = BioGptTokenizer.from_pretrained(\"microsoft/biogpt\")\n","tokenizer.padding_side = \"left\"\n","\n","model = BioGptForSequenceClassification.from_pretrained(\"microsoft/biogpt\")\n","\n","model = model.to(device)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":252,"referenced_widgets":["6fac1cbcc30f41f6bcb79417b2731600","bfebc60565cd4fdab41c0046d895899a","dcadb85b6c244f45924c64bfdeec12c1","8e62ce166bd048c091a0221cd400cf60","374eba5d81cb4cffa219520f3344fab6","e9bb4c595b0e4c33a76204a74dacfc79","47800baa5203415cb65f5581f8d616cd","6cc2fc3efc2346739f6302505b8ec0dc","c1a18fc8b61c41cca72b215b05772ad1","2057207218834b50abaffe9f000a4a3f","eeba0f2f9ded40fca96524e334a1b538","1fd5843ddb3347c98cd87654d86602c7","2a533da71bf24a1a9378f16133cf197d","31b7f836488642058b2a8d1a0123ea9c","113987fa9c4f48b2af3b4ed309faf093","2e884be9fd1842608120fa92b69784a1","a3b28ae530a442ebbfe320d6d5728d77","692cdd5b03384366bb12a7045bc42b34","20791f5eed9b433db404499a60206d19","06c744b6583546b698b2fbaacf51ff58","7d3f2b8246444765b02f145a9e8c685e","a83646f57a594de79fabfab9d614e7f5","134bfe2569fc4b5f87df32cfd1e15c08","4f6ac953e9414f1aa1c1b847f2bfdcae","c41ae56c9d50408584f73ccc356e108e","57c8e7ea9f2d41f7bdc8c32a484078b2","8afc1262dc824342a8a59a8bf22b7118","4737a24f366b469094eee31e02137d67","c0f2cfabb7074065bbf9bf9950f6280e","c21b8841c9a243ec9188c5dc886e3d20","2da67bc904694a328e2ce06bd6514558","58e001cec5af48c4bbada1c667b89766","d102c741a8a245938c8cf2ec6e802af4","5da37a1524824a959be4fadbd1fd7500","1a5b32b9e9124cb4a36a4e0b678f7cbb","c096d3d8a6e34fb197035bfb28785d5f","b046fab8656a4f639f1a6a7a11c7c63a","0c0bd808d21744429faf294163b426be","68cb56ea45ae4d749a9a806f8003f6e3","a0b366be2cbb4a5db9b37ac849a1a4f0","fb6354937f9741b99fe6099993828365","9a342335a672455687d753b1f04ae146","fad14dd72ba54bd7af695317749c79f8","43f36806111f465fa0cbd25e0b802455"]},"id":"S37ZLL02nMOi","executionInfo":{"status":"ok","timestamp":1689340650293,"user_tz":-60,"elapsed":31965,"user":{"displayName":"Unknown","userId":"16317712665857714848"}},"outputId":"79b24c62-9b2d-4f86-c18f-933479c633a5"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["Downloading (…)olve/main/vocab.json: 0%| | 0.00/927k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6fac1cbcc30f41f6bcb79417b2731600"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Downloading (…)olve/main/merges.txt: 0%| | 0.00/696k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"1fd5843ddb3347c98cd87654d86602c7"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Downloading (…)lve/main/config.json: 0%| | 0.00/595 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"134bfe2569fc4b5f87df32cfd1e15c08"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Downloading pytorch_model.bin: 0%| | 0.00/1.56G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5da37a1524824a959be4fadbd1fd7500"}},"metadata":{}},{"output_type":"stream","name":"stderr","text":["Some weights of the model checkpoint at microsoft/biogpt were not used when initializing BioGptForSequenceClassification: ['output_projection.weight']\n","- This IS expected if you are initializing BioGptForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n","- This IS NOT expected if you are initializing BioGptForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n","Some weights of BioGptForSequenceClassification were not initialized from the model checkpoint at microsoft/biogpt and are newly initialized: ['score.weight']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"]}]},{"cell_type":"code","source":["#oversample 2019 and get result\n","#class weight\n","#fine tune"],"metadata":{"id":"lmd-cKW-pGj8"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#clear GPU cache\n","#torch.cuda.empty_cache()"],"metadata":{"id":"-HIrsk4pqguR"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Define the pipeline"],"metadata":{"id":"ppuc0YsrMwiA"}},{"cell_type":"code","source":["# Create the pipeline\n","p = pipeline(\n"," task=\"feature-extraction\",\n"," tokenizer=\"microsoft/biogpt\",\n"," model=\"microsoft/biogpt\",\n"," framework=\"pt\",\n"," device=0, # use CUDA with 0\n",")"],"metadata":{"id":"f9JuA7OTMuw-","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1689340658502,"user_tz":-60,"elapsed":8213,"user":{"displayName":"Unknown","userId":"16317712665857714848"}},"outputId":"588f9e2e-bfab-460b-bd7d-91c4e596f9c2"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["Some weights of the model checkpoint at microsoft/biogpt were not used when initializing BioGptModel: ['output_projection.weight']\n","- This IS expected if you are initializing BioGptModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n","- This IS NOT expected if you are initializing BioGptModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"]}]},{"cell_type":"markdown","source":["### Get the embeddings of the last token of the last hidden state"],"metadata":{"id":"dNTtyRnArThB"}},{"cell_type":"code","source":["# Custom function to extract the embedding of the last token\n","def extract_last_token(last_hidden_states):\n"," last_hidden_states = np.array(last_hidden_states)\n"," return last_hidden_states[:,-1,:]\n","\n","# Process the data using the pipeline\n","results = p([row[\"Chief Complaint\"] for _, row in df.iterrows()])\n","\n","# Extract the last token of the last hidden state\n","embeddings = [extract_last_token(hidden_state) for hidden_state in results]"],"metadata":{"id":"8TnbZZQ3qqjo"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Assign the reshaped embeddings to the \"embeddings\" column in the DataFrame\n","df[\"emb_biogpt_no_FineT\"] = embeddings\n","\n","# Print the resulting DataFrame\n","print(df)"],"metadata":{"id":"ZzokAh_VU5c4","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1689340670722,"user_tz":-60,"elapsed":14,"user":{"displayName":"Unknown","userId":"16317712665857714848"}},"outputId":"36ebc9fe-8a8f-478f-db83-456805731689"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":[" Chief Complaint Predict Consensus \\\n","0 \"been feeling bad\" last 2 weeks & switched BP ... N - \n","1 \"can't walk\", reports onset at <<TIME>>. orien... Y N \n","2 \"dehydration\" Chest hurts, hips hurt, cramps P... Y Y \n","3 \"gout flare up\" L arm swelling x 1 week. denie... Y Y \n","4 \"heart racing,\"dyspnea, and orthopnea that has... N - \n",".. ... ... ... \n","295 upper abd/R side chest pain x1 month, new onse... N N \n","296 upper lip swelling x one day, pmh HTN, COPD, b... N N \n","297 walked outside of a gas station and began bein... N - \n","298 was getting prepped for colonoscopy and was se... N N \n","299 Was seen at <<HOSPITAL>> after an MVC. Pt stat... N N \n","\n"," emb_biogpt_no_FineT \n","0 [[0.4998164772987366, -1.1454148292541504, -1.... \n","1 [[-0.9474165439605713, -0.18875360488891602, -... \n","2 [[-0.7398123741149902, 0.812239944934845, -0.0... \n","3 [[-0.35727423429489136, 0.6445433497428894, 1.... \n","4 [[0.07174703478813171, 1.470916986465454, -1.7... \n",".. ... \n","295 [[-0.17614494264125824, 1.5961328744888306, 0.... \n","296 [[0.6994464993476868, 1.2614316940307617, -1.8... \n","297 [[1.3378559350967407, 1.1757584810256958, -1.0... \n","298 [[0.32566601037979126, -0.4120418131351471, -1... \n","299 [[1.250546932220459, 0.0630776435136795, -1.18... \n","\n","[300 rows x 4 columns]\n"]}]},{"cell_type":"markdown","source":["## Save to csv ot txt"],"metadata":{"id":"OeTeIttSNTS9"}},{"cell_type":"code","source":["\n","df.to_json('/content/drive/MyDrive/Colab Notebooks/dissertation/data/datafinal.json', orient='records')\n","\n","#df.to_csv(\"/content/drive/MyDrive/Colab Notebooks/dissertation/data/datafinal.csv\", index=False)\n","#np.savetxt(r\"/content/drive/MyDrive/Colab Notebooks/dissertation/data/datafinal.txt\", df, fmt='%s')"],"metadata":{"id":"8OxiKJdDNXI1"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["#For testing pusposes (SVC code)"],"metadata":{"id":"F23BVwcxT0YO"}},{"cell_type":"code","source":["from sklearn.metrics import classification_report\n","print(classification_report(y_test, y_pred))"],"metadata":{"id":"3fw2I_5FJJEb","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1689337345416,"user_tz":-60,"elapsed":4,"user":{"displayName":"Unknown","userId":"16317712665857714848"}},"outputId":"8e3b4c9a-814e-425d-b98f-25b97ea8ed6f"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":[" precision recall f1-score support\n","\n"," N 0.61 0.78 0.68 18\n"," U 0.00 0.00 0.00 3\n"," Y 0.60 0.53 0.56 17\n","\n"," accuracy 0.61 38\n"," macro avg 0.40 0.44 0.42 38\n","weighted avg 0.56 0.61 0.58 38\n","\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, msg_start, len(result))\n","/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, msg_start, len(result))\n","/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, msg_start, len(result))\n"]}]},{"cell_type":"code","source":["c = pipeline(\n"," task=\"text-classification\",\n"," tokenizer=\"microsoft/biogpt\",\n"," model=model,\n"," framework=\"pt\",\n"," device=0, # use CUDA with 0\n",")\n"],"metadata":{"id":"Bs-zADh7Zq3x","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1689338002767,"user_tz":-60,"elapsed":1455,"user":{"displayName":"Unknown","userId":"16317712665857714848"}},"outputId":"31a5f9db-d646-4ad1-fe9d-1fe571df7a05"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers\n","pip install xformers.\n"]}]},{"cell_type":"code","source":["results2 = c([row[\"Chief Complaint\"] for _, row in df.iterrows()])\n"],"metadata":{"id":"-OeiyKA7ZbPH"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print(classification_report(, results2))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"RK6vRCxkZoD9","executionInfo":{"status":"ok","timestamp":1689338215435,"user_tz":-60,"elapsed":184,"user":{"displayName":"Unknown","userId":"16317712665857714848"}},"outputId":"2a4f2377-8bb5-405b-a052-66a38ada86f4"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["[{'label': 'LABEL_0', 'score': 0.6615974307060242}, {'label': 'LABEL_0', 'score': 0.8978613615036011}, {'label': 'LABEL_0', 'score': 0.737217903137207}, {'label': 'LABEL_1', 'score': 0.5124248266220093}, {'label': 'LABEL_0', 'score': 0.7070297598838806}, {'label': 'LABEL_0', 'score': 0.6875593066215515}, {'label': 'LABEL_0', 'score': 0.6362900137901306}, {'label': 'LABEL_1', 'score': 0.5346298813819885}, {'label': 'LABEL_0', 'score': 0.7779340147972107}, {'label': 'LABEL_0', 'score': 0.6526575088500977}, {'label': 'LABEL_0', 'score': 0.7227324843406677}, {'label': 'LABEL_0', 'score': 0.8791565895080566}, {'label': 'LABEL_1', 'score': 0.932992696762085}, {'label': 'LABEL_0', 'score': 0.6692107915878296}, {'label': 'LABEL_0', 'score': 0.5934761166572571}, {'label': 'LABEL_0', 'score': 0.5123370885848999}, {'label': 'LABEL_0', 'score': 0.7277477383613586}, {'label': 'LABEL_0', 'score': 0.9222954511642456}, {'label': 'LABEL_0', 'score': 0.6491223573684692}, {'label': 'LABEL_0', 'score': 0.5726889371871948}, {'label': 'LABEL_0', 'score': 0.5519722104072571}, {'label': 'LABEL_0', 'score': 0.7658959031105042}, {'label': 'LABEL_0', 'score': 0.7677779793739319}, {'label': 'LABEL_0', 'score': 0.890244722366333}, {'label': 'LABEL_0', 'score': 0.5844970941543579}, {'label': 'LABEL_0', 'score': 0.6509642004966736}, {'label': 'LABEL_0', 'score': 0.7383297085762024}, {'label': 'LABEL_0', 'score': 0.7057068347930908}, {'label': 'LABEL_0', 'score': 0.7482975721359253}, {'label': 'LABEL_0', 'score': 0.6678454279899597}, {'label': 'LABEL_0', 'score': 0.7175522446632385}, {'label': 'LABEL_0', 'score': 0.7939876317977905}, {'label': 'LABEL_0', 'score': 0.6886239051818848}, {'label': 'LABEL_0', 'score': 0.6293384432792664}, {'label': 'LABEL_0', 'score': 0.7787708640098572}, {'label': 'LABEL_1', 'score': 0.5346750020980835}, {'label': 'LABEL_0', 'score': 0.6168493032455444}, {'label': 'LABEL_0', 'score': 0.7140619158744812}, {'label': 'LABEL_0', 'score': 0.9330634474754333}, {'label': 'LABEL_0', 'score': 0.7088048458099365}, {'label': 'LABEL_0', 'score': 0.7105473279953003}, {'label': 'LABEL_0', 'score': 0.7688608169555664}, {'label': 'LABEL_0', 'score': 0.7996343970298767}, {'label': 'LABEL_1', 'score': 0.5041794180870056}, {'label': 'LABEL_0', 'score': 0.7625333070755005}, {'label': 'LABEL_0', 'score': 0.6963614821434021}, {'label': 'LABEL_0', 'score': 0.7411929965019226}, {'label': 'LABEL_0', 'score': 0.7478159070014954}, {'label': 'LABEL_0', 'score': 0.8066991567611694}, {'label': 'LABEL_0', 'score': 0.6129153966903687}, {'label': 'LABEL_0', 'score': 0.5038318634033203}, {'label': 'LABEL_0', 'score': 0.685335099697113}, {'label': 'LABEL_0', 'score': 0.6668456196784973}, {'label': 'LABEL_0', 'score': 0.8713549375534058}, {'label': 'LABEL_0', 'score': 0.5283274054527283}, {'label': 'LABEL_0', 'score': 0.772803544998169}, {'label': 'LABEL_0', 'score': 0.9716405272483826}, {'label': 'LABEL_1', 'score': 0.5398741960525513}, {'label': 'LABEL_0', 'score': 0.7302786111831665}, {'label': 'LABEL_0', 'score': 0.7667151093482971}, {'label': 'LABEL_0', 'score': 0.7670304179191589}, {'label': 'LABEL_0', 'score': 0.7130184173583984}, {'label': 'LABEL_0', 'score': 0.5962071418762207}, {'label': 'LABEL_1', 'score': 0.7809521555900574}, {'label': 'LABEL_0', 'score': 0.91331946849823}, {'label': 'LABEL_0', 'score': 0.8840063214302063}, {'label': 'LABEL_1', 'score': 0.5562060475349426}, {'label': 'LABEL_0', 'score': 0.780636191368103}, {'label': 'LABEL_0', 'score': 0.6657049059867859}, {'label': 'LABEL_0', 'score': 0.6016926765441895}, {'label': 'LABEL_0', 'score': 0.7100022435188293}, {'label': 'LABEL_0', 'score': 0.7566763162612915}, {'label': 'LABEL_0', 'score': 0.6488569378852844}, {'label': 'LABEL_0', 'score': 0.6955004930496216}, {'label': 'LABEL_1', 'score': 0.5463865399360657}, {'label': 'LABEL_0', 'score': 0.6558042168617249}, {'label': 'LABEL_0', 'score': 0.7891439199447632}, {'label': 'LABEL_0', 'score': 0.7871383428573608}, {'label': 'LABEL_0', 'score': 0.8023427128791809}, {'label': 'LABEL_0', 'score': 0.589187502861023}, {'label': 'LABEL_0', 'score': 0.7212092280387878}, {'label': 'LABEL_1', 'score': 0.5844154953956604}, {'label': 'LABEL_0', 'score': 0.7456770539283752}, {'label': 'LABEL_0', 'score': 0.8659123182296753}, {'label': 'LABEL_0', 'score': 0.8992603421211243}, {'label': 'LABEL_0', 'score': 0.514760434627533}, {'label': 'LABEL_0', 'score': 0.7620018720626831}, {'label': 'LABEL_1', 'score': 0.5866338014602661}, {'label': 'LABEL_0', 'score': 0.8297902941703796}, {'label': 'LABEL_0', 'score': 0.8059366941452026}, {'label': 'LABEL_0', 'score': 0.6793333292007446}, {'label': 'LABEL_0', 'score': 0.5907491445541382}, {'label': 'LABEL_0', 'score': 0.9433456659317017}, {'label': 'LABEL_1', 'score': 0.5196303129196167}, {'label': 'LABEL_0', 'score': 0.7706891298294067}, {'label': 'LABEL_0', 'score': 0.8047298192977905}, {'label': 'LABEL_1', 'score': 0.6020045876502991}, {'label': 'LABEL_0', 'score': 0.96779465675354}, {'label': 'LABEL_0', 'score': 0.6920665502548218}, {'label': 'LABEL_0', 'score': 0.7542068362236023}, {'label': 'LABEL_0', 'score': 0.6763297915458679}, {'label': 'LABEL_0', 'score': 0.8180073499679565}, {'label': 'LABEL_1', 'score': 0.5910825729370117}, {'label': 'LABEL_0', 'score': 0.7814627289772034}, {'label': 'LABEL_0', 'score': 0.8504816889762878}, {'label': 'LABEL_0', 'score': 0.6659404635429382}, {'label': 'LABEL_0', 'score': 0.7470546364784241}, {'label': 'LABEL_1', 'score': 0.6019303798675537}, {'label': 'LABEL_0', 'score': 0.6815959215164185}, {'label': 'LABEL_0', 'score': 0.6103224754333496}, {'label': 'LABEL_0', 'score': 0.5264981985092163}, {'label': 'LABEL_0', 'score': 0.6877379417419434}, {'label': 'LABEL_0', 'score': 0.5092182159423828}, {'label': 'LABEL_0', 'score': 0.6549142599105835}, {'label': 'LABEL_0', 'score': 0.6244764924049377}, {'label': 'LABEL_0', 'score': 0.6329596638679504}, {'label': 'LABEL_0', 'score': 0.6964150667190552}, {'label': 'LABEL_0', 'score': 0.728044331073761}, {'label': 'LABEL_1', 'score': 0.6850619912147522}, {'label': 'LABEL_0', 'score': 0.5791393518447876}, {'label': 'LABEL_1', 'score': 0.631783127784729}, {'label': 'LABEL_0', 'score': 0.797654926776886}, {'label': 'LABEL_0', 'score': 0.8342559337615967}, {'label': 'LABEL_0', 'score': 0.7872901558876038}, {'label': 'LABEL_0', 'score': 0.5403001308441162}, {'label': 'LABEL_0', 'score': 0.9737462997436523}, {'label': 'LABEL_0', 'score': 0.663942277431488}, {'label': 'LABEL_0', 'score': 0.6177354454994202}, {'label': 'LABEL_0', 'score': 0.6165367960929871}, {'label': 'LABEL_0', 'score': 0.7678841352462769}, {'label': 'LABEL_0', 'score': 0.614977240562439}, {'label': 'LABEL_0', 'score': 0.8012751340866089}, {'label': 'LABEL_0', 'score': 0.65992271900177}, {'label': 'LABEL_0', 'score': 0.7245795726776123}, {'label': 'LABEL_0', 'score': 0.7442924976348877}, {'label': 'LABEL_0', 'score': 0.9026773571968079}, {'label': 'LABEL_0', 'score': 0.8084629774093628}, {'label': 'LABEL_0', 'score': 0.6144567131996155}, {'label': 'LABEL_0', 'score': 0.9622575044631958}, {'label': 'LABEL_0', 'score': 0.7682992815971375}, {'label': 'LABEL_0', 'score': 0.845178484916687}, {'label': 'LABEL_0', 'score': 0.9782117605209351}, {'label': 'LABEL_0', 'score': 0.6261171102523804}, {'label': 'LABEL_0', 'score': 0.8912791609764099}, {'label': 'LABEL_1', 'score': 0.5365575551986694}, {'label': 'LABEL_0', 'score': 0.5592782497406006}, {'label': 'LABEL_0', 'score': 0.7869940400123596}, {'label': 'LABEL_0', 'score': 0.5641335844993591}, {'label': 'LABEL_0', 'score': 0.7312366366386414}, {'label': 'LABEL_0', 'score': 0.759033739566803}, {'label': 'LABEL_0', 'score': 0.5738414525985718}, {'label': 'LABEL_1', 'score': 0.5965594053268433}, {'label': 'LABEL_0', 'score': 0.8608651161193848}, {'label': 'LABEL_0', 'score': 0.738036572933197}, {'label': 'LABEL_0', 'score': 0.604376494884491}, {'label': 'LABEL_0', 'score': 0.7667599320411682}, {'label': 'LABEL_1', 'score': 0.5392709970474243}, {'label': 'LABEL_0', 'score': 0.7998315095901489}, {'label': 'LABEL_0', 'score': 0.6989988088607788}, {'label': 'LABEL_0', 'score': 0.6253988742828369}, {'label': 'LABEL_0', 'score': 0.9001648426055908}, {'label': 'LABEL_0', 'score': 0.7018232345581055}, {'label': 'LABEL_1', 'score': 0.5016051530838013}, {'label': 'LABEL_0', 'score': 0.8002259731292725}, {'label': 'LABEL_0', 'score': 0.9049254059791565}, {'label': 'LABEL_0', 'score': 0.8459368348121643}, {'label': 'LABEL_0', 'score': 0.5065075159072876}, {'label': 'LABEL_0', 'score': 0.8334006667137146}, {'label': 'LABEL_0', 'score': 0.5210334658622742}, {'label': 'LABEL_0', 'score': 0.820144772529602}, {'label': 'LABEL_1', 'score': 0.6963856220245361}, {'label': 'LABEL_1', 'score': 0.6420274972915649}, {'label': 'LABEL_0', 'score': 0.8348711729049683}, {'label': 'LABEL_0', 'score': 0.6488588452339172}, {'label': 'LABEL_0', 'score': 0.8147182464599609}, {'label': 'LABEL_0', 'score': 0.7595041990280151}, {'label': 'LABEL_0', 'score': 0.8739688992500305}, {'label': 'LABEL_0', 'score': 0.8341084122657776}, {'label': 'LABEL_0', 'score': 0.730343222618103}, {'label': 'LABEL_0', 'score': 0.9408749938011169}, {'label': 'LABEL_1', 'score': 0.5836443901062012}, {'label': 'LABEL_0', 'score': 0.6184004545211792}, {'label': 'LABEL_0', 'score': 0.756477952003479}, {'label': 'LABEL_0', 'score': 0.8730202317237854}, {'label': 'LABEL_0', 'score': 0.8932791948318481}, {'label': 'LABEL_0', 'score': 0.8135764598846436}, {'label': 'LABEL_0', 'score': 0.7274437546730042}, {'label': 'LABEL_1', 'score': 0.5531033277511597}]\n"]}]},{"cell_type":"code","source":["def mean_pooling(last_hidden_states, ):\n"," last_4_layers = last_hidden_states[-12:] # Consider the last 4 layers\n"," return np.mean(last_4_layers, axis=1)\n","\n","# Process the data using the pipeline\n","results = p([row[\"text\"] for _, row in df2.iterrows()])\n","\n","features = np.squeeze(results)\n","\n","print(features.shape)\n","# Perform mean pooling on the last hidden states\n","embeddings = [mean_pooling(hidden_state) for hidden_state in results]\n","\n","# Create a DataFrame to store the results\n","df2[\"embeddings12\"] = embeddings\n","\n","# Print the resulting DataFrame\n","print(df2)"],"metadata":{"id":"rp9v3E0yM9hY"},"execution_count":null,"outputs":[]}]}