Skip to content
Permalink
28881fc565
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
1 lines (1 sloc) 48.7 KB
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4","mount_file_id":"1mTguZXjV6nhM_6mF99ESeKjayF29kpvA","authorship_tag":"ABX9TyNhX3OjVCUag6esgJw3/vQx"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"1c2ab33b113c43d5a5dc5adc2545ae8c":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_dd81bb4f7d7e4a219bb88e1ae672e269","IPY_MODEL_7ddb35d7a61448beb8622bbee7e030ce","IPY_MODEL_5476a15648ed435b97cb98699b778613"],"layout":"IPY_MODEL_cf535724d52649b091caddf11cd7d343"}},"dd81bb4f7d7e4a219bb88e1ae672e269":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c33ab185b9fe43298b8e1df6a518f66f","placeholder":"​","style":"IPY_MODEL_3e0520d086e04e50a326f5b54a3d12c5","value":"Downloading (…)olve/main/vocab.json: 100%"}},"7ddb35d7a61448beb8622bbee7e030ce":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_7488c398f65a45c69d7339a5cc0281bd","max":927059,"min":0,"orientation":"horizontal","style":"IPY_MODEL_6097b53803ee41989dbabae5757e8980","value":927059}},"5476a15648ed435b97cb98699b778613":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0fb8445cd3d34b7dbc9a0ddf56cc46aa","placeholder":"​","style":"IPY_MODEL_894316e70feb4c02a76d773d6b8a55ac","value":" 927k/927k [00:00&lt;00:00, 11.5MB/s]"}},"cf535724d52649b091caddf11cd7d343":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c33ab185b9fe43298b8e1df6a518f66f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3e0520d086e04e50a326f5b54a3d12c5":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7488c398f65a45c69d7339a5cc0281bd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6097b53803ee41989dbabae5757e8980":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"0fb8445cd3d34b7dbc9a0ddf56cc46aa":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"894316e70feb4c02a76d773d6b8a55ac":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9a7df809d0e34ea8a2f33405e9e4ea8e":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_49991380a0c34610b2fbc1162e5e714b","IPY_MODEL_0a27bf29283243d29f470307b2373001","IPY_MODEL_6010e0d1208f4f6d8c87b3e04f209d70"],"layout":"IPY_MODEL_cadb2516a5c340448f70b4def72923b0"}},"49991380a0c34610b2fbc1162e5e714b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b034e877975c4ef7a16a32669aae665d","placeholder":"​","style":"IPY_MODEL_b5154d9c661c4835a1dd9e512240c57e","value":"Downloading (…)olve/main/merges.txt: 100%"}},"0a27bf29283243d29f470307b2373001":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1d0995af5ec240e98027782ee6a6834e","max":696298,"min":0,"orientation":"horizontal","style":"IPY_MODEL_006225d4bd3641568e1330ba50c23fc7","value":696298}},"6010e0d1208f4f6d8c87b3e04f209d70":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cfc74db46bc040888d563a92de6f9a45","placeholder":"​","style":"IPY_MODEL_c123753b6da94c1a993502dcf8284ba4","value":" 696k/696k [00:00&lt;00:00, 4.33MB/s]"}},"cadb2516a5c340448f70b4def72923b0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b034e877975c4ef7a16a32669aae665d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b5154d9c661c4835a1dd9e512240c57e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1d0995af5ec240e98027782ee6a6834e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"006225d4bd3641568e1330ba50c23fc7":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"cfc74db46bc040888d563a92de6f9a45":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c123753b6da94c1a993502dcf8284ba4":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"330441016e5b44b5930c7ec3554a95f3":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_bca655046bd143128d1c52b1baee958d","IPY_MODEL_b8021f5298fe4ac19f6f4b19cfa5cae8","IPY_MODEL_dd0dccf09c97485bb0002c6731136cdb"],"layout":"IPY_MODEL_475397890c6445f7954473989dca5705"}},"bca655046bd143128d1c52b1baee958d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9c02f22a5cb345839f8fe70b0342e6a7","placeholder":"​","style":"IPY_MODEL_7adb167bc60f490bb8f61ef36c9f3b0d","value":"Downloading (…)lve/main/config.json: 100%"}},"b8021f5298fe4ac19f6f4b19cfa5cae8":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e019ef09be8545c184677e32303bbe8d","max":595,"min":0,"orientation":"horizontal","style":"IPY_MODEL_0d280882f5ad49fe8b7aa6244e9491b8","value":595}},"dd0dccf09c97485bb0002c6731136cdb":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e596879b4d9d4adba401f4c551a516d1","placeholder":"​","style":"IPY_MODEL_d4f344be9b124aff8fee5b05a0157ac7","value":" 595/595 [00:00&lt;00:00, 26.4kB/s]"}},"475397890c6445f7954473989dca5705":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9c02f22a5cb345839f8fe70b0342e6a7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7adb167bc60f490bb8f61ef36c9f3b0d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e019ef09be8545c184677e32303bbe8d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0d280882f5ad49fe8b7aa6244e9491b8":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e596879b4d9d4adba401f4c551a516d1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d4f344be9b124aff8fee5b05a0157ac7":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9ae0c7362ef34d1aa312627758d1e81f":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_38497a0cefae44d49709036f824d1520","IPY_MODEL_07f2abc27eb44f2dba5dbc5f9a0fe867","IPY_MODEL_a442ca8749914429ac5cd215ffa96739"],"layout":"IPY_MODEL_b0fdd28685f94d728a716a662d57d28d"}},"38497a0cefae44d49709036f824d1520":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bf5d8b45ea1c48b8b8f5e0bdf0dc0141","placeholder":"​","style":"IPY_MODEL_88bb45388f9a4f93ac270f16e6cc9746","value":"Downloading pytorch_model.bin: 100%"}},"07f2abc27eb44f2dba5dbc5f9a0fe867":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_66c28aeb8bc543118559fbf499fb5b04","max":1560781537,"min":0,"orientation":"horizontal","style":"IPY_MODEL_1e6534be6d454aeabcfba51863991edd","value":1560781537}},"a442ca8749914429ac5cd215ffa96739":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c09ad7b765c14f53813acc93edaa586b","placeholder":"​","style":"IPY_MODEL_387f3da2c1ca4e33b28d632095458ebf","value":" 1.56G/1.56G [00:06&lt;00:00, 257MB/s]"}},"b0fdd28685f94d728a716a662d57d28d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bf5d8b45ea1c48b8b8f5e0bdf0dc0141":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"88bb45388f9a4f93ac270f16e6cc9746":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"66c28aeb8bc543118559fbf499fb5b04":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1e6534be6d454aeabcfba51863991edd":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"c09ad7b765c14f53813acc93edaa586b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"387f3da2c1ca4e33b28d632095458ebf":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"markdown","source":["# Install Required libraries"],"metadata":{"id":"oNme5YN3mwRS"}},{"cell_type":"code","execution_count":1,"metadata":{"id":"OmxFfI92lslA","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1690470424237,"user_tz":-60,"elapsed":16578,"user":{"displayName":"Unknown","userId":"16317712665857714848"}},"outputId":"89c01c62-16cb-44a5-c7a7-ee56d2bfcff4"},"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting sacremoses\n"," Downloading sacremoses-0.0.53.tar.gz (880 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m880.6/880.6 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Collecting transformers\n"," Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m37.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from sacremoses) (2022.10.31)\n","Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from sacremoses) (1.16.0)\n","Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from sacremoses) (8.1.6)\n","Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from sacremoses) (1.3.1)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from sacremoses) (4.65.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n","Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)\n"," Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m20.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.22.4)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n","Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)\n"," Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m83.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting safetensors>=0.3.1 (from transformers)\n"," Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m55.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.6.0)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.7.1)\n","Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.16)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n","Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n","Building wheels for collected packages: sacremoses\n"," Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for sacremoses: filename=sacremoses-0.0.53-py3-none-any.whl size=895239 sha256=440bb68a23bcf345210fa9662c990612f9f95477d4a9eb1b832c6211619a91c8\n"," Stored in directory: /root/.cache/pip/wheels/00/24/97/a2ea5324f36bc626e1ea0267f33db6aa80d157ee977e9e42fb\n","Successfully built sacremoses\n","Installing collected packages: tokenizers, safetensors, sacremoses, huggingface-hub, transformers\n","Successfully installed huggingface-hub-0.16.4 sacremoses-0.0.53 safetensors-0.3.1 tokenizers-0.13.3 transformers-4.31.0\n"]}],"source":["!pip install sacremoses transformers"]},{"cell_type":"markdown","source":["# Import required libraries"],"metadata":{"id":"nxKi5cK6nU5Z"}},{"cell_type":"code","source":["from transformers import BioGptTokenizer, BioGptForSequenceClassification, pipeline, AutoModel\n","import torch\n","import pandas as pd\n","import numpy as np\n","from sklearn.model_selection import train_test_split"],"metadata":{"id":"9Q4U4TWWnUNa","executionInfo":{"status":"ok","timestamp":1690470462460,"user_tz":-60,"elapsed":6026,"user":{"displayName":"Unknown","userId":"16317712665857714848"}}},"execution_count":4,"outputs":[]},{"cell_type":"markdown","source":["# Import Data"],"metadata":{"id":"nK9JropUm3yK"}},{"cell_type":"code","source":["df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/dissertation/data/GOUT-CC-2020-CORPUS-REDACTED.csv')\n","#df = df.drop(\"Predict\", axis=1)"],"metadata":{"id":"pX9KCq5pSseO","executionInfo":{"status":"ok","timestamp":1690470433734,"user_tz":-60,"elapsed":1312,"user":{"displayName":"Unknown","userId":"16317712665857714848"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["# Check for consensus and mark rows for removal\n","rows_to_drop = []\n","for index, row in df.iterrows():\n"," if row['Consensus'] == '-': #Drop unknown and unmarked\n"," rows_to_drop.append(index)\n","\n","# Drop the marked rows\n","df.drop(rows_to_drop, inplace=True)\n","\n","# Print the updated DataFrame\n","print(df)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"lBfzey48Uufu","executionInfo":{"status":"ok","timestamp":1690470538352,"user_tz":-60,"elapsed":686,"user":{"displayName":"Unknown","userId":"16317712665857714848"}},"outputId":"37e9f181-9bc4-40e1-8e40-afd155c14062"},"execution_count":8,"outputs":[{"output_type":"stream","name":"stdout","text":[" Chief Complaint Predict Consensus\n","2 Altercation while making arrest, c/o R hand pa... N N\n","3 Cut on L upper thigh wtih saw. Bleeding contro... N N\n","7 Generalized pain all over. Patient not answeri... N N\n","13 R knee pain and swelling x2 weeks. \"I've just ... U N\n","14 R wrist pain/swelling/numbness since this morn... U N\n","... ... ... ...\n","8124 sob and right chest pain x 1 weeks - hx of mu... N N\n","8125 starts in lower back and goes right legs x1 m... N N\n","8127 sciatica pain R lower back radiating to R groi... N N\n","8132 stepped on a nail at home with right foot, pai... N N\n","8136 Rash/sores across body, infection ro left thum... N N\n","\n","[264 rows x 3 columns]\n"]}]},{"cell_type":"markdown","source":["# Small data analysis"],"metadata":{"id":"PEoE_tyrm7qS"}},{"cell_type":"code","source":["print(\"group by consensus\")\n","print(df.groupby(\"Consensus\").size())\n","print(\"-----------------------------------------------------------------\")\n","print(\"Predict is U\")\n","print(df[df['Predict'] == 'U'].groupby('Consensus').size())\n","print(\"-----------------------------------------------------------------\")\n","print(\"Predict is N\")\n","print(df[df['Predict'] == 'N'].groupby('Consensus').size())\n","print(\"-----------------------------------------------------------------\")\n","print(\"Predict is Y\")\n","print(df[df['Predict'] == 'Y'].groupby('Consensus').size())\n","print(\"-----------------------------------------------------------------\")\n","print(\"Predict is -\")\n","print(df[df['Predict'] == '-'].groupby('Consensus').size())\n","print(\"-----------------------------------------------------------------\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Kpe-3PP3mtsc","executionInfo":{"status":"ok","timestamp":1690470542052,"user_tz":-60,"elapsed":308,"user":{"displayName":"Unknown","userId":"16317712665857714848"}},"outputId":"84746d41-bf35-422d-a5b7-0e0f5fae719c"},"execution_count":9,"outputs":[{"output_type":"stream","name":"stdout","text":["group by consensus\n","Consensus\n","N 232\n","U 7\n","Y 25\n","dtype: int64\n","-----------------------------------------------------------------\n","Predict is U\n","Consensus\n","N 111\n","U 7\n","Y 9\n","dtype: int64\n","-----------------------------------------------------------------\n","Predict is N\n","Consensus\n","N 116\n","Y 7\n","dtype: int64\n","-----------------------------------------------------------------\n","Predict is Y\n","Consensus\n","N 5\n","Y 9\n","dtype: int64\n","-----------------------------------------------------------------\n","Predict is -\n","Series([], dtype: int64)\n","-----------------------------------------------------------------\n"]}]},{"cell_type":"markdown","source":["# Load Model and tokenizer (BioGPT)"],"metadata":{"id":"7h7jfsy9njIx"}},{"cell_type":"code","source":["device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n","\n","#GPT-2 is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than the left. Remember this\n","tokenizer = BioGptTokenizer.from_pretrained(\"microsoft/biogpt\")\n","tokenizer.padding_side = \"left\"\n","\n","model = AutoModel.from_pretrained(\"microsoft/biogpt\")\n","\n","model = model.to(device)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["1c2ab33b113c43d5a5dc5adc2545ae8c","dd81bb4f7d7e4a219bb88e1ae672e269","7ddb35d7a61448beb8622bbee7e030ce","5476a15648ed435b97cb98699b778613","cf535724d52649b091caddf11cd7d343","c33ab185b9fe43298b8e1df6a518f66f","3e0520d086e04e50a326f5b54a3d12c5","7488c398f65a45c69d7339a5cc0281bd","6097b53803ee41989dbabae5757e8980","0fb8445cd3d34b7dbc9a0ddf56cc46aa","894316e70feb4c02a76d773d6b8a55ac","9a7df809d0e34ea8a2f33405e9e4ea8e","49991380a0c34610b2fbc1162e5e714b","0a27bf29283243d29f470307b2373001","6010e0d1208f4f6d8c87b3e04f209d70","cadb2516a5c340448f70b4def72923b0","b034e877975c4ef7a16a32669aae665d","b5154d9c661c4835a1dd9e512240c57e","1d0995af5ec240e98027782ee6a6834e","006225d4bd3641568e1330ba50c23fc7","cfc74db46bc040888d563a92de6f9a45","c123753b6da94c1a993502dcf8284ba4","330441016e5b44b5930c7ec3554a95f3","bca655046bd143128d1c52b1baee958d","b8021f5298fe4ac19f6f4b19cfa5cae8","dd0dccf09c97485bb0002c6731136cdb","475397890c6445f7954473989dca5705","9c02f22a5cb345839f8fe70b0342e6a7","7adb167bc60f490bb8f61ef36c9f3b0d","e019ef09be8545c184677e32303bbe8d","0d280882f5ad49fe8b7aa6244e9491b8","e596879b4d9d4adba401f4c551a516d1","d4f344be9b124aff8fee5b05a0157ac7","9ae0c7362ef34d1aa312627758d1e81f","38497a0cefae44d49709036f824d1520","07f2abc27eb44f2dba5dbc5f9a0fe867","a442ca8749914429ac5cd215ffa96739","b0fdd28685f94d728a716a662d57d28d","bf5d8b45ea1c48b8b8f5e0bdf0dc0141","88bb45388f9a4f93ac270f16e6cc9746","66c28aeb8bc543118559fbf499fb5b04","1e6534be6d454aeabcfba51863991edd","c09ad7b765c14f53813acc93edaa586b","387f3da2c1ca4e33b28d632095458ebf"]},"id":"S37ZLL02nMOi","executionInfo":{"status":"ok","timestamp":1690470489452,"user_tz":-60,"elapsed":23877,"user":{"displayName":"Unknown","userId":"16317712665857714848"}},"outputId":"7732e030-7bba-4a0c-f7dd-d6263b98c466"},"execution_count":5,"outputs":[{"output_type":"display_data","data":{"text/plain":["Downloading (…)olve/main/vocab.json: 0%| | 0.00/927k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"1c2ab33b113c43d5a5dc5adc2545ae8c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Downloading (…)olve/main/merges.txt: 0%| | 0.00/696k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9a7df809d0e34ea8a2f33405e9e4ea8e"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Downloading (…)lve/main/config.json: 0%| | 0.00/595 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"330441016e5b44b5930c7ec3554a95f3"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Downloading pytorch_model.bin: 0%| | 0.00/1.56G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9ae0c7362ef34d1aa312627758d1e81f"}},"metadata":{}}]},{"cell_type":"code","source":["#oversample 2019 and get result\n","#class weight\n","#fine tune"],"metadata":{"id":"lmd-cKW-pGj8"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#clear GPU cache\n","#torch.cuda.empty_cache()"],"metadata":{"id":"-HIrsk4pqguR"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Define the pipeline"],"metadata":{"id":"ppuc0YsrMwiA"}},{"cell_type":"code","source":["# Create the pipeline\n","p = pipeline(\n"," task=\"feature-extraction\",\n"," tokenizer=\"microsoft/biogpt\",\n"," model=\"microsoft/biogpt\",\n"," framework=\"pt\",\n"," device=0, # use CUDA with 0\n",")"],"metadata":{"id":"f9JuA7OTMuw-","executionInfo":{"status":"ok","timestamp":1690470557426,"user_tz":-60,"elapsed":5788,"user":{"displayName":"Unknown","userId":"16317712665857714848"}}},"execution_count":10,"outputs":[]},{"cell_type":"markdown","source":["### Get the embeddings of the last token of the last hidden state"],"metadata":{"id":"dNTtyRnArThB"}},{"cell_type":"code","source":["# Custom function to extract the embedding of the last token\n","def extract_last_token(last_hidden_states):\n"," last_hidden_states = np.array(last_hidden_states)\n"," return last_hidden_states[:,-1,:]\n","\n","# Process the data using the pipeline\n","results = p([row[\"Chief Complaint\"] for _, row in df.iterrows()])\n","\n","# Extract the last token of the last hidden state\n","embeddings = [extract_last_token(hidden_state) for hidden_state in results]"],"metadata":{"id":"8TnbZZQ3qqjo","executionInfo":{"status":"ok","timestamp":1690470570221,"user_tz":-60,"elapsed":12513,"user":{"displayName":"Unknown","userId":"16317712665857714848"}}},"execution_count":11,"outputs":[]},{"cell_type":"code","source":["# Assign the reshaped embeddings to the \"embeddings\" column in the DataFrame\n","df[\"emb_biogpt_no_FineT\"] = embeddings\n","\n","# Print the resulting DataFrame\n","print(df)"],"metadata":{"id":"ZzokAh_VU5c4","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1690470570221,"user_tz":-60,"elapsed":18,"user":{"displayName":"Unknown","userId":"16317712665857714848"}},"outputId":"95700da2-1f7e-4574-ca5b-45351524c7ef"},"execution_count":12,"outputs":[{"output_type":"stream","name":"stdout","text":[" Chief Complaint Predict Consensus \\\n","2 Altercation while making arrest, c/o R hand pa... N N \n","3 Cut on L upper thigh wtih saw. Bleeding contro... N N \n","7 Generalized pain all over. Patient not answeri... N N \n","13 R knee pain and swelling x2 weeks. \"I've just ... U N \n","14 R wrist pain/swelling/numbness since this morn... U N \n","... ... ... ... \n","8124 sob and right chest pain x 1 weeks - hx of mu... N N \n","8125 starts in lower back and goes right legs x1 m... N N \n","8127 sciatica pain R lower back radiating to R groi... N N \n","8132 stepped on a nail at home with right foot, pai... N N \n","8136 Rash/sores across body, infection ro left thum... N N \n","\n"," emb_biogpt_no_FineT \n","2 [[0.014176959171891212, 1.070388913154602, -0.... \n","3 [[-0.2563551366329193, -1.5399489402770996, -0... \n","7 [[-1.118257761001587, -0.5657621622085571, -2.... \n","13 [[-0.3429946005344391, -0.15859508514404297, -... \n","14 [[-1.1331700086593628, 0.2827310562133789, -0.... \n","... ... \n","8124 [[0.09883549809455872, -0.4225137531757355, -3... \n","8125 [[1.0552010536193848, 0.5237547755241394, -0.1... \n","8127 [[-1.5441633462905884, 0.03140628710389137, -1... \n","8132 [[-0.4451369047164917, 0.439532071352005, -0.8... \n","8136 [[0.7580855488777161, 0.8591176271438599, 0.45... \n","\n","[264 rows x 4 columns]\n"]}]},{"cell_type":"markdown","source":["## Save to csv ot txt"],"metadata":{"id":"OeTeIttSNTS9"}},{"cell_type":"code","source":["df.to_json('/content/drive/MyDrive/Colab Notebooks/dissertation/data/2020_noFT.json', orient='records')"],"metadata":{"id":"8OxiKJdDNXI1","executionInfo":{"status":"ok","timestamp":1690470618351,"user_tz":-60,"elapsed":390,"user":{"displayName":"Unknown","userId":"16317712665857714848"}}},"execution_count":13,"outputs":[]}]}