From 3f589c0e697b6b5fb0f0b14cad56a6f8e0a7eae3 Mon Sep 17 00:00:00 2001 From: Daniel Bisig Date: Mon, 15 Aug 2022 12:38:59 +0200 Subject: [PATCH] first commit --- .../config/backups/codestyle.ini.bak | 8 + .../config/backups/encoding.ini.bak | 6 + .../.spyproject/config/backups/vcs.ini.bak | 7 + .../config/backups/workspace.ini.bak | 10 + .../.spyproject/config/codestyle.ini | 8 + .../defaults/defaults-codestyle-0.2.0.ini | 5 + .../defaults/defaults-encoding-0.2.0.ini | 3 + .../config/defaults/defaults-vcs-0.2.0.ini | 4 + .../defaults/defaults-workspace-0.2.0.ini | 6 + .../.spyproject/config/encoding.ini | 6 + .../granular_dance/.spyproject/config/vcs.ini | 7 + .../.spyproject/config/workspace.ini | 10 + autoencoder/granular_dance/granular_dance.py | 847 +++++++++++++++++ .../.pylint.d/image_autoencoder1.stats | Bin 0 -> 1282 bytes .../config/backups/codestyle.ini.bak | 8 + .../config/backups/encoding.ini.bak | 6 + .../.spyproject/config/backups/vcs.ini.bak | 7 + .../config/backups/workspace.ini.bak | 10 + .../.spyproject/config/codestyle.ini | 8 + .../defaults/defaults-codestyle-0.2.0.ini | 5 + .../defaults/defaults-encoding-0.2.0.ini | 3 + .../config/defaults/defaults-vcs-0.2.0.ini | 4 + .../defaults/defaults-workspace-0.2.0.ini | 6 + .../.spyproject/config/encoding.ini | 6 + .../.spyproject/config/vcs.ini | 7 + .../.spyproject/config/workspace.ini | 10 + .../image_autoencoder/image_autoencoder.py | 652 +++++++++++++ .../image_autoencoder_tmp.py | 862 ++++++++++++++++++ .../config/backups/codestyle.ini.bak | 8 + .../config/backups/encoding.ini.bak | 6 + .../.spyproject/config/backups/vcs.ini.bak | 7 + .../config/backups/workspace.ini.bak | 10 + .../.spyproject/config/codestyle.ini | 8 + .../defaults/defaults-codestyle-0.2.0.ini | 5 + .../defaults/defaults-encoding-0.2.0.ini | 3 + .../config/defaults/defaults-vcs-0.2.0.ini | 4 + .../defaults/defaults-workspace-0.2.0.ini | 6 + .../.spyproject/config/encoding.ini | 6 + .../.spyproject/config/vcs.ini | 7 + .../.spyproject/config/workspace.ini | 10 + .../motion_autoencoder/motion_autoencoder.py | 761 ++++++++++++++++ .../config/backups/codestyle.ini.bak | 8 + .../config/backups/encoding.ini.bak | 6 + .../.spyproject/config/backups/vcs.ini.bak | 7 + .../config/backups/workspace.ini.bak | 12 + .../.spyproject/config/codestyle.ini | 8 + .../defaults/defaults-codestyle-0.2.0.ini | 5 + .../defaults/defaults-encoding-0.2.0.ini | 3 + .../config/defaults/defaults-vcs-0.2.0.ini | 4 + .../defaults/defaults-workspace-0.2.0.ini | 6 + .../.spyproject/config/encoding.ini | 6 + .../.spyproject/config/vcs.ini | 7 + .../.spyproject/config/workspace.ini | 12 + .../pose_autoencoder/pose_autoencoder.py | 755 +++++++++++++++ .../rnn/.pylint.d/autoregression_rnn1.stats | Bin 0 -> 1233 bytes .../config/backups/codestyle.ini.bak | 8 + .../config/backups/encoding.ini.bak | 6 + .../.spyproject/config/backups/vcs.ini.bak | 7 + .../config/backups/workspace.ini.bak | 10 + .../rnn/.spyproject/config/codestyle.ini | 8 + .../defaults/defaults-codestyle-0.2.0.ini | 5 + .../defaults/defaults-encoding-0.2.0.ini | 3 + .../config/defaults/defaults-vcs-0.2.0.ini | 4 + .../defaults/defaults-workspace-0.2.0.ini | 6 + .../rnn/.spyproject/config/encoding.ini | 6 + autoregression/rnn/.spyproject/config/vcs.ini | 7 + .../rnn/.spyproject/config/workspace.ini | 10 + autoregression/rnn/autoregression_rnn.py | 448 +++++++++ .../.pylint.d/autoregression_rnn1.stats | Bin 0 -> 1233 bytes .../.pylint.d/autoregression_rnn_mdn1.stats | Bin 0 -> 1236 bytes .../config/backups/codestyle.ini.bak | 8 + .../config/backups/encoding.ini.bak | 6 + .../.spyproject/config/backups/vcs.ini.bak | 7 + .../config/backups/workspace.ini.bak | 10 + .../rnn_mdn/.spyproject/config/codestyle.ini | 8 + .../defaults/defaults-codestyle-0.2.0.ini | 5 + .../defaults/defaults-encoding-0.2.0.ini | 3 + .../config/defaults/defaults-vcs-0.2.0.ini | 4 + .../defaults/defaults-workspace-0.2.0.ini | 6 + .../rnn_mdn/.spyproject/config/encoding.ini | 6 + .../rnn_mdn/.spyproject/config/vcs.ini | 7 + .../rnn_mdn/.spyproject/config/workspace.ini | 10 + .../rnn_mdn/autoregression_rnn_mdn.py | 480 ++++++++++ common/__init__.py | 0 common/mocap_dataset.py | 167 ++++ common/pose_renderer.py | 193 ++++ common/quaternion.py | 238 +++++ common/skeleton.py | 115 +++ common/utils.py | 160 ++++ flickr/flickr_scrape.py | 61 ++ .../config/backups/codestyle.ini.bak | 8 + .../config/backups/encoding.ini.bak | 6 + .../.spyproject/config/backups/vcs.ini.bak | 7 + .../config/backups/workspace.ini.bak | 10 + .../.spyproject/config/codestyle.ini | 8 + .../defaults/defaults-codestyle-0.2.0.ini | 5 + .../defaults/defaults-encoding-0.2.0.ini | 3 + .../config/defaults/defaults-vcs-0.2.0.ini | 4 + .../defaults/defaults-workspace-0.2.0.ini | 6 + gan/image_gan/.spyproject/config/encoding.ini | 6 + gan/image_gan/.spyproject/config/vcs.ini | 7 + .../.spyproject/config/workspace.ini | 11 + gan/image_gan/image_gan.py | 544 +++++++++++ .../config/backups/codestyle.ini.bak | 8 + .../config/backups/encoding.ini.bak | 6 + .../.spyproject/config/backups/vcs.ini.bak | 7 + .../config/backups/workspace.ini.bak | 10 + .../.spyproject/config/codestyle.ini | 8 + .../defaults/defaults-codestyle-0.2.0.ini | 5 + .../defaults/defaults-encoding-0.2.0.ini | 3 + .../config/defaults/defaults-vcs-0.2.0.ini | 4 + .../defaults/defaults-workspace-0.2.0.ini | 6 + .../.spyproject/config/encoding.ini | 6 + gan/motion_gan/.spyproject/config/vcs.ini | 7 + .../.spyproject/config/workspace.ini | 10 + gan/motion_gan/motion_gan.py | 543 +++++++++++ .../config/backups/codestyle.ini.bak | 8 + .../config/backups/encoding.ini.bak | 6 + .../.spyproject/config/backups/vcs.ini.bak | 7 + .../config/backups/workspace.ini.bak | 10 + gan/pose_gan/.spyproject/config/codestyle.ini | 8 + .../defaults/defaults-codestyle-0.2.0.ini | 5 + .../defaults/defaults-encoding-0.2.0.ini | 3 + .../config/defaults/defaults-vcs-0.2.0.ini | 4 + .../defaults/defaults-workspace-0.2.0.ini | 6 + gan/pose_gan/.spyproject/config/encoding.ini | 6 + gan/pose_gan/.spyproject/config/vcs.ini | 7 + gan/pose_gan/.spyproject/config/workspace.ini | 10 + gan/pose_gan/pose_gan.py | 566 ++++++++++++ other/Dataset_Tutorial.py | 63 ++ .../config/backups/codestyle.ini.bak | 8 + .../config/backups/encoding.ini.bak | 6 + .../.spyproject/config/backups/vcs.ini.bak | 7 + .../config/backups/workspace.ini.bak | 10 + .../.spyproject/config/codestyle.ini | 8 + .../defaults/defaults-codestyle-0.2.0.ini | 5 + .../defaults/defaults-encoding-0.2.0.ini | 3 + .../config/defaults/defaults-vcs-0.2.0.ini | 4 + .../defaults/defaults-workspace-0.2.0.ini | 6 + .../.spyproject/config/encoding.ini | 6 + .../bvh_conversion/.spyproject/config/vcs.ini | 7 + .../.spyproject/config/workspace.ini | 10 + utils/bvh_conversion/bvh_data.py | 53 ++ utils/bvh_conversion/bvh_parsers.py | 242 +++++ utils/bvh_conversion/bvh_tools.py | 371 ++++++++ utils/bvh_conversion/dataset_tools.py | 206 +++++ utils/bvh_conversion/main_bvhconv.py | 53 ++ 147 files changed, 9185 insertions(+) create mode 100644 autoencoder/granular_dance/.spyproject/config/backups/codestyle.ini.bak create mode 100644 autoencoder/granular_dance/.spyproject/config/backups/encoding.ini.bak create mode 100644 autoencoder/granular_dance/.spyproject/config/backups/vcs.ini.bak create mode 100644 autoencoder/granular_dance/.spyproject/config/backups/workspace.ini.bak create mode 100644 autoencoder/granular_dance/.spyproject/config/codestyle.ini create mode 100644 autoencoder/granular_dance/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini create mode 100644 autoencoder/granular_dance/.spyproject/config/defaults/defaults-encoding-0.2.0.ini create mode 100644 autoencoder/granular_dance/.spyproject/config/defaults/defaults-vcs-0.2.0.ini create mode 100644 autoencoder/granular_dance/.spyproject/config/defaults/defaults-workspace-0.2.0.ini create mode 100644 autoencoder/granular_dance/.spyproject/config/encoding.ini create mode 100644 autoencoder/granular_dance/.spyproject/config/vcs.ini create mode 100644 autoencoder/granular_dance/.spyproject/config/workspace.ini create mode 100644 autoencoder/granular_dance/granular_dance.py create mode 100644 autoencoder/image_autoencoder/.pylint.d/image_autoencoder1.stats create mode 100644 autoencoder/image_autoencoder/.spyproject/config/backups/codestyle.ini.bak create mode 100644 autoencoder/image_autoencoder/.spyproject/config/backups/encoding.ini.bak create mode 100644 autoencoder/image_autoencoder/.spyproject/config/backups/vcs.ini.bak create mode 100644 autoencoder/image_autoencoder/.spyproject/config/backups/workspace.ini.bak create mode 100644 autoencoder/image_autoencoder/.spyproject/config/codestyle.ini create mode 100644 autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini create mode 100644 autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-encoding-0.2.0.ini create mode 100644 autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-vcs-0.2.0.ini create mode 100644 autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-workspace-0.2.0.ini create mode 100644 autoencoder/image_autoencoder/.spyproject/config/encoding.ini create mode 100644 autoencoder/image_autoencoder/.spyproject/config/vcs.ini create mode 100644 autoencoder/image_autoencoder/.spyproject/config/workspace.ini create mode 100644 autoencoder/image_autoencoder/image_autoencoder.py create mode 100644 autoencoder/image_autoencoder/image_autoencoder_tmp.py create mode 100644 autoencoder/motion_autoencoder/.spyproject/config/backups/codestyle.ini.bak create mode 100644 autoencoder/motion_autoencoder/.spyproject/config/backups/encoding.ini.bak create mode 100644 autoencoder/motion_autoencoder/.spyproject/config/backups/vcs.ini.bak create mode 100644 autoencoder/motion_autoencoder/.spyproject/config/backups/workspace.ini.bak create mode 100644 autoencoder/motion_autoencoder/.spyproject/config/codestyle.ini create mode 100644 autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini create mode 100644 autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-encoding-0.2.0.ini create mode 100644 autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-vcs-0.2.0.ini create mode 100644 autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-workspace-0.2.0.ini create mode 100644 autoencoder/motion_autoencoder/.spyproject/config/encoding.ini create mode 100644 autoencoder/motion_autoencoder/.spyproject/config/vcs.ini create mode 100644 autoencoder/motion_autoencoder/.spyproject/config/workspace.ini create mode 100644 autoencoder/motion_autoencoder/motion_autoencoder.py create mode 100644 autoencoder/pose_autoencoder/.spyproject/config/backups/codestyle.ini.bak create mode 100644 autoencoder/pose_autoencoder/.spyproject/config/backups/encoding.ini.bak create mode 100644 autoencoder/pose_autoencoder/.spyproject/config/backups/vcs.ini.bak create mode 100644 autoencoder/pose_autoencoder/.spyproject/config/backups/workspace.ini.bak create mode 100644 autoencoder/pose_autoencoder/.spyproject/config/codestyle.ini create mode 100644 autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini create mode 100644 autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-encoding-0.2.0.ini create mode 100644 autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-vcs-0.2.0.ini create mode 100644 autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-workspace-0.2.0.ini create mode 100644 autoencoder/pose_autoencoder/.spyproject/config/encoding.ini create mode 100644 autoencoder/pose_autoencoder/.spyproject/config/vcs.ini create mode 100644 autoencoder/pose_autoencoder/.spyproject/config/workspace.ini create mode 100644 autoencoder/pose_autoencoder/pose_autoencoder.py create mode 100644 autoregression/rnn/.pylint.d/autoregression_rnn1.stats create mode 100644 autoregression/rnn/.spyproject/config/backups/codestyle.ini.bak create mode 100644 autoregression/rnn/.spyproject/config/backups/encoding.ini.bak create mode 100644 autoregression/rnn/.spyproject/config/backups/vcs.ini.bak create mode 100644 autoregression/rnn/.spyproject/config/backups/workspace.ini.bak create mode 100644 autoregression/rnn/.spyproject/config/codestyle.ini create mode 100644 autoregression/rnn/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini create mode 100644 autoregression/rnn/.spyproject/config/defaults/defaults-encoding-0.2.0.ini create mode 100644 autoregression/rnn/.spyproject/config/defaults/defaults-vcs-0.2.0.ini create mode 100644 autoregression/rnn/.spyproject/config/defaults/defaults-workspace-0.2.0.ini create mode 100644 autoregression/rnn/.spyproject/config/encoding.ini create mode 100644 autoregression/rnn/.spyproject/config/vcs.ini create mode 100644 autoregression/rnn/.spyproject/config/workspace.ini create mode 100644 autoregression/rnn/autoregression_rnn.py create mode 100644 autoregression/rnn_mdn/.pylint.d/autoregression_rnn1.stats create mode 100644 autoregression/rnn_mdn/.pylint.d/autoregression_rnn_mdn1.stats create mode 100644 autoregression/rnn_mdn/.spyproject/config/backups/codestyle.ini.bak create mode 100644 autoregression/rnn_mdn/.spyproject/config/backups/encoding.ini.bak create mode 100644 autoregression/rnn_mdn/.spyproject/config/backups/vcs.ini.bak create mode 100644 autoregression/rnn_mdn/.spyproject/config/backups/workspace.ini.bak create mode 100644 autoregression/rnn_mdn/.spyproject/config/codestyle.ini create mode 100644 autoregression/rnn_mdn/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini create mode 100644 autoregression/rnn_mdn/.spyproject/config/defaults/defaults-encoding-0.2.0.ini create mode 100644 autoregression/rnn_mdn/.spyproject/config/defaults/defaults-vcs-0.2.0.ini create mode 100644 autoregression/rnn_mdn/.spyproject/config/defaults/defaults-workspace-0.2.0.ini create mode 100644 autoregression/rnn_mdn/.spyproject/config/encoding.ini create mode 100644 autoregression/rnn_mdn/.spyproject/config/vcs.ini create mode 100644 autoregression/rnn_mdn/.spyproject/config/workspace.ini create mode 100644 autoregression/rnn_mdn/autoregression_rnn_mdn.py create mode 100644 common/__init__.py create mode 100644 common/mocap_dataset.py create mode 100644 common/pose_renderer.py create mode 100644 common/quaternion.py create mode 100644 common/skeleton.py create mode 100644 common/utils.py create mode 100644 flickr/flickr_scrape.py create mode 100644 gan/image_gan/.spyproject/config/backups/codestyle.ini.bak create mode 100644 gan/image_gan/.spyproject/config/backups/encoding.ini.bak create mode 100644 gan/image_gan/.spyproject/config/backups/vcs.ini.bak create mode 100644 gan/image_gan/.spyproject/config/backups/workspace.ini.bak create mode 100644 gan/image_gan/.spyproject/config/codestyle.ini create mode 100644 gan/image_gan/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini create mode 100644 gan/image_gan/.spyproject/config/defaults/defaults-encoding-0.2.0.ini create mode 100644 gan/image_gan/.spyproject/config/defaults/defaults-vcs-0.2.0.ini create mode 100644 gan/image_gan/.spyproject/config/defaults/defaults-workspace-0.2.0.ini create mode 100644 gan/image_gan/.spyproject/config/encoding.ini create mode 100644 gan/image_gan/.spyproject/config/vcs.ini create mode 100644 gan/image_gan/.spyproject/config/workspace.ini create mode 100644 gan/image_gan/image_gan.py create mode 100644 gan/motion_gan/.spyproject/config/backups/codestyle.ini.bak create mode 100644 gan/motion_gan/.spyproject/config/backups/encoding.ini.bak create mode 100644 gan/motion_gan/.spyproject/config/backups/vcs.ini.bak create mode 100644 gan/motion_gan/.spyproject/config/backups/workspace.ini.bak create mode 100644 gan/motion_gan/.spyproject/config/codestyle.ini create mode 100644 gan/motion_gan/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini create mode 100644 gan/motion_gan/.spyproject/config/defaults/defaults-encoding-0.2.0.ini create mode 100644 gan/motion_gan/.spyproject/config/defaults/defaults-vcs-0.2.0.ini create mode 100644 gan/motion_gan/.spyproject/config/defaults/defaults-workspace-0.2.0.ini create mode 100644 gan/motion_gan/.spyproject/config/encoding.ini create mode 100644 gan/motion_gan/.spyproject/config/vcs.ini create mode 100644 gan/motion_gan/.spyproject/config/workspace.ini create mode 100644 gan/motion_gan/motion_gan.py create mode 100644 gan/pose_gan/.spyproject/config/backups/codestyle.ini.bak create mode 100644 gan/pose_gan/.spyproject/config/backups/encoding.ini.bak create mode 100644 gan/pose_gan/.spyproject/config/backups/vcs.ini.bak create mode 100644 gan/pose_gan/.spyproject/config/backups/workspace.ini.bak create mode 100644 gan/pose_gan/.spyproject/config/codestyle.ini create mode 100644 gan/pose_gan/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini create mode 100644 gan/pose_gan/.spyproject/config/defaults/defaults-encoding-0.2.0.ini create mode 100644 gan/pose_gan/.spyproject/config/defaults/defaults-vcs-0.2.0.ini create mode 100644 gan/pose_gan/.spyproject/config/defaults/defaults-workspace-0.2.0.ini create mode 100644 gan/pose_gan/.spyproject/config/encoding.ini create mode 100644 gan/pose_gan/.spyproject/config/vcs.ini create mode 100644 gan/pose_gan/.spyproject/config/workspace.ini create mode 100644 gan/pose_gan/pose_gan.py create mode 100644 other/Dataset_Tutorial.py create mode 100644 utils/bvh_conversion/.spyproject/config/backups/codestyle.ini.bak create mode 100644 utils/bvh_conversion/.spyproject/config/backups/encoding.ini.bak create mode 100644 utils/bvh_conversion/.spyproject/config/backups/vcs.ini.bak create mode 100644 utils/bvh_conversion/.spyproject/config/backups/workspace.ini.bak create mode 100644 utils/bvh_conversion/.spyproject/config/codestyle.ini create mode 100644 utils/bvh_conversion/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini create mode 100644 utils/bvh_conversion/.spyproject/config/defaults/defaults-encoding-0.2.0.ini create mode 100644 utils/bvh_conversion/.spyproject/config/defaults/defaults-vcs-0.2.0.ini create mode 100644 utils/bvh_conversion/.spyproject/config/defaults/defaults-workspace-0.2.0.ini create mode 100644 utils/bvh_conversion/.spyproject/config/encoding.ini create mode 100644 utils/bvh_conversion/.spyproject/config/vcs.ini create mode 100644 utils/bvh_conversion/.spyproject/config/workspace.ini create mode 100644 utils/bvh_conversion/bvh_data.py create mode 100644 utils/bvh_conversion/bvh_parsers.py create mode 100644 utils/bvh_conversion/bvh_tools.py create mode 100644 utils/bvh_conversion/dataset_tools.py create mode 100644 utils/bvh_conversion/main_bvhconv.py diff --git a/autoencoder/granular_dance/.spyproject/config/backups/codestyle.ini.bak b/autoencoder/granular_dance/.spyproject/config/backups/codestyle.ini.bak new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/autoencoder/granular_dance/.spyproject/config/backups/codestyle.ini.bak @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/autoencoder/granular_dance/.spyproject/config/backups/encoding.ini.bak b/autoencoder/granular_dance/.spyproject/config/backups/encoding.ini.bak new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/autoencoder/granular_dance/.spyproject/config/backups/encoding.ini.bak @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/autoencoder/granular_dance/.spyproject/config/backups/vcs.ini.bak b/autoencoder/granular_dance/.spyproject/config/backups/vcs.ini.bak new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/autoencoder/granular_dance/.spyproject/config/backups/vcs.ini.bak @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/autoencoder/granular_dance/.spyproject/config/backups/workspace.ini.bak b/autoencoder/granular_dance/.spyproject/config/backups/workspace.ini.bak new file mode 100644 index 0000000..d53ea3c --- /dev/null +++ b/autoencoder/granular_dance/.spyproject/config/backups/workspace.ini.bak @@ -0,0 +1,10 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + +[main] +version = 0.2.0 +recent_files = ['granular_dance.py'] + diff --git a/autoencoder/granular_dance/.spyproject/config/codestyle.ini b/autoencoder/granular_dance/.spyproject/config/codestyle.ini new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/autoencoder/granular_dance/.spyproject/config/codestyle.ini @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/autoencoder/granular_dance/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini b/autoencoder/granular_dance/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini new file mode 100644 index 0000000..0b95e5c --- /dev/null +++ b/autoencoder/granular_dance/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini @@ -0,0 +1,5 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + diff --git a/autoencoder/granular_dance/.spyproject/config/defaults/defaults-encoding-0.2.0.ini b/autoencoder/granular_dance/.spyproject/config/defaults/defaults-encoding-0.2.0.ini new file mode 100644 index 0000000..0ce193c --- /dev/null +++ b/autoencoder/granular_dance/.spyproject/config/defaults/defaults-encoding-0.2.0.ini @@ -0,0 +1,3 @@ +[encoding] +text_encoding = utf-8 + diff --git a/autoencoder/granular_dance/.spyproject/config/defaults/defaults-vcs-0.2.0.ini b/autoencoder/granular_dance/.spyproject/config/defaults/defaults-vcs-0.2.0.ini new file mode 100644 index 0000000..ee25483 --- /dev/null +++ b/autoencoder/granular_dance/.spyproject/config/defaults/defaults-vcs-0.2.0.ini @@ -0,0 +1,4 @@ +[vcs] +use_version_control = False +version_control_system = + diff --git a/autoencoder/granular_dance/.spyproject/config/defaults/defaults-workspace-0.2.0.ini b/autoencoder/granular_dance/.spyproject/config/defaults/defaults-workspace-0.2.0.ini new file mode 100644 index 0000000..2a73ab7 --- /dev/null +++ b/autoencoder/granular_dance/.spyproject/config/defaults/defaults-workspace-0.2.0.ini @@ -0,0 +1,6 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + diff --git a/autoencoder/granular_dance/.spyproject/config/encoding.ini b/autoencoder/granular_dance/.spyproject/config/encoding.ini new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/autoencoder/granular_dance/.spyproject/config/encoding.ini @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/autoencoder/granular_dance/.spyproject/config/vcs.ini b/autoencoder/granular_dance/.spyproject/config/vcs.ini new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/autoencoder/granular_dance/.spyproject/config/vcs.ini @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/autoencoder/granular_dance/.spyproject/config/workspace.ini b/autoencoder/granular_dance/.spyproject/config/workspace.ini new file mode 100644 index 0000000..d53ea3c --- /dev/null +++ b/autoencoder/granular_dance/.spyproject/config/workspace.ini @@ -0,0 +1,10 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + +[main] +version = 0.2.0 +recent_files = ['granular_dance.py'] + diff --git a/autoencoder/granular_dance/granular_dance.py b/autoencoder/granular_dance/granular_dance.py new file mode 100644 index 0000000..833a4fe --- /dev/null +++ b/autoencoder/granular_dance/granular_dance.py @@ -0,0 +1,847 @@ +""" +Same as motion_autoencoder.py but with a concatenation mechanism for blending successive +pose sequences +""" + +import torch +from torch.utils.data import Dataset +from torch.utils.data import DataLoader +from torch import nn +from collections import OrderedDict + +import os, sys, time, subprocess +import numpy as np +sys.path.append("../..") + +from common import utils +from common.skeleton import Skeleton +from common.mocap_dataset import MocapDataset +from common.quaternion import qmul, qnormalize_np, slerp +from common.pose_renderer import PoseRenderer + +device = 'cuda' if torch.cuda.is_available() else 'cpu' +print('Using {} device'.format(device)) + +# mocap settings +mocap_data_path = "../../../../Data/Mocap/Muriel_Nov_2021/MUR_PolytopiaMovement_Take2_mb_proc_rh.p" +mocap_valid_frame_ranges = [ [ 860, 9500 ] ] +mocap_fps = 50 + +# model settings +latent_dim = 64 +sequence_length = 128 +ae_rnn_layer_count = 2 +ae_rnn_layer_size = 512 +ae_dense_layer_sizes = [ 512 ] +prior_crit_dense_layer_sizes = [ 512, 512 ] + +save_models = False +save_tscript = False +save_weights = False + +# load model weights +load_weights = False +disc_prior_weights_file = "results/weights/disc_prior_weights_epoch_400" +encoder_weights_file = "results/weights/encoder_weights_epoch_400" +decoder_weights_file = "results/weights/decoder_weights_epoch_400" + +# training settings +sequence_offset = 2 # when creating sequence excerpts, each excerpt is offset from the previous one by this value +batch_size = 16 +train_percentage = 0.8 # train / test split +test_percentage = 0.2 +dp_learning_rate = 5e-4 +ae_learning_rate = 1e-4 +ae_norm_loss_scale = 0.1 +ae_pos_loss_scale = 0.1 +ae_quat_loss_scale = 1.0 +ae_prior_loss_scale = 0.01 # weight for prior distribution loss +epochs = 10 +model_save_interval = 100 +save_history = False + +# visualization settings +view_ele = 0.0 +view_azi = 0.0 +view_line_width = 4.0 +view_size = 8.0 + +# load mocap data +mocap_data = MocapDataset(mocap_data_path, fps=mocap_fps) +if device == 'cuda': + mocap_data.cuda() +mocap_data.compute_positions() + +# gather skeleton info +skeleton = mocap_data.skeleton() +skeleton_joint_count = skeleton.num_joints() +skel_edge_list = utils.get_skeleton_edge_list(skeleton) + +# obtain pose sequence +subject = "S1" +action = "A1" +pose_sequence = mocap_data[subject][action]["rotations"] + +pose_sequence_length = pose_sequence.shape[0] +joint_count = pose_sequence.shape[1] +joint_dim = pose_sequence.shape[2] +pose_dim = joint_count * joint_dim +pose_sequence = np.reshape(pose_sequence, (-1, pose_dim)) + +# gather pose sequence excerpts +pose_sequence_excerpts = [] + +for valid_frame_range in mocap_valid_frame_ranges: + frame_range_start = valid_frame_range[0] + frame_range_end = valid_frame_range[1] + + for seq_excerpt_start in np.arange(frame_range_start, frame_range_end - sequence_length, sequence_offset): + #print("valid: start ", frame_range_start, " end ", frame_range_end, " exc: start ", seq_excerpt_start, " end ", (seq_excerpt_start + sequence_length) ) + pose_sequence_excerpt = pose_sequence[seq_excerpt_start:seq_excerpt_start + sequence_length] + pose_sequence_excerpts.append(pose_sequence_excerpt) + +pose_sequence_excerpts = np.array(pose_sequence_excerpts) + +# create dataset + +sequence_excerpts_count = pose_sequence_excerpts.shape[0] + +class SequenceDataset(Dataset): + def __init__(self, sequence_excerpts): + self.sequence_excerpts = sequence_excerpts + + def __len__(self): + return self.sequence_excerpts.shape[0] + + def __getitem__(self, idx): + return self.sequence_excerpts[idx, ...] + + +full_dataset = SequenceDataset(pose_sequence_excerpts) +dataset_size = len(full_dataset) + +test_size = int(test_percentage * dataset_size) +train_size = dataset_size - test_size + +train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size]) + +train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) +test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) + +# create models + +# create discriminator model for prior distribution + +class DiscriminatorPrior(nn.Module): + def __init__(self, latent_dim, prior_crit_dense_layer_sizes): + super(DiscriminatorPrior, self).__init__() + + self.latent_dim = latent_dim + self.prior_crit_dense_layer_sizes = prior_crit_dense_layer_sizes + + dense_layers = [] + dense_layers.append(("disc_prior_dense_0", nn.Linear(latent_dim, prior_crit_dense_layer_sizes[0]))) + dense_layers.append(("disc_prior_elu_0", nn.ELU())) + + dense_layer_count = len(prior_crit_dense_layer_sizes) + + for layer_index in range(1, dense_layer_count): + dense_layers.append(("disc_prior_dense_{}".format(layer_index), nn.Linear(prior_crit_dense_layer_sizes[layer_index - 1], prior_crit_dense_layer_sizes[layer_index]))) + dense_layers.append(("disc_prior_elu_{}".format(layer_index), nn.ELU())) + + dense_layers.append(("disc_prior_dense_{}".format(dense_layer_count), nn.Linear(prior_crit_dense_layer_sizes[-1], 1))) + dense_layers.append(("disc_prior_sigmoid_{}".format(dense_layer_count), nn.Sigmoid())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + yhat = self.dense_layers(x) + return yhat + +disc_prior = DiscriminatorPrior(latent_dim, prior_crit_dense_layer_sizes).to(device) + +print(disc_prior) + +""" +for name, param in discriminator_prior.named_parameters(): + print(f"Layer: {name} | Size: {param.size()}") +""" + +if save_models == True: + disc_prior.eval() + + # save using pickle + torch.save(disc_prior, "results/models/disc_prior.pth") + + # save using onnx + x = torch.zeros((1, latent_dim)).to(device) + torch.onnx.export(disc_prior, x, "results/models/disc_prior.onnx") + + disc_prior.train() + +if save_tscript == True: + disc_prior.eval() + + # save using TochScript + x = torch.rand((1, latent_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(disc_prior, x) + script_module.save("results/models/disc_prior.pt") + + disc_prior.train() + +if load_weights and disc_prior_weights_file: + disc_prior.load_state_dict(torch.load(disc_prior_weights_file)) + +# create encoder model + +class Encoder(nn.Module): + def __init__(self, sequence_length, pose_dim, latent_dim, rnn_layer_count, rnn_layer_size, dense_layer_sizes): + super(Encoder, self).__init__() + + self.sequence_length = sequence_length + self.pose_dim = pose_dim + self.latent_dim = latent_dim + self.rnn_layer_count = rnn_layer_count + self.rnn_layer_size = rnn_layer_size + self.dense_layer_sizes = dense_layer_sizes + + # create recurrent layers + rnn_layers = [] + rnn_layers.append(("encoder_rnn_0", nn.LSTM(self.pose_dim, self.rnn_layer_size, self.rnn_layer_count, batch_first=True))) + + self.rnn_layers = nn.Sequential(OrderedDict(rnn_layers)) + + # create dense layers + + dense_layers = [] + + dense_layers.append(("encoder_dense_0", nn.Linear(self.rnn_layer_size, self.dense_layer_sizes[0]))) + dense_layers.append(("encoder_dense_relu_0", nn.ReLU())) + + dense_layer_count = len(self.dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("encoder_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("encoder_dense_relu_{}".format(layer_index), nn.ReLU())) + + dense_layers.append(("encoder_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], self.latent_dim))) + dense_layers.append(("encoder_dense_relu_{}".format(len(self.dense_layer_sizes)), nn.ReLU())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + + #print("x 1 ", x.shape) + + x, (_, _) = self.rnn_layers(x) + + #print("x 2 ", x.shape) + + x = x[:, -1, :] # only last time step + + #print("x 3 ", x.shape) + + yhat = self.dense_layers(x) + + #print("yhat ", yhat.shape) + + return yhat + +encoder = Encoder(sequence_length, pose_dim, latent_dim, ae_rnn_layer_count, ae_rnn_layer_size, ae_dense_layer_sizes).to(device) + +print(encoder) + +if save_models == True: + encoder.train() + + # save using pickle + torch.save(encoder, "results/models/encoder.pth") + + # save using onnx + x = torch.zeros((1, sequence_length, pose_dim)).to(device) + torch.onnx.export(encoder, x, "results/models/encoder.onnx") + + encoder.test() + +if save_tscript == True: + encoder.train() + + # save using TochScript + x = torch.rand((1, sequence_length, pose_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(encoder, x) + script_module.save("results/models/encoder.pt") + + encoder.test() + +if load_weights and encoder_weights_file: + encoder.load_state_dict(torch.load(encoder_weights_file, map_location=device)) + + +# create decoder model + +class Decoder(nn.Module): + def __init__(self, sequence_length, pose_dim, latent_dim, rnn_layer_count, rnn_layer_size, dense_layer_sizes): + super(Decoder, self).__init__() + + self.sequence_length = sequence_length + self.pose_dim = pose_dim + self.latent_dim = latent_dim + self.rnn_layer_size = rnn_layer_size + self.rnn_layer_count = rnn_layer_count + self.dense_layer_sizes = dense_layer_sizes + + # create dense layers + dense_layers = [] + + dense_layers.append(("decoder_dense_0", nn.Linear(latent_dim, self.dense_layer_sizes[0]))) + dense_layers.append(("decoder_relu_0", nn.ReLU())) + + dense_layer_count = len(self.dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("decoder_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("decoder_dense_relu_{}".format(layer_index), nn.ReLU())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + # create rnn layers + rnn_layers = [] + + rnn_layers.append(("decoder_rnn_0", nn.LSTM(self.dense_layer_sizes[-1], self.rnn_layer_size, self.rnn_layer_count, batch_first=True))) + + self.rnn_layers = nn.Sequential(OrderedDict(rnn_layers)) + + # final output dense layer + final_layers = [] + + final_layers.append(("decoder_dense_{}".format(dense_layer_count), nn.Linear(self.rnn_layer_size, self.pose_dim))) + + self.final_layers = nn.Sequential(OrderedDict(final_layers)) + + def forward(self, x): + #print("x 1 ", x.size()) + + # dense layers + x = self.dense_layers(x) + #print("x 2 ", x.size()) + + # repeat vector + x = torch.unsqueeze(x, dim=1) + x = x.repeat(1, sequence_length, 1) + #print("x 3 ", x.size()) + + # rnn layers + x, (_, _) = self.rnn_layers(x) + #print("x 4 ", x.size()) + + # final time distributed dense layer + x_reshaped = x.contiguous().view(-1, self.rnn_layer_size) # (batch_size * sequence, input_size) + #print("x 5 ", x_reshaped.size()) + + yhat = self.final_layers(x_reshaped) + #print("yhat 1 ", yhat.size()) + + yhat = yhat.contiguous().view(-1, self.sequence_length, self.pose_dim) + #print("yhat 2 ", yhat.size()) + + return yhat + +ae_dense_layer_sizes_reversed = ae_dense_layer_sizes.copy() +ae_dense_layer_sizes_reversed.reverse() + +decoder = Decoder(sequence_length, pose_dim, latent_dim, ae_rnn_layer_count, ae_rnn_layer_size, ae_dense_layer_sizes_reversed).to(device) + +print(decoder) + +if save_models == True: + decoder.eval() + + # save using pickle + torch.save(decoder, "results/models/decoder_weights.pth") + + # save using onnx + x = torch.zeros((1, latent_dim)).to(device) + torch.onnx.export(decoder, x, "results/models/decoder.onnx") + + decoder.train() + +if save_tscript == True: + decoder.eval() + + # save using TochScript + x = torch.rand((1, latent_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(decoder, x) + script_module.save("results/models/decoder.pt") + + decoder.train() + +if load_weights and decoder_weights_file: + decoder.load_state_dict(torch.load(decoder_weights_file, map_location=device)) + +# Training + +disc_optimizer = torch.optim.Adam(disc_prior.parameters(), lr=dp_learning_rate) +ae_optimizer = torch.optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=ae_learning_rate) + +cross_entropy = nn.BCELoss() + +# function returning normal distributed random data +# serves as reference for the discriminator to distinguish the encoders prior from +def sample_normal(shape): + return torch.tensor(np.random.normal(size=shape), dtype=torch.float32).to(device) + +# discriminator prior loss function +def disc_prior_loss(disc_real_output, disc_fake_output): + ones = torch.ones_like(disc_real_output).to(device) + zeros = torch.zeros_like(disc_fake_output).to(device) + + real_loss = cross_entropy(disc_real_output, ones) + fake_loss = cross_entropy(disc_fake_output, zeros) + + total_loss = (real_loss + fake_loss) * 0.5 + return total_loss + +def ae_norm_loss(yhat): + + _yhat = yhat.view(-1, 4) + _norm = torch.norm(_yhat, dim=1) + _diff = (_norm - 1.0) ** 2 + _loss = torch.mean(_diff) + return _loss + +def ae_pos_loss(y, yhat): + # y and yhat shapes: batch_size, seq_length, pose_dim + + # normalize tensors + _yhat = yhat.view(-1, 4) + + _yhat_norm = nn.functional.normalize(_yhat, p=2, dim=1) + _y_rot = y.view((y.shape[0], y.shape[1], -1, 4)) + _yhat_rot = _yhat.view((y.shape[0], y.shape[1], -1, 4)) + + zero_trajectory = torch.zeros((y.shape[0], y.shape[1], 3), dtype=torch.float32, requires_grad=True).to(device) + + _y_pos = skeleton.forward_kinematics(_y_rot, zero_trajectory) + _yhat_pos = skeleton.forward_kinematics(_yhat_rot, zero_trajectory) + + _pos_diff = torch.norm((_y_pos - _yhat_pos), dim=3) + + _loss = torch.mean(_pos_diff) + + return _loss + +def ae_quat_loss(y, yhat): + # y and yhat shapes: batch_size, seq_length, pose_dim + + # normalize quaternion + + _y = y.view((-1, 4)) + _yhat = yhat.view((-1, 4)) + + _yhat_norm = nn.functional.normalize(_yhat, p=2, dim=1) + + # inverse of quaternion: https://www.mathworks.com/help/aeroblks/quaternioninverse.html + _yhat_inv = _yhat_norm * torch.tensor([[1.0, -1.0, -1.0, -1.0]], dtype=torch.float32).to(device) + + # calculate difference quaternion + _diff = qmul(_yhat_inv, _y) + # length of complex part + _len = torch.norm(_diff[:, 1:], dim=1) + # atan2 + _atan = torch.atan2(_len, _diff[:, 0]) + # abs + _abs = torch.abs(_atan) + _loss = torch.mean(_abs) + return _loss + +# autoencoder loss function +def ae_loss(y, yhat, disc_fake_output): + # function parameters + # y: encoder input + # yhat: decoder output (i.e. reconstructed encoder input) + # disc_fake_output: discriminator output for encoder generated prior + + _norm_loss = ae_norm_loss(yhat) + _pos_loss = ae_pos_loss(y, yhat) + _quat_loss = ae_quat_loss(y, yhat) + + + # discrimination loss + _fake_loss = cross_entropy(torch.zeros_like(disc_fake_output), disc_fake_output) + + _total_loss = 0.0 + _total_loss += _norm_loss * ae_norm_loss_scale + _total_loss += _pos_loss * ae_pos_loss_scale + _total_loss += _quat_loss * ae_quat_loss_scale + _total_loss += _fake_loss * ae_prior_loss_scale + + return _total_loss, _norm_loss, _pos_loss, _quat_loss, _fake_loss + +def disc_prior_train_step(target_poses): + # have normal distribution and encoder produce real and fake outputs, respectively + + with torch.no_grad(): + encoder_output = encoder(target_poses) + + real_output = sample_normal(encoder_output.shape) + + # let discriminator distinguish between real and fake outputs + disc_real_output = disc_prior(real_output) + disc_fake_output = disc_prior(encoder_output) + _disc_loss = disc_prior_loss(disc_real_output, disc_fake_output) + + # Backpropagation + disc_optimizer.zero_grad() + _disc_loss.backward() + disc_optimizer.step() + + return _disc_loss + +def ae_train_step(target_poses): + + #print("train step target_poses ", target_poses.shape) + + # let autoencoder preproduce target_poses (decoder output) and also return encoder output + encoder_output = encoder(target_poses) + pred_poses = decoder(encoder_output) + + # let discriminator output its fake assessment of the encoder ouput + with torch.no_grad(): + disc_fake_output = disc_prior(encoder_output) + + _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss = ae_loss(target_poses, pred_poses, disc_fake_output) + + #print("_ae_pos_loss ", _ae_pos_loss) + + # Backpropagation + ae_optimizer.zero_grad() + _ae_loss.backward() + + #torch.nn.utils.clip_grad_norm(encoder.parameters(), 0.01) + #torch.nn.utils.clip_grad_norm(decoder.parameters(), 0.01) + + ae_optimizer.step() + + return _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss + +def ae_test_step(target_poses): + with torch.no_grad(): + # let autoencoder preproduce target_poses (decoder output) and also return encoder output + encoder_output = encoder(target_poses) + pred_poses = decoder(encoder_output) + + # let discriminator output its fake assessment of the encoder ouput + disc_fake_output = disc_prior(encoder_output) + + _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss = ae_loss(target_poses, pred_poses, disc_fake_output) + + return _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss + +def train(train_dataloader, test_dataloader, epochs): + + loss_history = {} + loss_history["ae train"] = [] + loss_history["ae test"] = [] + loss_history["ae norm"] = [] + loss_history["ae pos"] = [] + loss_history["ae quat"] = [] + loss_history["ae prior"] = [] + loss_history["disc prior"] = [] + + for epoch in range(epochs): + + start = time.time() + + ae_train_loss_per_epoch = [] + ae_norm_loss_per_epoch = [] + ae_pos_loss_per_epoch = [] + ae_quat_loss_per_epoch = [] + ae_prior_loss_per_epoch = [] + disc_prior_loss_per_epoch = [] + + for train_batch in train_dataloader: + train_batch = train_batch.to(device) + + # start with discriminator training + _disc_prior_train_loss = disc_prior_train_step(train_batch) + + _disc_prior_train_loss = _disc_prior_train_loss.detach().cpu().numpy() + + #print("_disc_prior_train_loss ", _disc_prior_train_loss) + + disc_prior_loss_per_epoch.append(_disc_prior_train_loss) + + # now train the autoencoder + _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss = ae_train_step(train_batch) + + _ae_loss = _ae_loss.detach().cpu().numpy() + _ae_norm_loss = _ae_norm_loss.detach().cpu().numpy() + _ae_pos_loss = _ae_pos_loss.detach().cpu().numpy() + _ae_quat_loss = _ae_quat_loss.detach().cpu().numpy() + _ae_prior_loss = _ae_prior_loss.detach().cpu().numpy() + + #print("_ae_prior_loss ", _ae_prior_loss) + + ae_train_loss_per_epoch.append(_ae_loss) + ae_norm_loss_per_epoch.append(_ae_norm_loss) + ae_pos_loss_per_epoch.append(_ae_pos_loss) + ae_quat_loss_per_epoch.append(_ae_quat_loss) + ae_prior_loss_per_epoch.append(_ae_prior_loss) + + ae_train_loss_per_epoch = np.mean(np.array(ae_train_loss_per_epoch)) + ae_norm_loss_per_epoch = np.mean(np.array(ae_norm_loss_per_epoch)) + ae_pos_loss_per_epoch = np.mean(np.array(ae_pos_loss_per_epoch)) + ae_quat_loss_per_epoch = np.mean(np.array(ae_quat_loss_per_epoch)) + ae_prior_loss_per_epoch = np.mean(np.array(ae_prior_loss_per_epoch)) + disc_prior_loss_per_epoch = np.mean(np.array(disc_prior_loss_per_epoch)) + + ae_test_loss_per_epoch = [] + + for test_batch in test_dataloader: + test_batch = test_batch.to(device) + + _ae_loss, _, _, _, _ = ae_test_step(train_batch) + + _ae_loss = _ae_loss.detach().cpu().numpy() + ae_test_loss_per_epoch.append(_ae_loss) + + ae_test_loss_per_epoch = np.mean(np.array(ae_test_loss_per_epoch)) + + if epoch % model_save_interval == 0 and save_weights == True: + torch.save(disc_prior.state_dict(), "results/weights/disc_prior_weights_epoch_{}".format(epoch)) + torch.save(encoder.state_dict(), "results/weights/encoder_weights_epoch_{}".format(epoch)) + torch.save(decoder.state_dict(), "results/weights/decoder_weights_epoch_{}".format(epoch)) + + loss_history["ae train"].append(ae_train_loss_per_epoch) + loss_history["ae test"].append(ae_test_loss_per_epoch) + loss_history["ae norm"].append(ae_norm_loss_per_epoch) + loss_history["ae pos"].append(ae_pos_loss_per_epoch) + loss_history["ae quat"].append(ae_quat_loss_per_epoch) + loss_history["ae prior"].append(ae_prior_loss_per_epoch) + loss_history["disc prior"].append(disc_prior_loss_per_epoch) + + print ('epoch {} : ae train: {:01.4f} ae test: {:01.4f} disc prior {:01.4f} norm {:01.4f} pos {:01.4f} quat {:01.4f} prior {:01.4f} time {:01.2f}'.format(epoch + 1, ae_train_loss_per_epoch, ae_test_loss_per_epoch, disc_prior_loss_per_epoch, ae_norm_loss_per_epoch, ae_pos_loss_per_epoch, ae_quat_loss_per_epoch, ae_prior_loss_per_epoch, time.time()-start)) + + return loss_history + +# fit model +loss_history = train(train_dataloader, test_dataloader, epochs) + +# save history +utils.save_loss_as_csv(loss_history, "results/histories/history_{}.csv".format(epochs)) +utils.save_loss_as_image(loss_history, "results/histories/history_{}.png".format(epochs)) + +# save model weights +torch.save(disc_prior.state_dict(), "results/weights/disc_prior_weights_epoch_{}".format(epochs)) +torch.save(encoder.state_dict(), "results/weights/encoder_weights_epoch_{}".format(epochs)) +torch.save(decoder.state_dict(), "results/weights/decoder_weights_epoch_{}".format(epochs)) + +# inference and rendering + +skel_edge_list = utils.get_skeleton_edge_list(skeleton) +poseRenderer = PoseRenderer(skel_edge_list) + +def create_ref_sequence_anim(seq_index, file_name): + sequence_excerpt = pose_sequence_excerpts[seq_index] + sequence_excerpt = np.reshape(sequence_excerpt, (sequence_length, joint_count, joint_dim)) + + sequence_excerpt = torch.tensor(np.expand_dims(sequence_excerpt, axis=0)).to(device) + zero_trajectory = torch.tensor(np.zeros((1, sequence_length, 3), dtype=np.float32)).to(device) + + skel_sequence = skeleton.forward_kinematics(sequence_excerpt, zero_trajectory) + + skel_sequence = skel_sequence.detach().cpu().numpy() + skel_sequence = np.squeeze(skel_sequence) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_sequence) + skel_images = poseRenderer.create_pose_images(skel_sequence, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + skel_images[0].save(file_name, save_all=True, append_images=skel_images[1:], optimize=False, duration=33.0, loop=0) + +def create_rec_sequence_anim(seq_index, file_name): + sequence_excerpt = pose_sequence_excerpts[seq_index] + sequence_excerpt = np.expand_dims(sequence_excerpt, axis=0) + + sequence_excerpt = torch.from_numpy(sequence_excerpt).to(device) + + with torch.no_grad(): + sequence_enc = encoder(sequence_excerpt) + pred_sequence = decoder(sequence_enc) + + pred_sequence = torch.squeeze(pred_sequence) + pred_sequence = pred_sequence.view((-1, 4)) + pred_sequence = nn.functional.normalize(pred_sequence, p=2, dim=1) + pred_sequence = pred_sequence.view((1, sequence_length, joint_count, joint_dim)) + + zero_trajectory = torch.tensor(np.zeros((1, sequence_length, 3), dtype=np.float32)) + zero_trajectory = zero_trajectory.to(device) + + skel_sequence = skeleton.forward_kinematics(pred_sequence, zero_trajectory) + + skel_sequence = skel_sequence.detach().cpu().numpy() + skel_sequence = np.squeeze(skel_sequence) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_sequence) + skel_images = poseRenderer.create_pose_images(skel_sequence, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + skel_images[0].save(file_name, save_all=True, append_images=skel_images[1:], optimize=False, duration=33.0, loop=0) + +def encode_sequences(frame_indices): + + encoder.eval() + + latent_vectors = [] + + seq_excerpt_count = len(frame_indices) + + for excerpt_index in range(seq_excerpt_count): + excerpt_start_frame = frame_indices[excerpt_index] + excerpt_end_frame = excerpt_start_frame + sequence_length + excerpt = pose_sequence[excerpt_start_frame:excerpt_end_frame] + excerpt = np.expand_dims(excerpt, axis=0) + excerpt = torch.from_numpy(excerpt).to(device) + + with torch.no_grad(): + latent_vector = encoder(excerpt) + + latent_vector = torch.squeeze(latent_vector) + latent_vector = latent_vector.detach().cpu().numpy() + + latent_vectors.append(latent_vector) + + encoder.train() + + return latent_vectors + +def decode_sequence_encodings(sequence_encodings, seq_overlap, base_pose, file_name): + + decoder.eval() + + seq_env = np.hanning(sequence_length) + seq_excerpt_count = len(sequence_encodings) + gen_seq_length = (seq_excerpt_count - 1) * seq_overlap + sequence_length + + gen_sequence = np.full(shape=(gen_seq_length, joint_count, joint_dim), fill_value=base_pose) + + for excerpt_index in range(len(sequence_encodings)): + latent_vector = sequence_encodings[excerpt_index] + latent_vector = np.expand_dims(latent_vector, axis=0) + latent_vector = torch.from_numpy(latent_vector).to(device) + + with torch.no_grad(): + excerpt_dec = decoder(latent_vector) + + excerpt_dec = torch.squeeze(excerpt_dec) + excerpt_dec = excerpt_dec.detach().cpu().numpy() + excerpt_dec = np.reshape(excerpt_dec, (-1, joint_count, joint_dim)) + + gen_frame = excerpt_index * seq_overlap + + for si in range(sequence_length): + for ji in range(joint_count): + current_quat = gen_sequence[gen_frame + si, ji, :] + target_quat = excerpt_dec[si, ji, :] + quat_mix = seq_env[si] + mix_quat = slerp(current_quat, target_quat, quat_mix ) + gen_sequence[gen_frame + si, ji, :] = mix_quat + + gen_sequence = torch.from_numpy(gen_sequence) + gen_sequence = gen_sequence.view((-1, 4)) + gen_sequence = nn.functional.normalize(gen_sequence, p=2, dim=1) + gen_sequence = gen_sequence.view((gen_seq_length, joint_count, joint_dim)) + gen_sequence = torch.unsqueeze(gen_sequence, dim=0) + gen_sequence = gen_sequence.to(device) + + zero_trajectory = torch.tensor(np.zeros((1, gen_seq_length, 3), dtype=np.float32)) + zero_trajectory = zero_trajectory.to(device) + + skel_sequence = skeleton.forward_kinematics(gen_sequence, zero_trajectory) + + skel_sequence = skel_sequence.detach().cpu().numpy() + skel_sequence = np.squeeze(skel_sequence) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_sequence) + skel_images = poseRenderer.create_pose_images(skel_sequence, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + + skel_images[0].save(file_name, save_all=True, append_images=skel_images[1:], optimize=False, duration=33.0, loop=0) + + decoder.train() + +# create single original sequence + +seq_index = 100 + +create_ref_sequence_anim(seq_index, "results/anims/orig_sequence_{}.gif".format(seq_index)) + +# recontruct single pose + +seq_index = 100 + +create_rec_sequence_anim(seq_index, "results/anims/rec_sequence_{}.gif".format(seq_index)) + +# configure sequence blending +seq_overlap = 32 +base_pose = np.reshape(pose_sequence[0], (joint_count, joint_dim)) + +# reconstruct original pose sequence +start_seq_index = 100 +end_seq_index = 612 +seq_indices = [ frame_index for frame_index in range(start_seq_index, end_seq_index, seq_overlap)] + +seq_encodings = encode_sequences(seq_indices) +decode_sequence_encodings(seq_encodings, seq_overlap, base_pose, "results/anims/rec_sequences_{}-{}.gif".format(start_seq_index, end_seq_index)) + +# random walk +start_seq_index = 4000 +seq_frame_count = 32 + +seq_indices = [start_seq_index] + +seq_encodings = encode_sequences(seq_indices) + +for index in range(0, seq_frame_count - 1): + random_step = np.random.random((latent_dim)).astype(np.float32) * 2.0 + seq_encodings.append(seq_encodings[index] + random_step) + +decode_sequence_encodings(seq_encodings, seq_overlap, base_pose, "results/anims/seq_randwalk_{}_{}.gif".format(start_seq_index, seq_frame_count)) + + +# sequence offset following + +seq_start_index = 4000 +seq_end_index = 5000 + +seq_indices = [ seq_index for seq_index in range(seq_start_index, seq_end_index, seq_overlap)] + +seq_encodings = encode_sequences(seq_indices) + +offset_seq_encodings = [] + +for index in range(len(seq_encodings)): + sin_value = np.sin(index / (len(seq_encodings) - 1) * np.pi * 4.0) + offset = np.ones(shape=(latent_dim), dtype=np.float32) * sin_value * 4.0 + offset_seq_encoding = seq_encodings[index] + offset + offset_seq_encodings.append(offset_seq_encoding) + +decode_sequence_encodings(offset_seq_encodings, seq_overlap, base_pose, "results/anims/seq_offset_{}-{}.gif".format(seq_start_index, seq_end_index)) + + + +# interpolate two original sequences + +seq1_start_index = 1000 +seq1_end_index = 2000 + +seq2_start_index = 4000 +seq2_end_index = 5000 + +seq1_indices = [ seq_index for seq_index in range(seq1_start_index, seq1_end_index, seq_overlap)] +seq2_indices = [ seq_index for seq_index in range(seq2_start_index, seq2_end_index, seq_overlap)] + +seq1_encodings = encode_sequences(seq1_indices) +seq2_encodings = encode_sequences(seq2_indices) + +mix_encodings = [] + +for index in range(len(seq1_encodings)): + mix_factor = index / (len(seq1_indices) - 1) + mix_encoding = seq1_encodings[index] * (1.0 - mix_factor) + seq2_encodings[index] * mix_factor + mix_encodings.append(mix_encoding) + +decode_sequence_encodings(mix_encodings, seq_overlap, base_pose, "results/anims/seq_mix_{}-{}_{}-{}.gif".format(seq1_start_index, seq1_end_index, seq2_start_index, seq2_end_index)) diff --git a/autoencoder/image_autoencoder/.pylint.d/image_autoencoder1.stats b/autoencoder/image_autoencoder/.pylint.d/image_autoencoder1.stats new file mode 100644 index 0000000000000000000000000000000000000000..8a81ede4f214d4f178035e639c0c60bf99667e04 GIT binary patch literal 1282 zcmZWp%Wm5+5R~h85+`vUaFHfw=OTYafgIT0dnp8pqAdaxDNv*h9|H7RV1f3){!c$6 zf6{B0vScN#54JVjndQ#z>f7MgpFvcAp8X?C(|uCt%t`K_J=1S z?*gOpqz6-_ zl_5hh9#f0ANijLr@(!+!0n}_<=tK=)9AT*$m5BA>!YNnb#kVL=Q|xP?J2dQ2PX}MZ zyel(xLRx{B>A|84mRgr@gl3JnC`xUtCj+?J8SG4a0Ag*z%#eZ0M z{DeH@r#LgCX4){(q&3l#DdODbI`d=%!@47(mUC=&=vOBk9V8)^ituL1xdl(4UXsc#*?|AmA zpxZZiSHtvQMpH-<)G>R Yoo=K~sZ5l%+&}$3Puy}g_~KT70SxTqg#Z8m literal 0 HcmV?d00001 diff --git a/autoencoder/image_autoencoder/.spyproject/config/backups/codestyle.ini.bak b/autoencoder/image_autoencoder/.spyproject/config/backups/codestyle.ini.bak new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/autoencoder/image_autoencoder/.spyproject/config/backups/codestyle.ini.bak @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/autoencoder/image_autoencoder/.spyproject/config/backups/encoding.ini.bak b/autoencoder/image_autoencoder/.spyproject/config/backups/encoding.ini.bak new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/autoencoder/image_autoencoder/.spyproject/config/backups/encoding.ini.bak @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/autoencoder/image_autoencoder/.spyproject/config/backups/vcs.ini.bak b/autoencoder/image_autoencoder/.spyproject/config/backups/vcs.ini.bak new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/autoencoder/image_autoencoder/.spyproject/config/backups/vcs.ini.bak @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/autoencoder/image_autoencoder/.spyproject/config/backups/workspace.ini.bak b/autoencoder/image_autoencoder/.spyproject/config/backups/workspace.ini.bak new file mode 100644 index 0000000..b630e25 --- /dev/null +++ b/autoencoder/image_autoencoder/.spyproject/config/backups/workspace.ini.bak @@ -0,0 +1,10 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + +[main] +version = 0.2.0 +recent_files = ['..\\..\\..\\..\\..\\..\\..\\..\\..\\.spyder-py3\\temp.py', 'image_autoencoder.py'] + diff --git a/autoencoder/image_autoencoder/.spyproject/config/codestyle.ini b/autoencoder/image_autoencoder/.spyproject/config/codestyle.ini new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/autoencoder/image_autoencoder/.spyproject/config/codestyle.ini @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini b/autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini new file mode 100644 index 0000000..0b95e5c --- /dev/null +++ b/autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini @@ -0,0 +1,5 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + diff --git a/autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-encoding-0.2.0.ini b/autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-encoding-0.2.0.ini new file mode 100644 index 0000000..0ce193c --- /dev/null +++ b/autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-encoding-0.2.0.ini @@ -0,0 +1,3 @@ +[encoding] +text_encoding = utf-8 + diff --git a/autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-vcs-0.2.0.ini b/autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-vcs-0.2.0.ini new file mode 100644 index 0000000..ee25483 --- /dev/null +++ b/autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-vcs-0.2.0.ini @@ -0,0 +1,4 @@ +[vcs] +use_version_control = False +version_control_system = + diff --git a/autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-workspace-0.2.0.ini b/autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-workspace-0.2.0.ini new file mode 100644 index 0000000..2a73ab7 --- /dev/null +++ b/autoencoder/image_autoencoder/.spyproject/config/defaults/defaults-workspace-0.2.0.ini @@ -0,0 +1,6 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + diff --git a/autoencoder/image_autoencoder/.spyproject/config/encoding.ini b/autoencoder/image_autoencoder/.spyproject/config/encoding.ini new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/autoencoder/image_autoencoder/.spyproject/config/encoding.ini @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/autoencoder/image_autoencoder/.spyproject/config/vcs.ini b/autoencoder/image_autoencoder/.spyproject/config/vcs.ini new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/autoencoder/image_autoencoder/.spyproject/config/vcs.ini @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/autoencoder/image_autoencoder/.spyproject/config/workspace.ini b/autoencoder/image_autoencoder/.spyproject/config/workspace.ini new file mode 100644 index 0000000..b630e25 --- /dev/null +++ b/autoencoder/image_autoencoder/.spyproject/config/workspace.ini @@ -0,0 +1,10 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + +[main] +version = 0.2.0 +recent_files = ['..\\..\\..\\..\\..\\..\\..\\..\\..\\.spyder-py3\\temp.py', 'image_autoencoder.py'] + diff --git a/autoencoder/image_autoencoder/image_autoencoder.py b/autoencoder/image_autoencoder/image_autoencoder.py new file mode 100644 index 0000000..28ebbc9 --- /dev/null +++ b/autoencoder/image_autoencoder/image_autoencoder.py @@ -0,0 +1,652 @@ +""" +Introduction Convolutional Neural Networks: + https://www.analyticsvidhya.com/blog/2021/05/convolutional-neural-networks-cnn/ + https://towardsdatascience.com/pytorch-basics-how-to-train-your-neural-net-intro-to-cnn-26a14c2ea29 + +Introduction Adversarial Networks: + Generative Adversarial Networks: https://wiki.pathmind.com/generative-adversarial-network-gan + Adversarial Autoencoder: https://medium.com/vitrox-publication/adversarial-auto-encoder-aae-a3fc86f71758 +""" + +import numpy as np +import torch +import torchvision +from pytorch_model_summary import summary +from torch.utils.data import DataLoader +from torch import nn +from torch import optim +from collections import OrderedDict +import matplotlib.pyplot as plt +import time +import pickle +import math + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# image settings +image_data_path = "../../../../Data/Images" +image_size = 128 +image_channels = 3 + +# model settings +latent_dim = 64 +ae_conv_channel_counts = [ 8, 32, 128, 512 ] +ae_conv_kernel_size = 5 +ae_dense_layer_sizes = [ 128 ] + +disc_prior_dense_layer_sizes = [ 128, 128 ] + +save_models = False +save_tscript = False +save_weights = True + +# load model weights +load_weights = False +disc_prior_weights_file = "results/weights/disc_prior_weights_epoch_400" +encoder_weights_file = "results/weights/encoder_weights_epoch_400" +decoder_weights_file = "results/weights/decoder_weights_epoch_400" + +# training settings +batch_size = 16 +train_percentage = 0.8 # train / test split +test_percentage = 0.2 +dp_learning_rate = 5e-3 +ae_learning_rate = 1e-3 +ae_rec_loss_scale = 1.0 +ae_prior_loss_scale = 0.1 +epochs = 100 +weight_save_interval = 10 +save_history = False + +# create dataset +transform = torchvision.transforms.Compose([torchvision.transforms.Resize(image_size), + torchvision.transforms.ToTensor()]) + +full_dataset = torchvision.datasets.ImageFolder(image_data_path, transform=transform) +dataset_size = len(full_dataset) + +test_size = int(test_percentage * dataset_size) +train_size = dataset_size - test_size + +train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size]) + +train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) +test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) + +# Create Models + +# Critique +class DiscriminatorPrior(nn.Module): + def __init__(self, latent_dim, dense_layer_sizes): + super().__init__() + + self.latent_dim = latent_dim + self.dense_layer_sizes = dense_layer_sizes + + # create dense layers + dense_layers = [] + + dense_layers.append(("disc_prior_dense_0", nn.Linear(latent_dim, dense_layer_sizes[0]))) + dense_layers.append(("disc_prior_elu_0", nn.ELU())) + + dense_layer_count = len(dense_layer_sizes) + + for layer_index in range(1, dense_layer_count): + dense_layers.append(("disc_prior_dense_{}".format(layer_index), nn.Linear(dense_layer_sizes[layer_index - 1], dense_layer_sizes[layer_index]))) + dense_layers.append(("disc_prior_elu_{}".format(layer_index), nn.ELU())) + + dense_layers.append(("disc_prior_dense_{}".format(dense_layer_count), nn.Linear(dense_layer_sizes[-1], 1))) + dense_layers.append(("disc_prior_sigmoid_{}".format(dense_layer_count), nn.Sigmoid())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + + #print("x1 s", x.shape) + + yhat = self.dense_layers(x) + + #print("yhat s", yhat.shape) + + return yhat + +disc_prior = DiscriminatorPrior(latent_dim, disc_prior_dense_layer_sizes).to(device) + +print(disc_prior) + +""" +test_input = torch.zeros((1, latent_dim)).to(device) +test_output = disc_prior(test_input) +""" + +if save_models == True: + disc_prior.eval() + + # save using pickle + torch.save(disc_prior, "results/models/disc_prior.pth") + + # save using onnx + x = torch.zeros((1, latent_dim)).to(device) + torch.onnx.export(disc_prior, x, "results/models/disc_prior.onnx") + + disc_prior.train() + +if save_tscript == True: + disc_prior.eval() + + # save using TochScript + x = torch.rand((1, latent_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(disc_prior, x) + script_module.save("results/models/disc_prior.pt") + + disc_prior.train() + +if load_weights and disc_prior_weights_file: + disc_prior.load_state_dict(torch.load(disc_prior_weights_file)) + + +# Encoder +class Encoder(nn.Module): + + def __init__(self, latent_dim, image_size, image_channels, conv_channel_counts, conv_kernel_size, dense_layer_sizes): + super().__init__() + + self.latent_dim = latent_dim + self.image_size = image_size + self.image_channels = image_channels + self.conv_channel_counts = conv_channel_counts + self.conv_kernel_size = conv_kernel_size + self.dense_layer_sizes = dense_layer_sizes + + # create convolutional layers + conv_layers = [] + + stride = (self.conv_kernel_size - 1) // 2 + padding = stride + + conv_layers.append(("encoder_conv_0", nn.Conv2d(self.image_channels, conv_channel_counts[0], self.conv_kernel_size, stride=stride, padding=padding))) + conv_layers.append(("encoder_bnorm_0", nn.BatchNorm2d(conv_channel_counts[0]))) + conv_layers.append(("encoder_lrelu_0", nn.LeakyReLU(0.2))) + + conv_layer_count = len(conv_channel_counts) + for layer_index in range(1, conv_layer_count): + conv_layers.append(("encoder_conv_{}".format(layer_index), nn.Conv2d(conv_channel_counts[layer_index-1], conv_channel_counts[layer_index], self.conv_kernel_size, stride=stride, padding=padding))) + conv_layers.append(("encoder_bnorm_{}".format(layer_index), nn.BatchNorm2d(conv_channel_counts[layer_index]))) + conv_layers.append(("encoder_lrelu_{}".format(layer_index), nn.LeakyReLU(0.2))) + + self.conv_layers = nn.Sequential(OrderedDict(conv_layers)) + + self.flatten = nn.Flatten() + + # create dense layers + dense_layers = [] + + last_conv_layer_size = int(image_size // np.power(2, len(conv_channel_counts))) + preflattened_size = [conv_channel_counts[-1], last_conv_layer_size, last_conv_layer_size] + dense_layer_input_size = conv_channel_counts[-1] * last_conv_layer_size * last_conv_layer_size + + dense_layers.append(("encoder_dense_0", nn.Linear(dense_layer_input_size, self.dense_layer_sizes[0]))) + dense_layers.append(("encoder_relu_0", nn.ReLU())) + + dense_layer_count = len(dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("encoder_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("encoder_dense_relu_{}".format(layer_index), nn.ReLU())) + + dense_layers.append(("encoder_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], self.latent_dim))) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + + #print("x1 s ", x.shape) + + x = self.conv_layers(x) + + #print("x2 s ", x.shape) + + x = self.flatten(x) + + #print("x3 s ", x.shape) + + yhat = self.dense_layers(x) + + #print("yhat s ", yhat.shape) + + return yhat + +encoder = Encoder(latent_dim, image_size, image_channels, ae_conv_channel_counts, ae_conv_kernel_size, ae_dense_layer_sizes).to(device) + +print(encoder) + +""" +test_input = torch.zeros((1, image_channels, image_size, image_size)).to(device) +test_output = encoder(test_input) +""" + +if save_models == True: + encoder.eval() + + # save using pickle + torch.save(encoder, "results/models/encoder.pth") + + # save using onnx + x = torch.zeros((1, image_channels, image_size, image_size)).to(device) + torch.onnx.export(encoder, x, "results/models/encoder.onnx") + + encoder.train() + +if save_tscript == True: + encoder.eval() + + # save using TochScript + x = torch.rand((1, image_channels, image_size, image_size), dtype=torch.float32).to(device) + script_module = torch.jit.trace(encoder, x) + script_module.save("results/models/encoder.pt") + + encoder.train() + +if load_weights and encoder_weights_file: + encoder.load_state_dict(torch.load(encoder_weights_file)) + +# Decoder +class Decoder(nn.Module): + + def __init__(self, latent_dim, image_size, image_channels, conv_channel_counts, conv_kernel_size, dense_layer_sizes): + super().__init__() + + self.latent_dim = latent_dim + self.image_size = image_size + self.image_channels = image_channels + self.conv_channel_counts = conv_channel_counts + self.conv_kernel_size = conv_kernel_size + self.dense_layer_sizes = dense_layer_sizes + + # create dense layers + dense_layers = [] + + dense_layers.append(("decoder_dense_0", nn.Linear(latent_dim, self.dense_layer_sizes[0]))) + dense_layers.append(("decoder_relu_0", nn.ReLU())) + + dense_layer_count = len(dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("decoder_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("decoder_dense_relu_{}".format(layer_index), nn.ReLU())) + + last_conv_layer_size = int(image_size // np.power(2, len(conv_channel_counts))) + preflattened_size = [conv_channel_counts[0], last_conv_layer_size, last_conv_layer_size] + dense_layer_output_size = conv_channel_counts[0] * last_conv_layer_size * last_conv_layer_size + + dense_layers.append(("decoder_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], dense_layer_output_size))) + dense_layers.append(("decoder_dense_relu_{}".format(len(self.dense_layer_sizes)), nn.ReLU())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + self.unflatten = nn.Unflatten(dim=1, unflattened_size=preflattened_size) + + # create convolutional layers + conv_layers = [] + + stride = (self.conv_kernel_size - 1) // 2 + padding = stride + output_padding = 1 + + conv_layer_count = len(conv_channel_counts) + for layer_index in range(1, conv_layer_count): + conv_layers.append(("decoder_bnorm_{}".format(layer_index), nn.BatchNorm2d(conv_channel_counts[layer_index-1]))) + conv_layers.append(("decoder_conv_{}".format(layer_index), nn.ConvTranspose2d(conv_channel_counts[layer_index-1], conv_channel_counts[layer_index], self.conv_kernel_size, stride=stride, padding=padding, output_padding=output_padding))) + conv_layers.append(("decoder_lrelu_{}".format(layer_index), nn.LeakyReLU(0.2))) + + conv_layers.append(("decoder_bnorm_{}".format(conv_layer_count), nn.BatchNorm2d(conv_channel_counts[-1]))) + conv_layers.append(("decoder_conv_{}".format(conv_layer_count), nn.ConvTranspose2d(conv_channel_counts[-1], self.image_channels, self.conv_kernel_size, stride=stride, padding=padding, output_padding=output_padding))) + + self.conv_layers = nn.Sequential(OrderedDict(conv_layers)) + + def forward(self, x): + + #print("x1 s ", x.shape) + + x = self.dense_layers(x) + + #print("x2 s ", x.shape) + + x = self.unflatten(x) + + #print("x3 s ", x.shape) + + yhat = self.conv_layers(x) + + #print("yhat s ", yhat.shape) + + return yhat + +ae_conv_channel_counts_reversed = ae_conv_channel_counts.copy() +ae_conv_channel_counts_reversed.reverse() + +ae_dense_layer_sizes_reversed = ae_dense_layer_sizes.copy() +ae_dense_layer_sizes_reversed.reverse() + +decoder = Decoder(latent_dim, image_size, image_channels, ae_conv_channel_counts_reversed, ae_conv_kernel_size, ae_dense_layer_sizes_reversed).to(device) + +print(decoder) + +""" +test_input = torch.zeros((1, latent_dim)).to(device) +test_output = generator(test_input) +""" + +if save_models == True: + decoder.eval() + + # save using pickle + torch.save(decoder, "results/models/decoder.pth") + + # save using onnx + x = torch.zeros((1, latent_dim)).to(device) + torch.onnx.export(decoder, x, "results/models/decoder.onnx") + + decoder.train() + +if save_tscript == True: + decoder.eval() + + # save using TochScript + x = torch.rand((1, latent_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(decoder, x) + script_module.save("results/models/decoder.pt") + + decoder.train() + +if load_weights and decoder_weights_file: + decoder.load_state_dict(torch.load(decoder_weights_file)) + +#Training + +disc_prior_optimizer = torch.optim.Adam(disc_prior.parameters(), lr=dp_learning_rate) +ae_optimizer = torch.optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=ae_learning_rate) + +mse_loss = torch.nn.MSELoss() +bce_loss = nn.BCEWithLogitsLoss() + +# function returning normal distributed random data +# serves as reference for the discriminator to distinguish the encoders prior from +def sample_normal(shape): + return torch.tensor(np.random.normal(size=shape), dtype=torch.float32).to(device) + +# discriminator prior loss function +def disc_prior_loss(disc_real_output, disc_fake_output): + _real_loss = bce_loss(disc_real_output, torch.ones_like(disc_real_output).to(device)) + _fake_loss = bce_loss(disc_fake_output, torch.zeros_like(disc_fake_output).to(device)) + + _total_loss = (_real_loss + _fake_loss) * 0.5 + return _total_loss + +def ae_loss(y, yhat, disc_pior_fake_output): + _ae_rec_loss = mse_loss(y, yhat) + _disc_prior_fake_loss = bce_loss(disc_pior_fake_output, torch.ones_like(disc_pior_fake_output).to(device)) + + _total_loss = 0.0 + _total_loss += _ae_rec_loss * ae_rec_loss_scale + _total_loss += _disc_prior_fake_loss * ae_prior_loss_scale + + return _total_loss, _ae_rec_loss, _disc_prior_fake_loss + +def disc_prior_train_step(target_images): + # have normal distribution and encoder produce real and fake outputs, respectively + + with torch.no_grad(): + encoder_output = encoder(target_images) + + real_output = sample_normal(encoder_output.shape) + + # let discriminator distinguish between real and fake outputs + disc_real_output = disc_prior(real_output) + disc_fake_output = disc_prior(encoder_output) + _disc_loss = disc_prior_loss(disc_real_output, disc_fake_output) + + # Backpropagation + disc_prior_optimizer.zero_grad() + _disc_loss.backward() + disc_prior_optimizer.step() + + return _disc_loss + +def ae_train_step(target_images): + + encoder_output = encoder(target_images) + pred_images = decoder(encoder_output) + + disc_fake_output = disc_prior(encoder_output) + + _ae_loss, _ae_rec_loss, _disc_prior_fake_loss = ae_loss(target_images, pred_images, disc_fake_output) + + ae_optimizer.zero_grad() + _ae_loss.backward() + ae_optimizer.step() + + return _ae_loss, _ae_rec_loss, _disc_prior_fake_loss + +def ae_test_step(target_images): + + with torch.no_grad(): + encoder_output = encoder(target_images) + pred_images = decoder(encoder_output) + disc_fake_output = disc_prior(encoder_output) + + _ae_loss, _ae_rec_loss, _disc_prior_fake_loss = ae_loss(target_images, pred_images, disc_fake_output) + + return _ae_loss, _ae_rec_loss, _disc_prior_fake_loss + +def plot_ae_outputs(encoder, decoder, epoch, n=5): + + encoder.eval() + decoder.eval() + + plt.figure(figsize=(10,4.5)) + for i in range(n): + ax = plt.subplot(2,n,i+1) + img = test_dataset[i][0].unsqueeze(0).to(device) + + with torch.no_grad(): + rec_img = decoder(encoder(img)) + + img = img.cpu().squeeze().numpy() + img = np.clip(img, 0.0, 1.0) + img = np.moveaxis(img, 0, 2) + + rec_img = rec_img.cpu().squeeze().numpy() + rec_img = np.clip(rec_img, 0.0, 1.0) + rec_img = np.moveaxis(rec_img, 0, 2) + + plt.imshow(img) + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + if i == n//2: + ax.set_title('Original images') + ax = plt.subplot(2, n, i + 1 + n) + plt.imshow(rec_img) + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + if i == n//2: + ax.set_title("Epoch {}: Reconstructed images".format(epoch)) + plt.show() + #plt.savefig("epoch_{0:05d}.jpg".format(epoch)) + plt.close() + + decoder.train() + decoder.train() + +def train(train_dataloader, test_dataloader, epochs): + + loss_history = {} + loss_history["ae train"] = [] + loss_history["ae test"] = [] + loss_history["ae rec"] = [] + loss_history["ae prior"] = [] + loss_history["disc prior"] = [] + + for epoch in range(epochs): + + start = time.time() + + ae_train_loss_per_epoch = [] + ae_rec_loss_per_epoch = [] + ae_prior_loss_per_epoch = [] + disc_prior_loss_per_epoch = [] + + for train_batch, _ in train_dataloader: + train_batch = train_batch.to(device) + + # start with discriminator training + _disc_prior_train_loss = disc_prior_train_step(train_batch) + _disc_prior_train_loss = _disc_prior_train_loss.detach().cpu().numpy() + disc_prior_loss_per_epoch.append(_disc_prior_train_loss) + + # now train the autoencoder + _ae_loss, _ae_rec_loss, _ae_prior_loss = ae_train_step(train_batch) + + _ae_loss = _ae_loss.detach().cpu().numpy() + _ae_rec_loss = _ae_rec_loss.detach().cpu().numpy() + _ae_prior_loss = _ae_prior_loss.detach().cpu().numpy() + + ae_train_loss_per_epoch.append(_ae_loss) + ae_rec_loss_per_epoch.append(_ae_rec_loss) + ae_prior_loss_per_epoch.append(_ae_prior_loss) + + ae_train_loss_per_epoch = np.mean(np.array(ae_train_loss_per_epoch)) + ae_rec_loss_per_epoch = np.mean(np.array(ae_rec_loss_per_epoch)) + ae_prior_loss_per_epoch = np.mean(np.array(ae_prior_loss_per_epoch)) + disc_prior_loss_per_epoch = np.mean(np.array(disc_prior_loss_per_epoch)) + + ae_test_loss_per_epoch = [] + + for test_batch, _ in test_dataloader: + test_batch = test_batch.to(device) + + _ae_loss, _, _ = ae_test_step(train_batch) + + _ae_loss = _ae_loss.detach().cpu().numpy() + ae_test_loss_per_epoch.append(_ae_loss) + + ae_test_loss_per_epoch = np.mean(np.array(ae_test_loss_per_epoch)) + + if epoch % weight_save_interval == 0 and save_weights == True: + torch.save(disc_prior.state_dict(), "results/weights/disc_prior_weights_epoch_{}".format(epoch)) + torch.save(encoder.state_dict(), "results/weights/encoder_weights_epoch_{}".format(epoch)) + torch.save(decoder.state_dict(), "results/weights/decoder_weights_epoch_{}".format(epoch)) + + plot_ae_outputs(encoder, decoder, epoch) + + loss_history["ae train"].append(ae_train_loss_per_epoch) + loss_history["ae test"].append(ae_test_loss_per_epoch) + loss_history["ae rec"].append(ae_rec_loss_per_epoch) + loss_history["ae prior"].append(ae_prior_loss_per_epoch) + loss_history["disc prior"].append(disc_prior_loss_per_epoch) + + print ('epoch {} : ae train: {:01.4f} ae test: {:01.4f} disc prior {:01.4f} rec {:01.4f} prior {:01.4f} time {:01.2f}'.format(epoch + 1, ae_train_loss_per_epoch, ae_test_loss_per_epoch, disc_prior_loss_per_epoch, ae_rec_loss_per_epoch, ae_prior_loss_per_epoch, time.time()-start)) + + return loss_history + +# fit model +loss_history = train(train_dataloader, test_dataloader, epochs) + +epochs = 100 + +# outer loop over the training epochs +for epoch in range(epochs): + + disc_prior_epoch_loss = 0 + autoencoder_epoch_loss = 0 + + tick = time.time() + + for batch_features, _ in train_dataloader: + + batch_features = batch_features.to(device) + + # disc prior train step + disc_prior_optimizer.zero_grad() + + with torch.no_grad(): + fake_output = encoder(batch_features) + real_output = sample_normal(fake_output.shape) + + disc_prior_real_output = disc_prior(real_output) + disc_prior_fake_output = disc_prior(fake_output) + + _disc_prior_loss = disc_prior_loss(disc_prior_real_output, disc_prior_fake_output) + + _disc_prior_loss.backward() + disc_prior_optimizer.step() + + disc_prior_epoch_loss += _disc_prior_loss.item() + + # autoencoder train step + ae_optimizer.zero_grad() + + encoded_images = encoder(batch_features) + reconstructed_images = decoder(encoded_images) + + disc_prior_fake_output = disc_prior(encoded_images) + + _autoencoder_loss, _, _ = ae_loss(batch_features, reconstructed_images, disc_prior_fake_output) + + _autoencoder_loss.backward() + ae_optimizer.step() + + autoencoder_epoch_loss += _autoencoder_loss.item() + + # compute the epoch training loss + disc_prior_epoch_loss = disc_prior_epoch_loss / len(train_dataloader) + autoencoder_epoch_loss = autoencoder_epoch_loss / len(train_dataloader) + + tock = time.time() + + + # display the epoch training loss + plot_ae_outputs(encoder,decoder, epoch, n=5) + print("epoch : {}/{}, dp_loss = {:.6f} ae_loss = {:.6f}, time = {:.2f}".format(epoch + 1, epochs, disc_prior_epoch_loss, autoencoder_epoch_loss, (tock - tick))) + +""" +def plot_ae_outputs2(encoder, decoder, epoch, n=5): + + encoder.eval() + decoder.eval() + + plt.figure(figsize=(10,4.5)) + for i in range(n): + ax = plt.subplot(2,n,i+1) + img = train_dataset[i][0].unsqueeze(0).to(device) + + with torch.no_grad(): + rec_img = decoder(encoder(img)) + + img = img.cpu().squeeze().numpy() + img = np.clip(img, 0.0, 1.0) + img = np.moveaxis(img, 0, 2) + + rec_img = rec_img.cpu().squeeze().numpy() + rec_img = np.clip(rec_img, 0.0, 1.0) + rec_img = np.moveaxis(rec_img, 0, 2) + + plt.imshow(img) + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + if i == n//2: + ax.set_title('Original images') + ax = plt.subplot(2, n, i + 1 + n) + plt.imshow(rec_img) + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + if i == n//2: + ax.set_title("Epoch {}: Reconstructed images".format(epoch)) + plt.show() + #plt.savefig("epoch_{0:05d}.jpg".format(epoch)) + plt.close() + + decoder.train() + decoder.train() + + +plot_ae_outputs2(encoder, decoder, 200) +"""""" \ No newline at end of file diff --git a/autoencoder/image_autoencoder/image_autoencoder_tmp.py b/autoencoder/image_autoencoder/image_autoencoder_tmp.py new file mode 100644 index 0000000..c858112 --- /dev/null +++ b/autoencoder/image_autoencoder/image_autoencoder_tmp.py @@ -0,0 +1,862 @@ +""" +Introduction Convolutional Neural Networks: + https://www.analyticsvidhya.com/blog/2021/05/convolutional-neural-networks-cnn/ + https://towardsdatascience.com/pytorch-basics-how-to-train-your-neural-net-intro-to-cnn-26a14c2ea29 + +Introduction Adversarial Networks: + Generative Adversarial Networks: https://wiki.pathmind.com/generative-adversarial-network-gan + Adversarial Autoencoder: https://medium.com/vitrox-publication/adversarial-auto-encoder-aae-a3fc86f71758 +""" + +import numpy as np +import torch +import torchvision +from pytorch_model_summary import summary +from torch.utils.data import DataLoader +from torch import nn +from torch import optim +from collections import OrderedDict +import matplotlib.pyplot as plt +import time +import pickle +import math + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# image settings +image_data_path = "../../../../Data/Images" +image_size = 128 +image_channels = 3 + +# model settings +latent_dim = 64 +ae_conv_channel_counts = [ 8, 32, 128, 512 ] +ae_conv_kernel_size = 5 +ae_dense_layer_sizes = [ 128 ] + +disc_prior_dense_layer_sizes = [ 128, 128 ] + +save_models = False +save_tscript = False +save_weights = True + +# load model weights +load_weights = False +disc_prior_weights_file = "results/weights/disc_prior_weights_epoch_400" +encoder_weights_file = "results/weights/encoder_weights_epoch_400" +decoder_weights_file = "results/weights/decoder_weights_epoch_400" + +# training settings +batch_size = 16 +train_percentage = 0.8 # train / test split +test_percentage = 0.2 +dp_learning_rate = 5e-3 +ae_learning_rate = 1e-3 +ae_rec_loss_scale = 1.0 +ae_prior_loss_scale = 0.5 +epochs = 100 +weight_save_interval = 10 +save_history = False + +# create dataset +transform = torchvision.transforms.Compose([torchvision.transforms.Resize(image_size), + torchvision.transforms.ToTensor()]) +full_dataset = torchvision.datasets.ImageFolder(image_data_path, transform=transform) +dataset_size = len(full_dataset) + +test_size = int(test_percentage * dataset_size) +train_size = dataset_size - test_size + +train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size]) + +train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) +test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) + +# Create Models + +# Critique +class DiscriminatorPrior(nn.Module): + def __init__(self, latent_dim, dense_layer_sizes): + super().__init__() + + self.latent_dim = latent_dim + self.dense_layer_sizes = dense_layer_sizes + + # create dense layers + dense_layers = [] + + dense_layers.append(("disc_prior_dense_0", nn.Linear(latent_dim, dense_layer_sizes[0]))) + dense_layers.append(("disc_prior_elu_0", nn.ELU())) + + dense_layer_count = len(dense_layer_sizes) + + for layer_index in range(1, dense_layer_count): + dense_layers.append(("disc_prior_dense_{}".format(layer_index), nn.Linear(dense_layer_sizes[layer_index - 1], dense_layer_sizes[layer_index]))) + dense_layers.append(("disc_prior_elu_{}".format(layer_index), nn.ELU())) + + dense_layers.append(("disc_prior_dense_{}".format(dense_layer_count), nn.Linear(dense_layer_sizes[-1], 1))) + dense_layers.append(("disc_prior_sigmoid_{}".format(dense_layer_count), nn.Sigmoid())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + + #print("x1 s", x.shape) + + yhat = self.dense_layers(x) + + #print("yhat s", yhat.shape) + + return yhat + +disc_prior = DiscriminatorPrior(latent_dim, disc_prior_dense_layer_sizes).to(device) + +print(disc_prior) + +""" +test_input = torch.zeros((1, latent_dim)).to(device) +test_output = disc_prior(test_input) +""" + +if save_models == True: + disc_prior.eval() + + # save using pickle + torch.save(disc_prior, "results/models/disc_prior.pth") + + # save using onnx + x = torch.zeros((1, latent_dim)).to(device) + torch.onnx.export(disc_prior, x, "results/models/disc_prior.onnx") + + disc_prior.train() + +if save_tscript == True: + disc_prior.eval() + + # save using TochScript + x = torch.rand((1, latent_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(disc_prior, x) + script_module.save("results/models/disc_prior.pt") + + disc_prior.train() + +if load_weights and disc_prior_weights_file: + disc_prior.load_state_dict(torch.load(disc_prior_weights_file)) + +# Encoder +class Encoder(nn.Module): + + def __init__(self, latent_dim, image_size, image_channels, conv_channel_counts, conv_kernel_size, dense_layer_sizes): + super().__init__() + + self.latent_dim = latent_dim + self.image_size = image_size + self.image_channels = image_channels + self.conv_channel_counts = conv_channel_counts + self.conv_kernel_size = conv_kernel_size + self.dense_layer_sizes = dense_layer_sizes + + # create convolutional layers + conv_layers = [] + + stride = (self.conv_kernel_size - 1) // 2 + padding = stride + + conv_layers.append(("encoder_conv_0", nn.Conv2d(self.image_channels, conv_channel_counts[0], self.conv_kernel_size, stride=stride, padding=padding))) + conv_layers.append(("encoder_bnorm_0", nn.BatchNorm2d(conv_channel_counts[0]))) + conv_layers.append(("encoder_lrelu_0", nn.LeakyReLU(0.2))) + + conv_layer_count = len(conv_channel_counts) + for layer_index in range(1, conv_layer_count): + conv_layers.append(("encoder_conv_{}".format(layer_index), nn.Conv2d(conv_channel_counts[layer_index-1], conv_channel_counts[layer_index], self.conv_kernel_size, stride=stride, padding=padding))) + conv_layers.append(("encoder_bnorm_{}".format(layer_index), nn.BatchNorm2d(conv_channel_counts[layer_index]))) + conv_layers.append(("encoder_lrelu_{}".format(layer_index), nn.LeakyReLU(0.2))) + + self.conv_layers = nn.Sequential(OrderedDict(conv_layers)) + + self.flatten = nn.Flatten() + + # create dense layers + dense_layers = [] + + last_conv_layer_size = int(image_size // np.power(2, len(conv_channel_counts))) + preflattened_size = [conv_channel_counts[-1], last_conv_layer_size, last_conv_layer_size] + dense_layer_input_size = conv_channel_counts[-1] * last_conv_layer_size * last_conv_layer_size + + dense_layers.append(("encoder_dense_0", nn.Linear(dense_layer_input_size, self.dense_layer_sizes[0]))) + dense_layers.append(("encoder_relu_0", nn.ReLU())) + + dense_layer_count = len(dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("encoder_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("encoder_dense_relu_{}".format(layer_index), nn.ReLU())) + + dense_layers.append(("encoder_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], self.latent_dim))) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + + #print("x1 s ", x.shape) + + x = self.conv_layers(x) + + #print("x2 s ", x.shape) + + x = self.flatten(x) + + #print("x3 s ", x.shape) + + yhat = self.dense_layers(x) + + #print("yhat s ", yhat.shape) + + return yhat + +encoder = Encoder(latent_dim, image_size, image_channels, ae_conv_channel_counts, ae_conv_kernel_size, ae_dense_layer_sizes).to(device) + +print(encoder) + +""" +test_input = torch.zeros((1, image_channels, image_size, image_size)).to(device) +test_output = encoder(test_input) +""" + +if save_models == True: + encoder.eval() + + # save using pickle + torch.save(encoder, "results/models/encoder.pth") + + # save using onnx + x = torch.zeros((1, image_channels, image_size, image_size)).to(device) + torch.onnx.export(encoder, x, "results/models/encoder.onnx") + + encoder.train() + +if save_tscript == True: + encoder.eval() + + # save using TochScript + x = torch.rand((1, image_channels, image_size, image_size), dtype=torch.float32).to(device) + script_module = torch.jit.trace(encoder, x) + script_module.save("results/models/encoder.pt") + + encoder.train() + +if load_weights and encoder_weights_file: + encoder.load_state_dict(torch.load(encoder_weights_file)) + +# Decoder +class Decoder(nn.Module): + + def __init__(self, latent_dim, image_size, image_channels, conv_channel_counts, conv_kernel_size, dense_layer_sizes): + super().__init__() + + self.latent_dim = latent_dim + self.image_size = image_size + self.image_channels = image_channels + self.conv_channel_counts = conv_channel_counts + self.conv_kernel_size = conv_kernel_size + self.dense_layer_sizes = dense_layer_sizes + + # create dense layers + dense_layers = [] + + dense_layers.append(("decoder_dense_0", nn.Linear(latent_dim, self.dense_layer_sizes[0]))) + dense_layers.append(("decoder_relu_0", nn.ReLU())) + + dense_layer_count = len(dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("decoder_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("decoder_dense_relu_{}".format(layer_index), nn.ReLU())) + + last_conv_layer_size = int(image_size // np.power(2, len(conv_channel_counts))) + preflattened_size = [conv_channel_counts[0], last_conv_layer_size, last_conv_layer_size] + dense_layer_output_size = conv_channel_counts[0] * last_conv_layer_size * last_conv_layer_size + + dense_layers.append(("decoder_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], dense_layer_output_size))) + dense_layers.append(("decoder_dense_relu_{}".format(len(self.dense_layer_sizes)), nn.ReLU())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + self.unflatten = nn.Unflatten(dim=1, unflattened_size=preflattened_size) + + # create convolutional layers + conv_layers = [] + + stride = (self.conv_kernel_size - 1) // 2 + padding = stride + output_padding = 1 + + conv_layer_count = len(conv_channel_counts) + for layer_index in range(1, conv_layer_count): + conv_layers.append(("decoder_bnorm_{}".format(layer_index), nn.BatchNorm2d(conv_channel_counts[layer_index-1]))) + conv_layers.append(("decoder_conv_{}".format(layer_index), nn.ConvTranspose2d(conv_channel_counts[layer_index-1], conv_channel_counts[layer_index], self.conv_kernel_size, stride=stride, padding=padding, output_padding=output_padding))) + conv_layers.append(("decoder_lrelu_{}".format(layer_index), nn.LeakyReLU(0.2))) + + conv_layers.append(("decoder_bnorm_{}".format(conv_layer_count), nn.BatchNorm2d(conv_channel_counts[-1]))) + conv_layers.append(("decoder_conv_{}".format(conv_layer_count), nn.ConvTranspose2d(conv_channel_counts[-1], self.image_channels, self.conv_kernel_size, stride=stride, padding=padding, output_padding=output_padding))) + + self.conv_layers = nn.Sequential(OrderedDict(conv_layers)) + + def forward(self, x): + + #print("x1 s ", x.shape) + + x = self.dense_layers(x) + + #print("x2 s ", x.shape) + + x = self.unflatten(x) + + #print("x3 s ", x.shape) + + yhat = self.conv_layers(x) + + #print("yhat s ", yhat.shape) + + return yhat + +ae_conv_channel_counts_reversed = ae_conv_channel_counts.copy() +ae_conv_channel_counts_reversed.reverse() + +ae_dense_layer_sizes_reversed = ae_dense_layer_sizes.copy() +ae_dense_layer_sizes_reversed.reverse() + +decoder = Decoder(latent_dim, image_size, image_channels, ae_conv_channel_counts_reversed, ae_conv_kernel_size, ae_dense_layer_sizes_reversed).to(device) + +print(decoder) + +""" +test_input = torch.zeros((1, latent_dim)).to(device) +test_output = generator(test_input) +""" + +if save_models == True: + decoder.eval() + + # save using pickle + torch.save(decoder, "results/models/decoder.pth") + + # save using onnx + x = torch.zeros((1, latent_dim)).to(device) + torch.onnx.export(decoder, x, "results/models/decoder.onnx") + + decoder.train() + +if save_tscript == True: + decoder.eval() + + # save using TochScript + x = torch.rand((1, latent_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(decoder, x) + script_module.save("results/models/decoder.pt") + + decoder.train() + +if load_weights and decoder_weights_file: + decoder.load_state_dict(torch.load(decoder_weights_file)) + +#Training + +disc_prior_optimizer = torch.optim.Adam(disc_prior.parameters(), lr=dp_learning_rate) +ae_optimizer = torch.optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=ae_learning_rate) + +mse_loss = torch.nn.MSELoss() +bce_loss = nn.BCEWithLogitsLoss() + +# function returning normal distributed random data +# serves as reference for the discriminator to distinguish the encoders prior from +def sample_normal(shape): + return torch.tensor(np.random.normal(size=shape), dtype=torch.float32).to(device) + +# discriminator prior loss function +def disc_prior_loss(disc_real_output, disc_fake_output): + _real_loss = bce_loss(disc_real_output, torch.ones_like(disc_real_output).to(device)) + _fake_loss = bce_loss(disc_fake_output, torch.zeros_like(disc_fake_output).to(device)) + + _total_loss = (_real_loss + _fake_loss) * 0.5 + return _total_loss + +def ae_loss(y, yhat, disc_pior_fake_output): + _ae_rec_loss = mse_loss(y, yhat) + _disc_prior_fake_loss = bce_loss(disc_pior_fake_output, torch.ones_like(disc_pior_fake_output).to(device)) + + _total_loss = 0.0 + _total_loss += _ae_rec_loss * ae_rec_loss_scale + _total_loss += _disc_prior_fake_loss * ae_prior_loss_scale + + return _total_loss, _ae_rec_loss, _disc_prior_fake_loss + +def disc_prior_train_step(target_images): + # have normal distribution and encoder produce real and fake outputs, respectively + + with torch.no_grad(): + encoder_output = encoder(target_images) + + real_output = sample_normal(encoder_output.shape) + + # let discriminator distinguish between real and fake outputs + disc_real_output = disc_prior(real_output) + disc_fake_output = disc_prior(encoder_output) + _disc_loss = disc_prior_loss(disc_real_output, disc_fake_output) + + # Backpropagation + disc_prior_optimizer.zero_grad() + _disc_loss.backward() + disc_prior_optimizer.step() + + return _disc_loss + +def ae_train_step(target_images): + + encoder_output = encoder(target_images) + pred_images = decoder(encoder_output) + + disc_fake_output = disc_prior(encoder_output) + + _ae_loss, _ae_rec_loss, _disc_prior_fake_loss = ae_loss(target_images, pred_images, disc_fake_output) + + ae_optimizer.zero_grad() + _ae_loss.backward() + ae_optimizer.step() + + return _ae_loss, _ae_rec_loss, _disc_prior_fake_loss + +def ae_test_step(target_images): + + with torch.no_grad(): + encoder_output = encoder(target_images) + pred_images = decoder(encoder_output) + disc_fake_output = disc_prior(encoder_output) + + _ae_loss, _ae_rec_loss, _disc_prior_fake_loss = ae_loss(target_images, pred_images, disc_fake_output) + + return _ae_loss, _ae_rec_loss, _disc_prior_fake_loss + +def plot_ae_outputs(encoder, decoder, epoch, n=5): + + encoder.eval() + decoder.eval() + + plt.figure(figsize=(10,4.5)) + for i in range(n): + ax = plt.subplot(2,n,i+1) + img = test_dataset[i][0].unsqueeze(0).to(device) + + with torch.no_grad(): + rec_img = decoder(encoder(img)) + + img = img.cpu().squeeze().numpy() + img = np.clip(img, 0.0, 1.0) + img = np.moveaxis(img, 0, 2) + + rec_img = rec_img.cpu().squeeze().numpy() + rec_img = np.clip(rec_img, 0.0, 1.0) + rec_img = np.moveaxis(rec_img, 0, 2) + + plt.imshow(img) + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + if i == n//2: + ax.set_title('Original images') + ax = plt.subplot(2, n, i + 1 + n) + plt.imshow(rec_img) + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + if i == n//2: + ax.set_title("Epoch {}: Reconstructed images".format(epoch)) + plt.show() + #plt.savefig("epoch_{0:05d}.jpg".format(epoch)) + plt.close() + + decoder.train() + decoder.train() + +def train(train_dataloader, test_dataloader, epochs): + + loss_history = {} + loss_history["ae train"] = [] + loss_history["ae test"] = [] + loss_history["ae rec"] = [] + loss_history["ae prior"] = [] + loss_history["disc prior"] = [] + + for epoch in range(epochs): + + start = time.time() + + ae_train_loss_per_epoch = [] + ae_rec_loss_per_epoch = [] + ae_prior_loss_per_epoch = [] + disc_prior_loss_per_epoch = [] + + for train_batch, _ in train_dataloader: + train_batch = train_batch.to(device) + + # start with discriminator training + _disc_prior_train_loss = disc_prior_train_step(train_batch) + _disc_prior_train_loss = _disc_prior_train_loss.detach().cpu().numpy() + disc_prior_loss_per_epoch.append(_disc_prior_train_loss) + + # now train the autoencoder + _ae_loss, _ae_rec_loss, _ae_prior_loss = ae_train_step(train_batch) + + _ae_loss = _ae_loss.detach().cpu().numpy() + _ae_rec_loss = _ae_rec_loss.detach().cpu().numpy() + _ae_prior_loss = _ae_prior_loss.detach().cpu().numpy() + + ae_train_loss_per_epoch.append(_ae_loss) + ae_rec_loss_per_epoch.append(_ae_rec_loss) + ae_prior_loss_per_epoch.append(_ae_prior_loss) + + ae_train_loss_per_epoch = np.mean(np.array(ae_train_loss_per_epoch)) + ae_rec_loss_per_epoch = np.mean(np.array(ae_rec_loss_per_epoch)) + ae_prior_loss_per_epoch = np.mean(np.array(ae_prior_loss_per_epoch)) + disc_prior_loss_per_epoch = np.mean(np.array(disc_prior_loss_per_epoch)) + + ae_test_loss_per_epoch = [] + + for test_batch, _ in test_dataloader: + test_batch = test_batch.to(device) + + _ae_loss, _, _ = ae_test_step(train_batch) + + _ae_loss = _ae_loss.detach().cpu().numpy() + ae_test_loss_per_epoch.append(_ae_loss) + + ae_test_loss_per_epoch = np.mean(np.array(ae_test_loss_per_epoch)) + + if epoch % weight_save_interval == 0 and save_weights == True: + torch.save(disc_prior.state_dict(), "results/weights/disc_prior_weights_epoch_{}".format(epoch)) + torch.save(encoder.state_dict(), "results/weights/encoder_weights_epoch_{}".format(epoch)) + torch.save(decoder.state_dict(), "results/weights/decoder_weights_epoch_{}".format(epoch)) + + plot_ae_outputs(encoder, decoder, epoch) + + loss_history["ae train"].append(ae_train_loss_per_epoch) + loss_history["ae test"].append(ae_test_loss_per_epoch) + loss_history["ae rec"].append(ae_rec_loss_per_epoch) + loss_history["ae prior"].append(ae_prior_loss_per_epoch) + loss_history["disc prior"].append(disc_prior_loss_per_epoch) + + print ('epoch {} : ae train: {:01.4f} ae test: {:01.4f} disc prior {:01.4f} rec {:01.4f} prior {:01.4f} time {:01.2f}'.format(epoch + 1, ae_train_loss_per_epoch, ae_test_loss_per_epoch, disc_prior_loss_per_epoch, ae_rec_loss_per_epoch, ae_prior_loss_per_epoch, time.time()-start)) + + return loss_history + +# fit model +loss_history = train(train_dataloader, test_dataloader, epochs) + + + +def ae_loss(y, yhat, disc_pior_fake_output): + rec_loss = mse_loss(y, yhat) + disc_prior_fake_loss = bce_loss(disc_pior_fake_output, torch.ones_like(disc_pior_fake_output).to(device)) + + total_loss = 1.0 * rec_loss + 0.5 * disc_prior_fake_loss + return total_loss + + +# discriminator prior loss function +def disc_loss(disc_real_output, disc_fake_output): + real_loss = bce_loss(disc_real_output, torch.ones_like(disc_real_output).to(device)) + fake_loss = bce_loss(disc_fake_output, torch.zeros_like(disc_fake_output).to(device)) + + total_loss = (real_loss + fake_loss) * 0.5 + return total_loss + + +# optimizers +ae_optimizer = torch.optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=1e-3) +discPrior_optimizer = torch.optim.Adam(disc_prior.parameters(), lr=5e-3) + + +def plot_ae_outputs(encoder, decoder, epoch, n=5): + plt.figure(figsize=(10,4.5)) + for i in range(n): + ax = plt.subplot(2,n,i+1) + img = full_dataset[i][0].unsqueeze(0).to(device) + + encoder.eval() + decoder.eval() + with torch.no_grad(): + rec_img = decoder(encoder(img)) + encoder.train() + decoder.train() + + img = img.cpu().squeeze().numpy() + img = np.clip(img, 0.0, 1.0) + img = np.moveaxis(img, 0, 2) + + rec_img = rec_img.cpu().squeeze().numpy() + rec_img = np.clip(rec_img, 0.0, 1.0) + rec_img = np.moveaxis(rec_img, 0, 2) + + plt.imshow(img) + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + if i == n//2: + ax.set_title('Original images') + ax = plt.subplot(2, n, i + 1 + n) + plt.imshow(rec_img) + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + if i == n//2: + ax.set_title("Epoch {}: Reconstructed images".format(epoch)) + plt.show() + #plt.savefig("epoch_{0:05d}.jpg".format(epoch)) + plt.close() + +epochs = 100 + +# outer loop over the training epochs +for epoch in range(epochs): + + discPrior_epoch_loss = 0 + discImage_epoch_loss = 0 + autoencoder_epoch_loss = 0 + + tick = time.time() + + for batch_features, _ in train_dataloader: + + batch_features = batch_features.to(device) + + # disc prior train step + discPrior_optimizer.zero_grad() + + with torch.no_grad(): + fake_output = encoder(batch_features) + real_output = sample_normal(fake_output.shape) + + discPrior_real_output = disc_prior(real_output) + discPrior_fake_output = disc_prior(fake_output) + + discPrior_loss = disc_loss(discPrior_real_output, discPrior_fake_output) + + discPrior_loss.backward() + discPrior_optimizer.step() + + discPrior_epoch_loss += discPrior_loss.item() + + # autoencoder train step + ae_optimizer.zero_grad() + + encoded_images = encoder(batch_features) + reconstructed_images = decoder(encoded_images) + + discPrior_fake_output = disc_prior(encoded_images) + + autoencoder_loss = ae_loss(batch_features, reconstructed_images, discPrior_fake_output) + + autoencoder_loss.backward() + ae_optimizer.step() + + autoencoder_epoch_loss += autoencoder_loss.item() + + # compute the epoch training loss + discPrior_epoch_loss = discPrior_epoch_loss / len(train_dataloader) + discImage_epoch_loss = discImage_epoch_loss / len(train_dataloader) + autoencoder_epoch_loss = autoencoder_epoch_loss / len(train_dataloader) + + tock = time.time() + + + # display the epoch training loss + plot_ae_outputs(encoder,decoder, epoch, n=5) + print("epoch : {}/{}, dp_loss = {:.6f} ae_loss = {:.6f}, time = {:.2f}".format(epoch + 1, epochs, discPrior_epoch_loss, autoencoder_epoch_loss, (tock - tick))) + + + +""" +Save/Load Model Weights +""" + +encoder_weights_file = "aae_encoder_weights_ballroom_epoch_100_cpu" +decoder_weights_file = "aae_decoder_weights_ballroom_epoch_100_cpu" +discPrior_weights_file = "aae_discPrior_weights_ballroom_epoch_100_cpu" + +# load model weights +encoder.load_state_dict(torch.load(encoder_weights_file)) +decoder.load_state_dict(torch.load(decoder_weights_file)) +discPrior.load_state_dict(torch.load(discPrior_weights_file)) + +# save model weights +torch.save(encoder.state_dict(), encoder_weights_file) +torch.save(decoder.state_dict(), decoder_weights_file) +torch.save(discPrior.state_dict(), discPrior_weights_file) + + +""" +Evaluate Model +""" + +# get two example images (and their labels) from the test dataset +img_1, label_1 = dataset[0] +img_2, label_2 = dataset[1] + +# show the two example images +plt.imshow(np.moveaxis(img_1.numpy(), 0, 2)) +plt.imshow(np.moveaxis(img_2.numpy(), 0, 2)) + +# reconstruct example images +img_1 = torch.unsqueeze(img_1, 0) +img_2 = torch.unsqueeze(img_2, 0) + +encoder.eval() +decoder.eval() +with torch.no_grad(): + rec_img_1 = decoder(encoder(img_1.to(device))) + rec_img_2 = decoder(encoder(img_2.to(device))) +encoder.train() +decoder.train() + +rec_img_1 = rec_img_1.cpu().detach().numpy().squeeze() +rec_img_2 = rec_img_2.cpu().detach().numpy().squeeze() + +plt.imshow(np.moveaxis(rec_img_1, 0, 2)) +plt.imshow(np.moveaxis(rec_img_2, 0, 2)) + +# mix image codes + +img_encoding_1 = encoder(img_1.to(device)) +img_encoding_2 = encoder(img_2.to(device)) + +mix_factor = 0.5 + +mixed_encoding = img_encoding_1 * mix_factor + img_encoding_2 * (1.0 - mix_factor) + +mixed_img = decoder(mixed_encoding) + +mixed_img = mixed_img.cpu().detach().numpy().squeeze() + +plt.imshow(np.moveaxis(mixed_img, 0, 2)) + +# create vector interpolation animation + +img_1, label_1 = dataset[0] +img_2, label_2 = dataset[1] + +img_1 = torch.unsqueeze(img_1, 0) +img_2 = torch.unsqueeze(img_2, 0) + +encoder.eval() +decoder.eval() + +with torch.no_grad(): + img_encoding_1 = encoder(img_1.to(device)) + img_encoding_2 = encoder(img_2.to(device)) + +mix_index = 0 + +for mix_factor in np.linspace(0.0, 1.0, 100): + mixed_encoding = img_encoding_1 * mix_factor + img_encoding_2 * (1.0 - mix_factor) + + with torch.no_grad(): + mixed_img = decoder(mixed_encoding) + + mixed_img = mixed_img.cpu().detach().numpy().squeeze() + + mixed_img = np.clip(mixed_img, 0.0, 1.0) + + #mixed_img = np.clip(mixed_img, 0.0, 1.0) + mixed_img = np.moveaxis(mixed_img, 0, 2) + + plt.imshow(mixed_img) + plt.savefig("mixed_{0:05d}.jpg".format(mix_index)) + + mix_index += 1 + +encoder.train() +decoder.train() + + + + + + + + + +""" +display images in a grid +parameter: + image_indices: indices of images to display + dataset: dataset to retrieve images from +""" +def draw_images(image_indices, dataset): + index_count = len(image_indices) + rows = int(math.ceil(math.sqrt(index_count))) + columns = int(math.ceil(index_count / rows)) + + # create figure + fig = plt.figure(figsize=(10, 7)) + + image_nr = 0 + for row in range(rows): + for col in range(columns): + if image_nr >= index_count: + break + + img, _ = dataset[image_indices[image_nr]] + img = img.squeeze() + + # Adds a subplot at the 1st position + fig.add_subplot(rows, columns, image_nr + 1) + + # showing image + plt.imshow(np.moveaxis(img.numpy(), 0, 2)) + plt.axis('off') + plt.title(image_indices[image_nr]) + plt.tight_layout() + + image_nr += 1 + + fig.show() + + +""" + creates a list of image encodings + parameter: + image_indices : indices of images to encode + dataset: dataset to load the images from +""" +def encode_images(image_indices, dataset): + + encodings = [] + + for image_index in image_indices: + img, _ = dataset[image_index] + + img_enc = encoder(img.unsqueeze(0).to(device)) + + img_enc = img_enc.cpu().detach().numpy().squeeze() + + encodings.append(img_enc) + + encodings = np.array(encodings) + + return encodings + +""" +save image encodings as "pickled" file +parameter: + image_encodings: numpy array of image encodings + file_name: name of file (ideally ends on ".p") +""" +def save_image_encodings(image_encodings, file_name): + with open(file_name, 'wb') as file: + pickle.dump(image_encodings, file) + +# select images +selected_image_indices = [0, 5, 7, 10, 12] + +# display selected images +draw_images(selected_image_indices, dataset) + +# encode selected images +image_encodings = encode_images(selected_image_indices, dataset) + +# save encodings of selected images +save_image_encodings(image_encodings, "ballroom_encodings.p") diff --git a/autoencoder/motion_autoencoder/.spyproject/config/backups/codestyle.ini.bak b/autoencoder/motion_autoencoder/.spyproject/config/backups/codestyle.ini.bak new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/autoencoder/motion_autoencoder/.spyproject/config/backups/codestyle.ini.bak @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/autoencoder/motion_autoencoder/.spyproject/config/backups/encoding.ini.bak b/autoencoder/motion_autoencoder/.spyproject/config/backups/encoding.ini.bak new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/autoencoder/motion_autoencoder/.spyproject/config/backups/encoding.ini.bak @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/autoencoder/motion_autoencoder/.spyproject/config/backups/vcs.ini.bak b/autoencoder/motion_autoencoder/.spyproject/config/backups/vcs.ini.bak new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/autoencoder/motion_autoencoder/.spyproject/config/backups/vcs.ini.bak @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/autoencoder/motion_autoencoder/.spyproject/config/backups/workspace.ini.bak b/autoencoder/motion_autoencoder/.spyproject/config/backups/workspace.ini.bak new file mode 100644 index 0000000..04a3e96 --- /dev/null +++ b/autoencoder/motion_autoencoder/.spyproject/config/backups/workspace.ini.bak @@ -0,0 +1,10 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + +[main] +version = 0.2.0 +recent_files = ['motion_autoencoder.py'] + diff --git a/autoencoder/motion_autoencoder/.spyproject/config/codestyle.ini b/autoencoder/motion_autoencoder/.spyproject/config/codestyle.ini new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/autoencoder/motion_autoencoder/.spyproject/config/codestyle.ini @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini b/autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini new file mode 100644 index 0000000..0b95e5c --- /dev/null +++ b/autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini @@ -0,0 +1,5 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + diff --git a/autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-encoding-0.2.0.ini b/autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-encoding-0.2.0.ini new file mode 100644 index 0000000..0ce193c --- /dev/null +++ b/autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-encoding-0.2.0.ini @@ -0,0 +1,3 @@ +[encoding] +text_encoding = utf-8 + diff --git a/autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-vcs-0.2.0.ini b/autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-vcs-0.2.0.ini new file mode 100644 index 0000000..ee25483 --- /dev/null +++ b/autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-vcs-0.2.0.ini @@ -0,0 +1,4 @@ +[vcs] +use_version_control = False +version_control_system = + diff --git a/autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-workspace-0.2.0.ini b/autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-workspace-0.2.0.ini new file mode 100644 index 0000000..2a73ab7 --- /dev/null +++ b/autoencoder/motion_autoencoder/.spyproject/config/defaults/defaults-workspace-0.2.0.ini @@ -0,0 +1,6 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + diff --git a/autoencoder/motion_autoencoder/.spyproject/config/encoding.ini b/autoencoder/motion_autoencoder/.spyproject/config/encoding.ini new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/autoencoder/motion_autoencoder/.spyproject/config/encoding.ini @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/autoencoder/motion_autoencoder/.spyproject/config/vcs.ini b/autoencoder/motion_autoencoder/.spyproject/config/vcs.ini new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/autoencoder/motion_autoencoder/.spyproject/config/vcs.ini @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/autoencoder/motion_autoencoder/.spyproject/config/workspace.ini b/autoencoder/motion_autoencoder/.spyproject/config/workspace.ini new file mode 100644 index 0000000..04a3e96 --- /dev/null +++ b/autoencoder/motion_autoencoder/.spyproject/config/workspace.ini @@ -0,0 +1,10 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + +[main] +version = 0.2.0 +recent_files = ['motion_autoencoder.py'] + diff --git a/autoencoder/motion_autoencoder/motion_autoencoder.py b/autoencoder/motion_autoencoder/motion_autoencoder.py new file mode 100644 index 0000000..f51c7e5 --- /dev/null +++ b/autoencoder/motion_autoencoder/motion_autoencoder.py @@ -0,0 +1,761 @@ +import torch +from torch.utils.data import Dataset +from torch.utils.data import DataLoader +from torch import nn +from collections import OrderedDict + +import os, sys, time, subprocess +import numpy as np +sys.path.append("../..") + +from common import utils +from common.skeleton import Skeleton +from common.mocap_dataset import MocapDataset +from common.quaternion import qmul, qnormalize_np, slerp +from common.pose_renderer import PoseRenderer + +device = 'cuda' if torch.cuda.is_available() else 'cpu' +print('Using {} device'.format(device)) + +# mocap settings +mocap_data_path = "../../../../Data/Mocap/Muriel_Nov_2021/MUR_PolytopiaMovement_Take2_mb_proc_rh.p" +mocap_valid_frame_ranges = [ [ 860, 9500 ] ] +mocap_fps = 50 + +# model settings +latent_dim = 64 +sequence_length = 128 +ae_rnn_layer_count = 2 +ae_rnn_layer_size = 512 +ae_dense_layer_sizes = [ 512 ] +prior_crit_dense_layer_sizes = [ 512, 512 ] + +save_models = False +save_tscript = False +save_weights = False + +# load model weights +load_weights = False +disc_prior_weights_file = "results/weights/disc_prior_weights_epoch_400" +encoder_weights_file = "results/weights/encoder_weights_epoch_400" +decoder_weights_file = "results/weights/decoder_weights_epoch_400" + +# training settings +sequence_offset = 2 # when creating sequence excerpts, each excerpt is offset from the previous one by this value +batch_size = 16 +train_percentage = 0.8 # train / test split +test_percentage = 0.2 +dp_learning_rate = 5e-4 +ae_learning_rate = 1e-4 +ae_norm_loss_scale = 0.1 +ae_pos_loss_scale = 0.1 +ae_quat_loss_scale = 1.0 +ae_prior_loss_scale = 0.01 # weight for prior distribution loss +epochs = 10 +model_save_interval = 100 +save_history = False + +# visualization settings +view_ele = 0.0 +view_azi = 0.0 +view_line_width = 4.0 +view_size = 8.0 + +# load mocap data +mocap_data = MocapDataset(mocap_data_path, fps=mocap_fps) +if device == 'cuda': + mocap_data.cuda() +mocap_data.compute_positions() + +# gather skeleton info +skeleton = mocap_data.skeleton() +skeleton_joint_count = skeleton.num_joints() +skel_edge_list = utils.get_skeleton_edge_list(skeleton) + +# obtain pose sequence +subject = "S1" +action = "A1" +pose_sequence = mocap_data[subject][action]["rotations"] + +pose_sequence_length = pose_sequence.shape[0] +joint_count = pose_sequence.shape[1] +joint_dim = pose_sequence.shape[2] +pose_dim = joint_count * joint_dim +pose_sequence = np.reshape(pose_sequence, (-1, pose_dim)) + +# gather pose sequence excerpts +pose_sequence_excerpts = [] + +for valid_frame_range in mocap_valid_frame_ranges: + frame_range_start = valid_frame_range[0] + frame_range_end = valid_frame_range[1] + + for seq_excerpt_start in np.arange(frame_range_start, frame_range_end - sequence_length, sequence_offset): + #print("valid: start ", frame_range_start, " end ", frame_range_end, " exc: start ", seq_excerpt_start, " end ", (seq_excerpt_start + sequence_length) ) + pose_sequence_excerpt = pose_sequence[seq_excerpt_start:seq_excerpt_start + sequence_length] + pose_sequence_excerpts.append(pose_sequence_excerpt) + +pose_sequence_excerpts = np.array(pose_sequence_excerpts) + +# create dataset + +sequence_excerpts_count = pose_sequence_excerpts.shape[0] + +class SequenceDataset(Dataset): + def __init__(self, sequence_excerpts): + self.sequence_excerpts = sequence_excerpts + + def __len__(self): + return self.sequence_excerpts.shape[0] + + def __getitem__(self, idx): + return self.sequence_excerpts[idx, ...] + + +full_dataset = SequenceDataset(pose_sequence_excerpts) +dataset_size = len(full_dataset) + +test_size = int(test_percentage * dataset_size) +train_size = dataset_size - test_size + +train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size]) + +train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) +test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) + +# create models + +# create discriminator model for prior distribution + +class DiscriminatorPrior(nn.Module): + def __init__(self, latent_dim, prior_crit_dense_layer_sizes): + super(DiscriminatorPrior, self).__init__() + + self.latent_dim = latent_dim + self.prior_crit_dense_layer_sizes = prior_crit_dense_layer_sizes + + dense_layers = [] + dense_layers.append(("disc_prior_dense_0", nn.Linear(latent_dim, prior_crit_dense_layer_sizes[0]))) + dense_layers.append(("disc_prior_elu_0", nn.ELU())) + + dense_layer_count = len(prior_crit_dense_layer_sizes) + + for layer_index in range(1, dense_layer_count): + dense_layers.append(("disc_prior_dense_{}".format(layer_index), nn.Linear(prior_crit_dense_layer_sizes[layer_index - 1], prior_crit_dense_layer_sizes[layer_index]))) + dense_layers.append(("disc_prior_elu_{}".format(layer_index), nn.ELU())) + + dense_layers.append(("disc_prior_dense_{}".format(dense_layer_count), nn.Linear(prior_crit_dense_layer_sizes[-1], 1))) + dense_layers.append(("disc_prior_sigmoid_{}".format(dense_layer_count), nn.Sigmoid())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + yhat = self.dense_layers(x) + return yhat + +disc_prior = DiscriminatorPrior(latent_dim, prior_crit_dense_layer_sizes).to(device) + +print(disc_prior) + +""" +for name, param in discriminator_prior.named_parameters(): + print(f"Layer: {name} | Size: {param.size()}") +""" + +if save_models == True: + disc_prior.eval() + + # save using pickle + torch.save(disc_prior, "results/models/disc_prior.pth") + + # save using onnx + x = torch.zeros((1, latent_dim)).to(device) + torch.onnx.export(disc_prior, x, "results/models/disc_prior.onnx") + + disc_prior.train() + +if save_tscript == True: + disc_prior.eval() + + # save using TochScript + x = torch.rand((1, latent_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(disc_prior, x) + script_module.save("results/models/disc_prior.pt") + + disc_prior.train() + +if load_weights and disc_prior_weights_file: + disc_prior.load_state_dict(torch.load(disc_prior_weights_file)) + +# create encoder model + +class Encoder(nn.Module): + def __init__(self, sequence_length, pose_dim, latent_dim, rnn_layer_count, rnn_layer_size, dense_layer_sizes): + super(Encoder, self).__init__() + + self.sequence_length = sequence_length + self.pose_dim = pose_dim + self.latent_dim = latent_dim + self.rnn_layer_count = rnn_layer_count + self.rnn_layer_size = rnn_layer_size + self.dense_layer_sizes = dense_layer_sizes + + # create recurrent layers + rnn_layers = [] + rnn_layers.append(("encoder_rnn_0", nn.LSTM(self.pose_dim, self.rnn_layer_size, self.rnn_layer_count, batch_first=True))) + + self.rnn_layers = nn.Sequential(OrderedDict(rnn_layers)) + + # create dense layers + + dense_layers = [] + + dense_layers.append(("encoder_dense_0", nn.Linear(self.rnn_layer_size, self.dense_layer_sizes[0]))) + dense_layers.append(("encoder_dense_relu_0", nn.ReLU())) + + dense_layer_count = len(self.dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("encoder_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("encoder_dense_relu_{}".format(layer_index), nn.ReLU())) + + dense_layers.append(("encoder_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], self.latent_dim))) + dense_layers.append(("encoder_dense_relu_{}".format(len(self.dense_layer_sizes)), nn.ReLU())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + + #print("x 1 ", x.shape) + + x, (_, _) = self.rnn_layers(x) + + #print("x 2 ", x.shape) + + x = x[:, -1, :] # only last time step + + #print("x 3 ", x.shape) + + yhat = self.dense_layers(x) + + #print("yhat ", yhat.shape) + + return yhat + +encoder = Encoder(sequence_length, pose_dim, latent_dim, ae_rnn_layer_count, ae_rnn_layer_size, ae_dense_layer_sizes).to(device) + +print(encoder) + +if save_models == True: + encoder.train() + + # save using pickle + torch.save(encoder, "results/models/encoder.pth") + + # save using onnx + x = torch.zeros((1, sequence_length, pose_dim)).to(device) + torch.onnx.export(encoder, x, "results/models/encoder.onnx") + + encoder.test() + +if save_tscript == True: + encoder.train() + + # save using TochScript + x = torch.rand((1, sequence_length, pose_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(encoder, x) + script_module.save("results/models/encoder.pt") + + encoder.test() + +if load_weights and encoder_weights_file: + encoder.load_state_dict(torch.load(encoder_weights_file, map_location=device)) + + +# create decoder model + +class Decoder(nn.Module): + def __init__(self, sequence_length, pose_dim, latent_dim, rnn_layer_count, rnn_layer_size, dense_layer_sizes): + super(Decoder, self).__init__() + + self.sequence_length = sequence_length + self.pose_dim = pose_dim + self.latent_dim = latent_dim + self.rnn_layer_size = rnn_layer_size + self.rnn_layer_count = rnn_layer_count + self.dense_layer_sizes = dense_layer_sizes + + # create dense layers + dense_layers = [] + + dense_layers.append(("decoder_dense_0", nn.Linear(latent_dim, self.dense_layer_sizes[0]))) + dense_layers.append(("decoder_relu_0", nn.ReLU())) + + dense_layer_count = len(self.dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("decoder_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("decoder_dense_relu_{}".format(layer_index), nn.ReLU())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + # create rnn layers + rnn_layers = [] + + rnn_layers.append(("decoder_rnn_0", nn.LSTM(self.dense_layer_sizes[-1], self.rnn_layer_size, self.rnn_layer_count, batch_first=True))) + + self.rnn_layers = nn.Sequential(OrderedDict(rnn_layers)) + + # final output dense layer + final_layers = [] + + final_layers.append(("decoder_dense_{}".format(dense_layer_count), nn.Linear(self.rnn_layer_size, self.pose_dim))) + + self.final_layers = nn.Sequential(OrderedDict(final_layers)) + + def forward(self, x): + #print("x 1 ", x.size()) + + # dense layers + x = self.dense_layers(x) + #print("x 2 ", x.size()) + + # repeat vector + x = torch.unsqueeze(x, dim=1) + x = x.repeat(1, sequence_length, 1) + #print("x 3 ", x.size()) + + # rnn layers + x, (_, _) = self.rnn_layers(x) + #print("x 4 ", x.size()) + + # final time distributed dense layer + x_reshaped = x.contiguous().view(-1, self.rnn_layer_size) # (batch_size * sequence, input_size) + #print("x 5 ", x_reshaped.size()) + + yhat = self.final_layers(x_reshaped) + #print("yhat 1 ", yhat.size()) + + yhat = yhat.contiguous().view(-1, self.sequence_length, self.pose_dim) + #print("yhat 2 ", yhat.size()) + + return yhat + +ae_dense_layer_sizes_reversed = ae_dense_layer_sizes.copy() +ae_dense_layer_sizes_reversed.reverse() + +decoder = Decoder(sequence_length, pose_dim, latent_dim, ae_rnn_layer_count, ae_rnn_layer_size, ae_dense_layer_sizes_reversed).to(device) + +print(decoder) + +if save_models == True: + decoder.eval() + + # save using pickle + torch.save(decoder, "results/models/decoder_weights.pth") + + # save using onnx + x = torch.zeros((1, latent_dim)).to(device) + torch.onnx.export(decoder, x, "results/models/decoder.onnx") + + decoder.train() + +if save_tscript == True: + decoder.eval() + + # save using TochScript + x = torch.rand((1, latent_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(decoder, x) + script_module.save("results/models/decoder.pt") + + decoder.train() + +if load_weights and decoder_weights_file: + decoder.load_state_dict(torch.load(decoder_weights_file, map_location=device)) + +# Training + +disc_optimizer = torch.optim.Adam(disc_prior.parameters(), lr=dp_learning_rate) +ae_optimizer = torch.optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=ae_learning_rate) + +cross_entropy = nn.BCELoss() + +# function returning normal distributed random data +# serves as reference for the discriminator to distinguish the encoders prior from +def sample_normal(shape): + return torch.tensor(np.random.normal(size=shape), dtype=torch.float32).to(device) + +# discriminator prior loss function +def disc_prior_loss(disc_real_output, disc_fake_output): + ones = torch.ones_like(disc_real_output).to(device) + zeros = torch.zeros_like(disc_fake_output).to(device) + + real_loss = cross_entropy(disc_real_output, ones) + fake_loss = cross_entropy(disc_fake_output, zeros) + + total_loss = (real_loss + fake_loss) * 0.5 + return total_loss + +def ae_norm_loss(yhat): + + _yhat = yhat.view(-1, 4) + _norm = torch.norm(_yhat, dim=1) + _diff = (_norm - 1.0) ** 2 + _loss = torch.mean(_diff) + return _loss + +def ae_pos_loss(y, yhat): + # y and yhat shapes: batch_size, seq_length, pose_dim + + # normalize tensors + _yhat = yhat.view(-1, 4) + + _yhat_norm = nn.functional.normalize(_yhat, p=2, dim=1) + _y_rot = y.view((y.shape[0], y.shape[1], -1, 4)) + _yhat_rot = _yhat.view((y.shape[0], y.shape[1], -1, 4)) + + zero_trajectory = torch.zeros((y.shape[0], y.shape[1], 3), dtype=torch.float32, requires_grad=True).to(device) + + _y_pos = skeleton.forward_kinematics(_y_rot, zero_trajectory) + _yhat_pos = skeleton.forward_kinematics(_yhat_rot, zero_trajectory) + + _pos_diff = torch.norm((_y_pos - _yhat_pos), dim=3) + + _loss = torch.mean(_pos_diff) + + return _loss + +def ae_quat_loss(y, yhat): + # y and yhat shapes: batch_size, seq_length, pose_dim + + # normalize quaternion + + _y = y.view((-1, 4)) + _yhat = yhat.view((-1, 4)) + + _yhat_norm = nn.functional.normalize(_yhat, p=2, dim=1) + + # inverse of quaternion: https://www.mathworks.com/help/aeroblks/quaternioninverse.html + _yhat_inv = _yhat_norm * torch.tensor([[1.0, -1.0, -1.0, -1.0]], dtype=torch.float32).to(device) + + # calculate difference quaternion + _diff = qmul(_yhat_inv, _y) + # length of complex part + _len = torch.norm(_diff[:, 1:], dim=1) + # atan2 + _atan = torch.atan2(_len, _diff[:, 0]) + # abs + _abs = torch.abs(_atan) + _loss = torch.mean(_abs) + return _loss + +# autoencoder loss function +def ae_loss(y, yhat, disc_fake_output): + # function parameters + # y: encoder input + # yhat: decoder output (i.e. reconstructed encoder input) + # disc_fake_output: discriminator output for encoder generated prior + + _norm_loss = ae_norm_loss(yhat) + _pos_loss = ae_pos_loss(y, yhat) + _quat_loss = ae_quat_loss(y, yhat) + + + # discrimination loss + _fake_loss = cross_entropy(torch.zeros_like(disc_fake_output), disc_fake_output) + + _total_loss = 0.0 + _total_loss += _norm_loss * ae_norm_loss_scale + _total_loss += _pos_loss * ae_pos_loss_scale + _total_loss += _quat_loss * ae_quat_loss_scale + _total_loss += _fake_loss * ae_prior_loss_scale + + return _total_loss, _norm_loss, _pos_loss, _quat_loss, _fake_loss + +def disc_prior_train_step(target_poses): + # have normal distribution and encoder produce real and fake outputs, respectively + + with torch.no_grad(): + encoder_output = encoder(target_poses) + + real_output = sample_normal(encoder_output.shape) + + # let discriminator distinguish between real and fake outputs + disc_real_output = disc_prior(real_output) + disc_fake_output = disc_prior(encoder_output) + _disc_loss = disc_prior_loss(disc_real_output, disc_fake_output) + + # Backpropagation + disc_optimizer.zero_grad() + _disc_loss.backward() + disc_optimizer.step() + + return _disc_loss + +def ae_train_step(target_poses): + + #print("train step target_poses ", target_poses.shape) + + # let autoencoder preproduce target_poses (decoder output) and also return encoder output + encoder_output = encoder(target_poses) + pred_poses = decoder(encoder_output) + + # let discriminator output its fake assessment of the encoder ouput + with torch.no_grad(): + disc_fake_output = disc_prior(encoder_output) + + _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss = ae_loss(target_poses, pred_poses, disc_fake_output) + + #print("_ae_pos_loss ", _ae_pos_loss) + + # Backpropagation + ae_optimizer.zero_grad() + _ae_loss.backward() + ae_optimizer.step() + + return _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss + +def ae_test_step(target_poses): + with torch.no_grad(): + # let autoencoder preproduce target_poses (decoder output) and also return encoder output + encoder_output = encoder(target_poses) + pred_poses = decoder(encoder_output) + + # let discriminator output its fake assessment of the encoder ouput + disc_fake_output = disc_prior(encoder_output) + + _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss = ae_loss(target_poses, pred_poses, disc_fake_output) + + return _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss + +def train(train_dataloader, test_dataloader, epochs): + + loss_history = {} + loss_history["ae train"] = [] + loss_history["ae test"] = [] + loss_history["ae norm"] = [] + loss_history["ae pos"] = [] + loss_history["ae quat"] = [] + loss_history["ae prior"] = [] + loss_history["disc prior"] = [] + + for epoch in range(epochs): + + start = time.time() + + ae_train_loss_per_epoch = [] + ae_norm_loss_per_epoch = [] + ae_pos_loss_per_epoch = [] + ae_quat_loss_per_epoch = [] + ae_prior_loss_per_epoch = [] + disc_prior_loss_per_epoch = [] + + for train_batch in train_dataloader: + train_batch = train_batch.to(device) + + # start with discriminator training + _disc_prior_train_loss = disc_prior_train_step(train_batch) + + _disc_prior_train_loss = _disc_prior_train_loss.detach().cpu().numpy() + + #print("_disc_prior_train_loss ", _disc_prior_train_loss) + + disc_prior_loss_per_epoch.append(_disc_prior_train_loss) + + # now train the autoencoder + _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss = ae_train_step(train_batch) + + _ae_loss = _ae_loss.detach().cpu().numpy() + _ae_norm_loss = _ae_norm_loss.detach().cpu().numpy() + _ae_pos_loss = _ae_pos_loss.detach().cpu().numpy() + _ae_quat_loss = _ae_quat_loss.detach().cpu().numpy() + _ae_prior_loss = _ae_prior_loss.detach().cpu().numpy() + + #print("_ae_prior_loss ", _ae_prior_loss) + + ae_train_loss_per_epoch.append(_ae_loss) + ae_norm_loss_per_epoch.append(_ae_norm_loss) + ae_pos_loss_per_epoch.append(_ae_pos_loss) + ae_quat_loss_per_epoch.append(_ae_quat_loss) + ae_prior_loss_per_epoch.append(_ae_prior_loss) + + ae_train_loss_per_epoch = np.mean(np.array(ae_train_loss_per_epoch)) + ae_norm_loss_per_epoch = np.mean(np.array(ae_norm_loss_per_epoch)) + ae_pos_loss_per_epoch = np.mean(np.array(ae_pos_loss_per_epoch)) + ae_quat_loss_per_epoch = np.mean(np.array(ae_quat_loss_per_epoch)) + ae_prior_loss_per_epoch = np.mean(np.array(ae_prior_loss_per_epoch)) + disc_prior_loss_per_epoch = np.mean(np.array(disc_prior_loss_per_epoch)) + + ae_test_loss_per_epoch = [] + + for test_batch in test_dataloader: + test_batch = test_batch.to(device) + + _ae_loss, _, _, _, _ = ae_test_step(train_batch) + + _ae_loss = _ae_loss.detach().cpu().numpy() + ae_test_loss_per_epoch.append(_ae_loss) + + ae_test_loss_per_epoch = np.mean(np.array(ae_test_loss_per_epoch)) + + if epoch % model_save_interval == 0 and save_weights == True: + torch.save(disc_prior.state_dict(), "results/weights/disc_prior_weights_epoch_{}".format(epoch)) + torch.save(encoder.state_dict(), "results/weights/encoder_weights_epoch_{}".format(epoch)) + torch.save(decoder.state_dict(), "results/weights/decoder_weights_epoch_{}".format(epoch)) + + loss_history["ae train"].append(ae_train_loss_per_epoch) + loss_history["ae test"].append(ae_test_loss_per_epoch) + loss_history["ae norm"].append(ae_norm_loss_per_epoch) + loss_history["ae pos"].append(ae_pos_loss_per_epoch) + loss_history["ae quat"].append(ae_quat_loss_per_epoch) + loss_history["ae prior"].append(ae_prior_loss_per_epoch) + loss_history["disc prior"].append(disc_prior_loss_per_epoch) + + print ('epoch {} : ae train: {:01.4f} ae test: {:01.4f} disc prior {:01.4f} norm {:01.4f} pos {:01.4f} quat {:01.4f} prior {:01.4f} time {:01.2f}'.format(epoch + 1, ae_train_loss_per_epoch, ae_test_loss_per_epoch, disc_prior_loss_per_epoch, ae_norm_loss_per_epoch, ae_pos_loss_per_epoch, ae_quat_loss_per_epoch, ae_prior_loss_per_epoch, time.time()-start)) + + return loss_history + +# fit model +loss_history = train(train_dataloader, test_dataloader, epochs) + +# save history +utils.save_loss_as_csv(loss_history, "results/histories/history_{}.csv".format(epochs)) +utils.save_loss_as_image(loss_history, "results/histories/history_{}.png".format(epochs)) + +# save model weights +torch.save(disc_prior.state_dict(), "results/weights/disc_prior_weights_epoch_{}".format(epochs)) +torch.save(encoder.state_dict(), "results/weights/encoder_weights_epoch_{}".format(epochs)) +torch.save(decoder.state_dict(), "results/weights/decoder_weights_epoch_{}".format(epochs)) + +# inference and rendering + +skel_edge_list = utils.get_skeleton_edge_list(skeleton) +poseRenderer = PoseRenderer(skel_edge_list) + +def create_ref_sequence_anim(seq_index, file_name): + sequence_excerpt = pose_sequence_excerpts[seq_index] + sequence_excerpt = np.reshape(sequence_excerpt, (sequence_length, joint_count, joint_dim)) + + sequence_excerpt = torch.tensor(np.expand_dims(sequence_excerpt, axis=0)).to(device) + zero_trajectory = torch.tensor(np.zeros((1, sequence_length, 3), dtype=np.float32)).to(device) + + skel_sequence = skeleton.forward_kinematics(sequence_excerpt, zero_trajectory) + + skel_sequence = skel_sequence.detach().cpu().numpy() + skel_sequence = np.squeeze(skel_sequence) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_sequence) + skel_images = poseRenderer.create_pose_images(skel_sequence, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + skel_images[0].save(file_name, save_all=True, append_images=skel_images[1:], optimize=False, duration=33.0, loop=0) + +def create_rec_sequence_anim(seq_index, file_name): + sequence_excerpt = pose_sequence_excerpts[seq_index] + sequence_excerpt = np.expand_dims(sequence_excerpt, axis=0) + + sequence_excerpt = torch.from_numpy(sequence_excerpt).to(device) + + with torch.no_grad(): + sequence_enc = encoder(sequence_excerpt) + pred_sequence = decoder(sequence_enc) + + pred_sequence = torch.squeeze(pred_sequence) + pred_sequence = pred_sequence.view((-1, 4)) + pred_sequence = nn.functional.normalize(pred_sequence, p=2, dim=1) + pred_sequence = pred_sequence.view((1, sequence_length, joint_count, joint_dim)) + + zero_trajectory = torch.tensor(np.zeros((1, sequence_length, 3), dtype=np.float32)) + zero_trajectory = zero_trajectory.to(device) + + skel_sequence = skeleton.forward_kinematics(pred_sequence, zero_trajectory) + + skel_sequence = skel_sequence.detach().cpu().numpy() + skel_sequence = np.squeeze(skel_sequence) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_sequence) + skel_images = poseRenderer.create_pose_images(skel_sequence, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + skel_images[0].save(file_name, save_all=True, append_images=skel_images[1:], optimize=False, duration=33.0, loop=0) + +def encode_sequences(frame_indices): + + encoder.eval() + + latent_vectors = [] + + seq_excerpt_count = len(frame_indices) + + for excerpt_index in range(seq_excerpt_count): + excerpt_start_frame = frame_indices[excerpt_index] + excerpt_end_frame = excerpt_start_frame + sequence_length + excerpt = pose_sequence[excerpt_start_frame:excerpt_end_frame] + excerpt = np.expand_dims(excerpt, axis=0) + excerpt = torch.from_numpy(excerpt).to(device) + + with torch.no_grad(): + latent_vector = encoder(excerpt) + + latent_vector = torch.squeeze(latent_vector) + latent_vector = latent_vector.detach().cpu().numpy() + + latent_vectors.append(latent_vector) + + encoder.train() + + return latent_vectors + +def decode_sequence_encodings(sequence_encodings, file_name): + + decoder.eval() + + rec_sequences = [] + + for seq_encoding in sequence_encodings: + seq_encoding = np.expand_dims(seq_encoding, axis=0) + seq_encoding = torch.from_numpy(seq_encoding).to(device) + + with torch.no_grad(): + rec_seq = decoder(seq_encoding) + + rec_seq = torch.squeeze(rec_seq) + rec_seq = rec_seq.view((-1, 4)) + rec_seq = nn.functional.normalize(rec_seq, p=2, dim=1) + rec_seq = rec_seq.view((-1, joint_count, joint_dim)) + + rec_sequences.append(rec_seq) + + rec_sequences = torch.cat(rec_sequences, dim=0) + rec_sequences = torch.unsqueeze(rec_sequences, dim=0) + + print("rec_sequences s ", rec_sequences.shape) + + zero_trajectory = torch.tensor(np.zeros((1, len(sequence_encodings) * sequence_length, 3), dtype=np.float32)) + zero_trajectory = zero_trajectory.to(device) + + skel_sequence = skeleton.forward_kinematics(rec_sequences, zero_trajectory) + + skel_sequence = skel_sequence.detach().cpu().numpy() + skel_sequence = np.squeeze(skel_sequence) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_sequence) + skel_images = poseRenderer.create_pose_images(skel_sequence, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + skel_images[0].save(file_name, save_all=True, append_images=skel_images[1:], optimize=False, duration=33.0, loop=0) + + decoder.train() + +# create original sequence + +seq_index = 100 + +create_ref_sequence_anim(seq_index, "results/anims/orig_sequence_{}.gif".format(seq_index)) + +# recontruct original sequence + +seq_index = 100 + +create_rec_sequence_anim(seq_index, "results/anims/rec_sequence_{}.gif".format(seq_index)) + +# reconstruct original pose sequences + +start_seq_index = 1000 +end_seq_index = 2000 +seq_indices = [ seq_index for seq_index in range(start_seq_index, end_seq_index, sequence_length)] + +seq_encodings = encode_sequences(seq_indices) +decode_sequence_encodings(seq_encodings, "results/anims/rec_sequences_{}-{}.gif".format(start_seq_index, end_seq_index)) diff --git a/autoencoder/pose_autoencoder/.spyproject/config/backups/codestyle.ini.bak b/autoencoder/pose_autoencoder/.spyproject/config/backups/codestyle.ini.bak new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/autoencoder/pose_autoencoder/.spyproject/config/backups/codestyle.ini.bak @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/autoencoder/pose_autoencoder/.spyproject/config/backups/encoding.ini.bak b/autoencoder/pose_autoencoder/.spyproject/config/backups/encoding.ini.bak new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/autoencoder/pose_autoencoder/.spyproject/config/backups/encoding.ini.bak @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/autoencoder/pose_autoencoder/.spyproject/config/backups/vcs.ini.bak b/autoencoder/pose_autoencoder/.spyproject/config/backups/vcs.ini.bak new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/autoencoder/pose_autoencoder/.spyproject/config/backups/vcs.ini.bak @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/autoencoder/pose_autoencoder/.spyproject/config/backups/workspace.ini.bak b/autoencoder/pose_autoencoder/.spyproject/config/backups/workspace.ini.bak new file mode 100644 index 0000000..88f8728 --- /dev/null +++ b/autoencoder/pose_autoencoder/.spyproject/config/backups/workspace.ini.bak @@ -0,0 +1,12 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False +project_type = 'empty-project-type' +recent_files = ['pose_autoencoder.py'] + +[main] +version = 0.2.0 +recent_files = ['..\\..\\..\\..\\..\\..\\..\\..\\..\\.spyder-py3\\temp.py', 'pose_autoencoder.py'] + diff --git a/autoencoder/pose_autoencoder/.spyproject/config/codestyle.ini b/autoencoder/pose_autoencoder/.spyproject/config/codestyle.ini new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/autoencoder/pose_autoencoder/.spyproject/config/codestyle.ini @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini b/autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini new file mode 100644 index 0000000..0b95e5c --- /dev/null +++ b/autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini @@ -0,0 +1,5 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + diff --git a/autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-encoding-0.2.0.ini b/autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-encoding-0.2.0.ini new file mode 100644 index 0000000..0ce193c --- /dev/null +++ b/autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-encoding-0.2.0.ini @@ -0,0 +1,3 @@ +[encoding] +text_encoding = utf-8 + diff --git a/autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-vcs-0.2.0.ini b/autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-vcs-0.2.0.ini new file mode 100644 index 0000000..ee25483 --- /dev/null +++ b/autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-vcs-0.2.0.ini @@ -0,0 +1,4 @@ +[vcs] +use_version_control = False +version_control_system = + diff --git a/autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-workspace-0.2.0.ini b/autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-workspace-0.2.0.ini new file mode 100644 index 0000000..2a73ab7 --- /dev/null +++ b/autoencoder/pose_autoencoder/.spyproject/config/defaults/defaults-workspace-0.2.0.ini @@ -0,0 +1,6 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + diff --git a/autoencoder/pose_autoencoder/.spyproject/config/encoding.ini b/autoencoder/pose_autoencoder/.spyproject/config/encoding.ini new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/autoencoder/pose_autoencoder/.spyproject/config/encoding.ini @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/autoencoder/pose_autoencoder/.spyproject/config/vcs.ini b/autoencoder/pose_autoencoder/.spyproject/config/vcs.ini new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/autoencoder/pose_autoencoder/.spyproject/config/vcs.ini @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/autoencoder/pose_autoencoder/.spyproject/config/workspace.ini b/autoencoder/pose_autoencoder/.spyproject/config/workspace.ini new file mode 100644 index 0000000..88f8728 --- /dev/null +++ b/autoencoder/pose_autoencoder/.spyproject/config/workspace.ini @@ -0,0 +1,12 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False +project_type = 'empty-project-type' +recent_files = ['pose_autoencoder.py'] + +[main] +version = 0.2.0 +recent_files = ['..\\..\\..\\..\\..\\..\\..\\..\\..\\.spyder-py3\\temp.py', 'pose_autoencoder.py'] + diff --git a/autoencoder/pose_autoencoder/pose_autoencoder.py b/autoencoder/pose_autoencoder/pose_autoencoder.py new file mode 100644 index 0000000..584b3ec --- /dev/null +++ b/autoencoder/pose_autoencoder/pose_autoencoder.py @@ -0,0 +1,755 @@ +import torch +from torch.utils.data import Dataset +from torch.utils.data import DataLoader +from torch import nn +from collections import OrderedDict + +import os, sys, time, subprocess +import numpy as np +sys.path.append("../..") + +from common import utils +from common.skeleton import Skeleton +from common.mocap_dataset import MocapDataset +from common.quaternion import qmul, qnormalize_np, slerp +from common.pose_renderer import PoseRenderer + +device = 'cuda' if torch.cuda.is_available() else 'cpu' +print('Using {} device'.format(device)) + + +# mocap settings +mocap_data_path = "../../../../Data/Mocap/Muriel_Nov_2021/MUR_Fluidity_Body_Take1_mb_proc_rh.p" +mocap_valid_frame_ranges = [ [ 500, 6500 ] ] +mocap_fps = 50 + +# model settings +latent_dim = 8 +ae_dense_layer_sizes = [ 64, 16 ] +prior_crit_dense_layer_sizes = [ 32, 32 ] + +save_models = False +save_tscript = False +save_weights = False + +# load model weights +load_weights = True +disc_prior_weights_file = "results/weights/disc_prior_weights_epoch_400" +encoder_weights_file = "results/weights/encoder_weights_epoch_400" +decoder_weights_file = "results/weights/decoder_weights_epoch_400" + +# training settings +batch_size = 16 +train_percentage = 0.8 # train / test split +test_percentage = 0.2 +dp_learning_rate = 5e-4 +ae_learning_rate = 1e-4 +ae_norm_loss_scale = 0.1 +ae_pos_loss_scale = 0.1 +ae_quat_loss_scale = 1.0 +ae_prior_loss_scale = 0.01 # weight for prior distribution loss +epochs = 400 +model_save_interval = 100 +save_history = False + +# visualization settings +view_ele = 0.0 +view_azi = 0.0 +view_line_width = 4.0 +view_size = 8.0 + +# load mocap data +mocap_data = MocapDataset(mocap_data_path, fps=mocap_fps) +if device == 'cuda': + mocap_data.cuda() +mocap_data.compute_positions() + +# gather skeleton info +skeleton = mocap_data.skeleton() +skeleton_joint_count = skeleton.num_joints() +skel_edge_list = utils.get_skeleton_edge_list(skeleton) + +# gather poses +subject = "S1" +action = "A1" +pose_sequence = mocap_data[subject][action]["rotations"] + +poses = [] +for valid_frame_range in mocap_valid_frame_ranges: + frame_range_start = valid_frame_range[0] + frame_range_end = valid_frame_range[1] + poses += [pose_sequence[frame_range_start:frame_range_end]] +poses = np.concatenate(poses, axis=0) + +pose_count = poses.shape[0] +joint_count = poses.shape[1] +joint_dim = poses.shape[2] +pose_dim = joint_count * joint_dim + +poses = np.reshape(poses, (-1, pose_dim)) + +# create dataset + +class PoseDataset(Dataset): + def __init__(self, poses): + self.poses = poses + + def __len__(self): + return self.poses.shape[0] + + def __getitem__(self, idx): + return self.poses[idx, ...] + +full_dataset = PoseDataset(poses) +dataset_size = len(full_dataset) + +test_size = int(test_percentage * dataset_size) +train_size = dataset_size - test_size + +train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size]) + +train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) +test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) + +# create models + +# create discriminator model for prior distribution + +class DiscriminatorPrior(nn.Module): + def __init__(self, latent_dim, dense_layer_sizes): + super(DiscriminatorPrior, self).__init__() + + self.latent_dim = latent_dim + self.dense_layer_sizes = dense_layer_sizes + + dense_layers = [] + + dense_layer_count = len(self.dense_layer_sizes) + + dense_layers.append(("disc_prior_dense_0", nn.Linear(latent_dim, dense_layer_sizes[0]))) + dense_layers.append(("disc_prior_elu_0", nn.ELU())) + + for layer_index in range(1, dense_layer_count): + dense_layers.append(("disc_prior_dense_{}".format(layer_index), nn.Linear(dense_layer_sizes[layer_index - 1], dense_layer_sizes[layer_index]))) + dense_layers.append(("disc_prior_elu_{}".format(layer_index), nn.ELU())) + + dense_layers.append(("disc_prior_dense_{}".format(dense_layer_count), nn.Linear(prior_crit_dense_layer_sizes[-1], 1))) + dense_layers.append(("disc_prior_sigmoid_{}".format(dense_layer_count), nn.Sigmoid())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + yhat = self.dense_layers(x) + return yhat + +disc_prior = DiscriminatorPrior(latent_dim, prior_crit_dense_layer_sizes).to(device) + +print(disc_prior) + +if save_models == True: + disc_prior.eval() + + # save using pickle + torch.save(disc_prior, "results/models/disc_prior.pth") + + # save using onnx + x = torch.zeros((1, latent_dim)).to(device) + torch.onnx.export(disc_prior, x, "results/models/disc_prior.onnx") + + disc_prior.train() + +if save_tscript == True: + disc_prior.eval() + + # save using TochScript + x = torch.rand((1, latent_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(disc_prior, x) + script_module.save("results/models/disc_prior.pt") + + disc_prior.train() + +if load_weights and disc_prior_weights_file: + disc_prior.load_state_dict(torch.load(disc_prior_weights_file)) + +# create encoder model + +class Encoder(nn.Module): + def __init__(self, pose_dim, latent_dim, dense_layer_sizes): + super(Encoder, self).__init__() + + self.pose_dim = pose_dim + self.latent_dim = latent_dim + self.dense_layer_sizes = dense_layer_sizes + + # create dense layers + + dense_layers = [] + + dense_layers.append(("encoder_dense_0", nn.Linear(self.pose_dim, self.dense_layer_sizes[0]))) + dense_layers.append(("encoder_dense_relu_0", nn.ReLU())) + + dense_layer_count = len(dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("encoder_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("encoder_dense_relu_{}".format(layer_index), nn.ReLU())) + + dense_layers.append(("encoder_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], self.latent_dim))) + dense_layers.append(("encoder_dense_relu_{}".format(len(self.dense_layer_sizes)), nn.ReLU())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + + #print("x 1 ", x.shape) + + yhat = self.dense_layers(x) + + #print("yhat ", yhat.shape) + + return yhat + +encoder = Encoder(pose_dim, latent_dim, ae_dense_layer_sizes).to(device) + +print(encoder) + +if save_models == True: + disc_prior.eval() + + # save using pickle + torch.save(encoder, "results/models/encoder.pth") + + # save using onnx + x = torch.zeros((1, pose_dim)).to(device) + torch.onnx.export(encoder, x, "results/models/encoder.onnx") + + disc_prior.train() + +if save_tscript == True: + encoder.eval() + + # save using TochScript + x = torch.rand((1, pose_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(encoder, x) + script_module.save("results/models/encoder.pt") + + encoder.train() + +if load_weights and encoder_weights_file: + encoder.load_state_dict(torch.load(encoder_weights_file)) + +# create decoder model + +class Decoder(nn.Module): + def __init__(self, pose_dim, latent_dim, dense_layer_sizes): + super(Decoder, self).__init__() + + self.pose_dim = pose_dim + self.latent_dim = latent_dim + self.dense_layer_sizes = dense_layer_sizes + + # create dense layers + dense_layers = [] + + dense_layers.append(("decoder_dense_0", nn.Linear(latent_dim, self.dense_layer_sizes[0]))) + dense_layers.append(("decoder_relu_0", nn.ReLU())) + + dense_layer_count = len(self.dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("decoder_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("decoder_dense_relu_{}".format(layer_index), nn.ReLU())) + + dense_layers.append(("encoder_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], self.pose_dim))) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + #print("x 1 ", x.size()) + + # dense layers + yhat = self.dense_layers(x) + #print("yhat ", yhat.size()) + + + return yhat + +ae_dense_layer_sizes_reversed = ae_dense_layer_sizes.copy() +ae_dense_layer_sizes_reversed.reverse() + +decoder = Decoder(pose_dim, latent_dim, ae_dense_layer_sizes_reversed).to(device) + +print(decoder) + +if save_models == True: + disc_prior.eval() + + # save using pickle + torch.save(decoder, "results/models/decoder.pth") + + # save using onnx + x = torch.zeros((1, latent_dim)).to(device) + torch.onnx.export(decoder, x, "results/models/decoder.onnx") + + disc_prior.train() + +if save_tscript == True: + decoder.eval() + + # save using TochScript + x = torch.rand((1, latent_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(decoder, x) + script_module.save("results/models/decoder.pt") + + decoder.train() + +if load_weights and decoder_weights_file: + decoder.load_state_dict(torch.load(decoder_weights_file)) + +# Training + +disc_optimizer = torch.optim.Adam(disc_prior.parameters(), lr=dp_learning_rate) +ae_optimizer = torch.optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=ae_learning_rate) + +cross_entropy = nn.BCELoss() + +# function returning normal distributed random data +# serves as reference for the discriminator to distinguish the encoders prior from +def sample_normal(shape): + return torch.tensor(np.random.normal(size=shape), dtype=torch.float32).to(device) + +# discriminator prior loss function +def disc_prior_loss(disc_real_output, disc_fake_output): + ones = torch.ones_like(disc_real_output).to(device) + zeros = torch.zeros_like(disc_fake_output).to(device) + + real_loss = cross_entropy(disc_real_output, ones) + fake_loss = cross_entropy(disc_fake_output, zeros) + + total_loss = (real_loss + fake_loss) * 0.5 + return total_loss + +# define AE Loss Functions + +def ae_norm_loss(yhat): + + _yhat = yhat.view(-1, 4) + _norm = torch.norm(_yhat, dim=1) + _diff = (_norm - 1.0) ** 2 + _loss = torch.mean(_diff) + return _loss + +def ae_pos_loss(y, yhat): + # y and yhat shapes: batch_size, seq_length, pose_dim + + # normalize tensors + _yhat = yhat.view(-1, 4) + + _yhat_norm = nn.functional.normalize(_yhat, p=2, dim=1) + _y_rot = y.view((y.shape[0], 1, -1, 4)) + _yhat_rot = _yhat.view((y.shape[0], 1, -1, 4)) + + zero_trajectory = torch.zeros((y.shape[0], 1, 3), dtype=torch.float32, requires_grad=True).to(device) + + _y_pos = skeleton.forward_kinematics(_y_rot, zero_trajectory) + _yhat_pos = skeleton.forward_kinematics(_yhat_rot, zero_trajectory) + + _pos_diff = torch.norm((_y_pos - _yhat_pos), dim=3) + + _loss = torch.mean(_pos_diff) + + return _loss + +def ae_quat_loss(y, yhat): + # y and yhat shapes: batch_size, seq_length, pose_dim + + # normalize quaternion + + _y = y.view((-1, 4)) + _yhat = yhat.view((-1, 4)) + + _yhat_norm = nn.functional.normalize(_yhat, p=2, dim=1) + + # inverse of quaternion: https://www.mathworks.com/help/aeroblks/quaternioninverse.html + _yhat_inv = _yhat_norm * torch.tensor([[1.0, -1.0, -1.0, -1.0]], dtype=torch.float32).to(device) + + # calculate difference quaternion + _diff = qmul(_yhat_inv, _y) + # length of complex part + _len = torch.norm(_diff[:, 1:], dim=1) + # atan2 + _atan = torch.atan2(_len, _diff[:, 0]) + # abs + _abs = torch.abs(_atan) + _loss = torch.mean(_abs) + return _loss + +# autoencoder loss function +def ae_loss(y, yhat, disc_fake_output): + # function parameters + # y: encoder input + # yhat: decoder output (i.e. reconstructed encoder input) + # disc_fake_output: discriminator output for encoder generated prior + + _norm_loss = ae_norm_loss(yhat) + _pos_loss = ae_pos_loss(y, yhat) + _quat_loss = ae_quat_loss(y, yhat) + + + # discrimination loss + _fake_loss = cross_entropy(torch.zeros_like(disc_fake_output), disc_fake_output) + + _total_loss = 0.0 + _total_loss += _norm_loss * ae_norm_loss_scale + _total_loss += _pos_loss * ae_pos_loss_scale + _total_loss += _quat_loss * ae_quat_loss_scale + _total_loss += _fake_loss * ae_prior_loss_scale + + return _total_loss, _norm_loss, _pos_loss, _quat_loss, _fake_loss + +def disc_prior_train_step(target_poses): + # have normal distribution and encoder produce real and fake outputs, respectively + + with torch.no_grad(): + encoder_output = encoder(target_poses) + + real_output = sample_normal(encoder_output.shape) + + # let discriminator distinguish between real and fake outputs + disc_real_output = disc_prior(real_output) + disc_fake_output = disc_prior(encoder_output) + _disc_loss = disc_prior_loss(disc_real_output, disc_fake_output) + + # Backpropagation + disc_optimizer.zero_grad() + _disc_loss.backward() + disc_optimizer.step() + + return _disc_loss + +def ae_train_step(target_poses): + + #print("train step target_poses ", target_poses.shape) + + # let autoencoder preproduce target_poses (decoder output) and also return encoder output + encoder_output = encoder(target_poses) + pred_poses = decoder(encoder_output) + + # let discriminator output its fake assessment of the encoder ouput + with torch.no_grad(): + disc_fake_output = disc_prior(encoder_output) + + _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss = ae_loss(target_poses, pred_poses, disc_fake_output) + + #print("_ae_pos_loss ", _ae_pos_loss) + + # Backpropagation + ae_optimizer.zero_grad() + _ae_loss.backward() + ae_optimizer.step() + + return _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss + +def ae_test_step(target_poses): + with torch.no_grad(): + # let autoencoder preproduce target_poses (decoder output) and also return encoder output + encoder_output = encoder(target_poses) + pred_poses = decoder(encoder_output) + + # let discriminator output its fake assessment of the encoder ouput + disc_fake_output = disc_prior(encoder_output) + + _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss = ae_loss(target_poses, pred_poses, disc_fake_output) + + return _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss + +def train(train_dataloader, test_dataloader, epochs): + + loss_history = {} + loss_history["ae train"] = [] + loss_history["ae test"] = [] + loss_history["ae norm"] = [] + loss_history["ae pos"] = [] + loss_history["ae quat"] = [] + loss_history["ae prior"] = [] + loss_history["disc prior"] = [] + + for epoch in range(epochs): + + start = time.time() + + ae_train_loss_per_epoch = [] + ae_norm_loss_per_epoch = [] + ae_pos_loss_per_epoch = [] + ae_quat_loss_per_epoch = [] + ae_prior_loss_per_epoch = [] + disc_prior_loss_per_epoch = [] + + for train_batch in train_dataloader: + train_batch = train_batch.to(device) + + # start with discriminator training + _disc_prior_train_loss = disc_prior_train_step(train_batch) + + _disc_prior_train_loss = _disc_prior_train_loss.detach().cpu().numpy() + + #print("_disc_prior_train_loss ", _disc_prior_train_loss) + + disc_prior_loss_per_epoch.append(_disc_prior_train_loss) + + # now train the autoencoder + _ae_loss, _ae_norm_loss, _ae_pos_loss, _ae_quat_loss, _ae_prior_loss = ae_train_step(train_batch) + + _ae_loss = _ae_loss.detach().cpu().numpy() + _ae_norm_loss = _ae_norm_loss.detach().cpu().numpy() + _ae_pos_loss = _ae_pos_loss.detach().cpu().numpy() + _ae_quat_loss = _ae_quat_loss.detach().cpu().numpy() + _ae_prior_loss = _ae_prior_loss.detach().cpu().numpy() + + #print("_ae_prior_loss ", _ae_prior_loss) + + ae_train_loss_per_epoch.append(_ae_loss) + ae_norm_loss_per_epoch.append(_ae_norm_loss) + ae_pos_loss_per_epoch.append(_ae_pos_loss) + ae_quat_loss_per_epoch.append(_ae_quat_loss) + ae_prior_loss_per_epoch.append(_ae_prior_loss) + + ae_train_loss_per_epoch = np.mean(np.array(ae_train_loss_per_epoch)) + ae_norm_loss_per_epoch = np.mean(np.array(ae_norm_loss_per_epoch)) + ae_pos_loss_per_epoch = np.mean(np.array(ae_pos_loss_per_epoch)) + ae_quat_loss_per_epoch = np.mean(np.array(ae_quat_loss_per_epoch)) + ae_prior_loss_per_epoch = np.mean(np.array(ae_prior_loss_per_epoch)) + disc_prior_loss_per_epoch = np.mean(np.array(disc_prior_loss_per_epoch)) + + ae_test_loss_per_epoch = [] + + for test_batch in test_dataloader: + test_batch = test_batch.to(device) + + _ae_loss, _, _, _, _ = ae_test_step(train_batch) + + _ae_loss = _ae_loss.detach().cpu().numpy() + ae_test_loss_per_epoch.append(_ae_loss) + + ae_test_loss_per_epoch = np.mean(np.array(ae_test_loss_per_epoch)) + + if epoch % model_save_interval == 0 and save_weights == True: + disc_prior.save_weights("disc_prior_weights epoch_{}".format(epoch)) + encoder.save_weights("ae_encoder_weights epoch_{}".format(epoch)) + decoder.save_weights("ae_decoder_weights epoch_{}".format(epoch)) + + """ + if epoch % vis_save_interval == 0 and save_vis == True: + create_epoch_visualisations(epoch) + """ + + loss_history["ae train"].append(ae_train_loss_per_epoch) + loss_history["ae test"].append(ae_test_loss_per_epoch) + loss_history["ae norm"].append(ae_norm_loss_per_epoch) + loss_history["ae pos"].append(ae_pos_loss_per_epoch) + loss_history["ae quat"].append(ae_quat_loss_per_epoch) + loss_history["ae prior"].append(ae_prior_loss_per_epoch) + loss_history["disc prior"].append(disc_prior_loss_per_epoch) + + print ('epoch {} : ae train: {:01.4f} ae test: {:01.4f} disc prior {:01.4f} norm {:01.4f} pos {:01.4f} quat {:01.4f} prior {:01.4f} time {:01.2f}'.format(epoch + 1, ae_train_loss_per_epoch, ae_test_loss_per_epoch, disc_prior_loss_per_epoch, ae_norm_loss_per_epoch, ae_pos_loss_per_epoch, ae_quat_loss_per_epoch, ae_prior_loss_per_epoch, time.time()-start)) + + return loss_history + +# fit model +loss_history = train(train_dataloader, test_dataloader, epochs) + +# save history +utils.save_loss_as_csv(loss_history, "results/histories/history_{}.csv".format(epochs)) +utils.save_loss_as_image(loss_history, "results/histories/history_{}.png".format(epochs)) + +# save model weights +torch.save(disc_prior.state_dict(), "results/weights/disc_prior_weights_epoch_{}".format(epochs)) +torch.save(encoder.state_dict(), "results/weights/encoder_weights_epoch_{}".format(epochs)) +torch.save(decoder.state_dict(), "results/weights/decoder_weights_epoch_{}".format(epochs)) + +# inference and rendering + +skel_edge_list = utils.get_skeleton_edge_list(skeleton) +poseRenderer = PoseRenderer(skel_edge_list) + +def create_ref_pose_image(pose_index, file_name): + pose = poses[pose_index] + pose = torch.tensor(np.reshape(pose, (1, 1, joint_count, joint_dim))).to(device) + zero_trajectory = torch.tensor(np.zeros((1, 1, 3), dtype=np.float32)).to(device) + skel_pose = skeleton.forward_kinematics(pose, zero_trajectory) + skel_pose = skel_pose.detach().cpu().numpy() + skel_pose = np.reshape(skel_pose, (joint_count, 3)) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_pose) + pose_image = poseRenderer.create_pose_image(skel_pose, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + pose_image.save(file_name, optimize=False) + +def create_rec_pose_image(pose_index, file_name): + encoder.eval() + decoder.eval() + + pose = poses[pose_index] + pose = torch.tensor(np.expand_dims(pose, axis=0)).to(device) + + with torch.no_grad(): + pose_enc = encoder(pose) + rec_pose = decoder(pose_enc) + + rec_pose = torch.squeeze(rec_pose) + rec_pose = rec_pose.view((-1, 4)) + rec_pose = nn.functional.normalize(rec_pose, p=2, dim=1) + rec_pose = rec_pose.view((1, 1, joint_count, joint_dim)) + + zero_trajectory = torch.tensor(np.zeros((1, 1, 3), dtype=np.float32)) + zero_trajectory = zero_trajectory.to(device) + + skel_pose = skeleton.forward_kinematics(rec_pose, zero_trajectory) + + skel_pose = skel_pose.detach().cpu().numpy() + skel_pose = np.squeeze(skel_pose) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_pose) + pose_image = poseRenderer.create_pose_image(skel_pose, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + pose_image.save(file_name, optimize=False) + + encoder.train() + decoder.train() + +def encode_poses(pose_indices): + + encoder.eval() + + pose_encodings = [] + + for pose_index in pose_indices: + pose = poses[pose_index] + pose = np.expand_dims(pose, axis=0) + pose = torch.from_numpy(pose).to(device) + + with torch.no_grad(): + pose_enc = encoder(pose) + + pose_enc = torch.squeeze(pose_enc) + pose_enc = pose_enc.detach().cpu().numpy() + + pose_encodings.append(pose_enc) + + encoder.train() + + return pose_encodings + +def decode_pose_encodings(pose_encodings, file_name): + + decoder.eval() + + rec_poses = [] + + for pose_encoding in pose_encodings: + pose_encoding = np.expand_dims(pose_encoding, axis=0) + pose_encoding = torch.from_numpy(pose_encoding).to(device) + + with torch.no_grad(): + rec_pose = decoder(pose_encoding) + + rec_pose = torch.squeeze(rec_pose) + rec_pose = rec_pose.view((-1, 4)) + rec_pose = nn.functional.normalize(rec_pose, p=2, dim=1) + rec_pose = rec_pose.view((1, joint_count, joint_dim)) + + rec_poses.append(rec_pose) + + rec_poses = torch.cat(rec_poses, dim=0) + rec_poses = torch.unsqueeze(rec_poses, dim=0) + + zero_trajectory = torch.tensor(np.zeros((1, len(pose_encodings), 3), dtype=np.float32)) + zero_trajectory = zero_trajectory.to(device) + + skel_poses = skeleton.forward_kinematics(rec_poses, zero_trajectory) + + skel_poses = skel_poses.detach().cpu().numpy() + skel_poses = np.squeeze(skel_poses) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_poses) + pose_images = poseRenderer.create_pose_images(skel_poses, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + + pose_images[0].save(file_name, save_all=True, append_images=pose_images[1:], optimize=False, duration=33.0, loop=0) + + decoder.train() + +# create single original pose + +pose_index = 100 + +create_ref_pose_image(pose_index, "results/images/orig_pose_{}.gif".format(pose_index)) + +# recontruct single pose + +pose_index = 100 + +create_rec_pose_image(pose_index, "results/images/rec_pose_{}.gif".format(pose_index)) + +# reconstruct original pose sequence + +start_pose_index = 100 +end_pose_index = 500 +pose_indices = [ pose_index for pose_index in range(start_pose_index, end_pose_index)] + +pose_encodings = encode_poses(pose_indices) +decode_pose_encodings(pose_encodings, "results/images/rec_pose_sequence_{}-{}.gif".format(start_pose_index, end_pose_index)) + +# random walk + +start_pose_index = 100 +pose_count = 500 + +pose_indices = [start_pose_index] + +pose_encodings = encode_poses(pose_indices) + +for index in range(0, pose_count - 1): + random_step = np.random.random((latent_dim)).astype(np.float32) * 2.0 + pose_encodings.append(pose_encodings[index] + random_step) + +decode_pose_encodings(pose_encodings, "results/images/rec_poses_randwalk_{}_{}.gif".format(start_pose_index, pose_count)) + +# pose sequence offset following + +start_pose_index = 100 +end_pose_index = 500 + +pose_indices = [ pose_index for pose_index in range(start_pose_index, end_pose_index)] + +pose_encodings = encode_poses(pose_indices) + +offset_pose_encodings = [] + +for index in range(len(pose_encodings)): + sin_value = np.sin(index / (len(pose_encodings) - 1) * np.pi * 4.0) + offset = np.ones(shape=(latent_dim), dtype=np.float32) * sin_value * 4.0 + offset_pose_encoding = pose_encodings[index] + offset + offset_pose_encodings.append(offset_pose_encoding) + +decode_pose_encodings(offset_pose_encodings, "results/images/rec_pose_sequence_offset_{}-{}.gif".format(start_pose_index, end_pose_index)) + +# interpolate two original pose sequences + +start_pose1_index = 100 +end_pose1_index = 500 + +start_pose2_index = 1100 +end_pose2_index = 1500 + +pose1_indices = [ pose_index for pose_index in range(start_pose1_index, end_pose1_index)] +pose2_indices = [ pose_index for pose_index in range(start_pose2_index, end_pose2_index)] + +pose1_encodings = encode_poses(pose1_indices) +pose2_encodings = encode_poses(pose2_indices) + +mixed_pose_encodings = [] + +for index in range(len(pose1_indices)): + mix_factor = index / (len(pose1_indices) - 1) + mixed_pose_encoding = pose1_encodings[index] * (1.0 - mix_factor) + pose2_encodings[index] * mix_factor + mixed_pose_encodings.append(mixed_pose_encoding) + +decode_pose_encodings(mixed_pose_encodings, "results/images/rec_pose_sequence_mix_{}-{}_{}-{}.gif".format(start_pose1_index, end_pose2_index, start_pose2_index, end_pose2_index)) + + diff --git a/autoregression/rnn/.pylint.d/autoregression_rnn1.stats b/autoregression/rnn/.pylint.d/autoregression_rnn1.stats new file mode 100644 index 0000000000000000000000000000000000000000..7e698af5394bc68d16bb9e9d5f3e7a3eca4906bb GIT binary patch literal 1233 zcmZux%We}f6rH4EGqEYX-xZMd~U ztA$YtVT`hlI$m<+!pHL-916A4Axq$xX?2Gv#)j+xMux8_!wezo!*D~55^5c?8%Vh^ z5uHK0qK-=BdJBVC+pg;hXHqCmoYq9@$hm-tGgKhDCYwSyZY#=o$lk-W6eteil~7a? z#W#@}vL0NNUOG`pPDEL0<7@;^oByO$kiv9hutFO`Ds4q0KgCdWN2SP#q9rDL1y@}u zCaJX~*UUOoAE^(QhUXk5@SJGxxM@jKxU{~)YwOYxn%eL~0#hUvGM=-0j zP)?%rq^W{Go#|-jWjwRw4rxr66*a;}(+}WCX;SiX$@nN437mFu)H&3+pGqt1LiTc(YUf1)&pVz_aqe&{k{_#f9go&ub)mJm zMJyziI4hAb!4_y%4+qcp4m5p&@6Is%o6#Pm2jeGMWQpTIQ^ zC&Q8ack{M+6%zC&=M~2hQA{8yJ^(3RYF31=;dOEF4X*3e+&dxdJjV+?e}`X|yDs3x ze*->f8t0)+GHPP*g=v9Q`Lf!!D-Q# 0: + dense_layers.append(("autoreg_dense_0", nn.Linear(self.rnn_layer_size, self.dense_layer_sizes[0]))) + dense_layers.append(("autoregr_dense_relu_0", nn.ReLU())) + + for layer_index in range(1, dense_layer_count): + dense_layers.append(("autoreg_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("autoregr_dense_relu_{}".format(layer_index), nn.ReLU())) + + dense_layers.append(("autoregr_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], self.pose_dim))) + else: + dense_layers.append(("autoreg_dense_0", nn.Linear(self.rnn_layer_size, self.pose_dim))) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + #print("x 1 ", x.shape) + x, (_, _) = self.rnn_layers(x) + #print("x 2 ", x.shape) + x = x[:, -1, :] # only last time step + #print("x 3 ", x.shape) + yhat = self.dense_layers(x) + #print("yhat ", yhat.shape) + return yhat + +autoreg = AutoRegressor(pose_dim, ar_rnn_layer_count, ar_rnn_layer_size, ar_dense_layer_sizes).to(device) + +print(autoreg) + +""" +test_input = torch.zeros((1, sequence_length, pose_dim)).to(device) +test_output = autoreg(test_input) +""" + +if save_models == True: + autoreg.train() + + # save using pickle + torch.save(autoreg, "results/models/autoreg.pth") + + # save using onnx + x = torch.zeros((1, sequence_length, pose_dim)).to(device) + torch.onnx.export(autoreg, x, "results/models/autoreg.onnx") + + autoreg.test() + +if save_tscript == True: + autoreg.train() + + # save using TochScript + x = torch.rand((1, sequence_length, pose_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(autoreg, x) + script_module.save("results/models/autoreg.pt") + + autoreg.test() + +if load_weights and autoreg_weights_file: + autoreg.load_state_dict(torch.load(autoreg_weights_file, map_location=device)) + +# Training +ar_optimizer = torch.optim.Adam(autoreg.parameters(), lr=ar_learning_rate) + +def ar_norm_loss(yhat): + _yhat = yhat.view(-1, 4) + _norm = torch.norm(_yhat, dim=1) + _diff = (_norm - 1.0) ** 2 + _loss = torch.mean(_diff) + return _loss + +def ar_quat_loss(y, yhat): + # y and yhat shapes: batch_size, seq_length, pose_dim + + # normalize quaternion + + _y = y.view((-1, 4)) + _yhat = yhat.view((-1, 4)) + + _yhat_norm = nn.functional.normalize(_yhat, p=2, dim=1) + + # inverse of quaternion: https://www.mathworks.com/help/aeroblks/quaternioninverse.html + _yhat_inv = _yhat_norm * torch.tensor([[1.0, -1.0, -1.0, -1.0]], dtype=torch.float32).to(device) + + # calculate difference quaternion + _diff = qmul(_yhat_inv, _y) + # length of complex part + _len = torch.norm(_diff[:, 1:], dim=1) + # atan2 + _atan = torch.atan2(_len, _diff[:, 0]) + # abs + _abs = torch.abs(_atan) + _loss = torch.mean(_abs) + return _loss + +# autoencoder loss function +def ar_loss(y, yhat): + _norm_loss = ar_norm_loss(yhat) + _quat_loss = ar_quat_loss(y, yhat) + + _total_loss = 0.0 + _total_loss += _norm_loss * ar_norm_loss_scale + _total_loss += _quat_loss * ar_quat_loss_scale + + return _total_loss, _norm_loss, _quat_loss + +def ar_train_step(pose_sequences, target_poses): + + pred_poses = autoreg(pose_sequences) + + _ar_loss, _ar_norm_loss, _ar_quat_loss = ar_loss(target_poses, pred_poses) + + #print("_ae_pos_loss ", _ae_pos_loss) + + # Backpropagation + ar_optimizer.zero_grad() + _ar_loss.backward() + + ar_optimizer.step() + + return _ar_loss, _ar_norm_loss, _ar_quat_loss + +def ar_test_step(pose_sequences, target_poses): + + autoreg.eval() + + with torch.no_grad(): + pred_poses = autoreg(pose_sequences) + _ar_loss, _ar_norm_loss, _ar_quat_loss = ar_loss(target_poses, pred_poses) + + autoreg.train() + + return _ar_loss, _ar_norm_loss, _ar_quat_loss + +def train(train_dataloader, test_dataloader, epochs): + + loss_history = {} + loss_history["ar train"] = [] + loss_history["ar test"] = [] + loss_history["ar norm"] = [] + loss_history["ar quat"] = [] + + for epoch in range(epochs): + start = time.time() + + ar_train_loss_per_epoch = [] + ar_norm_loss_per_epoch = [] + ar_quat_loss_per_epoch = [] + + for train_batch in train_dataloader: + input_pose_sequences = train_batch[0].to(device) + target_poses = train_batch[1].to(device) + + _ar_loss, _ar_norm_loss, _ar_quat_loss = ar_train_step(input_pose_sequences, target_poses) + + _ar_loss = _ar_loss.detach().cpu().numpy() + _ar_norm_loss = _ar_norm_loss.detach().cpu().numpy() + _ar_quat_loss = _ar_quat_loss.detach().cpu().numpy() + + ar_train_loss_per_epoch.append(_ar_loss) + ar_norm_loss_per_epoch.append(_ar_norm_loss) + ar_quat_loss_per_epoch.append(_ar_quat_loss) + + ar_train_loss_per_epoch = np.mean(np.array(ar_train_loss_per_epoch)) + ar_norm_loss_per_epoch = np.mean(np.array(ar_norm_loss_per_epoch)) + ar_quat_loss_per_epoch = np.mean(np.array(ar_quat_loss_per_epoch)) + + ar_test_loss_per_epoch = [] + + for test_batch in test_dataloader: + input_pose_sequences = train_batch[0].to(device) + target_poses = train_batch[1].to(device) + + _ar_loss, _, _ = ar_train_step(input_pose_sequences, target_poses) + + _ar_loss = _ar_loss.detach().cpu().numpy() + + ar_test_loss_per_epoch.append(_ar_loss) + + ar_test_loss_per_epoch = np.mean(np.array(ar_test_loss_per_epoch)) + + if epoch % model_save_interval == 0 and save_weights == True: + autoreg.save_weights("results/weights/autoreg_weights_epoch_{}".format(epoch)) + + loss_history["ar train"].append(ar_train_loss_per_epoch) + loss_history["ar test"].append(ar_test_loss_per_epoch) + loss_history["ar norm"].append(ar_norm_loss_per_epoch) + loss_history["ar quat"].append(ar_quat_loss_per_epoch) + + print ('epoch {} : ar train: {:01.4f} ar test: {:01.4f} norm {:01.4f} quat {:01.4f} time {:01.2f}'.format(epoch + 1, ar_train_loss_per_epoch, ar_test_loss_per_epoch, ar_norm_loss_per_epoch, ar_quat_loss_per_epoch, time.time()-start)) + + return loss_history + +# fit model +loss_history = train(train_dataloader, test_dataloader, epochs) + +# save history +utils.save_loss_as_csv(loss_history, "results/histories/history_{}.csv".format(epochs)) +utils.save_loss_as_image(loss_history, "results/histories/history_{}.png".format(epochs)) + +# save model weights +torch.save(autoreg.state_dict(), "results/weights/autoreg_weights_epoch_{}".format(epochs)) + +# inference and rendering +skel_edge_list = utils.get_skeleton_edge_list(skeleton) +poseRenderer = PoseRenderer(skel_edge_list) + +# create ref pose sequence +def create_ref_sequence_anim(start_pose_index, pose_count, file_name): + + start_pose_index = max(start_pose_index, sequence_length) + pose_count = min(pose_count, pose_sequence_length - start_pose_index) + + sequence_excerpt = pose_sequence[start_pose_index:start_pose_index + pose_count, :] + sequence_excerpt = np.reshape(sequence_excerpt, (pose_count, joint_count, joint_dim)) + + sequence_excerpt = torch.tensor(np.expand_dims(sequence_excerpt, axis=0)).to(device) + zero_trajectory = torch.tensor(np.zeros((1, pose_count, 3), dtype=np.float32)).to(device) + + skel_sequence = skeleton.forward_kinematics(sequence_excerpt, zero_trajectory) + + skel_sequence = np.squeeze(skel_sequence.cpu().numpy()) + view_min, view_max = utils.get_equal_mix_max_positions(skel_sequence) + skel_images = poseRenderer.create_pose_images(skel_sequence, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + skel_images[0].save(file_name, save_all=True, append_images=skel_images[1:], optimize=False, duration=33.0, loop=0) + + +def create_pred_sequence_anim(start_pose_index, pose_count, file_name): + autoreg.eval() + + start_pose_index = max(start_pose_index, sequence_length) + pose_count = min(pose_count, pose_sequence_length - start_pose_index) + + start_seq = pose_sequence[start_pose_index - sequence_length:start_pose_index, :] + start_seq = torch.from_numpy(start_seq).to(device) + + next_seq = start_seq + + pred_poses = [] + + for i in range(pose_count): + with torch.no_grad(): + pred_pose = autoreg(torch.unsqueeze(next_seq, axis=0)) + + # normalize pred pose + pred_pose = torch.squeeze(pred_pose) + pred_pose = pred_pose.view((-1, 4)) + pred_pose = nn.functional.normalize(pred_pose, p=2, dim=1) + pred_pose = pred_pose.view((1, pose_dim)) + + pred_poses.append(pred_pose) + + #print("next_seq s ", next_seq.shape) + #print("pred_pose s ", pred_pose.shape) + + next_seq = torch.cat([next_seq[1:,:], pred_pose], axis=0) + + print("predict time step ", i) + + pred_poses = torch.cat(pred_poses, dim=0) + pred_poses = pred_poses.view((1, pose_count, joint_count, joint_dim)) + + + zero_trajectory = torch.tensor(np.zeros((1, pose_count, 3), dtype=np.float32)) + zero_trajectory = zero_trajectory.to(device) + + skel_poses = skeleton.forward_kinematics(pred_poses, zero_trajectory) + + skel_poses = skel_poses.detach().cpu().numpy() + skel_poses = np.squeeze(skel_poses) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_poses) + pose_images = poseRenderer.create_pose_images(skel_poses, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + + pose_images[0].save(file_name, save_all=True, append_images=pose_images[1:], optimize=False, duration=33.0, loop=0) + + autoreg.train() + + +seq_start_pose_index = 1000 +seq_pose_count = 200 + +create_ref_sequence_anim(seq_start_pose_index, seq_pose_count, "ref_{}_{}.gif".format(seq_start_pose_index, seq_pose_count)) +create_pred_sequence_anim(seq_start_pose_index, seq_pose_count, "pred_{}_{}.gif".format(seq_start_pose_index, seq_pose_count)) diff --git a/autoregression/rnn_mdn/.pylint.d/autoregression_rnn1.stats b/autoregression/rnn_mdn/.pylint.d/autoregression_rnn1.stats new file mode 100644 index 0000000000000000000000000000000000000000..7e698af5394bc68d16bb9e9d5f3e7a3eca4906bb GIT binary patch literal 1233 zcmZux%We}f6rH4EGqEYX-xZMd~U ztA$YtVT`hlI$m<+!pHL-916A4Axq$xX?2Gv#)j+xMux8_!wezo!*D~55^5c?8%Vh^ z5uHK0qK-=BdJBVC+pg;hXHqCmoYq9@$hm-tGgKhDCYwSyZY#=o$lk-W6eteil~7a? z#W#@}vL0NNUOG`pPDEL0<7@;^oByO$kiv9hutFO`Ds4q0KgCdWN2SP#q9rDL1y@}u zCaJX~*UUOoAE^(QhUXk5@SJGxxM@jKxU{~)YwOYxn%eL~0#hUvGM=-0j zP)?%rq^W{Go#|-jWjwRw4rxr66*a;}(+}WCX;SiX$@nN437mFu)H&3+pGqt1LiTc(YUf1)&pVz_aqe&{k{_#f9go&ub)mJm zMJyziI4hAb!4_y%4+qcp4m5p&@6Is%o6#Pm2jeGMWQpTIQ^ zC&Q8ack{M+6%zC&=M~2hQA{8yJ^(3RYF31=;dOEF4X*3e+&dxdJjV+?e}`X|yDs3x ze*->f8t0)+GHPP*g=v9Q`Lf!!D-Q#g1tbl z&;xYcD|OYOEZK2eF9d7&-presM}6vl|JBc$-(!3YyVHkBWs0Cge2nmt1#hgFTj88E zdSbPnRE5SPL^{vBsD$?M{dEueQqN4J8SHVR?=gz_NS|P6#f)*x5@{cH7R+j?=aJq* zF04(_E68WeGlksXV36v&c~jv~NiB#shA5Lb=P>e?NsP|PqLf~^nsE^+h2ttHFKZ=; ztZHMui?jy^%m1XUlf(JKVxAU4YUAWmWR6gM&y*~PW)&uU0~eLVjuFfibB+{-J8v7| zTX1GYA!aD8ASQTWThb-mB&ikC4@7Dlj^=_ekF+VG8bzlo0JHOuM%RSTT&OY4Li&}o70s;R&!p1@hstAs}sH&xB7bZG20}3ugZP&&!Dv*3L18OM>@o~@TDmt%^`2L66qGan6e`6tn)1L z54%tc%HX(*tkY%Bm~Gk&4mw=gTC(ywmBEEZ`f8nO-E$vKI-b 0: + dense_layers.append(("autoreg_dense_0", nn.Linear(self.rnn_layer_size, self.dense_layer_sizes[0]))) + dense_layers.append(("autoreg_dense_relu_0", nn.ReLU())) + + for layer_index in range(1, dense_layer_count): + dense_layers.append(("autoreg_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("autoreg_dense_relu_{}".format(layer_index), nn.ReLU())) + + dense_layers.append(("autoregr_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], self.pose_dim))) + else: + dense_layers.append(("autoreg_dense_0", nn.Linear(self.rnn_layer_size, self.pose_dim))) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + # mdn mu layers + mdn_mu_layers = [] + mdn_mu_layers.append(("autoreg_mdn_mu_dense", nn.Linear(self.pose_dim, self.pose_dim * self.mix_count))) + self.mdn_mu_layers = nn.Sequential(OrderedDict(mdn_mu_layers)) + + # mdn sigma layers + mdn_sigma_layers = [] + mdn_sigma_layers.append(("autoreg_mdn_sigma_dense", nn.Linear(self.pose_dim, self.pose_dim * self.mix_count))) + self.mdn_sigma_layers = nn.Sequential(OrderedDict(mdn_sigma_layers)) + + # mdn alpha layers + mdn_alpha_layers = [] + mdn_alpha_layers.append(("autoreg_mdn_alpha_dense", nn.Linear(self.pose_dim, self.mix_count))) + mdn_alpha_layers.append(("autoreg_mdn_alpha_softmax", nn.Softmax(dim=1))) + self.mdn_alpha_layers = nn.Sequential(OrderedDict(mdn_alpha_layers)) + + def forward(self, x): + #print("x 1 ", x.shape) + x, (_, _) = self.rnn_layers(x) + #print("x 2 ", x.shape) + x = x[:, -1, :] # only last time step + #print("x 3 ", x.shape) + x = self.dense_layers(x) + #print("x ", x.shape) + mu = self.mdn_mu_layers(x) + mu = mu.view((-1, self.mix_count, self.pose_dim)) + #print("mus ", mus.shape) + sigma = self.mdn_sigma_layers(x) + sigma = torch.exp(sigma) + sigma = sigma.view((-1, self.mix_count, self.pose_dim)) + #print("sigmas ", sigmas.shape) + alpha = self.mdn_alpha_layers(x) + alpha = alpha.view((-1, self.mix_count)) + #print("alphas ", alphas.shape) + return mu, sigma, alpha + +autoreg = AutoRegressor(pose_dim, ar_rnn_layer_count, ar_rnn_layer_size, ar_dense_layer_sizes, ar_mdn_mix_count).to(device) + +print(autoreg) + +test_input = torch.zeros((1, sequence_length, pose_dim)).to(device) +mu, sigma, alpha = autoreg(test_input) + + +""" +test_input = torch.zeros((1, sequence_length, pose_dim)).to(device) +mu, sigma, alpha = autoreg(test_input) +""" + +if save_models == True: + autoreg.train() + + # save using pickle + torch.save(autoreg, "results/models/autoreg.pth") + + # save using onnx + x = torch.zeros((1, sequence_length, pose_dim)).to(device) + torch.onnx.export(autoreg, x, "results/models/autoreg.onnx") + + autoreg.test() + +if save_tscript == True: + autoreg.train() + + # save using TochScript + x = torch.rand((1, sequence_length, pose_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(autoreg, x) + script_module.save("results/models/autoreg.pt") + + autoreg.test() + +if load_weights and autoreg_weights_file: + autoreg.load_state_dict(torch.load(autoreg_weights_file, map_location=device)) + +# Training +ar_optimizer = torch.optim.Adam(autoreg.parameters(), lr=ar_learning_rate) + +def ar_norm_loss(yhat): + _yhat = yhat.view(-1, 4) + _norm = torch.norm(_yhat, dim=1) + _diff = (_norm - 1.0) ** 2 + _loss = torch.mean(_diff) + return _loss + +def mdn_loss(y, mu, sigma, alpha): + """Calculates the error, given the MoG parameters and the target + The loss is the negative log likelihood of the data given the MoG + parameters. + """ + + #normal = Normal(mu, sigma+1+1e-7) + normal = Normal(mu, sigma+1e-7) + loglik = normal.log_prob(y.expand_as(sigma)) + #loglik = torch.sum(loglik, dim=2) + loglik = torch.mean(loglik, dim=2) + loss = -torch.logsumexp(torch.log(alpha) + loglik, dim=1) + + return torch.mean(loss) + +# autoencoder loss function +def ar_loss(y, mu, sigma, alpha): + + _norm_loss = ar_norm_loss(mu) + _mdn_loss = mdn_loss(y, mu, sigma, alpha) + + #print("_mdn_loss ", _mdn_loss) + + _total_loss = 0.0 + _total_loss += _norm_loss * ar_norm_loss_scale + _total_loss += _mdn_loss * ar_mdn_loss_scale + + return _total_loss, _norm_loss, _mdn_loss + +def ar_train_step(pose_sequences, target_poses): + + mu, sigma, alpha = autoreg(pose_sequences) + + _ar_loss, _ar_norm_loss, _ar_mdn_loss = ar_loss(target_poses, mu, sigma, alpha) + + #print("_ae_pos_loss ", _ae_pos_loss) + + # Backpropagation + ar_optimizer.zero_grad() + _ar_loss.backward() + + ar_optimizer.step() + + return _ar_loss, _ar_norm_loss, _ar_mdn_loss + +def ar_test_step(pose_sequences, target_poses): + + autoreg.eval() + + with torch.no_grad(): + mu, sigma, alpha = autoreg(pose_sequences) + _ar_loss, _ar_norm_loss, _ar_mdn_loss = ar_loss(target_poses, mu, sigma, alpha) + + autoreg.train() + + return _ar_loss, _ar_norm_loss, _ar_mdn_loss + +def train(train_dataloader, test_dataloader, epochs): + + loss_history = {} + loss_history["ar train"] = [] + loss_history["ar test"] = [] + loss_history["ar norm"] = [] + loss_history["ar mdn"] = [] + + for epoch in range(epochs): + start = time.time() + + ar_train_loss_per_epoch = [] + ar_norm_loss_per_epoch = [] + ar_mdn_loss_per_epoch = [] + + for train_batch in train_dataloader: + input_pose_sequences = train_batch[0].to(device) + target_poses = train_batch[1].to(device) + + _ar_loss, _ar_norm_loss, _ar_mdn_loss = ar_train_step(input_pose_sequences, target_poses) + + _ar_loss = _ar_loss.detach().cpu().numpy() + _ar_norm_loss = _ar_norm_loss.detach().cpu().numpy() + _ar_mdn_loss = _ar_mdn_loss.detach().cpu().numpy() + + ar_train_loss_per_epoch.append(_ar_loss) + ar_norm_loss_per_epoch.append(_ar_norm_loss) + ar_mdn_loss_per_epoch.append(_ar_mdn_loss) + + ar_train_loss_per_epoch = np.mean(np.array(ar_train_loss_per_epoch)) + ar_norm_loss_per_epoch = np.mean(np.array(ar_norm_loss_per_epoch)) + ar_mdn_loss_per_epoch = np.mean(np.array(ar_mdn_loss_per_epoch)) + + ar_test_loss_per_epoch = [] + + for test_batch in test_dataloader: + input_pose_sequences = train_batch[0].to(device) + target_poses = train_batch[1].to(device) + + _ar_loss, _, _ = ar_train_step(input_pose_sequences, target_poses) + + _ar_loss = _ar_loss.detach().cpu().numpy() + + ar_test_loss_per_epoch.append(_ar_loss) + + ar_test_loss_per_epoch = np.mean(np.array(ar_test_loss_per_epoch)) + + if epoch % model_save_interval == 0 and save_weights == True: + autoreg.save_weights("results/weights/autoreg_weights_epoch_{}".format(epoch)) + + loss_history["ar train"].append(ar_train_loss_per_epoch) + loss_history["ar test"].append(ar_test_loss_per_epoch) + loss_history["ar norm"].append(ar_norm_loss_per_epoch) + loss_history["ar mdn"].append(ar_mdn_loss_per_epoch) + + print ('epoch {} : ar train: {:01.4f} ar test: {:01.4f} norm {:01.4f} mdn {:01.4f} time {:01.2f}'.format(epoch + 1, ar_train_loss_per_epoch, ar_test_loss_per_epoch, ar_norm_loss_per_epoch, ar_mdn_loss_per_epoch, time.time()-start)) + + return loss_history + +# fit model +loss_history = train(train_dataloader, test_dataloader, epochs) + +# save history +utils.save_loss_as_csv(loss_history, "results/histories/history_{}.csv".format(epochs)) +utils.save_loss_as_image(loss_history, "results/histories/history_{}.png".format(epochs)) + +# save model weights +torch.save(autoreg.state_dict(), "results/weights/autoreg_weights_epoch_{}".format(epochs)) + +# inference and rendering +skel_edge_list = utils.get_skeleton_edge_list(skeleton) +poseRenderer = PoseRenderer(skel_edge_list) + + +def sample(mu, sigma, alpha): + alpha_i = Categorical(alpha).sample() + return mu[:,alpha_i,:] + +def sample2(mu, sigma, alpha): + alpha_i = Categorical(alpha).sample() + normal = Normal(mu[:,alpha_i,:], sigma[:,alpha_i,:]+1e-7) + return normal.sample() + +# create ref pose sequence +def create_ref_sequence_anim(start_pose_index, pose_count, file_name): + + start_pose_index = max(start_pose_index, sequence_length) + pose_count = min(pose_count, pose_sequence_length - start_pose_index) + + sequence_excerpt = pose_sequence[start_pose_index:start_pose_index + pose_count, :] + sequence_excerpt = np.reshape(sequence_excerpt, (pose_count, joint_count, joint_dim)) + + sequence_excerpt = torch.tensor(np.expand_dims(sequence_excerpt, axis=0)).to(device) + zero_trajectory = torch.tensor(np.zeros((1, pose_count, 3), dtype=np.float32)).to(device) + + skel_sequence = skeleton.forward_kinematics(sequence_excerpt, zero_trajectory) + + skel_sequence = np.squeeze(skel_sequence.cpu().numpy()) + view_min, view_max = utils.get_equal_mix_max_positions(skel_sequence) + skel_images = poseRenderer.create_pose_images(skel_sequence, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + skel_images[0].save(file_name, save_all=True, append_images=skel_images[1:], optimize=False, duration=33.0, loop=0) + + +def create_pred_sequence_anim(start_pose_index, pose_count, file_name): + autoreg.eval() + + start_pose_index = max(start_pose_index, sequence_length) + pose_count = min(pose_count, pose_sequence_length - start_pose_index) + + start_seq = pose_sequence[start_pose_index - sequence_length:start_pose_index, :] + start_seq = torch.from_numpy(start_seq).to(device) + + next_seq = start_seq + + pred_poses = [] + + for i in range(pose_count): + with torch.no_grad(): + mu, sigma, alpha = autoreg(torch.unsqueeze(next_seq, axis=0)) + pred_pose = sample(mu, sigma, alpha) + + # normalize pred pose + pred_pose = torch.squeeze(pred_pose) + pred_pose = pred_pose.view((-1, 4)) + pred_pose = nn.functional.normalize(pred_pose, p=2, dim=1) + pred_pose = pred_pose.view((1, pose_dim)) + + pred_poses.append(pred_pose) + + #print("next_seq s ", next_seq.shape) + #print("pred_pose s ", pred_pose.shape) + + next_seq = torch.cat([next_seq[1:,:], pred_pose], axis=0) + + print("predict time step ", i) + + pred_poses = torch.cat(pred_poses, dim=0) + pred_poses = pred_poses.view((1, pose_count, joint_count, joint_dim)) + + + zero_trajectory = torch.tensor(np.zeros((1, pose_count, 3), dtype=np.float32)) + zero_trajectory = zero_trajectory.to(device) + + skel_poses = skeleton.forward_kinematics(pred_poses, zero_trajectory) + + skel_poses = skel_poses.detach().cpu().numpy() + skel_poses = np.squeeze(skel_poses) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_poses) + pose_images = poseRenderer.create_pose_images(skel_poses, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + + pose_images[0].save(file_name, save_all=True, append_images=pose_images[1:], optimize=False, duration=33.0, loop=0) + + autoreg.train() + + +seq_start_pose_index = 1000 +seq_pose_count = 200 + +create_ref_sequence_anim(seq_start_pose_index, seq_pose_count, "ref_{}_{}.gif".format(seq_start_pose_index, seq_pose_count)) +create_pred_sequence_anim(seq_start_pose_index, seq_pose_count, "pred_{}_{}.gif".format(seq_start_pose_index, seq_pose_count)) diff --git a/common/__init__.py b/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/common/mocap_dataset.py b/common/mocap_dataset.py new file mode 100644 index 0000000..1dbf847 --- /dev/null +++ b/common/mocap_dataset.py @@ -0,0 +1,167 @@ +# dataset for storing and processing motion capture data +# code adapted from original pyTorch implementation of Quaternet +# quaternet / common / mocap_dataset.py + +import numpy as np +import torch +from common.skeleton import Skeleton +from common.quaternion import qeuler_np, qfix + +class MocapDataset: + def __init__(self, path, fps): + skeleton, data = self._load(path) + + self._skeleton = skeleton + self._data = data + self._fps = fps + self._use_gpu = False + + def cuda(self): + self._use_gpu = True + self._skeleton.cuda() + return self + + # assumes that all subjects in the dataset possess the same skeleton + # also assumes that only one action is stored in the dataset + # mandatoy content in dataset: + # offsets: relative position offsets of joints with respect to parent joints + # parents: parent joints in hiearchical skeleton topology + # rot_local: relative rotations (as quaternions) of joints with respect to parent joints + # pos_world: absolute 3D positions of joints + # non-mandatory content in dataset: + # everyhing else: which will be just copied into the output dataset + def _load(self, path): + data = np.load(path, 'r', allow_pickle=True) + + non_copy_keys = ["offsets", "parents", "children", "rot_local", "rot_world", "pos_local", "names"] + + # create skeleton + subject = list(data.keys())[0] + skeleton_offsets = data[subject]["offsets"] + skeleton_parents = data[subject]["parents"] + + root_joint_index = skeleton_parents.index(-1) + + skeleton = Skeleton(offsets=skeleton_offsets, parents=skeleton_parents) + + # create mocap_data + mocap_data = {} + action_name = "A1" + + for subject in data.keys(): + subject_data = data[subject] + + rotations = np.copy(subject_data["rot_local"]) + positions = subject_data["pos_world"] + trajectory = np.copy(positions[:, root_joint_index, :]) + + mocap_data[subject] = {} + mocap_data[subject][action_name] = { + "rotations": rotations, + "trajectory": trajectory + } + + # add non-mandatory content to mocap data + for subject in data.keys(): + subject_data = data[subject] + + for key in list(subject_data.keys()): + + if key in non_copy_keys: + continue + + mocap_data[subject][action_name][key] = subject_data[key] + + return skeleton, mocap_data + + def downsample(self, factor, keep_strides=True): + """ + Downsample this dataset by an integer factor, keeping all strides of the data + if keep_strides is True. + The frame rate must be divisible by the given factor. + The sequences will be replaced by their downsampled versions, whose actions + will have '_d0', ... '_dn' appended to their names. + """ + assert self._fps % factor == 0 + + for subject in self._data.keys(): + new_actions = {} + for action in list(self._data[subject].keys()): + for idx in range(factor): + tup = {} + for k in self._data[subject][action].keys(): + tup[k] = self._data[subject][action][k][idx::factor] + new_actions[action + '_d' + str(idx)] = tup + if not keep_strides: + break + self._data[subject] = new_actions + + self._fps //= factor + + def compute_euler_angles(self, order): + for subject in self._data.values(): + for action in subject.values(): + action['rotations_euler'] = qeuler_np(action['rotations'], order) + + def compute_positions(self): + + """ + TODO: since tensorflow doesn't permit the assignment of values to tensors, I'm converting back and forth between numpy arrays and tensors. This is very slow. Maybe there is a better alternative? + """ + + for subject in self._data.values(): + for action in subject.values(): + rotations = torch.from_numpy(action['rotations'].astype('float32')).unsqueeze(0) + trajectory = torch.from_numpy(action['trajectory'].astype('float32')).unsqueeze(0) + + if self._use_gpu: + rotations = rotations.cuda() + trajectory = trajectory.cuda() + + action['positions_world'] = self._skeleton.forward_kinematics(rotations, trajectory).squeeze(0).cpu().numpy() + + # set root position to zero for calculating local joint positions + trajectory[:, :, :] = 0 + action['positions_local'] = self._skeleton.forward_kinematics(rotations, trajectory).squeeze(0).cpu().numpy() + + def compute_standardized_values(self, value_key): + + for subject in self._data.values(): + for action in subject.values(): + values = action[value_key] + + std = np.std(values, axis=0) + 1e-10 + mean = np.mean(values, axis=0) + std_values = (values - mean) / std + + action[value_key + "_std"] = std + action[value_key + "_mean"] = mean + action[value_key + "_standardized"] = std_values + + + def __getitem__(self, key): + return self._data[key] + + + def subjects(self): + return self._data.keys() + + + def subject_actions(self, subject): + return self._data[subject].keys() + + + def all_actions(self): + result = [] + for subject, actions in self._data.items(): + for action in actions.keys(): + result.append((subject, action)) + return result + + + def fps(self): + return self._fps + + + def skeleton(self): + return self._skeleton \ No newline at end of file diff --git a/common/pose_renderer.py b/common/pose_renderer.py new file mode 100644 index 0000000..2b80225 --- /dev/null +++ b/common/pose_renderer.py @@ -0,0 +1,193 @@ +import numpy as np +from matplotlib import pyplot as plt +import matplotlib.animation as animation +from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas +import mpl_toolkits.mplot3d as plt3d +from PIL import Image + + +class PoseRenderer: + def __init__(self, edge_data): + self.edge_data = edge_data + + def _fig2data (self, fig): + """ + @brief Convert a Matplotlib figure to a 4D numpy array with RGBA channels and return it + @param fig a matplotlib figure + @return a numpy 3D array of RGBA values + """ + # draw the renderer + fig.canvas.draw ( ) + + # Get the RGBA buffer from the figure + w,h = fig.canvas.get_width_height() + + #print("w ", w, " h ", h) + + buf = np.fromstring ( fig.canvas.tostring_argb(), dtype=np.uint8 ) + buf.shape = ( w, h,4 ) + + # canvas.tostring_argb give pixmap in ARGB mode. Roll the ALPHA channel to have it in RGBA mode + buf = np.roll ( buf, 3, axis = 2 ) + return buf + + def _fig2img (self, fig): + """ + @brief Convert a Matplotlib figure to a PIL Image in RGBA format and return it + @param fig a matplotlib figure + @return a Python Imaging Library ( PIL ) image + """ + # put the figure pixmap into a numpy array + buf = self._fig2data ( fig ) + w, h, d = buf.shape + + return Image.frombuffer( "RGBA", ( w ,h ), buf.tostring( ) ) + + def create_pose_image(self, pose, axis_min, axis_max, rot_elev, rot_azi, line_width, image_xinch, image_yinch): + point_data = np.array([pose[:,0], pose[:,1], pose[:,2]]) + lines_data = np.array([[pose[edge[0],:], pose[edge[1],:]] for edge in self.edge_data]) + + fig = plt.figure(figsize=(image_xinch,image_yinch)) + plt.axis("off") + fig.tight_layout() + + ax = plt3d.Axes3D(fig) + ax.view_init(elev=rot_elev, azim=rot_azi) + + ax.set_xlim(axis_min[0], axis_max[0]) + ax.set_ylim(axis_min[1], axis_max[1]) + ax.set_zlim(axis_min[2], axis_max[2]) + + # Make panes transparent + ax.xaxis.pane.fill = False # Left pane + ax.yaxis.pane.fill = False # Right pane + ax.zaxis.pane.fill = False # Right pane + + ax.grid(False) # Remove grid lines + + # Remove tick labels + ax.set_xticklabels([]) + ax.set_yticklabels([]) + ax.set_zticklabels([]) + + # Transparent spines + ax.w_xaxis.line.set_color((1.0, 1.0, 1.0, 0.0)) + ax.w_yaxis.line.set_color((1.0, 1.0, 1.0, 0.0)) + ax.w_zaxis.line.set_color((1.0, 1.0, 1.0, 0.0)) + + # Transparent panes + ax.w_xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) + ax.w_yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) + + # No ticks + ax.set_xticks([]) + ax.set_yticks([]) + ax.set_zticks([]) + + for line in lines_data: + ax.plot(line[:,0], line[:,1], zs=line[:,2], linewidth=line_width, color='cadetblue', alpha=0.5) + ax.scatter(point_data[0, :], point_data[1, :], point_data[2, :], s=line_width * 8, color='darkslateblue', alpha=0.5) + + fig.show() + + pose_image = self._fig2img ( fig ) + + plt.close() + + return pose_image + + def create_pose_images(self, poses, axis_min, axis_max, rot_elev, rot_azi, line_width, image_xinch, image_yinch): + pose_count = poses.shape[0] + pose_images = [] + + fig = plt.figure(figsize=(image_xinch,image_yinch)) + plt.axis("off") + fig.tight_layout() + + ax = plt3d.Axes3D(fig) + ax.view_init(elev=rot_elev, azim=rot_azi) + + ax.set_xlim(axis_min[0], axis_max[0]) + ax.set_ylim(axis_min[1], axis_max[1]) + ax.set_zlim(axis_min[2], axis_max[2]) + + # Make panes transparent + ax.xaxis.pane.fill = False # Left pane + ax.yaxis.pane.fill = False # Right pane + ax.zaxis.pane.fill = False # Right pane + + ax.grid(False) # Remove grid lines + + # Remove tick labels + ax.set_xticklabels([]) + ax.set_yticklabels([]) + ax.set_zticklabels([]) + + # Transparent spines + ax.w_xaxis.line.set_color((1.0, 1.0, 1.0, 0.0)) + ax.w_yaxis.line.set_color((1.0, 1.0, 1.0, 0.0)) + ax.w_zaxis.line.set_color((1.0, 1.0, 1.0, 0.0)) + + # Transparent panes + ax.w_xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) + ax.w_yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) + + # No ticks + ax.set_xticks([]) + ax.set_yticks([]) + ax.set_zticks([]) + + scatter_data = None + + fig.show() + + for pI in range(pose_count): + + # cleanup previous drawing + if scatter_data != None: + scatter_data.remove() + if len(ax.lines) > 0: + ax.lines.clear() + + point_data = np.array([poses[pI, :,0], poses[pI, :,1], poses[pI,:,2]]) + lines_data = np.array([[poses[pI, edge[0],:], poses[pI, edge[1],:]] for edge in self.edge_data]) + + for line in lines_data: + ax.plot(line[:,0], line[:,1], zs=line[:,2], linewidth=line_width, color='cadetblue', alpha=0.5) + scatter_data = ax.scatter(point_data[0, :], point_data[1, :], point_data[2, :], s=line_width*8.0, color='darkslateblue', alpha=0.5) + + im = self._fig2img ( fig ) + + pose_images.append(im) + + plt.close() + + return pose_images + + def create_grid_image(self, images, grid): + h_count = grid[0] + v_count = grid[1] + + fig = plt.figure(figsize=(h_count * 2, v_count * 2)) + + image_count = h_count * v_count + + for iI in range(image_count): + ax = fig.add_subplot(v_count, h_count, iI + 1) + ax.imshow(images[iI]) + ax.spines['top'].set_visible(False) + ax.spines['left'].set_visible(False) + ax.spines['bottom'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.set_xticks([]) + ax.set_yticks([]) + fig.tight_layout() + + fig.show() + + grid_image = self._fig2img ( fig ) + + plt.close() + + return grid_image + \ No newline at end of file diff --git a/common/quaternion.py b/common/quaternion.py new file mode 100644 index 0000000..d190904 --- /dev/null +++ b/common/quaternion.py @@ -0,0 +1,238 @@ +# quaternion math implemented using tensorflow tensors +# code adapted from original pyTorch implementation of Quaternet +# quaternet / common / quaternion.py + +import torch +import numpy as np + +def qmul(q, r): + """ + Multiply quaternion(s) q with quaternion(s) s + Expects two equally-sized tensors of shape (*, 4), where * denotes any number of dimensions + Returns q*r as a tensor of shape (*, 4) + """ + + assert q.shape[-1] == 4 + assert r.shape[-1] == 4 + + original_shape = q.shape + + # Compute outer product + terms = torch.bmm(r.view(-1, 4, 1), q.view(-1, 1, 4)) + + w = terms[:, 0, 0] - terms[:, 1, 1] - terms[:, 2, 2] - terms[:, 3, 3] + x = terms[:, 0, 1] + terms[:, 1, 0] - terms[:, 2, 3] + terms[:, 3, 2] + y = terms[:, 0, 2] + terms[:, 1, 3] + terms[:, 2, 0] - terms[:, 3, 1] + z = terms[:, 0, 3] - terms[:, 1, 2] + terms[:, 2, 1] + terms[:, 3, 0] + return torch.stack((w, x, y, z), dim=1).view(original_shape) + +def qrot(q, v): + """ + Rotate vector(s) v about the rotation described by quaternion(s) q. + Expects a tensor of shape (*, 4) for q and a tensor of shape (*, 3) for v, + where * denotes any number of dimensions. + Returns a tensor of shape (*, 3). + """ + assert q.shape[-1] == 4 + assert v.shape[-1] == 3 + assert q.shape[:-1] == v.shape[:-1] + + original_shape = list(v.shape) + q = q.view(-1, 4) + v = v.view(-1, 3) + + qvec = q[:, 1:] + + uv = torch.cross(qvec, v, dim=1) + uuv = torch.cross(qvec, uv, dim=1) + return (v + 2 * (q[:, :1] * uv + uuv)).view(original_shape) + +def qeuler(q, order, epsilon=0): + """ + Convert quaternion(s) q to Euler angles. + Expects a tensor of shape (*, 4), where * denotes any number of dimensions. + Returns a tensor of shape (*, 3). + """ + assert q.shape[-1] == 4 + + original_shape = list(q.shape) + original_shape[-1] = 3 + q = q.view(-1, 4) + + q0 = q[:, 0] + q1 = q[:, 1] + q2 = q[:, 2] + q3 = q[:, 3] + + if order == 'xyz': + x = torch.atan2(2 * (q0 * q1 - q2 * q3), 1 - 2*(q1 * q1 + q2 * q2)) + y = torch.asin(torch.clamp(2 * (q1 * q3 + q0 * q2), -1+epsilon, 1-epsilon)) + z = torch.atan2(2 * (q0 * q3 - q1 * q2), 1 - 2*(q2 * q2 + q3 * q3)) + elif order == 'yzx': + x = torch.atan2(2 * (q0 * q1 - q2 * q3), 1 - 2*(q1 * q1 + q3 * q3)) + y = torch.atan2(2 * (q0 * q2 - q1 * q3), 1 - 2*(q2 * q2 + q3 * q3)) + z = torch.asin(torch.clamp(2 * (q1 * q2 + q0 * q3), -1+epsilon, 1-epsilon)) + elif order == 'zxy': + x = torch.asin(torch.clamp(2 * (q0 * q1 + q2 * q3), -1+epsilon, 1-epsilon)) + y = torch.atan2(2 * (q0 * q2 - q1 * q3), 1 - 2*(q1 * q1 + q2 * q2)) + z = torch.atan2(2 * (q0 * q3 - q1 * q2), 1 - 2*(q1 * q1 + q3 * q3)) + elif order == 'xzy': + x = torch.atan2(2 * (q0 * q1 + q2 * q3), 1 - 2*(q1 * q1 + q3 * q3)) + y = torch.atan2(2 * (q0 * q2 + q1 * q3), 1 - 2*(q2 * q2 + q3 * q3)) + z = torch.asin(torch.clamp(2 * (q0 * q3 - q1 * q2), -1+epsilon, 1-epsilon)) + elif order == 'yxz': + x = torch.asin(torch.clamp(2 * (q0 * q1 - q2 * q3), -1+epsilon, 1-epsilon)) + y = torch.atan2(2 * (q1 * q3 + q0 * q2), 1 - 2*(q1 * q1 + q2 * q2)) + z = torch.atan2(2 * (q1 * q2 + q0 * q3), 1 - 2*(q1 * q1 + q3 * q3)) + elif order == 'zyx': + x = torch.atan2(2 * (q0 * q1 + q2 * q3), 1 - 2*(q1 * q1 + q2 * q2)) + y = torch.asin(torch.clamp(2 * (q0 * q2 - q1 * q3), -1+epsilon, 1-epsilon)) + z = torch.atan2(2 * (q0 * q3 + q1 * q2), 1 - 2*(q2 * q2 + q3 * q3)) + else: + raise + + return torch.stack((x, y, z), dim=1).view(original_shape) + +# Numpy-backed implementations + +def qnormalize_np(q): + q_orig_shape = q.shape + q = np.reshape(q, (-1, 4)) + norm = np.linalg.norm(q, axis=1) + 0.000001 + norm = np.reshape(norm, (-1, 1)) + + q_norm = q / norm + q_norm = np.reshape(q_norm, q_orig_shape) + + return q_norm + +def qmul_np(q, r): + q = torch.from_numpy(q).contiguous() + r = torch.from_numpy(r).contiguous() + return qmul(q, r).numpy() + +def qrot_np(q, v): + q = torch.from_numpy(q).contiguous() + v = torch.from_numpy(v).contiguous() + return qrot(q, v).numpy() + +def qeuler_np(q, order, epsilon=0, use_gpu=False): + if use_gpu: + q = torch.from_numpy(q).cuda() + return qeuler(q, order, epsilon).cpu().numpy() + else: + q = torch.from_numpy(q).contiguous() + return qeuler(q, order, epsilon).numpy() + +def qfix(q): + """ + Enforce quaternion continuity across the time dimension by selecting + the representation (q or -q) with minimal distance (or, equivalently, maximal dot product) + between two consecutive frames. + + Expects a tensor of shape (L, J, 4), where L is the sequence length and J is the number of joints. + Returns a tensor of the same shape. + """ + assert len(q.shape) == 3 + assert q.shape[-1] == 4 + + result = q.copy() + dot_products = np.sum(q[1:]*q[:-1], axis=2) + mask = dot_products < 0 + mask = (np.cumsum(mask, axis=0)%2).astype(bool) + result[1:][mask] *= -1 + return result + +def expmap_to_quaternion(e): + """ + Convert axis-angle rotations (aka exponential maps) to quaternions. + Stable formula from "Practical Parameterization of Rotations Using the Exponential Map". + Expects a tensor of shape (*, 3), where * denotes any number of dimensions. + Returns a tensor of shape (*, 4). + """ + assert e.shape[-1] == 3 + + original_shape = list(e.shape) + original_shape[-1] = 4 + e = e.reshape(-1, 3) + + theta = np.linalg.norm(e, axis=1).reshape(-1, 1) + w = np.cos(0.5*theta).reshape(-1, 1) + xyz = 0.5*np.sinc(0.5*theta/np.pi)*e + return np.concatenate((w, xyz), axis=1).reshape(original_shape) + +def euler_to_quaternion(e, order): + """ + Convert Euler angles to quaternions. + """ + assert e.shape[-1] == 3 + + original_shape = list(e.shape) + original_shape[-1] = 4 + + e = e.reshape(-1, 3) + + x = e[:, 0] + y = e[:, 1] + z = e[:, 2] + + rx = np.stack((np.cos(x/2), np.sin(x/2), np.zeros_like(x), np.zeros_like(x)), axis=1) + ry = np.stack((np.cos(y/2), np.zeros_like(y), np.sin(y/2), np.zeros_like(y)), axis=1) + rz = np.stack((np.cos(z/2), np.zeros_like(z), np.zeros_like(z), np.sin(z/2)), axis=1) + + result = None + for coord in order: + if coord == 'x': + r = rx + elif coord == 'y': + r = ry + elif coord == 'z': + r = rz + else: + raise + if result is None: + result = r + else: + result = qmul_np(result, r) + + # Reverse antipodal representation to have a non-negative "w" + if order in ['xyz', 'yzx', 'zxy']: + result *= -1 + + return result.reshape(original_shape) + +def slerp(q0, q1, amount=0.5): + # TODO: modify this code so it works for arrays of quaternions + + # Ensure quaternion inputs are unit quaternions and 0 <= amount <=1 + q0 = qnormalize_np(q0) + q1 = qnormalize_np(q1) + + amount = np.clip(amount, 0, 1) + + dot = np.dot(q0, q1) + + # If the dot product is negative, slerp won't take the shorter path. + # Note that v1 and -v1 are equivalent when the negation is applied to all four components. + # Fix by reversing one quaternion + if dot < 0.0: + q0 = -q0 + dot = -dot + + # sin_theta_0 can not be zero + if dot > 0.9995: + qr = q0 + amount * (q1 - q0) + qr = qnormalize_np(qr) + return qr + + theta_0 = np.arccos(dot) # Since dot is in range [0, 0.9995], np.arccos() is safe + sin_theta_0 = np.sin(theta_0) + + theta = theta_0 * amount + sin_theta = np.sin(theta) + + s0 = np.cos(theta) - dot * sin_theta / sin_theta_0 + s1 = sin_theta / sin_theta_0 + qr = (s0 * q0) + (s1 * q1) + qr = qnormalize_np(qr) + return qr diff --git a/common/skeleton.py b/common/skeleton.py new file mode 100644 index 0000000..e972a59 --- /dev/null +++ b/common/skeleton.py @@ -0,0 +1,115 @@ +# skeleton implementation with functions to remove joints and to do forward kinematics +# code adapted from original pyTorch implementation of Quaternet +# quaternet / common / skeleton.py + +# TODO: verify correctness of code, in particular the forward kinematics function still contains torch specific code such as expand + +import torch +import numpy as np +from common.quaternion import qmul_np, qmul, qrot + +class Skeleton: + def __init__(self, offsets, parents): + assert len(offsets) == len(parents) + + self._offsets = torch.FloatTensor(offsets) + self._parents = np.array(parents) + self._compute_metadata() + + def cuda(self): + self._offsets = self._offsets.cuda() + return self + + def num_joints(self): + return self._offsets.shape[0] + + def offsets(self): + return self._offsets + + def parents(self): + return self._parents + + def has_children(self): + return self._has_children + + def children(self): + return self._children + + def remove_joints(self, joints_to_remove, dataset): + """ + Remove the joints specified in 'joints_to_remove', both from the + skeleton definition and from the dataset (which is modified in place). + The rotations of removed joints are propagated along the kinematic chain. + """ + valid_joints = [] + for joint in range(len(self._parents)): + if joint not in joints_to_remove: + valid_joints.append(joint) + + # Update all transformations in the dataset + for subject in dataset.subjects(): + for action in dataset[subject].keys(): + rotations = dataset[subject][action]['rotations'] + for joint in joints_to_remove: + for child in self._children[joint]: + rotations[:, child] = qmul_np(rotations[:, joint], rotations[:, child]) + rotations[:, joint] = [1, 0, 0, 0] # Identity + dataset[subject][action]['rotations'] = rotations[:, valid_joints] + + index_offsets = np.zeros(len(self._parents), dtype=int) + new_parents = [] + for i, parent in enumerate(self._parents): + if i not in joints_to_remove: + new_parents.append(parent - index_offsets[parent]) + else: + index_offsets[i:] += 1 + self._parents = np.array(new_parents) + + self._offsets = self._offsets[valid_joints] + + self._compute_metadata() + + def forward_kinematics(self, rotations, root_positions): + """ + Perform forward kinematics using the given trajectory and local rotations. + Arguments (where N = batch size, L = sequence length, J = number of joints): + -- rotations: (N, L, J, 4) tensor of unit quaternions describing the local rotations of each joint. + -- root_positions: (N, L, 3) tensor describing the root joint positions. + """ + assert len(rotations.shape) == 4 + assert rotations.shape[-1] == 4 + + positions_world = [] + rotations_world = [] + + expanded_offsets = self._offsets.expand(rotations.shape[0], rotations.shape[1], + self._offsets.shape[0], self._offsets.shape[1]) + + # Parallelize along the batch and time dimensions + for i in range(self._offsets.shape[0]): + if self._parents[i] == -1: + positions_world.append(root_positions) + rotations_world.append(rotations[:, :, 0]) + else: + positions_world.append(qrot(rotations_world[self._parents[i]], expanded_offsets[:, :, i]) \ + + positions_world[self._parents[i]]) + if self._has_children[i]: + rotations_world.append(qmul(rotations_world[self._parents[i]], rotations[:, :, i])) + else: + # This joint is a terminal node -> it would be useless to compute the transformation + rotations_world.append(None) + + return torch.stack(positions_world, dim=3).permute(0, 1, 3, 2) + + def _compute_metadata(self): + self._has_children = np.zeros(len(self._parents)).astype(bool) + for i, parent in enumerate(self._parents): + if parent != -1: + self._has_children[parent] = True + + self._children = [] + for i, parent in enumerate(self._parents): + self._children.append([]) + for i, parent in enumerate(self._parents): + if parent != -1: + self._children[parent].append(i) \ No newline at end of file diff --git a/common/utils.py b/common/utils.py new file mode 100644 index 0000000..d0ed861 --- /dev/null +++ b/common/utils.py @@ -0,0 +1,160 @@ +import torch +import numpy as np +from matplotlib import pyplot as plt +import csv + +def save_loss_as_csv(loss_history, csv_file_name): + with open(csv_file_name, 'w') as csv_file: + csv_columns = list(loss_history.keys()) + csv_row_count = len(loss_history[csv_columns[0]]) + + + csv_writer = csv.DictWriter(csv_file, fieldnames=csv_columns, delimiter=',', lineterminator='\n') + csv_writer.writeheader() + + for row in range(csv_row_count): + + csv_row = {} + + for key in loss_history.keys(): + csv_row[key] = loss_history[key][row] + + csv_writer.writerow(csv_row) + +def save_loss_as_image(loss_history, image_file_name): + keys = list(loss_history.keys()) + epochs = len(loss_history[keys[0]]) + + for key in keys: + plt.plot(range(epochs), loss_history[key], label=key) + + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.show() + plt.savefig(image_file_name) + +def get_skeleton_edge_list(skeleton): + skel_edge_list = [] + + skeleton_children = skeleton.children() + for parent_joint_index in range(len(skeleton_children)): + for child_joint_index in skeleton_children[parent_joint_index]: + skel_edge_list.append([parent_joint_index, child_joint_index]) + + return skel_edge_list + +def get_equal_mix_max_positions(poses): + + min_pos = np.min(np.reshape(poses, (-1, 3)), axis=0) + max_pos = np.max(np.reshape(poses, (-1, 3)), axis=0) + min_pos = np.min(min_pos, axis=0) + max_pos = np.max(max_pos, axis=0) + + _min_pos = [min_pos, min_pos, min_pos] + _max_pos = [max_pos, max_pos, max_pos] + + min_pos = _min_pos + max_pos = _max_pos + + return min_pos, max_pos + +def create_ref_pose_sequence(ref_poses, start_frame, frame_count): + _ref_poses = ref_poses[start_frame:start_frame + frame_count] + + return _ref_poses + +def create_pred_pose_sequence(ref_poses, start_frame, frame_count, encoder, decoder, batch_size = 32): + pred_poses = [] + + for i in range(start_frame, start_frame + frame_count, batch_size): + target_poses = [] + + for bI in range(batch_size): + target_poses.append(ref_poses[i + bI]) + + target_poses = torch.stack(target_poses) + _pred_poses = decoder.predict(encoder.predict(target_poses)) + pred_poses.append(_pred_poses) + + pred_poses = np.array(pred_poses) + + pred_poses = np.reshape(pred_poses, (-1, pred_poses.shape[-1])) + pred_poses = pred_poses[:frame_count, :] + + return pred_poses + +def create_2_pose_interpolation(ref_poses, frame1, frame2, interpolation_count, encoder, decoder): + start_pose = ref_poses[frame1] + end_pose = ref_poses[frame2] + + start_pose = np.expand_dims(start_pose, axis=0) + end_pose = np.expand_dims(end_pose, axis=0) + + start_enc = encoder.predict(start_pose) + end_enc = encoder.predict(end_pose) + + inter_poses = [] + + for i in range(interpolation_count): + inter_enc = start_enc + (end_enc - start_enc) * i / (interpolation_count - 1.0) + inter_pose = decoder.predict(inter_enc) + inter_poses.append(torch.squeeze(inter_pose, 0)) + + inter_poses = np.array(inter_poses) + + return inter_poses + +def create_3_pose_interpolation(ref_poses, frame1, frame2, frame3, interpolation_count, encoder, decoder): + inter_poses = [] + + ref_pose1 = ref_poses[frame1] + ref_pose2 = ref_poses[frame2] + ref_pose3 = ref_poses[frame3] + + ref_pose1 = np.expand_dims(ref_pose1, axis=0) + ref_pose2 = np.expand_dims(ref_pose2, axis=0) + ref_pose3 = np.expand_dims(ref_pose3, axis=0) + + ref_enc1 = encoder.predict(ref_pose1) + ref_enc2 = encoder.predict(ref_pose2) + ref_enc3 = encoder(ref_pose3, training=False) + + for hI in range(interpolation_count[0]): + h_mix = hI / (interpolation_count[0] - 1) + h_mix_enc12 = ref_enc1 * (1.0 - h_mix) + ref_enc2 * h_mix + + for vI in range(interpolation_count[1]): + v_mix = vI / (interpolation_count[1] - 1) + v_mix_enc13 = ref_enc1 * (1.0 - v_mix) + ref_enc3 * v_mix + f_mix_enc = h_mix_enc12 + v_mix_enc13 - ref_enc1 + + f_mix_pose = decoder.predict(f_mix_enc) + + inter_poses.append(f_mix_pose) + + inter_poses = np.array(inter_poses) + + return inter_poses + +def create_pose_deviation(ref_poses, frame, latent_dim, deviation_range, deviation_count, encoder, decoder): + deviation_poses = [] + + ref_pose = ref_poses[frame] + ref_pose = np.expand_dims(ref_pose, axis=0) + ref_enc = encoder.predict(ref_pose) + + for lI in range(latent_dim): + + deviation_vec = np.zeros(shape=ref_enc.shape) + + for dI in range(-deviation_count, deviation_count + 1): + + deviation_vec[0, lI] = deviation_range * dI / (deviation_count - 1) + deviation_pose = decoder.predict(ref_enc + deviation_vec) + + deviation_poses.append(deviation_pose) + + deviation_poses = np.array(deviation_poses) + + return deviation_poses \ No newline at end of file diff --git a/flickr/flickr_scrape.py b/flickr/flickr_scrape.py new file mode 100644 index 0000000..15bd2f5 --- /dev/null +++ b/flickr/flickr_scrape.py @@ -0,0 +1,61 @@ +""" +How to install flickr-api + +https://github.com/ultralytics/flickr_scraper/issues/5 + +Warning: create a new environment before conducting these steps + +!git clone https://github.com/ultralytics/flickr_scraper +%cd flickr_scraper +%pip install -qr requirements.txt + +How to get a flickr API key +http://www.cmssupport.utoronto.ca/help/Creating_a_Flickr_API_key.htm +""" + +""" +extras argument: +url_sq : s small square 75x75 +url_q : q large square 150x150 +url_t : t thumbnail, 100 on longest side +url_s : m small, 240 on longest side +url_n : n small, 320 on longest side +url_m : - medium, 500 on longest side +url_z : z medium 640, 640 on longest side +url_c : c medium 800, 800 on longest side† +url_l : b large, 1024 on longest side* +url_o : o original image, either a jpg, gif or png, depending on source format +""" + +import flickrapi +import urllib.request + +api_key = '------' +secret = '------' + +image_save_directory = 'images/dancer/img_' +image_save_count = 5000 # 5000 +extras = "url_q" + +print(flickrapi.__version__) + +flickr = flickrapi.FlickrAPI(api_key,secret) +count = 0 + +for photo in flickr.walk(tag_mode='all', + text ='dancer, dance, contemporary, solo', + media='photos', + sort='relevance', + extras=extras): + try: + photo_url = photo.get(extras) + filename = image_save_directory+('{:0>10}'.format(count))+'.jpg' + urllib.request.urlretrieve(photo_url, filename) + count += 1 + + print("get photo index {} name {} ".format(count, filename) ) + except: + pass + + if count > image_save_count: + break diff --git a/gan/image_gan/.spyproject/config/backups/codestyle.ini.bak b/gan/image_gan/.spyproject/config/backups/codestyle.ini.bak new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/gan/image_gan/.spyproject/config/backups/codestyle.ini.bak @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/gan/image_gan/.spyproject/config/backups/encoding.ini.bak b/gan/image_gan/.spyproject/config/backups/encoding.ini.bak new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/gan/image_gan/.spyproject/config/backups/encoding.ini.bak @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/gan/image_gan/.spyproject/config/backups/vcs.ini.bak b/gan/image_gan/.spyproject/config/backups/vcs.ini.bak new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/gan/image_gan/.spyproject/config/backups/vcs.ini.bak @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/gan/image_gan/.spyproject/config/backups/workspace.ini.bak b/gan/image_gan/.spyproject/config/backups/workspace.ini.bak new file mode 100644 index 0000000..e8f364a --- /dev/null +++ b/gan/image_gan/.spyproject/config/backups/workspace.ini.bak @@ -0,0 +1,10 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + +[main] +version = 0.2.0 +recent_files = ['image_gan.py'] + diff --git a/gan/image_gan/.spyproject/config/codestyle.ini b/gan/image_gan/.spyproject/config/codestyle.ini new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/gan/image_gan/.spyproject/config/codestyle.ini @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/gan/image_gan/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini b/gan/image_gan/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini new file mode 100644 index 0000000..0b95e5c --- /dev/null +++ b/gan/image_gan/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini @@ -0,0 +1,5 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + diff --git a/gan/image_gan/.spyproject/config/defaults/defaults-encoding-0.2.0.ini b/gan/image_gan/.spyproject/config/defaults/defaults-encoding-0.2.0.ini new file mode 100644 index 0000000..0ce193c --- /dev/null +++ b/gan/image_gan/.spyproject/config/defaults/defaults-encoding-0.2.0.ini @@ -0,0 +1,3 @@ +[encoding] +text_encoding = utf-8 + diff --git a/gan/image_gan/.spyproject/config/defaults/defaults-vcs-0.2.0.ini b/gan/image_gan/.spyproject/config/defaults/defaults-vcs-0.2.0.ini new file mode 100644 index 0000000..ee25483 --- /dev/null +++ b/gan/image_gan/.spyproject/config/defaults/defaults-vcs-0.2.0.ini @@ -0,0 +1,4 @@ +[vcs] +use_version_control = False +version_control_system = + diff --git a/gan/image_gan/.spyproject/config/defaults/defaults-workspace-0.2.0.ini b/gan/image_gan/.spyproject/config/defaults/defaults-workspace-0.2.0.ini new file mode 100644 index 0000000..2a73ab7 --- /dev/null +++ b/gan/image_gan/.spyproject/config/defaults/defaults-workspace-0.2.0.ini @@ -0,0 +1,6 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + diff --git a/gan/image_gan/.spyproject/config/encoding.ini b/gan/image_gan/.spyproject/config/encoding.ini new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/gan/image_gan/.spyproject/config/encoding.ini @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/gan/image_gan/.spyproject/config/vcs.ini b/gan/image_gan/.spyproject/config/vcs.ini new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/gan/image_gan/.spyproject/config/vcs.ini @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/gan/image_gan/.spyproject/config/workspace.ini b/gan/image_gan/.spyproject/config/workspace.ini new file mode 100644 index 0000000..5df243f --- /dev/null +++ b/gan/image_gan/.spyproject/config/workspace.ini @@ -0,0 +1,11 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False +project_type = 'empty-project-type' +recent_files = ['image_gan.py'] + +[main] +version = 0.2.0 + diff --git a/gan/image_gan/image_gan.py b/gan/image_gan/image_gan.py new file mode 100644 index 0000000..f03b672 --- /dev/null +++ b/gan/image_gan/image_gan.py @@ -0,0 +1,544 @@ +""" +Introduction Convolutional Neural Networks: + https://www.analyticsvidhya.com/blog/2021/05/convolutional-neural-networks-cnn/ + https://towardsdatascience.com/pytorch-basics-how-to-train-your-neural-net-intro-to-cnn-26a14c2ea29 + +Introduction Adversarial Networks: + Generative Adversarial Networks: https://wiki.pathmind.com/generative-adversarial-network-gan + Adversarial Autoencoder: https://medium.com/vitrox-publication/adversarial-auto-encoder-aae-a3fc86f71758 +""" + +import numpy as np +import torch +import torchvision +from pytorch_model_summary import summary +from torch.utils.data import DataLoader +from torch import nn +from torch import optim +from collections import OrderedDict +import matplotlib.pyplot as plt +import time +import pickle +import math + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# image settings +image_data_path = "../../../../Data/Images" +image_size = 128 +image_channels = 3 + +# model settings +latent_dim = 64 +gen_conv_channel_counts = [ 512, 128, 32, 8 ] +gen_conv_kernel_size = 5 +gen_dense_layer_sizes = [ 128 ] + +crit_conv_channel_counts = [ 8, 32, 128, 512 ] +crit_conv_kernel_size = 5 +crit_dense_layer_sizes = [ 128 ] + +save_models = False +save_tscript = False +save_weights = True + +# load model weights +load_weights = False +generator_weights_file = "results/weights/generator_weights_epoch_400" +critique_weights_file = "results/weights/critique_weights_epoch_400" + +# training settings +batch_size = 16 +train_percentage = 0.8 # train / test split +test_percentage = 0.2 +gen_learning_rate = 1e-4 +crit_learning_rate = 1e-4 +epochs = 1000 +weight_save_interval = 10 +save_history = False + +# create dataset +transform = torchvision.transforms.Compose([torchvision.transforms.Resize(image_size), + torchvision.transforms.ToTensor()]) + +full_dataset = torchvision.datasets.ImageFolder(image_data_path, transform=transform) +dataset_size = len(full_dataset) + +test_size = int(test_percentage * dataset_size) +train_size = dataset_size - test_size + +train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size]) + +train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) +test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) + +# Create Models + +# Critique +class Critique(nn.Module): + def __init__(self, image_size, image_channels, conv_channel_counts, conv_kernel_size, dense_layer_sizes): + super().__init__() + + self.image_size = image_size + self.image_channels = image_channels + self.conv_channel_counts = conv_channel_counts + self.conv_kernel_size = conv_kernel_size + self.dense_layer_sizes = dense_layer_sizes + + # create convolutional layers + conv_layers = [] + + stride = (self.conv_kernel_size - 1) // 2 + padding = stride + + conv_layers.append(("critique_conv_0", nn.Conv2d(image_channels, self.conv_channel_counts[0], self.conv_kernel_size, stride=stride, padding=padding))) + conv_layers.append(("critique_lrelu_0", nn.LeakyReLU(0.2))) + conv_layers.append(("critique_bnorm_0", nn.BatchNorm2d(self.conv_channel_counts[0]))) + + conv_layer_count = len(conv_channel_counts) + + for layer_index in range(1, conv_layer_count): + conv_layers.append(("critique_conv_{}".format(layer_index), nn.Conv2d(self.conv_channel_counts[layer_index-1], self.conv_channel_counts[layer_index], self.conv_kernel_size, stride=stride, padding=padding))) + conv_layers.append(("critique_lrelu_{}".format(layer_index), nn.LeakyReLU(0.2))) + conv_layers.append(("critique_bnorm_{}".format(layer_index), nn.BatchNorm2d(self.conv_channel_counts[layer_index]))) + + self.conv_layers = nn.Sequential(OrderedDict(conv_layers)) + self.flatten = nn.Flatten(start_dim=1) + + # create dense layers + dense_layers = [] + + last_conv_layer_size = image_size // np.power(2, len(conv_channel_counts)) + + #print("last_conv_layer_size ", last_conv_layer_size) + + dense_layer_input_size = conv_channel_counts[-1] * last_conv_layer_size * last_conv_layer_size + + #print("dense_layer_input_size ", dense_layer_input_size) + + dense_layers.append(("critique_dense_0", nn.Linear(dense_layer_input_size, self.dense_layer_sizes[0]))) + dense_layers.append(("critique_dense_lrelu_0", nn.LeakyReLU(0.2))) + + dense_layer_count = len(dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("critique_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("critique_dense_lrelu_{}".format(layer_index), nn.LeakyReLU(0.2))) + + dense_layers.append(("encoder_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], 1))) + dense_layers.append(("encoder_dense_sigmoid_{}".format(len(self.dense_layer_sizes)), nn.Sigmoid())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + + #print("x1 s", x.shape) + + x = self.conv_layers(x) + + #print("x2 s", x.shape) + + x = self.flatten(x) + + #print("x3 s", x.shape) + + yhat = self.dense_layers(x) + + #print("yhat s", yhat.shape) + + return yhat + +critique = Critique(image_size, image_channels, crit_conv_channel_counts, crit_conv_kernel_size, crit_dense_layer_sizes).to(device) + +print(critique) + +""" +test_input = torch.zeros((1, image_channels, image_size, image_size)).to(device) +test_output = critique(test_input) +""" + +if save_models == True: + critique.eval() + + # save using pickle + torch.save(critique, "results/models/critique.pth") + + # save using onnx + x = torch.zeros((1, image_channels, image_size, image_size)).to(device) + torch.onnx.export(critique, x, "results/models/critique.onnx") + + critique.train() + +if save_tscript == True: + critique.eval() + + # save using TochScript + x = torch.rand((1, image_channels, image_size, image_size), dtype=torch.float32).to(device) + script_module = torch.jit.trace(critique, x) + script_module.save("results/models/critique.pt") + + critique.train() + +if load_weights and critique_weights_file: + critique.load_state_dict(torch.load(critique_weights_file)) + +# Generator +class Generator(nn.Module): + + def __init__(self, latent_dim, image_size, image_channels, conv_channel_counts, conv_kernel_size, dense_layer_sizes): + super().__init__() + + self.latent_dim = latent_dim + self.image_size = image_size + self.image_channels = image_channels + self.conv_channel_counts = conv_channel_counts + self.conv_kernel_size = conv_kernel_size + self.dense_layer_sizes = dense_layer_sizes + + # create dense layers + dense_layers = [] + + dense_layers.append(("generator_dense_0", nn.Linear(latent_dim, self.dense_layer_sizes[0]))) + dense_layers.append(("generator_relu_0", nn.ReLU())) + + dense_layer_count = len(dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("generator_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("generator_dense_relu_{}".format(layer_index), nn.ReLU())) + + last_conv_layer_size = int(image_size // np.power(2, len(conv_channel_counts))) + preflattened_size = [conv_channel_counts[0], last_conv_layer_size, last_conv_layer_size] + dense_layer_output_size = conv_channel_counts[0] * last_conv_layer_size * last_conv_layer_size + + print("preflattened_size ", preflattened_size) + + dense_layers.append(("generator_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], dense_layer_output_size))) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + self.unflatten = nn.Unflatten(dim=1, unflattened_size=preflattened_size) + + # create convolutional layers + conv_layers = [] + + stride = (self.conv_kernel_size - 1) // 2 + padding = stride + output_padding = 1 + + conv_layer_count = len(conv_channel_counts) + for layer_index in range(1, conv_layer_count): + conv_layers.append(("generator_bnorm_{}".format(layer_index), nn.BatchNorm2d(conv_channel_counts[layer_index-1]))) + conv_layers.append(("generator_conv_{}".format(layer_index), nn.ConvTranspose2d(conv_channel_counts[layer_index-1], conv_channel_counts[layer_index], self.conv_kernel_size, stride=stride, padding=padding, output_padding=output_padding))) + conv_layers.append(("generator_lrelu_{}".format(layer_index), nn.LeakyReLU(0.2))) + + conv_layers.append(("generator_bnorm_{}".format(conv_layer_count), nn.BatchNorm2d(conv_channel_counts[-1]))) + conv_layers.append(("generator_conv_{}".format(conv_layer_count), nn.ConvTranspose2d(conv_channel_counts[-1], self.image_channels, self.conv_kernel_size, stride=stride, padding=padding, output_padding=output_padding))) + conv_layers.append(("generator_sigmoid_{}".format(conv_layer_count), nn.Sigmoid())) + + self.conv_layers = nn.Sequential(OrderedDict(conv_layers)) + + def forward(self, x): + + #print("x1 s ", x.shape) + + x = self.dense_layers(x) + + #print("x2 s ", x.shape) + + x = self.unflatten(x) + + #print("x3 s ", x.shape) + + yhat = self.conv_layers(x) + + #print("yhat s ", yhat.shape) + + return yhat + +generator = Generator(latent_dim, image_size, image_channels, gen_conv_channel_counts, gen_conv_kernel_size, gen_dense_layer_sizes).to(device) + +print(generator) + +""" +test_input = torch.zeros((1, latent_dim)).to(device) +test_output = generator(test_input) +""" + +if save_models == True: + generator.eval() + + # save using pickle + torch.save(generator, "results/models/generator.pth") + + # save using onnx + x = torch.zeros((1, latent_dim)).to(device) + torch.onnx.export(generator, x, "results/models/generator.onnx") + + generator.train() + +if save_tscript == True: + generator.eval() + + # save using TochScript + x = torch.rand((1, latent_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(critique, x) + script_module.save("results/models/generator.pt") + + generator.train() + +if load_weights and generator_weights_file: + generator.load_state_dict(torch.load(generator_weights_file)) + +#Training + +critique_optimizer = torch.optim.Adam(critique.parameters(), lr=crit_learning_rate) +generator_optimizer = torch.optim.Adam(generator.parameters(), lr=gen_learning_rate) + +bce_loss = nn.BCELoss() + +# crictique loss function +def crit_loss(crit_real_output, crit_fake_output): + _real_loss = bce_loss(crit_real_output, torch.ones_like(crit_real_output).to(device)) + _fake_loss = bce_loss(crit_fake_output, torch.zeros_like(crit_fake_output).to(device)) + + _loss = (_real_loss + _fake_loss) * 0.5 + return _loss + +# generator loss +def gen_crit_loss(crit_fake_output): + _loss = bce_loss(crit_fake_output, torch.ones_like(crit_fake_output).to(device)) + return _loss + +def gen_loss(crit_fake_output): + _loss = gen_crit_loss(crit_fake_output) + return _loss + +def crit_train_step(real_poses, random_encodings): + + critique_optimizer.zero_grad() + + with torch.no_grad(): + fake_output = generator(random_encodings) + real_output = real_poses + + crit_real_output = critique(real_output) + crit_fake_output = critique(fake_output) + + _crit_loss = crit_loss(crit_real_output, crit_fake_output) + + _crit_loss.backward() + critique_optimizer.step() + + return _crit_loss + +def crit_test_step(real_poses, random_encodings): + with torch.no_grad(): + fake_output = generator(random_encodings) + real_output = real_poses + + crit_real_output = critique(real_output) + crit_fake_output = critique(fake_output) + + _crit_loss = crit_loss(crit_real_output, crit_fake_output) + + return _crit_loss + +def gen_train_step(random_encodings): + + generator_optimizer.zero_grad() + + generated_poses = generator(random_encodings) + + crit_fake_output = critique(generated_poses) + + _gen_loss = gen_loss(crit_fake_output) + + _gen_loss.backward() + generator_optimizer.step() + + return _gen_loss + +def gen_test_step(random_encodings): + with torch.no_grad(): + generated_poses = generator(random_encodings) + + crit_fake_output = critique(generated_poses) + + _gen_loss = gen_loss(crit_fake_output) + + return _gen_loss + +def plot_gan_outputs(decoder, epoch, n=5): + + generator.eval() + + plt.figure(figsize=(10,4.5)) + for i in range(n): + ax = plt.subplot(1,n,i+1) + + decoder.eval() + with torch.no_grad(): + random_encoding = torch.randn((1, latent_dim)).to(device) + gen_img = decoder(random_encoding) + decoder.train() + + gen_img = gen_img.cpu().squeeze().numpy() + gen_img = np.clip(gen_img, 0.0, 1.0) + gen_img = np.moveaxis(gen_img, 0, 2) + + plt.imshow(gen_img) + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + if i == 0: + ax.set_title("Epoch {}: Generated Images".format(epoch)) + plt.show() + + generator.train() + +def train(train_dataloader, test_dataloader, epochs): + + loss_history = {} + loss_history["gen train"] = [] + loss_history["gen test"] = [] + loss_history["crit train"] = [] + loss_history["crit test"] = [] + + for epoch in range(epochs): + + start = time.time() + + crit_train_loss_per_epoch = [] + gen_train_loss_per_epoch = [] + + for train_batch, _ in train_dataloader: + train_batch = train_batch.to(device) + + random_encodings = torch.randn((train_batch.shape[0], latent_dim)).to(device) + + # start with critique training + _crit_train_loss = crit_train_step(train_batch, random_encodings) + + _crit_train_loss = _crit_train_loss.detach().cpu().numpy() + + crit_train_loss_per_epoch.append(_crit_train_loss) + + # now train the generator + for iter in range(2): + _gen_loss = gen_train_step(random_encodings) + + _gen_loss = _gen_loss.detach().cpu().numpy() + + gen_train_loss_per_epoch.append(_gen_loss) + + crit_train_loss_per_epoch = np.mean(np.array(crit_train_loss_per_epoch)) + gen_train_loss_per_epoch = np.mean(np.array(gen_train_loss_per_epoch)) + + crit_test_loss_per_epoch = [] + gen_test_loss_per_epoch = [] + + for test_batch, _ in test_dataloader: + test_batch = test_batch.to(device) + + random_encodings = torch.randn((train_batch.shape[0], latent_dim)).to(device) + + # start with critique testing + _crit_test_loss = crit_test_step(train_batch, random_encodings) + + _crit_test_loss = _crit_test_loss.detach().cpu().numpy() + + crit_test_loss_per_epoch.append(_crit_test_loss) + + # now test the generator + _gen_loss = gen_test_step(random_encodings) + + _gen_loss = _gen_loss.detach().cpu().numpy() + + gen_test_loss_per_epoch.append(_gen_loss) + + crit_test_loss_per_epoch = np.mean(np.array(crit_test_loss_per_epoch)) + gen_test_loss_per_epoch = np.mean(np.array(gen_test_loss_per_epoch)) + + if epoch % weight_save_interval == 0 and save_weights == True: + torch.save(critique.state_dict(), "results/weights/critique_weights_epoch_{}".format(epoch)) + torch.save(generator.state_dict(), "results/weights/generator_weights_epoch_{}".format(epoch)) + + plot_gan_outputs(generator, epoch, n=5) + + + loss_history["gen train"].append(gen_train_loss_per_epoch) + loss_history["gen test"].append(gen_test_loss_per_epoch) + loss_history["crit train"].append(crit_train_loss_per_epoch) + loss_history["crit test"].append(crit_test_loss_per_epoch) + + print ('epoch {} : gen train: {:01.4f} gen test: {:01.4f} crit train {:01.4f} crit test {:01.4f} time {:01.2f}'.format(epoch + 1, gen_train_loss_per_epoch, gen_test_loss_per_epoch, crit_train_loss_per_epoch, crit_test_loss_per_epoch, time.time()-start)) + + return loss_history + +# fit model +loss_history = train(train_dataloader, test_dataloader, epochs) + + + + +""" +epochs = 2000 + +# gan only training loop +# outer loop over the training epochs +for epoch in range(epochs): + + critique_epoch_loss = 0 + generator_epoch_loss = 0 + + tick = time.time() + + for batch_features, _ in train_dataloader: + + batch_features = batch_features.to(device) + + # image prior train step + critique_optimizer.zero_grad() + + random_encodings = torch.randn((batch_features.shape[0], latent_dim)).to(device) + + with torch.no_grad(): + fake_output = generator(random_encodings) + real_output = batch_features + + critique_real_output = critique(real_output) + critique_fake_output = critique(fake_output) + + critique_loss = crit_loss(critique_real_output, critique_fake_output) + + critique_loss.backward() + critique_optimizer.step() + + critique_epoch_loss += critique_loss.item() + + for iter in range(2): + + # generator train step + generator_optimizer.zero_grad() + + generated_images = generator(random_encodings) + + critique_fake_output = critique(generated_images) + + generator_loss = gen_loss(critique_fake_output) + + generator_loss.backward() + generator_optimizer.step() + + generator_epoch_loss += generator_loss.item() + + # compute the epoch training loss + critique_epoch_loss = critique_epoch_loss / len(train_dataloader) + generator_epoch_loss = generator_epoch_loss / len(train_dataloader) + + tock = time.time() + + # display the epoch training loss + #plot_ae_outputs(encoder,decoder, epoch, n=5) + plot_gan_outputs(generator, epoch, n=5) + print("epoch : {}/{}, di_loss = {:.6f} dec_loss = {:.6f}, time = {:.2f}".format(epoch + 1, epochs, critique_epoch_loss, generator_epoch_loss, (tock - tick))) +""" + diff --git a/gan/motion_gan/.spyproject/config/backups/codestyle.ini.bak b/gan/motion_gan/.spyproject/config/backups/codestyle.ini.bak new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/gan/motion_gan/.spyproject/config/backups/codestyle.ini.bak @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/gan/motion_gan/.spyproject/config/backups/encoding.ini.bak b/gan/motion_gan/.spyproject/config/backups/encoding.ini.bak new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/gan/motion_gan/.spyproject/config/backups/encoding.ini.bak @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/gan/motion_gan/.spyproject/config/backups/vcs.ini.bak b/gan/motion_gan/.spyproject/config/backups/vcs.ini.bak new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/gan/motion_gan/.spyproject/config/backups/vcs.ini.bak @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/gan/motion_gan/.spyproject/config/backups/workspace.ini.bak b/gan/motion_gan/.spyproject/config/backups/workspace.ini.bak new file mode 100644 index 0000000..5ef0d13 --- /dev/null +++ b/gan/motion_gan/.spyproject/config/backups/workspace.ini.bak @@ -0,0 +1,10 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + +[main] +version = 0.2.0 +recent_files = ['motion_gan.py', '..\\pose_gan\\pose_gan.py'] + diff --git a/gan/motion_gan/.spyproject/config/codestyle.ini b/gan/motion_gan/.spyproject/config/codestyle.ini new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/gan/motion_gan/.spyproject/config/codestyle.ini @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/gan/motion_gan/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini b/gan/motion_gan/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini new file mode 100644 index 0000000..0b95e5c --- /dev/null +++ b/gan/motion_gan/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini @@ -0,0 +1,5 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + diff --git a/gan/motion_gan/.spyproject/config/defaults/defaults-encoding-0.2.0.ini b/gan/motion_gan/.spyproject/config/defaults/defaults-encoding-0.2.0.ini new file mode 100644 index 0000000..0ce193c --- /dev/null +++ b/gan/motion_gan/.spyproject/config/defaults/defaults-encoding-0.2.0.ini @@ -0,0 +1,3 @@ +[encoding] +text_encoding = utf-8 + diff --git a/gan/motion_gan/.spyproject/config/defaults/defaults-vcs-0.2.0.ini b/gan/motion_gan/.spyproject/config/defaults/defaults-vcs-0.2.0.ini new file mode 100644 index 0000000..ee25483 --- /dev/null +++ b/gan/motion_gan/.spyproject/config/defaults/defaults-vcs-0.2.0.ini @@ -0,0 +1,4 @@ +[vcs] +use_version_control = False +version_control_system = + diff --git a/gan/motion_gan/.spyproject/config/defaults/defaults-workspace-0.2.0.ini b/gan/motion_gan/.spyproject/config/defaults/defaults-workspace-0.2.0.ini new file mode 100644 index 0000000..2a73ab7 --- /dev/null +++ b/gan/motion_gan/.spyproject/config/defaults/defaults-workspace-0.2.0.ini @@ -0,0 +1,6 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + diff --git a/gan/motion_gan/.spyproject/config/encoding.ini b/gan/motion_gan/.spyproject/config/encoding.ini new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/gan/motion_gan/.spyproject/config/encoding.ini @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/gan/motion_gan/.spyproject/config/vcs.ini b/gan/motion_gan/.spyproject/config/vcs.ini new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/gan/motion_gan/.spyproject/config/vcs.ini @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/gan/motion_gan/.spyproject/config/workspace.ini b/gan/motion_gan/.spyproject/config/workspace.ini new file mode 100644 index 0000000..5ef0d13 --- /dev/null +++ b/gan/motion_gan/.spyproject/config/workspace.ini @@ -0,0 +1,10 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + +[main] +version = 0.2.0 +recent_files = ['motion_gan.py', '..\\pose_gan\\pose_gan.py'] + diff --git a/gan/motion_gan/motion_gan.py b/gan/motion_gan/motion_gan.py new file mode 100644 index 0000000..b97d4c2 --- /dev/null +++ b/gan/motion_gan/motion_gan.py @@ -0,0 +1,543 @@ +import torch +from torch.utils.data import Dataset +from torch.utils.data import DataLoader +from torch import nn +from collections import OrderedDict + +import os, sys, time, subprocess +import numpy as np +sys.path.append("../..") + +from common import utils +from common.skeleton import Skeleton +from common.mocap_dataset import MocapDataset +from common.quaternion import qmul, qnormalize_np, slerp +from common.pose_renderer import PoseRenderer + +device = 'cuda' if torch.cuda.is_available() else 'cpu' +print('Using {} device'.format(device)) + +# mocap settings +mocap_data_path = "../../../../Data/Mocap/Muriel_Nov_2021/MUR_PolytopiaMovement_Take2_mb_proc_rh.p" +mocap_valid_frame_ranges = [ [ 860, 9500 ] ] +mocap_fps = 50 + +# model settings +latent_dim = 64 +sequence_length = 128 +gen_rnn_layer_count = 2 +gen_rnn_layer_size = 512 +gen_dense_layer_sizes = [ 512 ] +crit_rnn_layer_count = 2 +crit_rnn_layer_size = 512 +crit_dense_layer_sizes = [ 512 ] + +save_models = False +save_tscript = False +save_weights = True + +# load model weights +load_weights = True +generator_weights_file = "results/weights/generator_weights_epoch_150" +critique_weights_file = "results/weights/critique_weights_epoch_150" + +# training settings +sequence_offset = 2 # when creating sequence excerpts, each excerpt is offset from the previous one by this value +batch_size = 16 +train_percentage = 0.8 # train / test split +test_percentage = 0.2 +gen_learning_rate = 1e-4 +crit_learning_rate = 1e-4 +gen_norm_loss_scale = 0.1 +gen_crit_loss_scale = 1.0 +epochs = 500 +weight_save_interval = 10 +save_history = False + +# visualization settings +view_ele = 0.0 +view_azi = 0.0 +view_line_width = 4.0 +view_size = 8.0 + +# load mocap data +mocap_data = MocapDataset(mocap_data_path, fps=mocap_fps) +if device == 'cuda': + mocap_data.cuda() +mocap_data.compute_positions() + +# gather skeleton info +skeleton = mocap_data.skeleton() +skeleton_joint_count = skeleton.num_joints() +skel_edge_list = utils.get_skeleton_edge_list(skeleton) + +# obtain pose sequence +subject = "S1" +action = "A1" +pose_sequence = mocap_data[subject][action]["rotations"] + +pose_sequence_length = pose_sequence.shape[0] +joint_count = pose_sequence.shape[1] +joint_dim = pose_sequence.shape[2] +pose_dim = joint_count * joint_dim +pose_sequence = np.reshape(pose_sequence, (-1, pose_dim)) + +# gather pose sequence excerpts +pose_sequence_excerpts = [] + +for valid_frame_range in mocap_valid_frame_ranges: + frame_range_start = valid_frame_range[0] + frame_range_end = valid_frame_range[1] + + for seq_excerpt_start in np.arange(frame_range_start, frame_range_end - sequence_length, sequence_offset): + #print("valid: start ", frame_range_start, " end ", frame_range_end, " exc: start ", seq_excerpt_start, " end ", (seq_excerpt_start + sequence_length) ) + pose_sequence_excerpt = pose_sequence[seq_excerpt_start:seq_excerpt_start + sequence_length] + pose_sequence_excerpts.append(pose_sequence_excerpt) + +pose_sequence_excerpts = np.array(pose_sequence_excerpts) + +# create dataset + +sequence_excerpts_count = pose_sequence_excerpts.shape[0] + +class SequenceDataset(Dataset): + def __init__(self, sequence_excerpts): + self.sequence_excerpts = sequence_excerpts + + def __len__(self): + return self.sequence_excerpts.shape[0] + + def __getitem__(self, idx): + return self.sequence_excerpts[idx, ...] + + +full_dataset = SequenceDataset(pose_sequence_excerpts) +dataset_size = len(full_dataset) + +test_size = int(test_percentage * dataset_size) +train_size = dataset_size - test_size + +train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size]) + +train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) +test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) + +# create models + +# create critique model + +class Critique(nn.Module): + def __init__(self, sequence_length, pose_dim, rnn_layer_count, rnn_layer_size, dense_layer_sizes): + super(Critique, self).__init__() + + self.sequence_length = sequence_length + self.pose_dim = pose_dim + self.rnn_layer_count = rnn_layer_count + self.rnn_layer_size = rnn_layer_size + self.dense_layer_sizes = dense_layer_sizes + + # create recurrent layers + rnn_layers = [] + rnn_layers.append(("critique_rnn_0", nn.LSTM(self.pose_dim, self.rnn_layer_size, self.rnn_layer_count, batch_first=True))) + + self.rnn_layers = nn.Sequential(OrderedDict(rnn_layers)) + + # create dense layers + + dense_layers = [] + + dense_layers.append(("critique_dense_0", nn.Linear(self.rnn_layer_size, self.dense_layer_sizes[0]))) + dense_layers.append(("critique_dense_relu_0", nn.ReLU())) + + dense_layer_count = len(self.dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("critique_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("critique_dense_relu_{}".format(layer_index), nn.ReLU())) + + dense_layers.append(("critique_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], 1))) + dense_layers.append(("critique_dense_sigmoid_{}".format(len(self.dense_layer_sizes)), nn.Sigmoid())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + + #print("x 1 ", x.shape) + + x, (_, _) = self.rnn_layers(x) + + #print("x 2 ", x.shape) + + x = x[:, -1, :] # only last time step + + #print("x 3 ", x.shape) + + yhat = self.dense_layers(x) + + #print("yhat ", yhat.shape) + + return yhat + +critique = Critique(sequence_length, pose_dim, crit_rnn_layer_count, crit_rnn_layer_size, crit_dense_layer_sizes).to(device) + +print(critique) + +if save_models == True: + critique.train() + + # save using pickle + torch.save(critique, "results/models/critique.pth") + + # save using onnx + x = torch.zeros((1, sequence_length, pose_dim)).to(device) + torch.onnx.export(critique, x, "results/models/critique.onnx") + + critique.test() + +if save_tscript == True: + critique.train() + + # save using TochScript + x = torch.rand((1, sequence_length, pose_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(critique, x) + script_module.save("results/models/critique.pt") + + critique.test() + +if load_weights and critique_weights_file: + critique.load_state_dict(torch.load(critique_weights_file, map_location=device)) + +# create generator model + +class Generator(nn.Module): + def __init__(self, sequence_length, pose_dim, latent_dim, rnn_layer_count, rnn_layer_size, dense_layer_sizes): + super(Generator, self).__init__() + + self.sequence_length = sequence_length + self.pose_dim = pose_dim + self.latent_dim = latent_dim + self.rnn_layer_size = rnn_layer_size + self.rnn_layer_count = rnn_layer_count + self.dense_layer_sizes = dense_layer_sizes + + # create dense layers + dense_layers = [] + + dense_layers.append(("decoder_dense_0", nn.Linear(latent_dim, self.dense_layer_sizes[0]))) + dense_layers.append(("decoder_relu_0", nn.ReLU())) + + dense_layer_count = len(self.dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("decoder_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("decoder_dense_relu_{}".format(layer_index), nn.ReLU())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + # create rnn layers + rnn_layers = [] + + rnn_layers.append(("decoder_rnn_0", nn.LSTM(self.dense_layer_sizes[-1], self.rnn_layer_size, self.rnn_layer_count, batch_first=True))) + + self.rnn_layers = nn.Sequential(OrderedDict(rnn_layers)) + + # final output dense layer + final_layers = [] + + final_layers.append(("decoder_dense_{}".format(dense_layer_count), nn.Linear(self.rnn_layer_size, self.pose_dim))) + + self.final_layers = nn.Sequential(OrderedDict(final_layers)) + + def forward(self, x): + #print("x 1 ", x.size()) + + # dense layers + x = self.dense_layers(x) + #print("x 2 ", x.size()) + + # repeat vector + x = torch.unsqueeze(x, dim=1) + x = x.repeat(1, sequence_length, 1) + #print("x 3 ", x.size()) + + # rnn layers + x, (_, _) = self.rnn_layers(x) + #print("x 4 ", x.size()) + + # final time distributed dense layer + x_reshaped = x.contiguous().view(-1, self.rnn_layer_size) # (batch_size * sequence, input_size) + #print("x 5 ", x_reshaped.size()) + + yhat = self.final_layers(x_reshaped) + #print("yhat 1 ", yhat.size()) + + yhat = yhat.contiguous().view(-1, self.sequence_length, self.pose_dim) + #print("yhat 2 ", yhat.size()) + + return yhat + +generator = Generator(sequence_length, pose_dim, latent_dim, gen_rnn_layer_count, gen_rnn_layer_size, gen_dense_layer_sizes).to(device) + +print(generator) + +if save_models == True: + generator.eval() + + # save using pickle + torch.save(generator, "results/models/generator_weights.pth") + + # save using onnx + x = torch.zeros((1, latent_dim)).to(device) + torch.onnx.export(generator, x, "results/models/generator.onnx") + + generator.train() + +if save_tscript == True: + generator.eval() + + # save using TochScript + x = torch.rand((1, latent_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(generator, x) + script_module.save("results/models/generator.pt") + + generator.train() + +if load_weights and generator_weights_file: + generator.load_state_dict(torch.load(generator_weights_file, map_location=device)) + +# Training + +critique_optimizer = torch.optim.Adam(critique.parameters(), lr=crit_learning_rate) +generator_optimizer = torch.optim.Adam(generator.parameters(), lr=gen_learning_rate) + +bce_loss = nn.BCELoss() + +# crictique loss function +def crit_loss(crit_real_output, crit_fake_output): + _real_loss = bce_loss(crit_real_output, torch.ones_like(crit_real_output).to(device)) + _fake_loss = bce_loss(crit_fake_output, torch.zeros_like(crit_fake_output).to(device)) + + _loss = (_real_loss + _fake_loss) * 0.5 + return _loss + +# generator loss +def gen_crit_loss(crit_fake_output): + _loss = bce_loss(crit_fake_output, torch.ones_like(crit_fake_output).to(device)) + return _loss + +def gen_norm_loss(yhat): + + _yhat = yhat.view(-1, 4) + _norm = torch.norm(_yhat, dim=1) + _diff = (_norm - 1.0) ** 2 + _loss = torch.mean(_diff) + return _loss + +def gen_loss(yhat, crit_fake_output): + _norm_loss = gen_norm_loss(yhat) + _crit_loss = gen_crit_loss(crit_fake_output) + + _total_loss = 0.0 + _total_loss += _norm_loss * gen_norm_loss_scale + _total_loss += _crit_loss * gen_crit_loss_scale + + return _total_loss, _norm_loss, _crit_loss + +def crit_train_step(real_poses, random_encodings): + + critique_optimizer.zero_grad() + + with torch.no_grad(): + fake_output = generator(random_encodings) + real_output = real_poses + + crit_real_output = critique(real_output) + crit_fake_output = critique(fake_output) + + _crit_loss = crit_loss(crit_real_output, crit_fake_output) + + _crit_loss.backward() + critique_optimizer.step() + + return _crit_loss + +def crit_test_step(real_poses, random_encodings): + with torch.no_grad(): + fake_output = generator(random_encodings) + real_output = real_poses + + crit_real_output = critique(real_output) + crit_fake_output = critique(fake_output) + + _crit_loss = crit_loss(crit_real_output, crit_fake_output) + + return _crit_loss + +def gen_train_step(random_encodings): + + generator_optimizer.zero_grad() + + generated_poses = generator(random_encodings) + + crit_fake_output = critique(generated_poses) + + _gen_loss, _norm_loss, _crit_loss = gen_loss(generated_poses, crit_fake_output) + + _gen_loss.backward() + generator_optimizer.step() + + return _gen_loss, _norm_loss, _crit_loss + +def gen_test_step(random_encodings): + with torch.no_grad(): + generated_poses = generator(random_encodings) + + crit_fake_output = critique(generated_poses) + + _gen_loss, _norm_loss, _crit_loss = gen_loss(generated_poses, crit_fake_output) + + return _gen_loss, _norm_loss, _crit_loss + +def train(train_dataloader, test_dataloader, epochs): + + loss_history = {} + loss_history["gen train"] = [] + loss_history["gen test"] = [] + loss_history["crit train"] = [] + loss_history["crit test"] = [] + loss_history["gen crit"] = [] + loss_history["gen norm"] = [] + + for epoch in range(epochs): + + start = time.time() + + crit_train_loss_per_epoch = [] + gen_train_loss_per_epoch = [] + gen_norm_loss_per_epoch = [] + gen_crit_loss_per_epoch = [] + + for train_batch in train_dataloader: + train_batch = train_batch.to(device) + + random_encodings = torch.randn((train_batch.shape[0], latent_dim)).to(device) + + # start with critique training + _crit_train_loss = crit_train_step(train_batch, random_encodings) + + _crit_train_loss = _crit_train_loss.detach().cpu().numpy() + + crit_train_loss_per_epoch.append(_crit_train_loss) + + # now train the generator + for iter in range(2): + _gen_loss, _gen_norm_loss, _gen_crit_loss = gen_train_step(random_encodings) + + _gen_loss = _gen_loss.detach().cpu().numpy() + _gen_norm_loss = _gen_norm_loss.detach().cpu().numpy() + _gen_crit_loss = _gen_crit_loss.detach().cpu().numpy() + + gen_train_loss_per_epoch.append(_gen_loss) + gen_norm_loss_per_epoch.append(_gen_norm_loss) + gen_crit_loss_per_epoch.append(_gen_crit_loss) + + + crit_train_loss_per_epoch = np.mean(np.array(crit_train_loss_per_epoch)) + gen_train_loss_per_epoch = np.mean(np.array(gen_train_loss_per_epoch)) + gen_norm_loss_per_epoch = np.mean(np.array(gen_norm_loss_per_epoch)) + gen_crit_loss_per_epoch = np.mean(np.array(gen_crit_loss_per_epoch)) + + crit_test_loss_per_epoch = [] + gen_test_loss_per_epoch = [] + + for test_batch in test_dataloader: + test_batch = test_batch.to(device) + + random_encodings = torch.randn((train_batch.shape[0], latent_dim)).to(device) + + # start with critique testing + _crit_test_loss = crit_test_step(train_batch, random_encodings) + + _crit_test_loss = _crit_test_loss.detach().cpu().numpy() + + crit_test_loss_per_epoch.append(_crit_test_loss) + + # now test the generator + _gen_loss, _, _ = gen_test_step(random_encodings) + + _gen_loss = _gen_loss.detach().cpu().numpy() + + gen_test_loss_per_epoch.append(_gen_loss) + + crit_test_loss_per_epoch = np.mean(np.array(crit_test_loss_per_epoch)) + gen_test_loss_per_epoch = np.mean(np.array(gen_test_loss_per_epoch)) + + if epoch % weight_save_interval == 0 and save_weights == True: + torch.save(critique.state_dict(), "results/weights/critique_weights_epoch_{}".format(epoch)) + torch.save(generator.state_dict(), "results/weights/generator_weights_epoch_{}".format(epoch)) + + loss_history["gen train"].append(gen_train_loss_per_epoch) + loss_history["gen test"].append(gen_test_loss_per_epoch) + loss_history["crit train"].append(crit_train_loss_per_epoch) + loss_history["crit test"].append(crit_test_loss_per_epoch) + loss_history["gen crit"].append(gen_crit_loss_per_epoch) + loss_history["gen norm"].append(gen_norm_loss_per_epoch) + + print ('epoch {} : gen train: {:01.4f} gen test: {:01.4f} crit train {:01.4f} crit test {:01.4f} gen norm {:01.4f} gen crit {:01.4f} time {:01.2f}'.format(epoch + 1, gen_train_loss_per_epoch, gen_test_loss_per_epoch, crit_train_loss_per_epoch, crit_test_loss_per_epoch, gen_norm_loss_per_epoch, gen_crit_loss_per_epoch, time.time()-start)) + + return loss_history + +# fit model +loss_history = train(train_dataloader, test_dataloader, epochs) + +# save history +utils.save_loss_as_csv(loss_history, "results/histories/history_{}.csv".format(epochs)) +utils.save_loss_as_image(loss_history, "results/histories/history_{}.png".format(epochs)) + +# save model weights +torch.save(critique.state_dict(), "results/weights/critique_weights_epoch_{}".format(epochs)) +torch.save(generator.state_dict(), "results/weights/generator_weights_epoch_{}".format(epochs)) + +# inference and rendering +skel_edge_list = utils.get_skeleton_edge_list(skeleton) +poseRenderer = PoseRenderer(skel_edge_list) + +def create_ref_sequence_anim(seq_index, file_name): + sequence_excerpt = pose_sequence_excerpts[seq_index] + sequence_excerpt = np.reshape(sequence_excerpt, (sequence_length, joint_count, joint_dim)) + + sequence_excerpt = torch.tensor(np.expand_dims(sequence_excerpt, axis=0)) + zero_trajectory = torch.tensor(np.zeros((1, sequence_length, 3), dtype=np.float32)) + + skel_sequence = skeleton.forward_kinematics(sequence_excerpt, zero_trajectory) + skel_sequence = np.squeeze(skel_sequence.numpy()) + view_min, view_max = utils.get_equal_mix_max_positions(skel_sequence) + skel_images = poseRenderer.create_pose_images(skel_sequence, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + skel_images[0].save(file_name, save_all=True, append_images=skel_images[1:], optimize=False, duration=33.0, loop=0) + +def create_gen_sequence_anim(file_name): + generator.eval() + + random_encoding = torch.randn((1, latent_dim)).to(device) + + with torch.no_grad(): + gen_sequence = generator(random_encoding) + + gen_sequence = torch.squeeze(gen_sequence) + gen_sequence = gen_sequence.view((-1, 4)) + gen_sequence = nn.functional.normalize(gen_sequence, p=2, dim=1) + gen_sequence = gen_sequence.view((1, sequence_length, joint_count, joint_dim)) + + zero_trajectory = torch.tensor(np.zeros((1, sequence_length, 3), dtype=np.float32)) + zero_trajectory = zero_trajectory.to(device) + + skel_sequence = skeleton.forward_kinematics(gen_sequence, zero_trajectory) + + skel_sequence = skel_sequence.detach().cpu().numpy() + skel_sequence = np.squeeze(skel_sequence) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_sequence) + skel_images = poseRenderer.create_pose_images(skel_sequence, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + skel_images[0].save(file_name, save_all=True, append_images=skel_images[1:], optimize=False, duration=33.0, loop=0) + + generator.train() + +create_gen_sequence_anim("test.gif") diff --git a/gan/pose_gan/.spyproject/config/backups/codestyle.ini.bak b/gan/pose_gan/.spyproject/config/backups/codestyle.ini.bak new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/gan/pose_gan/.spyproject/config/backups/codestyle.ini.bak @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/gan/pose_gan/.spyproject/config/backups/encoding.ini.bak b/gan/pose_gan/.spyproject/config/backups/encoding.ini.bak new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/gan/pose_gan/.spyproject/config/backups/encoding.ini.bak @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/gan/pose_gan/.spyproject/config/backups/vcs.ini.bak b/gan/pose_gan/.spyproject/config/backups/vcs.ini.bak new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/gan/pose_gan/.spyproject/config/backups/vcs.ini.bak @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/gan/pose_gan/.spyproject/config/backups/workspace.ini.bak b/gan/pose_gan/.spyproject/config/backups/workspace.ini.bak new file mode 100644 index 0000000..c3d9b60 --- /dev/null +++ b/gan/pose_gan/.spyproject/config/backups/workspace.ini.bak @@ -0,0 +1,10 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + +[main] +version = 0.2.0 +recent_files = ['pose_gan.py'] + diff --git a/gan/pose_gan/.spyproject/config/codestyle.ini b/gan/pose_gan/.spyproject/config/codestyle.ini new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/gan/pose_gan/.spyproject/config/codestyle.ini @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/gan/pose_gan/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini b/gan/pose_gan/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini new file mode 100644 index 0000000..0b95e5c --- /dev/null +++ b/gan/pose_gan/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini @@ -0,0 +1,5 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + diff --git a/gan/pose_gan/.spyproject/config/defaults/defaults-encoding-0.2.0.ini b/gan/pose_gan/.spyproject/config/defaults/defaults-encoding-0.2.0.ini new file mode 100644 index 0000000..0ce193c --- /dev/null +++ b/gan/pose_gan/.spyproject/config/defaults/defaults-encoding-0.2.0.ini @@ -0,0 +1,3 @@ +[encoding] +text_encoding = utf-8 + diff --git a/gan/pose_gan/.spyproject/config/defaults/defaults-vcs-0.2.0.ini b/gan/pose_gan/.spyproject/config/defaults/defaults-vcs-0.2.0.ini new file mode 100644 index 0000000..ee25483 --- /dev/null +++ b/gan/pose_gan/.spyproject/config/defaults/defaults-vcs-0.2.0.ini @@ -0,0 +1,4 @@ +[vcs] +use_version_control = False +version_control_system = + diff --git a/gan/pose_gan/.spyproject/config/defaults/defaults-workspace-0.2.0.ini b/gan/pose_gan/.spyproject/config/defaults/defaults-workspace-0.2.0.ini new file mode 100644 index 0000000..2a73ab7 --- /dev/null +++ b/gan/pose_gan/.spyproject/config/defaults/defaults-workspace-0.2.0.ini @@ -0,0 +1,6 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + diff --git a/gan/pose_gan/.spyproject/config/encoding.ini b/gan/pose_gan/.spyproject/config/encoding.ini new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/gan/pose_gan/.spyproject/config/encoding.ini @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/gan/pose_gan/.spyproject/config/vcs.ini b/gan/pose_gan/.spyproject/config/vcs.ini new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/gan/pose_gan/.spyproject/config/vcs.ini @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/gan/pose_gan/.spyproject/config/workspace.ini b/gan/pose_gan/.spyproject/config/workspace.ini new file mode 100644 index 0000000..c3d9b60 --- /dev/null +++ b/gan/pose_gan/.spyproject/config/workspace.ini @@ -0,0 +1,10 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + +[main] +version = 0.2.0 +recent_files = ['pose_gan.py'] + diff --git a/gan/pose_gan/pose_gan.py b/gan/pose_gan/pose_gan.py new file mode 100644 index 0000000..e4a5b2f --- /dev/null +++ b/gan/pose_gan/pose_gan.py @@ -0,0 +1,566 @@ +import torch +from torch.utils.data import Dataset +from torch.utils.data import DataLoader +from torch import nn +from collections import OrderedDict + +import os, sys, time, subprocess +import numpy as np +sys.path.append("../..") + +from common import utils +from common.skeleton import Skeleton +from common.mocap_dataset import MocapDataset +from common.quaternion import qmul, qnormalize_np, slerp +from common.pose_renderer import PoseRenderer +import matplotlib.pyplot as plt + +device = 'cuda' if torch.cuda.is_available() else 'cpu' +print('Using {} device'.format(device)) + + +# mocap settings +mocap_data_path = "../../../../Data/Mocap/Muriel_Nov_2021/MUR_PolytopiaMovement_Take2_mb_proc_rh.p" +mocap_valid_frame_ranges = [ [ 500, 6500 ] ] +mocap_fps = 50 + +# model settings +latent_dim = 8 +gen_dense_layer_sizes = [ 16, 64, 128 ] +crit_dense_layer_sizes = [ 128, 64, 16 ] + +save_models = False +save_tscript = False +save_weights = False + +# load model weights +load_weights = False +generator_weights_file = "results/weights/generator_weights_epoch_400" +critique_weights_file = "results/weights/critique_weights_epoch_400" + +# training settings +batch_size = 16 +train_percentage = 0.8 # train / test split +test_percentage = 0.2 +gen_learning_rate = 1e-4 +crit_learning_rate = 1e-4 +gen_norm_loss_scale = 0.1 +gen_crit_loss_scale = 1.0 +epochs = 200 +weight_save_interval = 1 +save_history = False + +# visualization settings +view_ele = 0.0 +view_azi = 0.0 +view_line_width = 4.0 +view_size = 8.0 + + +# load mocap data +mocap_data = MocapDataset(mocap_data_path, fps=mocap_fps) +if device == 'cuda': + mocap_data.cuda() +mocap_data.compute_positions() + +# gather skeleton info +skeleton = mocap_data.skeleton() +skeleton_joint_count = skeleton.num_joints() +skel_edge_list = utils.get_skeleton_edge_list(skeleton) + +# inference and rendering +skel_edge_list = utils.get_skeleton_edge_list(skeleton) +poseRenderer = PoseRenderer(skel_edge_list) + +# gather poses +subject = "S1" +action = "A1" +pose_sequence = mocap_data[subject][action]["rotations"] + +poses = [] +for valid_frame_range in mocap_valid_frame_ranges: + frame_range_start = valid_frame_range[0] + frame_range_end = valid_frame_range[1] + poses += [pose_sequence[frame_range_start:frame_range_end]] +poses = np.concatenate(poses, axis=0) + +pose_count = poses.shape[0] +joint_count = poses.shape[1] +joint_dim = poses.shape[2] +pose_dim = joint_count * joint_dim + +poses = np.reshape(poses, (-1, pose_dim)) + +# create dataset + +class PoseDataset(Dataset): + def __init__(self, poses): + self.poses = poses + + def __len__(self): + return self.poses.shape[0] + + def __getitem__(self, idx): + return self.poses[idx, ...] + +full_dataset = PoseDataset(poses) +dataset_size = len(full_dataset) + +test_size = int(test_percentage * dataset_size) +train_size = dataset_size - test_size + +train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size]) + +train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) +test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) + + +# create models + +class Critique(nn.Module): + def __init__(self, pose_dim, dense_layer_sizes): + super().__init__() + + self.pose_dim = pose_dim + self.dense_layer_sizes = dense_layer_sizes + + # create dense layers + dense_layers = [] + + dense_layers.append(("encoder_dense_0", nn.Linear(self.pose_dim, self.dense_layer_sizes[0]))) + dense_layers.append(("encoder_dense_relu_0", nn.ReLU())) + + dense_layer_count = len(dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("encoder_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("encoder_dense_relu_{}".format(layer_index), nn.ReLU())) + + dense_layers.append(("encoder_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], 1))) + dense_layers.append(("encoder_dense_sigmoid_{}".format(len(self.dense_layer_sizes)), nn.Sigmoid())) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + #print("x 1 ", x.shape + yhat = self.dense_layers(x) + #print("yhat ", yhat.shape) + return yhat + +critique = Critique(pose_dim, crit_dense_layer_sizes).to(device) + +print(critique) + +""" +test_input = torch.zeros((1, pose_dim)).to(device) +test_output = critique(test_input) +""" + +if save_models == True: + critique.eval() + + # save using pickle + torch.save(critique, "results/models/critique.pth") + + # save using onnx + x = torch.zeros((1, pose_dim)).to(device) + torch.onnx.export(critique, x, "results/models/critique.onnx") + + critique.train() + +if save_tscript == True: + critique.eval() + + # save using TochScript + x = torch.rand((1, pose_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(critique, x) + script_module.save("results/models/critique.pt") + + critique.train() + +if load_weights and critique_weights_file: + critique.load_state_dict(torch.load(critique_weights_file)) + +# create generator model + +class Generator(nn.Module): + def __init__(self, pose_dim, latent_dim, dense_layer_sizes): + super(Generator, self).__init__() + + self.pose_dim = pose_dim + self.latent_dim = latent_dim + self.dense_layer_sizes = dense_layer_sizes + + # create dense layers + dense_layers = [] + + dense_layers.append(("generator_dense_0", nn.Linear(latent_dim, self.dense_layer_sizes[0]))) + dense_layers.append(("generator_relu_0", nn.ReLU())) + + dense_layer_count = len(self.dense_layer_sizes) + for layer_index in range(1, dense_layer_count): + dense_layers.append(("generator_dense_{}".format(layer_index), nn.Linear(self.dense_layer_sizes[layer_index-1], self.dense_layer_sizes[layer_index]))) + dense_layers.append(("generator_dense_relu_{}".format(layer_index), nn.ReLU())) + + dense_layers.append(("generator_dense_{}".format(len(self.dense_layer_sizes)), nn.Linear(self.dense_layer_sizes[-1], self.pose_dim))) + + self.dense_layers = nn.Sequential(OrderedDict(dense_layers)) + + def forward(self, x): + #print("x 1 ", x.size()) + + # dense layers + yhat = self.dense_layers(x) + #print("yhat ", yhat.size()) + + + return yhat + +generator = Generator(pose_dim, latent_dim, gen_dense_layer_sizes).to(device) + +print(generator) + +if save_models == True: + generator.eval() + + # save using pickle + torch.save(generator, "results/models/generator.pth") + + # save using onnx + x = torch.zeros((1, latent_dim)).to(device) + torch.onnx.export(generator, x, "results/models/generator.onnx") + + generator.train() + +if save_tscript == True: + generator.eval() + + # save using TochScript + x = torch.rand((1, latent_dim), dtype=torch.float32).to(device) + script_module = torch.jit.trace(generator, x) + script_module.save("results/models/generator.pt") + + generator.train() + +if load_weights and generator_weights_file: + generator.load_state_dict(torch.load(generator_weights_file)) + +# Training + +critique_optimizer = torch.optim.Adam(critique.parameters(), lr=crit_learning_rate) +generator_optimizer = torch.optim.Adam(generator.parameters(), lr=gen_learning_rate) + +bce_loss = nn.BCELoss() + +# crictique loss function +def crit_loss(crit_real_output, crit_fake_output): + _real_loss = bce_loss(crit_real_output, torch.ones_like(crit_real_output).to(device)) + _fake_loss = bce_loss(crit_fake_output, torch.zeros_like(crit_fake_output).to(device)) + + _loss = (_real_loss + _fake_loss) * 0.5 + return _loss + +# generator loss +def gen_crit_loss(crit_fake_output): + _loss = bce_loss(crit_fake_output, torch.ones_like(crit_fake_output).to(device)) + return _loss + +def gen_norm_loss(yhat): + + _yhat = yhat.view(-1, 4) + _norm = torch.norm(_yhat, dim=1) + _diff = (_norm - 1.0) ** 2 + _loss = torch.mean(_diff) + return _loss + +def gen_loss(yhat, crit_fake_output): + _norm_loss = gen_norm_loss(yhat) + _crit_loss = gen_crit_loss(crit_fake_output) + + _total_loss = 0.0 + _total_loss += _norm_loss * gen_norm_loss_scale + _total_loss += _crit_loss * gen_crit_loss_scale + + return _total_loss, _norm_loss, _crit_loss + +def crit_train_step(real_poses, random_encodings): + + critique_optimizer.zero_grad() + + with torch.no_grad(): + fake_output = generator(random_encodings) + real_output = real_poses + + crit_real_output = critique(real_output) + crit_fake_output = critique(fake_output) + + _crit_loss = crit_loss(crit_real_output, crit_fake_output) + + _crit_loss.backward() + critique_optimizer.step() + + return _crit_loss + +def crit_test_step(real_poses, random_encodings): + with torch.no_grad(): + fake_output = generator(random_encodings) + real_output = real_poses + + crit_real_output = critique(real_output) + crit_fake_output = critique(fake_output) + + _crit_loss = crit_loss(crit_real_output, crit_fake_output) + + return _crit_loss + +def gen_train_step(random_encodings): + + generator_optimizer.zero_grad() + + generated_poses = generator(random_encodings) + + crit_fake_output = critique(generated_poses) + + _gen_loss, _norm_loss, _crit_loss = gen_loss(generated_poses, crit_fake_output) + + _gen_loss.backward() + generator_optimizer.step() + + return _gen_loss, _norm_loss, _crit_loss + +def gen_test_step(random_encodings): + with torch.no_grad(): + generated_poses = generator(random_encodings) + + crit_fake_output = critique(generated_poses) + + _gen_loss, _norm_loss, _crit_loss = gen_loss(generated_poses, crit_fake_output) + + return _gen_loss, _norm_loss, _crit_loss + +def plot_gan_outputs(generator, epoch, n=5): + generator.eval() + + plt.figure(figsize=(10,4.5)) + + zero_trajectory = torch.tensor(np.zeros((1, 1, 3), dtype=np.float32)) + zero_trajectory = zero_trajectory.to(device) + + for i in range(n): + ax = plt.subplot(1,n,i+1) + + random_encoding = torch.randn((1, latent_dim)).to(device) + + with torch.no_grad(): + gen_pose = generator(random_encoding) + + gen_pose = torch.squeeze(gen_pose) + gen_pose = gen_pose.view((-1, 4)) + gen_pose = nn.functional.normalize(gen_pose, p=2, dim=1) + gen_pose = gen_pose.view((1, 1, joint_count, joint_dim)) + + skel_pose = skeleton.forward_kinematics(gen_pose, zero_trajectory) + skel_pose = skel_pose.detach().cpu().numpy() + skel_pose = np.reshape(skel_pose, (1, joint_count, 3)) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_pose) + pose_image = poseRenderer.create_pose_images(skel_pose, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + + plt.imshow(pose_image[0]) + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + if i == 0: + ax.set_title("Epoch {}: Generated Images".format(epoch)) + + plt.show() + + generator.train() + +def train(train_dataloader, test_dataloader, epochs): + + loss_history = {} + loss_history["gen train"] = [] + loss_history["gen test"] = [] + loss_history["crit train"] = [] + loss_history["crit test"] = [] + loss_history["gen crit"] = [] + loss_history["gen norm"] = [] + + for epoch in range(epochs): + + start = time.time() + + crit_train_loss_per_epoch = [] + gen_train_loss_per_epoch = [] + gen_norm_loss_per_epoch = [] + gen_crit_loss_per_epoch = [] + + for train_batch in train_dataloader: + train_batch = train_batch.to(device) + + random_encodings = torch.randn((train_batch.shape[0], latent_dim)).to(device) + + # start with critique training + _crit_train_loss = crit_train_step(train_batch, random_encodings) + + _crit_train_loss = _crit_train_loss.detach().cpu().numpy() + + crit_train_loss_per_epoch.append(_crit_train_loss) + + # now train the generator + for iter in range(2): + _gen_loss, _gen_norm_loss, _gen_crit_loss = gen_train_step(random_encodings) + + _gen_loss = _gen_loss.detach().cpu().numpy() + _gen_norm_loss = _gen_norm_loss.detach().cpu().numpy() + _gen_crit_loss = _gen_crit_loss.detach().cpu().numpy() + + gen_train_loss_per_epoch.append(_gen_loss) + gen_norm_loss_per_epoch.append(_gen_norm_loss) + gen_crit_loss_per_epoch.append(_gen_crit_loss) + + + crit_train_loss_per_epoch = np.mean(np.array(crit_train_loss_per_epoch)) + gen_train_loss_per_epoch = np.mean(np.array(gen_train_loss_per_epoch)) + gen_norm_loss_per_epoch = np.mean(np.array(gen_norm_loss_per_epoch)) + gen_crit_loss_per_epoch = np.mean(np.array(gen_crit_loss_per_epoch)) + + crit_test_loss_per_epoch = [] + gen_test_loss_per_epoch = [] + + for test_batch in test_dataloader: + test_batch = test_batch.to(device) + + random_encodings = torch.randn((train_batch.shape[0], latent_dim)).to(device) + + # start with critique testing + _crit_test_loss = crit_test_step(train_batch, random_encodings) + + _crit_test_loss = _crit_test_loss.detach().cpu().numpy() + + crit_test_loss_per_epoch.append(_crit_test_loss) + + # now test the generator + _gen_loss, _, _ = gen_test_step(random_encodings) + + _gen_loss = _gen_loss.detach().cpu().numpy() + + gen_test_loss_per_epoch.append(_gen_loss) + + crit_test_loss_per_epoch = np.mean(np.array(crit_test_loss_per_epoch)) + gen_test_loss_per_epoch = np.mean(np.array(gen_test_loss_per_epoch)) + + if epoch % weight_save_interval == 0 and save_weights == True: + torch.save(critique.state_dict(), "results/weights/critique_weights_epoch_{}".format(epoch)) + torch.save(generator.state_dict(), "results/weights/generator_weights_epoch_{}".format(epoch)) + + plot_gan_outputs(generator, epoch, n=5) + + loss_history["gen train"].append(gen_train_loss_per_epoch) + loss_history["gen test"].append(gen_test_loss_per_epoch) + loss_history["crit train"].append(crit_train_loss_per_epoch) + loss_history["crit test"].append(crit_test_loss_per_epoch) + loss_history["gen crit"].append(gen_crit_loss_per_epoch) + loss_history["gen norm"].append(gen_norm_loss_per_epoch) + + print ('epoch {} : gen train: {:01.4f} gen test: {:01.4f} crit train {:01.4f} crit test {:01.4f} gen norm {:01.4f} gen crit {:01.4f} time {:01.2f}'.format(epoch + 1, gen_train_loss_per_epoch, gen_test_loss_per_epoch, crit_train_loss_per_epoch, crit_test_loss_per_epoch, gen_norm_loss_per_epoch, gen_crit_loss_per_epoch, time.time()-start)) + + return loss_history + +# fit model +loss_history = train(train_dataloader, test_dataloader, epochs) + +epochs = 400 + +# save history +utils.save_loss_as_csv(loss_history, "results/histories/history_{}.csv".format(epochs)) +utils.save_loss_as_image(loss_history, "results/histories/history_{}.png".format(epochs)) + +# save model weights +torch.save(critique.state_dict(), "results/weights/critique_weights_epoch_{}".format(epochs)) +torch.save(generator.state_dict(), "results/weights/generator_weights_epoch_{}".format(epochs)) + +def create_ref_pose_image(pose_index, file_name): + pose = poses[pose_index] + pose = torch.tensor(np.reshape(pose, (1, 1, joint_count, joint_dim))).to(device) + zero_trajectory = torch.tensor(np.zeros((1, 1, 3), dtype=np.float32)).to(device) + skel_pose = skeleton.forward_kinematics(pose, zero_trajectory) + skel_pose = skel_pose.detach().cpu().numpy() + skel_pose = np.reshape(skel_pose, (joint_count, 3)) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_pose) + pose_image = poseRenderer.create_pose_image(skel_pose, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + pose_image.save(file_name, optimize=False) + +def create_gen_pose_image(file_name): + generator.eval() + + random_encoding = torch.randn((1, latent_dim)).to(device) + + with torch.no_grad(): + gen_pose = generator(random_encoding) + + gen_pose = torch.squeeze(gen_pose) + gen_pose = gen_pose.view((-1, 4)) + gen_pose = nn.functional.normalize(gen_pose, p=2, dim=1) + gen_pose = gen_pose.view((1, 1, joint_count, joint_dim)) + + zero_trajectory = torch.tensor(np.zeros((1, 1, 3), dtype=np.float32)) + zero_trajectory = zero_trajectory.to(device) + + skel_pose = skeleton.forward_kinematics(gen_pose, zero_trajectory) + + skel_pose = skel_pose.detach().cpu().numpy() + skel_pose = np.squeeze(skel_pose) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_pose) + pose_image = poseRenderer.create_pose_image(skel_pose, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + pose_image.save(file_name, optimize=False) + + generator.train() + +def decode_pose_encodings(pose_encodings, file_name): + + generator.eval() + + gen_poses = [] + + for pose_encoding in pose_encodings: + pose_encoding = np.expand_dims(pose_encoding, axis=0) + pose_encoding = torch.from_numpy(pose_encoding).to(device) + + with torch.no_grad(): + gen_pose = gen_poses(pose_encoding) + + gen_pose = torch.squeeze(gen_pose) + gen_pose = gen_pose.view((-1, 4)) + gen_pose = nn.functional.normalize(gen_pose, p=2, dim=1) + gen_pose = gen_pose.view((1, joint_count, joint_dim)) + + gen_poses.append(gen_pose) + + gen_poses = torch.cat(gen_poses, dim=0) + gen_poses = torch.unsqueeze(gen_poses, dim=0) + + zero_trajectory = torch.tensor(np.zeros((1, len(pose_encodings), 3), dtype=np.float32)) + zero_trajectory = zero_trajectory.to(device) + + skel_poses = skeleton.forward_kinematics(gen_poses, zero_trajectory) + + skel_poses = skel_poses.detach().cpu().numpy() + skel_poses = np.squeeze(skel_poses) + + view_min, view_max = utils.get_equal_mix_max_positions(skel_poses) + pose_images = poseRenderer.create_pose_images(skel_poses, view_min, view_max, view_ele, view_azi, view_line_width, view_size, view_size) + + pose_images[0].save(file_name, save_all=True, append_images=pose_images[1:], optimize=False, duration=33.0, loop=0) + + generator.train() + +# create single original pose image + +pose_index = 100 + +create_ref_pose_image(pose_index, "results/images/orig_pose_{}.gif".format(pose_index)) + +# generate single pose image +create_gen_pose_image("results/images/gen_pose_2.gif") diff --git a/other/Dataset_Tutorial.py b/other/Dataset_Tutorial.py new file mode 100644 index 0000000..882da57 --- /dev/null +++ b/other/Dataset_Tutorial.py @@ -0,0 +1,63 @@ +import torch +from torch.utils.data import Dataset, DataLoader + +# create to random tensors representing dummy data for the dataset +data_count = 100 # number of data instances +data_dim = 8 # number of features per instance +label_count = 4 # number of class labels + +dummy_features = torch.rand([data_count, data_dim], dtype=torch.float32) +dummy_labels = torch.randint(0, label_count, [data_count], dtype=torch.int32) + +# Create class for a simple customised Dataset by subclassing Dataset +class CustomDataset(Dataset): + def __init__(self, features, labels): + self.features = features + self.labels = labels + def __len__(self): + return len(self.features) + def __getitem__(self, idx): + feature = self.features[idx] + label = self.labels[idx] + return feature, label + +# Create an instance of the customised dataset +customDataset = CustomDataset(dummy_features, dummy_labels) + +#print length of dataset +print(len(customDataset )) + +#iterate over all data instances contained in dataset +for instance in iter(customDataset): + print(instance) + +# split full dataset into train and test dataset +train_test_ratio = 0.8 # 80% of data goes into training set, 20% into test set +train_size = int(len(customDataset) * train_test_ratio) +test_size = len(customDataset) - train_size + +train_dataset, test_dataset = torch.utils.data.random_split(customDataset , [train_size, test_size]) + +print("train dataset size: ", len(train_dataset)) +print("test dataset size: ", len(test_dataset)) + +# Instantiate Dataloaders + +batch_size = 16 + +train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) +test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) + +batch = next(iter(train_dataloader)) + +batch_features = batch[0] +batch_labels = batch[1] + +print("batch features shape ", batch_features.shape) +print("batch labels shape", batch_labels.shape) + +# iterate over DataLoader for test dataset + +for (idx, batch) in enumerate(test_dataloader): + print("batch ", idx, " features: ", batch[0]) + print("batch ", idx, " labels: ", batch[1]) \ No newline at end of file diff --git a/utils/bvh_conversion/.spyproject/config/backups/codestyle.ini.bak b/utils/bvh_conversion/.spyproject/config/backups/codestyle.ini.bak new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/utils/bvh_conversion/.spyproject/config/backups/codestyle.ini.bak @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/utils/bvh_conversion/.spyproject/config/backups/encoding.ini.bak b/utils/bvh_conversion/.spyproject/config/backups/encoding.ini.bak new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/utils/bvh_conversion/.spyproject/config/backups/encoding.ini.bak @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/utils/bvh_conversion/.spyproject/config/backups/vcs.ini.bak b/utils/bvh_conversion/.spyproject/config/backups/vcs.ini.bak new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/utils/bvh_conversion/.spyproject/config/backups/vcs.ini.bak @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/utils/bvh_conversion/.spyproject/config/backups/workspace.ini.bak b/utils/bvh_conversion/.spyproject/config/backups/workspace.ini.bak new file mode 100644 index 0000000..d3c9740 --- /dev/null +++ b/utils/bvh_conversion/.spyproject/config/backups/workspace.ini.bak @@ -0,0 +1,10 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + +[main] +version = 0.2.0 +recent_files = ['main_bvhconv.py', 'dataset_tools.py', 'bvh_tools.py'] + diff --git a/utils/bvh_conversion/.spyproject/config/codestyle.ini b/utils/bvh_conversion/.spyproject/config/codestyle.ini new file mode 100644 index 0000000..0f54b4c --- /dev/null +++ b/utils/bvh_conversion/.spyproject/config/codestyle.ini @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/utils/bvh_conversion/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini b/utils/bvh_conversion/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini new file mode 100644 index 0000000..0b95e5c --- /dev/null +++ b/utils/bvh_conversion/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini @@ -0,0 +1,5 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + diff --git a/utils/bvh_conversion/.spyproject/config/defaults/defaults-encoding-0.2.0.ini b/utils/bvh_conversion/.spyproject/config/defaults/defaults-encoding-0.2.0.ini new file mode 100644 index 0000000..0ce193c --- /dev/null +++ b/utils/bvh_conversion/.spyproject/config/defaults/defaults-encoding-0.2.0.ini @@ -0,0 +1,3 @@ +[encoding] +text_encoding = utf-8 + diff --git a/utils/bvh_conversion/.spyproject/config/defaults/defaults-vcs-0.2.0.ini b/utils/bvh_conversion/.spyproject/config/defaults/defaults-vcs-0.2.0.ini new file mode 100644 index 0000000..ee25483 --- /dev/null +++ b/utils/bvh_conversion/.spyproject/config/defaults/defaults-vcs-0.2.0.ini @@ -0,0 +1,4 @@ +[vcs] +use_version_control = False +version_control_system = + diff --git a/utils/bvh_conversion/.spyproject/config/defaults/defaults-workspace-0.2.0.ini b/utils/bvh_conversion/.spyproject/config/defaults/defaults-workspace-0.2.0.ini new file mode 100644 index 0000000..2a73ab7 --- /dev/null +++ b/utils/bvh_conversion/.spyproject/config/defaults/defaults-workspace-0.2.0.ini @@ -0,0 +1,6 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + diff --git a/utils/bvh_conversion/.spyproject/config/encoding.ini b/utils/bvh_conversion/.spyproject/config/encoding.ini new file mode 100644 index 0000000..a17aced --- /dev/null +++ b/utils/bvh_conversion/.spyproject/config/encoding.ini @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/utils/bvh_conversion/.spyproject/config/vcs.ini b/utils/bvh_conversion/.spyproject/config/vcs.ini new file mode 100644 index 0000000..fd66eae --- /dev/null +++ b/utils/bvh_conversion/.spyproject/config/vcs.ini @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/utils/bvh_conversion/.spyproject/config/workspace.ini b/utils/bvh_conversion/.spyproject/config/workspace.ini new file mode 100644 index 0000000..d3c9740 --- /dev/null +++ b/utils/bvh_conversion/.spyproject/config/workspace.ini @@ -0,0 +1,10 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + +[main] +version = 0.2.0 +recent_files = ['main_bvhconv.py', 'dataset_tools.py', 'bvh_tools.py'] + diff --git a/utils/bvh_conversion/bvh_data.py b/utils/bvh_conversion/bvh_data.py new file mode 100644 index 0000000..bf46b60 --- /dev/null +++ b/utils/bvh_conversion/bvh_data.py @@ -0,0 +1,53 @@ +import numpy as np + +class BVH_Joint(): + def __init__(self, name, parent=None, children=None): + self.name = name + self.parent = parent + self.children = children + +class BVH_MocapData(): + def __init__(self): + self.skeleton = {} + self.values = None + self.channel_names = [] + self.framerate = 0.0 + self.root_name = '' + + def traverse(self, j=None): + stack = [self.root_name] + while stack: + joint = stack.pop() + yield joint + for c in self.skeleton[joint]['children']: + stack.append(c) + + def clone(self): + import copy + new_data = BVH_MocapData() + new_data.skeleton = copy.copy(self.skeleton) + new_data.values = copy.copy(self.values) + new_data.channel_names = copy.copy(self.channel_names) + new_data.root_name = copy.copy(self.root_name) + new_data.framerate = copy.copy(self.framerate) + return new_data + + def get_all_channels(self): + '''Returns all of the channels parsed from the file as a 2D numpy array''' + + frames = [f[1] for f in self.values] + return np.asarray([[channel[2] for channel in frame] for frame in frames]) + + def get_skeleton_tree(self): + tree = [] + root_key = [j for j in self.skeleton if self.skeleton[j]['parent']==None][0] + + root_joint = BVH_Joint(root_key) + + def get_empty_channels(self): + #TODO + pass + + def get_constant_channels(self): + #TODO + pass diff --git a/utils/bvh_conversion/bvh_parsers.py b/utils/bvh_conversion/bvh_parsers.py new file mode 100644 index 0000000..3756e29 --- /dev/null +++ b/utils/bvh_conversion/bvh_parsers.py @@ -0,0 +1,242 @@ +''' +BVH Parser Class + +By Omid Alemi +Created: June 12, 2017 + +Based on: https://gist.github.com/johnfredcee/2007503 + +''' +import re +import numpy as np +from bvh_data import BVH_Joint, BVH_MocapData + +class BVH_Scanner(): + ''' + A wrapper class for re.Scanner + ''' + def __init__(self): + + def identifier(scanner, token): + return 'IDENT', token + + def operator(scanner, token): + return 'OPERATOR', token + + def digit(scanner, token): + return 'DIGIT', token + + def open_brace(scanner, token): + return 'OPEN_BRACE', token + + def close_brace(scanner, token): + return 'CLOSE_BRACE', token + + self.scanner = re.Scanner([ + (r'[a-zA-Z_]\w*', identifier), + #(r'-*[0-9]+(\.[0-9]+)?', digit), # won't work for .34 + #(r'[-+]?[0-9]*\.?[0-9]+', digit), # won't work for 4.56e-2 + #(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?', digit), + (r'-*[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?', digit), + (r'}', close_brace), + (r'}', close_brace), + (r'{', open_brace), + (r':', None), + (r'\s+', None) + ]) + + def scan(self, stuff): + return self.scanner.scan(stuff) + + + +class BVH_Parser(): + ''' + A class to parse a BVH file. + + Extracts the skeleton and channel values + ''' + def __init__(self, filename=None): + self.reset() + + def reset(self): + self._skeleton = {} + self.bone_context = [] + self._motion_channels = [] + self._motions = [] + self.current_token = 0 + self.framerate = 0.0 + self.root_name = '' + + self.scanner = BVH_Scanner() + + self.data = BVH_MocapData() + + + def parse(self, filename): + self.reset() + + with open(filename, 'r') as bvh_file: + raw_contents = bvh_file.read() + tokens, remainder = self.scanner.scan(raw_contents) + self._parse_hierarchy(tokens) + self.current_token = self.current_token + 1 + self._parse_motion(tokens) + + self.data.skeleton = self._skeleton + self.data.channel_names = self._motion_channels + self.data.values = self._to_DataFrame() + self.data.root_name = self.root_name + self.data.framerate = self.framerate + + return self.data + + def _to_DataFrame(self): + '''Returns all of the channels parsed from the file as a pandas DataFrame''' + + import pandas as pd + time_index = pd.to_timedelta([f[0] for f in self._motions], unit='s') + frames = [f[1] for f in self._motions] + channels = np.asarray([[channel[2] for channel in frame] for frame in frames]) + column_names = ['%s_%s'%(c[0], c[1]) for c in self._motion_channels] + + return pd.DataFrame(data=channels, index=time_index, columns=column_names) + + + def _new_bone(self, parent, name): + bone = {'parent': parent, 'channels': [], 'offsets': [],'children': []} + return bone + + def _push_bone_context(self,name): + self.bone_context.append(name) + + def _get_bone_context(self): + return self.bone_context[len(self.bone_context)-1] + + def _pop_bone_context(self): + self.bone_context = self.bone_context[:-1] + return self.bone_context[len(self.bone_context)-1] + + def _read_offset(self, bvh, token_index): + if bvh[token_index] != ('IDENT', 'OFFSET'): + return None, None + token_index = token_index + 1 + offsets = [0.0] * 3 + for i in range(3): + offsets[i] = float(bvh[token_index][1]) + token_index = token_index + 1 + return offsets, token_index + + def _read_channels(self, bvh, token_index): + if bvh[token_index] != ('IDENT', 'CHANNELS'): + return None, None + token_index = token_index + 1 + channel_count = int(bvh[token_index][1]) + token_index = token_index + 1 + channels = [""] * channel_count + for i in range(channel_count): + channels[i] = bvh[token_index][1] + token_index = token_index + 1 + return channels, token_index + + def _parse_joint(self, bvh, token_index): + end_site = False + joint_id = bvh[token_index][1] + token_index = token_index + 1 + joint_name = bvh[token_index][1] + token_index = token_index + 1 + + parent_name = self._get_bone_context() + + if (joint_id == "End"): + joint_name = parent_name+ '_Nub' + end_site = True + joint = self._new_bone(parent_name, joint_name) + if bvh[token_index][0] != 'OPEN_BRACE': + print('Was expecting brance, got ', bvh[token_index]) + return None + token_index = token_index + 1 + offsets, token_index = self._read_offset(bvh, token_index) + joint['offsets'] = offsets + if not end_site: + channels, token_index = self._read_channels(bvh, token_index) + joint['channels'] = channels + for channel in channels: + self._motion_channels.append((joint_name, channel)) + + self._skeleton[joint_name] = joint + self._skeleton[parent_name]['children'].append(joint_name) + + while (bvh[token_index][0] == 'IDENT' and bvh[token_index][1] == 'JOINT') or (bvh[token_index][0] == 'IDENT' and bvh[token_index][1] == 'End'): + self._push_bone_context(joint_name) + token_index = self._parse_joint(bvh, token_index) + self._pop_bone_context() + + if bvh[token_index][0] == 'CLOSE_BRACE': + return token_index + 1 + + print('Unexpected token ', bvh[token_index]) + + def _parse_hierarchy(self, bvh): + self.current_token = 0 + if bvh[self.current_token] != ('IDENT', 'HIERARCHY'): + return None + self.current_token = self.current_token + 1 + if bvh[self.current_token] != ('IDENT', 'ROOT'): + return None + self.current_token = self.current_token + 1 + if bvh[self.current_token][0] != 'IDENT': + return None + + root_name = bvh[self.current_token][1] + root_bone = self._new_bone(None, root_name) + self.current_token = self.current_token + 2 #skipping open brace + offsets, self.current_token = self._read_offset(bvh, self.current_token) + channels, self.current_token = self._read_channels(bvh, self.current_token) + root_bone['offsets'] = offsets + root_bone['channels'] = channels + self._skeleton[root_name] = root_bone + self._push_bone_context(root_name) + + for channel in channels: + self._motion_channels.append((root_name, channel)) + + while bvh[self.current_token][1] == 'JOINT': + self.current_token = self._parse_joint(bvh, self.current_token) + + self.root_name = root_name + + def _parse_motion(self, bvh): + if bvh[self.current_token][0] != 'IDENT': + print('Unexpected text') + return None + if bvh[self.current_token][1] != 'MOTION': + print('No motion section') + return None + self.current_token = self.current_token + 1 + if bvh[self.current_token][1] != 'Frames': + return None + self.current_token = self.current_token + 1 + frame_count = int(bvh[self.current_token][1]) + self.current_token = self.current_token + 1 + if bvh[self.current_token][1] != 'Frame': + return None + self.current_token = self.current_token + 1 + if bvh[self.current_token][1] != 'Time': + return None + self.current_token = self.current_token + 1 + frame_rate = float(bvh[self.current_token][1]) + + self.framerate = frame_rate + + self.current_token = self.current_token + 1 + + frame_time = 0.0 + self._motions = [()] * frame_count + for i in range(frame_count): + channel_values = [] + for channel in self._motion_channels: + channel_values.append((channel[0], channel[1], float(bvh[self.current_token][1]))) + self.current_token = self.current_token + 1 + self._motions[i] = (frame_time, channel_values) + frame_time = frame_time + frame_rate diff --git a/utils/bvh_conversion/bvh_tools.py b/utils/bvh_conversion/bvh_tools.py new file mode 100644 index 0000000..85be137 --- /dev/null +++ b/utils/bvh_conversion/bvh_tools.py @@ -0,0 +1,371 @@ +""" +important note: rotation conversion to quaternion currently only workss correctly +for the euler rotation sequence: xrot, yrot, zrot +""" + +from bvh_parsers import BVH_Parser +import pandas +import math +import numpy as np +import transforms3d as t3d + +class SkeletonJoint: + + def __init__(self, name, offset): + self.name = name + self.local_offset = offset + self.local_translation = np.array([0, 0, 0]) + self.local_rotation = t3d.quaternions.qeye() + + self.local_transformation = np.identity(4) + self.world_transformation = np.identity(4) + + self.world_rotation = t3d.quaternions.qeye() + self.world_position = np.array([0, 0, 0]) + + self.parent = None + self.children = list() + +class Skeleton: + + def __init__(self): + self.root_joint = None + self.joints = list() + +class BVH_Tools: + def __init__(self): + self.parser = BVH_Parser() + self.bvh_data = None + self.skeletons = [] + self.skeletons_frames = [] + self.euler_sequence = [0, 1, 2] # xyz + + # gather all root joint names + # each root joint corresponds to a skeleton + def _get_root_joint_names(self): + bvh_skeleton = self.bvh_data.skeleton + root_joint_names = list() + for joint_name in bvh_skeleton: + if bvh_skeleton[joint_name]["parent"] == None: + root_joint_names.append(joint_name) + return root_joint_names + + # traverse joint hiararchy + def _traverse_create_joint_hierarchy(self, parent_joint_name, joint_hierarchy): + bvh_skeleton = self.bvh_data.skeleton + children_joint_names = bvh_skeleton[parent_joint_name]["children"] + joint_hierarchy[parent_joint_name] = children_joint_names + + for child_joint_name in children_joint_names: + self._traverse_create_joint_hierarchy(child_joint_name, joint_hierarchy) + + return joint_hierarchy + + # create joint hierarchy + def _create_joint_hierarchy(self, root_joint_name): + joint_names_hierarchy = dict() + self._traverse_create_joint_hierarchy(root_joint_name, joint_names_hierarchy) + return joint_names_hierarchy + + def _traverse_create_skeleton(self, skel_parent_joint, joint_hierarchy, skeleton): + bvh_skeleton = self.bvh_data.skeleton + children_joint_names = joint_hierarchy[skel_parent_joint.name] + + for child_joint_name in children_joint_names: + + children_joint_offset = np.array(bvh_skeleton[child_joint_name]["offsets"]) + skel_child_joint = SkeletonJoint(child_joint_name, children_joint_offset) + + skel_parent_joint.children.append(skel_child_joint) + skel_child_joint.parent = skel_parent_joint + + skeleton.joints.append(skel_child_joint) + + self._traverse_create_skeleton(skel_child_joint, joint_hierarchy, skeleton) + + def _create_skeleton(self, root_joint_name, joint_hierarchy): + bvh_skeleton = self.bvh_data.skeleton + skeleton = Skeleton() + + root_joint_offset = np.array(bvh_skeleton[root_joint_name]["offsets"]) + skel_root_joint = SkeletonJoint(root_joint_name, root_joint_offset) + + skeleton.root_joint = skel_root_joint + skeleton.joints.append(skel_root_joint) + + self._traverse_create_skeleton(skel_root_joint, joint_hierarchy, skeleton) + + return skeleton + + def _get_skeleton_frames(self, skeleton): + bvh_frames = self.bvh_data.values + bvh_frames_column_names = [ column for column in self.bvh_data.values.columns ] + bvh_framecount = bvh_frames.shape[0] + bvh_channels = set(self.bvh_data.channel_names) + bvh_channel_joint_names = set([channel[0] for channel in bvh_channels]) + bvh_channel_value_names = ["Xposition", "Yposition", "Zposition", "Xrotation", "Yrotation", "Zrotation"] + + joint_frames = list() + + for joint in skeleton.joints: + joint_name = joint.name + if joint_name in bvh_channel_joint_names: + joint_frames_combined = [] + + for i, value_name in enumerate(bvh_channel_value_names): + column_name = joint.name + "_" + value_name + + if column_name in bvh_frames_column_names: + joint_frames_combined.append(np.array(bvh_frames[column_name])) + + #print("colname ", column_name, " values ", np.array(bvh_frames[column_name])[0]) + + else: + joint_frames_combined.append(np.zeros(bvh_framecount)) + + + joint_translations = joint_frames_combined[:3] + joint_rotations = joint_frames_combined[3:] + + joint_translations = np.array(joint_translations) + joint_rotations = np.array(joint_rotations) + + joint_translations = np.transpose(joint_translations) + joint_rotations = np.transpose(joint_rotations) + + joint_frames.append( [joint_name, joint_translations, joint_rotations] ) + else: + joint_frames.append( [joint_name] ) + + return joint_frames + + def _skeleton_traverse_transformations(self, joint, parent_joint): + + # calculate local translation vector and rotation matrix + _trans = joint.local_offset + joint.local_translation + _rot = t3d.quaternions.quat2mat(joint.local_rotation) + + # create local transformation matrix + joint.local_transformation = np.identity(4) + joint.local_transformation[0:3, 0:3] = _rot + joint.local_transformation[0:3, 3] = _trans + + # calculate world transformation matrix + joint.world_transformation = np.matmul(parent_joint.world_transformation, joint.local_transformation) + + # calculate absolute joint position + joint.world_position = np.matmul(joint.world_transformation, np.array([0, 0, 0, 1])) + joint.world_position = joint.world_position[:3] + + # calculate abolute joint rotation + joint.world_rotation = t3d.quaternions.mat2quat(joint.world_transformation[0:3, 0:3]) + + #print("joint ", joint.name ," wpos ", joint.world_position) + + for child_joint in joint.children: + self._skeleton_traverse_transformations(child_joint, joint) + + def _skeleton_update_transformations(self, skeleton): + joint = skeleton.root_joint + + # calculate local translation vector and rotation matrix + _trans = joint.local_offset + joint.local_translation + _rot = t3d.quaternions.quat2mat(joint.local_rotation) + + # create local transformation matrix + joint.local_transformation = np.identity(4) + joint.local_transformation[0:3, 0:3] = _rot + joint.local_transformation[0:3, 3] = _trans + + # for root node, local and world transformation matrix are identical + joint.world_transformation = np.copy(joint.local_transformation) + + # calculate absolute joint position + joint.world_position = np.matmul(joint.world_transformation, np.array([0, 0, 0, 1])) + joint.world_position = joint.world_position[:3] + + # calculate abolute joint rotation + joint.world_rotation = t3d.quaternions.mat2quat(joint.world_transformation[0:3, 0:3]) + + + #print("joint ", joint.name ," wpos ", joint.world_position) + + for child_joint in joint.children: + self._skeleton_traverse_transformations(child_joint, joint) + + def _skeleton_set_frame(self, skeleton, skeleton_frame, frame_index): + for joint_index, joint in enumerate(skeleton.joints): + if len(skeleton_frame[joint_index]) > 1: # check if the frame contains transfomation info + #print("joint ", joint.name, " trans ", joint.local_translation) + + # get local translation + joint.local_translation = np.copy(skeleton_frame[joint_index][1][frame_index]) + + # get local rotation in euler angles and degrees + rel_rotation_euler = np.copy(skeleton_frame[joint_index][2][frame_index]) + + # convert degrees to radians + rel_rotation_euler[0] = rel_rotation_euler[0]/180.0 * math.pi; + rel_rotation_euler[1] = rel_rotation_euler[1]/180.0 * math.pi; + rel_rotation_euler[2] = rel_rotation_euler[2]/180.0 * math.pi; + + # convert euler rotation to quaternion + joint.local_rotation = t3d.quaternions.qeye() + + quat_x = t3d.quaternions.axangle2quat([1, 0, 0], rel_rotation_euler[0]) + quat_y = t3d.quaternions.axangle2quat([0, 1, 0], rel_rotation_euler[1]) + quat_z = t3d.quaternions.axangle2quat([0, 0, 1], rel_rotation_euler[2]) + + rotations = [quat_x, quat_y, quat_z] + for rot_index in self.euler_sequence: + joint.local_rotation = t3d.quaternions.qmult(joint.local_rotation, rotations[rot_index]) + + """ + print("update joint ", joint.name, " rel quat\n", joint.local_rotation) + """ + + def parse_bvh_file(self, file_name): + parser = BVH_Parser() + self.bvh_data = parser.parse(file_name) + bvh_root_joint_names = self._get_root_joint_names() + + for root_joint_name in bvh_root_joint_names: + bvh_joint_hierarchy = self._create_joint_hierarchy(root_joint_name) + skeleton = self._create_skeleton(bvh_root_joint_names[0], bvh_joint_hierarchy) + + self.skeletons.append(skeleton) + + for skeleton in self.skeletons: + skeleton_frames = self._get_skeleton_frames(skeleton) + self.skeletons_frames.append(skeleton_frames) + + return self.skeletons, self.skeletons_frames + + def write_bvh_file(self, skeleton, frames, fps, file_name): + + with open(file_name, "w") as file: + file.write("HIERARCHY\n") + self._write_bvh_hierarchy(skeleton.root_joint, indent="", file=file) + file.write("MOTION\n") + file.write("Frames: {}\n".format(frames[0][1].shape[0])) + file.write("Frame Time: {}\n".format(1.0 / fps)) + self._write_bvh_frames(frames, file=file) + + def _write_bvh_hierarchy(self, joint, indent, file): + if joint.parent == None: + file.write("{}ROOT {}\n".format(indent, joint.name)) + elif len(joint.children) > 0: + file.write("{}JOINT {}\n".format(indent, joint.name)) + else: + file.write("{}End Site\n".format(indent)) + + file.write("{}".format(indent) + "{\n") + file.write(" {}OFFSET {} {} {}\n".format(indent, joint.local_offset[0], joint.local_offset[1], joint.local_offset[2])) + + if len(joint.children) > 0: + file.write(" {}CHANNELS 6 Xposition Yposition Zposition Zrotation Xrotation Yrotation\n".format(indent)) + + for child in joint.children: + self._write_bvh_hierarchy(child, "{} ".format(indent), file) + + file.write("{}".format(indent) + "}\n") + + def _write_bvh_frames(self, frames, file): + jointcount = len(frames) + framecount = frames[0][1].shape[0] + + for frame in range(framecount): + for joint in range(jointcount): + if len(frames[joint]) == 1: # Nub + continue + joint_rotations = frames[joint][1] + joint_positions = frames[joint][2] + + joint_rotation = joint_rotations[frame] + joint_position = joint_positions[frame] + + file.write("{} {} {} ".format(joint_rotation[0], joint_rotation[1], joint_rotation[2])) + file.write("{} {} {} ".format(joint_position[self.euler_sequence[0]], joint_position[self.euler_sequence[1]], joint_position[self.euler_sequence[2]])) + + file.write("\n") + + def set_frame(self, frame_index): + for skeleton, skeleton_frames in zip(self.skeletons, self.skeletons_frames): + self._skeleton_set_frame(skeleton, skeleton_frames, frame_index) + self._skeleton_update_transformations(skeleton) + + def create_datasets(self, start_frame_index=-1, end_frame_index=-1): + + if start_frame_index == -1: + start_frame_index = 0 + if end_frame_index == -1: + end_frame_index = self.bvh_data.values.shape[0] + + frameCount = end_frame_index - start_frame_index + + datasets = dict() + + for skeleton_index in range(len(self.skeletons)): + + dataset = dict() + datasets["S{}".format(skeleton_index + 1)] = dataset + + skeleton = self.skeletons[skeleton_index] + joint_count = len(skeleton.joints) + + joint_names = list() + joint_parents = list() + joint_children = list() + joints_offsets = np.zeros((joint_count, 3), dtype=np.float32) + + joint_index_map = dict() + for joint_index, joint in enumerate(skeleton.joints): + joint_index_map[joint] = joint_index + + for joint_index, joint in enumerate(skeleton.joints): + + joint_names.append(joint.name) + joints_offsets[joint_index] = joint.local_offset + + if joint.parent: + joint_parent_index = joint_index_map[joint.parent] + joint_parents.append(joint_parent_index) + else: + joint_parents.append(-1) + + joint_children.append(list()) + + for joint_child in joint.children: + joint_child_index = joint_index_map[joint_child] + + joint_children[joint_index].append(joint_child_index) + + dataset["names"] = joint_names + dataset["offsets"] = joints_offsets + dataset["parents"] = joint_parents + dataset["children"] = joint_children + + skeleton_frames = self.skeletons_frames[skeleton_index] + joints_pos_local = np.zeros((frameCount, joint_count, 3), dtype=np.float32) + joints_pos_world = np.zeros((frameCount, joint_count, 3), dtype=np.float32) + joints_rot_local = np.zeros((frameCount, joint_count, 4), dtype=np.float32) + joints_rot_world = np.zeros((frameCount, joint_count, 4), dtype=np.float32) + + for frame_index in range(start_frame_index, end_frame_index): + self.set_frame(frame_index) + + rel_frame_index = frame_index - start_frame_index + + for joint_index, joint in enumerate(skeleton.joints): + + joints_pos_local[rel_frame_index][joint_index][:] = joint.local_offset + joint.local_translation + joints_pos_world[rel_frame_index][joint_index][:] = joint.world_position + joints_rot_local[rel_frame_index][joint_index][:] = joint.local_rotation + joints_rot_world[rel_frame_index][joint_index][:] = joint.world_rotation + + dataset["pos_local"] = joints_pos_local + dataset["pos_world"] = joints_pos_world + dataset["rot_local"] = joints_rot_local + dataset["rot_world"] = joints_rot_world + + return datasets \ No newline at end of file diff --git a/utils/bvh_conversion/dataset_tools.py b/utils/bvh_conversion/dataset_tools.py new file mode 100644 index 0000000..c29181d --- /dev/null +++ b/utils/bvh_conversion/dataset_tools.py @@ -0,0 +1,206 @@ +import pickle +import json +import copy +import numpy as np +from numpy.core.umath_tests import inner1d + +class DatasetTools: + def __init__(self): + self.dataset = None + + def load_dataset(self, file_path): + if file_path.endswith(".p"): + self._load_pickle(file_path) + elif file_path.endswith(".json"): + self._load_json(file_path) + else: + print("file type not recognized") + + def _load_pickle(self, file_path): + with open(file_path, 'rb') as file: + self.dataset = pickle.load(file) + + def _load_json(self, file_path): + with open(file_path, 'r') as file: + conv_dataset = json.load(file) + self.dataset = self._convert_list_to_np(conv_dataset) + + def save_dataset(self, file_path): + if file_path.endswith(".p"): + self._save_pickle(file_path) + elif file_path.endswith(".json"): + self._save_json(file_path) + else: + print("file type not recognized") + + def _save_pickle(self, file_path): + with open(file_path, 'wb') as file: + pickle.dump( self.dataset, file ) + + def _save_json(self, file_path): + conv_dataset = self._convert_np_to_list() + with open(file_path, 'w') as file: + json.dump(conv_dataset, file) + + def _convert_np_to_list(self): + conv_dataset = copy.deepcopy(self.dataset) + for subject_name, subject_dict in conv_dataset.items(): + for data_name, data in subject_dict.items(): + if isinstance(data, np.ndarray): + subject_dict[data_name] = data.tolist() + return conv_dataset + + def _convert_list_to_np(self, dataset): + conv_dataset = copy.deepcopy(dataset) + # go through all data and check if data is a list + # if yes, then check if first innermost value of list is a float + # if yes, assume all values in the list are floats and convert list to numpy array + for subject_name, subject_dict in conv_dataset.items(): + for data_name, data in subject_dict.items(): + if isinstance(data, list): + list_value = data + while( isinstance(list_value, list)): + list_value = list_value[0] + if isinstance(list_value, float): + #print("convert data ", data_name, " to array") + subject_dict[data_name] = np.array(data) + + return conv_dataset + + # calculate relative positions of all joints with respect to the position of a reference joint at a particular reference frame + # arg ref_joint_name: name of the reference joint + # arg ref_frame: index of the refefence frame (typically 0) + # arg abs_pos_data_name: name of data containing absolute joint positions + # arg rel_pos_data_name: name of data where the relative joint positions will be written to + def remove_ref_position(self, ref_joint_name, ref_frame, abs_pos_data_name, rel_pos_data_name): + assert(self.dataset != None) + + for subject_name, subject_dict in self.dataset.items(): + ref_joint_index = subject_dict["names"].index(ref_joint_name) + #print("ref_joint_index ", ref_joint_index) + abs_pos_data = subject_dict[abs_pos_data_name] + abs_ref_pos = abs_pos_data[ref_frame, ref_joint_index, :] + #print("abs_ref_pos ", abs_ref_pos) + + rel_pos_data = np.copy(abs_pos_data) + rel_pos_data -= abs_ref_pos + + subject_dict[rel_pos_data_name] = rel_pos_data + + # calculate relative positions of all joints with respect to the directions of three reference joints at a particular reference frame + # arg ref_joint_names: three joint names, typically this is: Hips, LeftUpLeg, Spine + # arg ref_frame: index of the reference frame (typically 0) + # arg abs_pos_data_name: name of data containing absolute joint positions + # arg relrot_pos_data_name: name of data where the rotated joint positions will be written to + + def remove_ref_orientation(self, ref_joint_names, ref_frame, abs_pos_data_name, relrot_pos_data_name): + assert(self.dataset != None) + assert(len(ref_joint_names) == 3) + + for subject_name, subject_dict in self.dataset.items(): + ref1_joint_index = subject_dict["names"].index(ref_joint_names[0]) + ref2_joint_index = subject_dict["names"].index(ref_joint_names[1]) + ref3_joint_index = subject_dict["names"].index(ref_joint_names[2]) + + abs_pos_data = subject_dict[abs_pos_data_name] + abs_ref1_pos = abs_pos_data[ref_frame, ref1_joint_index, :] + abs_ref2_pos = abs_pos_data[ref_frame, ref2_joint_index, :] + abs_ref3_pos = abs_pos_data[ref_frame, ref3_joint_index, :] + + vecX = abs_ref2_pos - abs_ref1_pos + vecY = abs_ref3_pos - abs_ref1_pos + vecX /= np.linalg.norm(vecX) + vecY /= np.linalg.norm(vecY) + vecZ = np.cross(vecX, vecY) + + ref_matrix = np.zeros(shape=(3, 3), dtype=np.float32) + ref_matrix[0, :] = vecX + ref_matrix[1, :] = vecY + ref_matrix[2, :] = vecZ + + inv_matrix = np.linalg.inv(ref_matrix) + + relrot_pos_data = np.copy(abs_pos_data) + relrot_pos_data = np.matmul(relrot_pos_data, inv_matrix) + + subject_dict[relrot_pos_data_name] = relrot_pos_data + + # calculate the angle between joints + # for three joints: angle between (j1 - j2) and (j3 - j2) + # for four joints: angle between (j1 - j2) and (j4 - j3) + # arg joint_names : three or four joint names + # arg pos_data_name : name of data containing joint positions + # arg angle_data_name : name of data to write angles to + + def _calc_angle_j3(self, joint_names, pos_data_name, angle_data_name): + + for subject_name, subject_dict in self.dataset.items(): + joint1_index = subject_dict["names"].index(joint_names[0]) + joint2_index = subject_dict["names"].index(joint_names[1]) + joint3_index = subject_dict["names"].index(joint_names[2]) + + pos_data = subject_dict[pos_data_name] + joint1_pos_data = pos_data[:, joint1_index, :].copy() + joint2_pos_data = pos_data[:, joint2_index, :].copy() + joint3_pos_data = pos_data[:, joint3_index, :].copy() + + joint21_dir = joint1_pos_data - joint2_pos_data + joint23_dir = joint3_pos_data - joint2_pos_data + + joint21_len = np.expand_dims(np.linalg.norm(joint21_dir, axis=1), axis=1) + joint21_dir /= joint21_len + joint23_len = np.expand_dims(np.linalg.norm(joint23_dir, axis=1), axis=1) + joint23_dir /= joint23_len + + joint_angle = inner1d(joint21_dir, joint23_dir) + + subject_dict[angle_data_name] = joint_angle + + def _calc_angle_j4(self, joint_names, pos_data_name, angle_data_name): + + for subject_name, subject_dict in self.dataset.items(): + joint1_index = subject_dict["names"].index(joint_names[0]) + joint2_index = subject_dict["names"].index(joint_names[1]) + joint3_index = subject_dict["names"].index(joint_names[2]) + joint4_index = subject_dict["names"].index(joint_names[3]) + + pos_data = subject_dict[pos_data_name] + joint1_pos_data = pos_data[:, joint1_index, :].copy() + joint2_pos_data = pos_data[:, joint2_index, :].copy() + joint3_pos_data = pos_data[:, joint3_index, :].copy() + joint4_pos_data = pos_data[:, joint4_index, :].copy() + + joint21_dir = joint1_pos_data - joint2_pos_data + joint43_dir = joint4_pos_data - joint3_pos_data + + joint21_len = np.expand_dims(np.linalg.norm(joint21_dir, axis=1), axis=1) + joint21_dir /= joint21_len + joint43_len = np.expand_dims(np.linalg.norm(joint43_dir, axis=1), axis=1) + joint43_dir /= joint43_len + + joint_angle = inner1d(joint21_dir, joint43_dir) + + subject_dict[angle_data_name] = joint_angle + + def calc_angle(self, joint_names, pos_data_name, angle_data_name): + assert(self.dataset != None) + assert(len(joint_names) == 3 or len(joint_names) == 4) + + if len(joint_names) == 3: + self._calc_angle_j3(joint_names, pos_data_name, angle_data_name) + else: + self._calc_angle_j4(joint_names, pos_data_name, angle_data_name) + + + + + + + + + + + + + + \ No newline at end of file diff --git a/utils/bvh_conversion/main_bvhconv.py b/utils/bvh_conversion/main_bvhconv.py new file mode 100644 index 0000000..c940de0 --- /dev/null +++ b/utils/bvh_conversion/main_bvhconv.py @@ -0,0 +1,53 @@ +""" +convert bvh file into a dataset for further processing by the mocap_dataset class +this is a pre-requisite for training any machine learning systems +currently, the code is not able to automatically determine the correct order of euler rotations +used in the bvh file +for this reason, this order needs to be specified by the user +for bvh data exported from Captury Studio the order is x, y, z +for bvh data exported from MotionBuilder the order is z, x, y +""" + +import argparse +from bvh_parsers import BVH_Parser +from bvh_tools import * +from dataset_tools import DatasetTools +import pickle + + +parser = argparse.ArgumentParser(description='convert bvh file into mocap file') + +parser.add_argument('--input', type=str, nargs='+', + help='input bvh file') +parser.add_argument('--output', type=str, nargs='+', + help='output mocap file') + +args = parser.parse_args() + +input_file_name = args.input[0] +output_file_name = args.output[0] + + +bvh_tools = BVH_Tools() + +#captury euler rotation sequence +#bvh_tools.euler_sequence = [0, 1, 2] # x, y, z + +#motion builder euler rotation sequence +bvh_tools.euler_sequence = [2, 0, 1] # z, x, y + +#Rokoko Suit euler rotation sequence +#bvh_tools.euler_sequence = [1, 0, 2] # y, x, z + +skeletons, skeleton_frames = bvh_tools.parse_bvh_file(input_file_name) +datasets = bvh_tools.create_datasets() + +# store as pickle file +if output_file_name.endswith(".p"): + pickle.dump( datasets, open( output_file_name, "wb" ) ) + +# store as json file +if output_file_name.endswith(".json"): + datatools = DatasetTools() + datatools.dataset = datasets + datatools.save_dataset(output_file_name)