zirobtc commited on
Commit
adeebb7
·
verified ·
1 Parent(s): 60b86d7

Initial upload of MotionStreamer code, excluding large extracted data and output folders.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Causal_TAE/net_last.pth +3 -0
  2. Causal_TAE_t2m_babel/net_last.pth +3 -0
  3. Evaluator_272/.DS_Store +0 -0
  4. Evaluator_272/configs/assets.yaml +13 -0
  5. Evaluator_272/configs/base.yaml +92 -0
  6. Evaluator_272/configs/configs_evaluator_272/H3D-TMR.yaml +95 -0
  7. Evaluator_272/configs/modules/denoiser.yaml +22 -0
  8. Evaluator_272/configs/modules/evaluators.yaml +20 -0
  9. Evaluator_272/configs/modules/motion_vae.yaml +15 -0
  10. Evaluator_272/configs/modules/scheduler.yaml +25 -0
  11. Evaluator_272/configs/modules/text_encoder.yaml +8 -0
  12. Evaluator_272/configs/modules_temos/motiondecoder.yaml +11 -0
  13. Evaluator_272/configs/modules_temos/motionencoder.yaml +12 -0
  14. Evaluator_272/configs/modules_temos/text_encoder.yaml +13 -0
  15. Evaluator_272/datasets/__init__.py +0 -0
  16. Evaluator_272/mld/__init__.py +0 -0
  17. Evaluator_272/mld/callback/__init__.py +1 -0
  18. Evaluator_272/mld/callback/progress.py +54 -0
  19. Evaluator_272/mld/config.py +104 -0
  20. Evaluator_272/mld/data/HumanML3D_272.py +131 -0
  21. Evaluator_272/mld/data/__init__.py +0 -0
  22. Evaluator_272/mld/data/base.py +105 -0
  23. Evaluator_272/mld/data/get_data.py +183 -0
  24. Evaluator_272/mld/data/humanml/__init__.py +0 -0
  25. Evaluator_272/mld/data/humanml/common/quaternion.py +423 -0
  26. Evaluator_272/mld/data/humanml/common/skeleton.py +199 -0
  27. Evaluator_272/mld/data/humanml/data/__init__.py +0 -0
  28. Evaluator_272/mld/data/humanml/data/dataset.py +227 -0
  29. Evaluator_272/mld/data/humanml/scripts/motion_process.py +576 -0
  30. Evaluator_272/mld/data/humanml/utils/__init__.py +0 -0
  31. Evaluator_272/mld/data/humanml/utils/metrics.py +142 -0
  32. Evaluator_272/mld/data/humanml/utils/paramUtil.py +63 -0
  33. Evaluator_272/mld/data/humanml/utils/plot_script.py +103 -0
  34. Evaluator_272/mld/data/humanml/utils/utils.py +163 -0
  35. Evaluator_272/mld/data/humanml/utils/word_vectorizer.py +143 -0
  36. Evaluator_272/mld/data/sampling/__init__.py +2 -0
  37. Evaluator_272/mld/data/sampling/base.py +41 -0
  38. Evaluator_272/mld/data/sampling/framerate.py +32 -0
  39. Evaluator_272/mld/data/sampling/frames.py +58 -0
  40. Evaluator_272/mld/data/utils.py +38 -0
  41. Evaluator_272/mld/launch/__init__.py +0 -0
  42. Evaluator_272/mld/launch/blender.py +23 -0
  43. Evaluator_272/mld/launch/prepare.py +66 -0
  44. Evaluator_272/mld/launch/tools.py +9 -0
  45. Evaluator_272/mld/models/__init__.py +0 -0
  46. Evaluator_272/mld/models/architectures/__init__.py +0 -0
  47. Evaluator_272/mld/models/architectures/actor_vae.py +258 -0
  48. Evaluator_272/mld/models/architectures/fc.py +100 -0
  49. Evaluator_272/mld/models/architectures/gpt/clip.py +90 -0
  50. Evaluator_272/mld/models/architectures/gpt/pos_encoding.py +43 -0
Causal_TAE/net_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8becaeebbd0588d7080ea3baf19ca036fe06851035c8b5f214dac1a5cf23949c
3
+ size 304843534
Causal_TAE_t2m_babel/net_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d4cf982269fed7887c45076852fe44be3611ac3c7761caaa5c849a8725ae3c6
3
+ size 304843534
Evaluator_272/.DS_Store ADDED
Binary file (6.15 kB). View file
 
Evaluator_272/configs/assets.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FOLDER: './experiments' # Experiment files saving path
2
+
3
+ TEST:
4
+ FOLDER: './results' # Testing files saving path
5
+
6
+ DATASET:
7
+ HUMANML3D_272:
8
+ ROOT: './datasets/humanml3d_272' # HumanML3D_272 directory
9
+ SPLIT_ROOT: './datasets/humanml3d_272/split' # HumanML3D_272 splits directory
10
+
11
+ model:
12
+ bert_path: './deps/distilbert-base-uncased'
13
+
Evaluator_272/configs/base.yaml ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SEED_VALUE: 1234
2
+ DEBUG: True
3
+ TRAIN:
4
+ SPLIT: 'train'
5
+ NUM_WORKERS: 2 # Number of workers
6
+ BATCH_SIZE: 4 # Size of batches
7
+ START_EPOCH: 0 # Start epoch
8
+ END_EPOCH: 400 # End epoch
9
+ RESUME: '' # Experiment path to be resumed training
10
+ PRETRAINED_VAE: ''
11
+ PRETRAINED: '' # Pretrained model path
12
+
13
+ OPTIM:
14
+ OPTIM.TYPE: 'AdamW' # Optimizer type
15
+ OPTIM.LR: 1e-4 # Learning rate
16
+
17
+ ABLATION:
18
+ VAE_TYPE: 'actor' # vae ablation: actor or mcross
19
+ VAE_ARCH: 'encoder_decoder' # mdiffusion vae architecture
20
+ PE_TYPE: 'actor' # mdiffusion mld or actor
21
+ DIFF_PE_TYPE: 'actor' # mdiffusion mld or actor
22
+ SKIP_CONNECT: False # skip connection for denoiser va
23
+ # use linear to expand mean and std rather expand token nums
24
+ MLP_DIST: False
25
+ IS_DIST: False # Mcross distribution kl
26
+ PREDICT_EPSILON: True # noise or motion
27
+
28
+ EVAL:
29
+ SPLIT: 'gtest'
30
+ BATCH_SIZE: 1 # Evaluating Batch size
31
+ NUM_WORKERS: 12 # Evaluating Batch size
32
+
33
+ TEST:
34
+ TEST_DIR: ''
35
+ CHECKPOINTS: '' # Pretrained model path
36
+ SPLIT: 'gtest'
37
+ BATCH_SIZE: 1 # Testing Batch size
38
+ NUM_WORKERS: 12 # Evaluating Batch size
39
+ SAVE_PREDICTIONS: False # Weather to save predictions
40
+ COUNT_TIME: False # Weather to count time during test
41
+ REPLICATION_TIMES: 20 # Number of times to replicate the test
42
+ MM_NUM_SAMPLES: 100 # Number of samples for multimodal test
43
+ MM_NUM_REPEATS: 30 # Number of repeats for multimodal test
44
+ MM_NUM_TIMES: 10 # Number of times to repeat the multimodal test
45
+ DIVERSITY_TIMES: 300 # Number of times to repeat the diversity test
46
+ REP_I: 0
47
+ model:
48
+ target: 'modules'
49
+ t2m_textencoder:
50
+ dim_word: 300
51
+ dim_pos_ohot: 15
52
+ dim_text_hidden: 512
53
+ dim_coemb_hidden: 512
54
+
55
+ t2m_motionencoder:
56
+ dim_move_hidden: 512
57
+ dim_move_latent: 512
58
+ dim_motion_hidden: 1024
59
+ dim_motion_latent: 512
60
+ LOSS:
61
+ LAMBDA_LATENT: 1e-5 # Lambda for latent losses
62
+ LAMBDA_KL: 1e-5 # Lambda for kl losses
63
+ LAMBDA_REC: 1.0 # Lambda for reconstruction losses
64
+ LAMBDA_JOINT: 1.0 # Lambda for joint losses
65
+ LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses
66
+ LAMBDA_CROSS: 1.0 # Lambda for cross-reconstruction losses
67
+ LAMBDA_CYCLE: 1.0 # Lambda for cycle losses
68
+ LAMBDA_PRIOR: 0.0
69
+ DIST_SYNC_ON_STEP: True
70
+ METRIC:
71
+ FORCE_IN_METER: True
72
+ DIST_SYNC_ON_STEP: True
73
+ DATASET:
74
+ NCLASSES: 10
75
+ SAMPLER:
76
+ MAX_SQE: -1
77
+ MAX_LEN: 196
78
+ MIN_LEN: 40
79
+ MAX_TEXT_LEN: 20
80
+ HUMANML3D_272:
81
+ UNIT_LEN: 4
82
+
83
+
84
+ LOGGER:
85
+ SACE_CHECKPOINT_EPOCH: 1
86
+ LOG_EVERY_STEPS: 1
87
+ VAL_EVERY_STEPS: 10
88
+ TENSORBOARD: true
89
+ WANDB:
90
+ OFFLINE: false
91
+ PROJECT: null
92
+ RESUME_ID: null
Evaluator_272/configs/configs_evaluator_272/H3D-TMR.yaml ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NAME: EXP1 # Experiment name
2
+ DEBUG: False # Debug mode
3
+ ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”
4
+ DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]
5
+ # DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]
6
+
7
+ # Training configuration
8
+ TRAIN:
9
+ #---------------------------------
10
+ STAGE: temos # stage "vae" or "diffusion", "vae_diffusion"
11
+ #---------------------------------
12
+ DATASETS: ['humanml3d_272'] # Training datasets
13
+ NUM_WORKERS: 11 # Number of workers
14
+ BATCH_SIZE: 256 # Size of batches
15
+ START_EPOCH: 0 # Start epochMMOTIONENCODER
16
+ END_EPOCH: 100 # End epoch
17
+ RESUME: '' # Resume training from this path
18
+ OPTIM:
19
+ TYPE: AdamW # Optimizer type
20
+ LR: 1e-4 # Learning rate
21
+ PRETRAINED_MLD: False
22
+
23
+ # Evaluating Configuration
24
+ EVAL:
25
+ DATASETS: ['humanml3d_272'] # Evaluating datasets
26
+ BATCH_SIZE: 32 # Evaluating Batch size
27
+ SPLIT: test
28
+ eval_self_on_gt: True
29
+
30
+ # Test Configuration
31
+ TEST:
32
+ PRETRAINED_CHECKPOINTS_VAE: ''
33
+ SAVE_PREDICTIONS: False
34
+ CHECKPOINTS: '' # Pretrained model path
35
+ DATASETS: ['humanml3d_272'] # training datasets
36
+ SPLIT: test
37
+ BATCH_SIZE: 32 # training Batch size
38
+ MEAN: False
39
+ NUM_SAMPLES: 1
40
+ FACT: 1
41
+ inference_vq_code: False
42
+ # REPLICATION_TIM
43
+
44
+ # Datasets Configuration
45
+ DATASET:
46
+ JOINT_TYPE: 'humanml3d_v3' # join type
47
+ VERSION: ''
48
+ MOTION_TYPE: ''
49
+ METRIC:
50
+ TYPE: ['TMR_TM2TMetrics']
51
+ # Losses Configuration
52
+ LOSS:
53
+ TYPE: temos # Losses type
54
+ USE_INFONCE: True
55
+ USE_INFONCE_FILTER: True
56
+ LAMBDA_LATENT: 1.0e-5 # Lambda for latent Losses
57
+ LAMBDA_KL: 1.0e-5 # Lambda for kl Losses
58
+ LAMBDA_REC: 1.0 # Lambda for reconstruction Losses
59
+ LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses
60
+ LAMBDA_CROSS: 1.0 # Lambda for reconstruction Losses
61
+ LAMBDA_CYCLE: 0.0 # Lambda for cycle Losses
62
+ LAMBDA_PRIOR: 0.0
63
+ LAMBDA_INFONCE: 0.1 # Lambda for infonce
64
+ INFONCE_TEMP: 0.1
65
+ DIST_SYNC_ON_STEP: False # Sync Losses on step when distributed trained
66
+ USE_RECLIPLOSS: False
67
+ SYNC: False
68
+ TRAIN_TMR: False
69
+
70
+ # Model Configuration
71
+ model:
72
+ vae: true # whether vae model
73
+ model_type: temos # model type
74
+ condition: 'text'
75
+ target: modules_temos
76
+ #####
77
+ latent_dim: 256 # latent dimension
78
+ ff_size: 1024 #
79
+ num_layers: 4 # number of layers
80
+ num_head: 6 # number of head layers
81
+ dropout: 0.1 # dropout rate
82
+ activation: gelu # activation type
83
+ eval_text_encode_way: given_glove
84
+ eval_text_source: token
85
+
86
+ # Logger configuration
87
+ LOGGER:
88
+ SAVE_CHECKPOINT_EPOCH: 10
89
+ LOG_EVERY_STEPS: 1
90
+ VAL_EVERY_STEPS: 5
91
+ TENSORBOARD: True
92
+ WANDB:
93
+ PROJECT: null
94
+ OFFLINE: False
95
+ RESUME_ID: null
Evaluator_272/configs/modules/denoiser.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ denoiser:
2
+ target: mld.models.architectures.mld_denoiser.MldDenoiser
3
+ params:
4
+ text_encoded_dim: 768
5
+ ff_size: 1024
6
+ num_layers: 9
7
+ num_heads: 4
8
+ dropout: 0.1
9
+ normalize_before: False
10
+ activation: 'gelu'
11
+ flip_sin_to_cos: True
12
+ return_intermediate_dec: False
13
+ position_embedding: 'learned'
14
+ arch: trans_enc
15
+ freq_shift: 0
16
+ condition: ${model.condition}
17
+ latent_dim: ${model.latent_dim}
18
+ guidance_scale: ${model.guidance_scale}
19
+ guidance_uncondp: ${model.guidance_uncondp}
20
+ nfeats: ${DATASET.NFEATS}
21
+ nclasses: ${DATASET.NCLASSES}
22
+ ablation: ${TRAIN.ABLATION}
Evaluator_272/configs/modules/evaluators.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ t2m_textencoder:
2
+ target: mld.models.architectures.t2m_textenc.TextEncoderBiGRUCo
3
+ params:
4
+ word_size: 300
5
+ pos_size: 15
6
+ hidden_size: 512
7
+ output_size: 512
8
+
9
+ t2m_moveencoder:
10
+ target: mld.models.architectures.t2m_textenc.MovementConvEncoder
11
+ params:
12
+ hidden_size: 512
13
+ output_size: 512
14
+
15
+ t2m_motionencoder:
16
+ target: mld.models.architectures.t2m_motionenc.MotionEncoder
17
+ params:
18
+ input_size: ${model.t2m_moveencoder.output_size}
19
+ hidden_size: 1024
20
+ output_size: 512
Evaluator_272/configs/modules/motion_vae.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ motion_vae:
2
+ # Optional: mld_vae, vposert_vae
3
+ target: mld.models.architectures.mld_vae.MldVae
4
+ params:
5
+ arch: 'encoder_decoder'
6
+ ff_size: 1024
7
+ num_layers: 9
8
+ num_heads: 4
9
+ dropout: 0.1
10
+ normalize_before: false
11
+ activation: 'gelu'
12
+ position_embedding: 'learned'
13
+ latent_dim: ${model.latent_dim}
14
+ nfeats: ${DATASET.NFEATS}
15
+ ablation: ${TRAIN.ABLATION}
Evaluator_272/configs/modules/scheduler.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scheduler:
2
+ target: diffusers.DDIMScheduler
3
+ num_inference_timesteps: 50
4
+ eta: 0.0
5
+ params:
6
+ num_train_timesteps: 1000
7
+ beta_start: 0.00085
8
+ beta_end: 0.012
9
+ beta_schedule: 'scaled_linear' # Optional: ['linear', 'scaled_linear', 'squaredcos_cap_v2']
10
+ # variance_type: 'fixed_small'
11
+ clip_sample: false # clip sample to -1~1
12
+ # below are for ddim
13
+ set_alpha_to_one: false
14
+ steps_offset: 1
15
+
16
+
17
+ noise_scheduler:
18
+ target: diffusers.DDPMScheduler
19
+ params:
20
+ num_train_timesteps: 1000
21
+ beta_start: 0.00085
22
+ beta_end: 0.012
23
+ beta_schedule: 'scaled_linear' # Optional: ['linear', 'scaled_linear', 'squaredcos_cap_v2']
24
+ variance_type: 'fixed_small'
25
+ clip_sample: false # clip sample to -1~1
Evaluator_272/configs/modules/text_encoder.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ text_encoder:
2
+ # Optional: mld_clip, mld_bert
3
+ target: mld.models.architectures.mld_clip.MldTextEncoder
4
+ params:
5
+ finetune: false # if false, model weights are frozen
6
+ last_hidden_state: false # if true, the last hidden state is used as the text embedding
7
+ latent_dim: ${model.latent_dim}
8
+ modelpath: ${model.clip_path}
Evaluator_272/configs/modules_temos/motiondecoder.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ motiondecoder:
2
+ name: actor_decoder
3
+ target: mld.models.architectures.temos.motiondecoder.actor.ActorAgnosticDecoder
4
+ params:
5
+ latent_dim: ${model.latent_dim}
6
+ ff_size: ${model.ff_size}
7
+ num_layers: ${model.num_layers}
8
+ num_head: ${model.num_head}
9
+ droupout: ${model.dropout}
10
+ activation: ${model.activation}
11
+ nfeats: ${DATASET.NFEATS}
Evaluator_272/configs/modules_temos/motionencoder.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ motionencoder:
2
+ name: actor_encoder
3
+ target: mld.models.architectures.temos.motionencoder.actor.ActorAgnosticEncoder
4
+ params:
5
+ latent_dim: ${model.latent_dim}
6
+ vae: ${model.vae}
7
+ ff_size: ${model.ff_size}
8
+ num_layers: ${model.num_layers}
9
+ num_head: ${model.num_head}
10
+ droupout: ${model.dropout}
11
+ activation: ${model.activation}
12
+ nfeats: ${DATASET.NFEATS}
Evaluator_272/configs/modules_temos/text_encoder.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ textencoder:
2
+ name: distilbert_actor
3
+ target: mld.models.architectures.temos.textencoder.distillbert_actor.DistilbertActorAgnosticEncoder
4
+ params:
5
+ latent_dim: ${model.latent_dim}
6
+ vae: ${model.vae}
7
+ ff_size: ${model.ff_size}
8
+ num_layers: ${model.num_layers}
9
+ num_head: ${model.num_head}
10
+ droupout: ${model.dropout}
11
+ activation: ${model.activation}
12
+ finetune: false
13
+ modelpath: ${model.bert_path}
Evaluator_272/datasets/__init__.py ADDED
File without changes
Evaluator_272/mld/__init__.py ADDED
File without changes
Evaluator_272/mld/callback/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .progress import ProgressLogger
Evaluator_272/mld/callback/progress.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ from pytorch_lightning import LightningModule, Trainer
4
+ from pytorch_lightning.callbacks import Callback
5
+ import psutil
6
+
7
+ logger = logging.getLogger()
8
+
9
+
10
+ class ProgressLogger(Callback):
11
+
12
+ def __init__(self, metric_monitor: dict, precision: int = 3):
13
+ # Metric to monitor
14
+ self.metric_monitor = metric_monitor
15
+ self.precision = precision
16
+
17
+ def on_train_start(self, trainer: Trainer, pl_module: LightningModule,
18
+ **kwargs) -> None:
19
+ logger.info("Training started")
20
+
21
+ def on_train_end(self, trainer: Trainer, pl_module: LightningModule,
22
+ **kwargs) -> None:
23
+ logger.info("Training done")
24
+
25
+ def on_validation_epoch_end(self, trainer: Trainer,
26
+ pl_module: LightningModule, **kwargs) -> None:
27
+ if trainer.sanity_checking:
28
+ logger.info("Sanity checking ok.")
29
+
30
+ def on_train_epoch_end(self,
31
+ trainer: Trainer,
32
+ pl_module: LightningModule,
33
+ padding=False,
34
+ **kwargs) -> None:
35
+ metric_format = f"{{:.{self.precision}e}}"
36
+ line = f"Epoch {trainer.current_epoch}"
37
+ if padding:
38
+ line = f"{line:>{len('Epoch xxxx')}}" # Right padding
39
+ metrics_str = []
40
+
41
+ losses_dict = trainer.callback_metrics
42
+ for metric_name, dico_name in self.metric_monitor.items():
43
+ if dico_name in losses_dict:
44
+ metric = losses_dict[dico_name].item()
45
+ metric = metric_format.format(metric)
46
+ metric = f"{metric_name} {metric}"
47
+ metrics_str.append(metric)
48
+
49
+ if len(metrics_str) == 0:
50
+ return
51
+
52
+ memory = f"Memory {psutil.virtual_memory().percent}%"
53
+ line = line + ": " + " ".join(metrics_str) + " " + memory
54
+ logger.info(line)
Evaluator_272/mld/config.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+ from argparse import ArgumentParser
3
+ from omegaconf import OmegaConf
4
+ import os
5
+
6
+
7
+ def get_module_config(cfg_model, path="modules"):
8
+ module_conf = OmegaConf.create()
9
+ files = os.listdir(f'./configs/{path}/')
10
+ for file in files:
11
+ if file.endswith('.yaml'):
12
+ with open(f'./configs/{path}/' + file, 'r') as f:
13
+ module_conf.merge_with(OmegaConf.load(f))
14
+ module_conf.merge_with(cfg_model)
15
+ return module_conf
16
+
17
+
18
+ def get_obj_from_str(string, reload=False):
19
+ module, cls = string.rsplit(".", 1)
20
+ if reload:
21
+ module_imp = importlib.import_module(module)
22
+ importlib.reload(module_imp)
23
+ return getattr(importlib.import_module(module, package=None), cls)
24
+
25
+
26
+ def instantiate_from_config(config):
27
+ if not "target" in config:
28
+ if config == '__is_first_stage__':
29
+ return None
30
+ elif config == "__is_unconditional__":
31
+ return None
32
+ raise KeyError("Expected key `target` to instantiate.")
33
+ return get_obj_from_str(config["target"])(**config.get("params", dict()))
34
+
35
+
36
+ def parse_args(phase="train"):
37
+ parser = ArgumentParser()
38
+
39
+ group = parser.add_argument_group("Training options")
40
+ if phase in ["train", "test"]:
41
+ group.add_argument(
42
+ "--cfg",
43
+ type=str,
44
+ required=False,
45
+ default="./configs/config.yaml",
46
+ help="config file",
47
+ )
48
+ group.add_argument(
49
+ "--cfg_assets",
50
+ type=str,
51
+ required=False,
52
+ default="./configs/assets.yaml",
53
+ help="config file for asset paths",
54
+ )
55
+ group.add_argument("--batch_size",
56
+ type=int,
57
+ required=False,
58
+ help="training batch size")
59
+ group.add_argument("--device",
60
+ type=int,
61
+ nargs="+",
62
+ required=False,
63
+ help="training device")
64
+ group.add_argument("--nodebug",
65
+ action="store_true",
66
+ required=False,
67
+ help="debug or not")
68
+ group.add_argument("--dir",
69
+ type=str,
70
+ required=False,
71
+ help="evaluate existing npys")
72
+
73
+ # remove None params, and create a dictionnary
74
+ params = parser.parse_args()
75
+ # params = {key: val for key, val in vars(opt).items() if val is not None}
76
+
77
+ # update config from files
78
+ cfg_base = OmegaConf.load('./configs/base.yaml')
79
+ cfg_exp = OmegaConf.merge(cfg_base, OmegaConf.load(params.cfg))
80
+ cfg_model = get_module_config(cfg_exp.model, cfg_exp.model.target)
81
+ cfg_exp.model = cfg_model
82
+ cfg_assets = OmegaConf.load(params.cfg_assets)
83
+ cfg = OmegaConf.merge(cfg_exp, cfg_model, cfg_assets)
84
+
85
+ if phase in ["train", "test"]:
86
+ cfg.TRAIN.BATCH_SIZE = (params.batch_size
87
+ if params.batch_size else cfg.TRAIN.BATCH_SIZE)
88
+ cfg.DEVICE = params.device if params.device else cfg.DEVICE
89
+ cfg.DEBUG = not params.nodebug if params.nodebug is not None else cfg.DEBUG
90
+
91
+ cfg.DEBUG = False if phase == "test" else cfg.DEBUG
92
+ if phase == "test":
93
+ cfg.DEBUG = False
94
+ cfg.DEVICE = [0]
95
+ print("Force no debugging and one gpu when testing")
96
+ cfg.TEST.TEST_DIR = params.dir if params.dir else cfg.TEST.TEST_DIR
97
+
98
+ # debug mode
99
+ if cfg.DEBUG:
100
+ cfg.NAME = "debug--" + cfg.NAME
101
+ cfg.LOGGER.WANDB.OFFLINE = True
102
+ cfg.LOGGER.VAL_EVERY_STEPS = 1
103
+
104
+ return cfg
Evaluator_272/mld/data/HumanML3D_272.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+
4
+ from mld.data.humanml.scripts.motion_process import (process_file,
5
+ recover_from_ric, recover_from_root_rot6d)
6
+
7
+ from .base import BASEDataModule
8
+ from .humanml.data.dataset import Text2MotionDatasetV2
9
+ from .humanml.common.skeleton import Skeleton
10
+ import torch.nn.functional as F
11
+
12
+
13
+ class HumanML3D_272_DataModule(BASEDataModule):
14
+
15
+ def __init__(self,
16
+ cfg,
17
+ batch_size,
18
+ num_workers,
19
+ collate_fn=None,
20
+ phase="train",
21
+ **kwargs):
22
+ super().__init__(batch_size=batch_size,
23
+ num_workers=num_workers,
24
+ collate_fn=collate_fn)
25
+
26
+ self.save_hyperparameters(logger=False)
27
+ self.name = "humanml3d_272"
28
+ self.njoints = 22
29
+ self.hparams['njoints']=22
30
+ if phase == "text_only":
31
+ self.Dataset = TextOnlyDataset
32
+ else:
33
+ if cfg.TRAIN.STAGE in ['gpt'] and (not cfg.TEST.inference_vq_code):
34
+ if cfg.model.vae_type in ['humanvq']:
35
+ self.Dataset = Text2MotionDatasetV2_VQToken
36
+ elif cfg.model.vae_type in ['hvq']:
37
+ self.Dataset = Text2MotionDatasetV2_Dual_codebook_VQToken
38
+ else:
39
+ raise NotImplementedError
40
+ elif cfg.TEST.inference_vq_code:
41
+ self.Dataset = VQMotionDataset
42
+ else:
43
+ self.Dataset = Text2MotionDatasetV2
44
+ self.cfg = cfg
45
+ sample_overrides = {
46
+ "split": "val",
47
+ "tiny": True,
48
+ "progress_bar": False
49
+ }
50
+
51
+ self._sample_set = self.get_sample_set(overrides=sample_overrides)
52
+
53
+ self.nfeats = self._sample_set.nfeats
54
+
55
+ def recover_from_local_position(self, final_x, njoint):
56
+
57
+ def accumulate_rotations(relative_rotations):
58
+ R_total = [relative_rotations[0]]
59
+ for R_rel in relative_rotations[1:]:
60
+ R_total.append(np.matmul(R_rel, R_total[-1]))
61
+
62
+ return np.array(R_total)
63
+
64
+ def rotation_6d_to_matrix(d6: torch.Tensor) -> torch.Tensor:
65
+ a1, a2 = d6[..., :3], d6[..., 3:]
66
+ b1 = F.normalize(a1, dim=-1)
67
+ b2 = a2 - (b1 * a2).sum(-1, keepdim=True) * b1
68
+ b2 = F.normalize(b2, dim=-1)
69
+ b3 = torch.cross(b1, b2, dim=-1)
70
+ return torch.stack((b1, b2, b3), dim=-2)
71
+
72
+ nfrm, _ = final_x.shape
73
+ positions_no_heading = final_x[:,8:8+3*njoint].reshape(nfrm, -1, 3)
74
+ velocities_root_xy_no_heading = final_x[:,:2]
75
+ global_heading_diff_rot = final_x[:,2:8]
76
+
77
+ global_heading_rot = accumulate_rotations(rotation_6d_to_matrix(torch.from_numpy(global_heading_diff_rot)).numpy())
78
+ inv_global_heading_rot = np.transpose(global_heading_rot, (0, 2, 1))
79
+ positions_with_heading = np.matmul(np.repeat(inv_global_heading_rot[:, None,:, :], njoint, axis=1), positions_no_heading[...,None]).squeeze(-1)
80
+ velocities_root_xyz_no_heading = np.zeros((velocities_root_xy_no_heading.shape[0], 3))
81
+ velocities_root_xyz_no_heading[:, 0] = velocities_root_xy_no_heading[:, 0]
82
+ velocities_root_xyz_no_heading[:, 2] = velocities_root_xy_no_heading[:, 1]
83
+ velocities_root_xyz_no_heading[1:, :] = np.matmul(inv_global_heading_rot[:-1], velocities_root_xyz_no_heading[1:, :,None]).squeeze(-1)
84
+
85
+ root_translation = np.cumsum(velocities_root_xyz_no_heading, axis=0)
86
+ positions_with_heading[:, :, 0] += root_translation[:, 0:1]
87
+ positions_with_heading[:, :, 2] += root_translation[:, 2:]
88
+
89
+ return positions_with_heading
90
+
91
+ def feats2joints(self, features, skel=None, motion_type=''):
92
+ assert motion_type in ['']
93
+ assert features.shape[2] == 272
94
+ mean = torch.tensor(self.hparams.mean).to(features)
95
+ std = torch.tensor(self.hparams.std).to(features)
96
+ features = features * std + mean
97
+ return self.recover_from_local_position(features.reshape(-1, 272).detach().cpu().numpy(), self.njoints).reshape(features.shape[0], -1, 22, 3)
98
+
99
+
100
+ def joints2feats(self, features):
101
+ features = process_file(features, self.njoints)[0]
102
+ return features
103
+
104
+ def renorm4t2m(self, features):
105
+ ori_mean = torch.tensor(self.hparams.mean).to(features)
106
+ ori_std = torch.tensor(self.hparams.std).to(features)
107
+ eval_mean = torch.tensor(self.hparams.mean_eval).to(features)
108
+ eval_std = torch.tensor(self.hparams.std_eval).to(features)
109
+ features = features * ori_std + ori_mean
110
+ features = (features - eval_mean) / eval_std
111
+ return features
112
+
113
+ def renorm2ori(self, features):
114
+ mean = torch.tensor(self.hparams.mean).to(features)
115
+ std = torch.tensor(self.hparams.std).to(features)
116
+ features = features * std + mean
117
+
118
+ return features
119
+
120
+
121
+ def mm_mode(self, mm_on=True):
122
+ if mm_on:
123
+ self.is_mm = True
124
+ self.name_list = self.test_dataset.name_list
125
+ self.mm_list = np.random.choice(self.name_list,
126
+ self.cfg.TEST.MM_NUM_SAMPLES,
127
+ replace=False)
128
+ self.test_dataset.name_list = self.mm_list
129
+ else:
130
+ self.is_mm = False
131
+ self.test_dataset.name_list = self.name_list
Evaluator_272/mld/data/__init__.py ADDED
File without changes
Evaluator_272/mld/data/base.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from os.path import join as pjoin
2
+ import numpy as np
3
+ import pytorch_lightning as pl
4
+ from torch.utils.data import DataLoader
5
+
6
+
7
+ class BASEDataModule(pl.LightningDataModule):
8
+
9
+ def __init__(self, collate_fn, batch_size: int, num_workers: int):
10
+ super().__init__()
11
+
12
+ self.dataloader_options = {
13
+ "batch_size": batch_size,
14
+ "num_workers": num_workers,
15
+ "collate_fn": collate_fn,
16
+ }
17
+
18
+ self.persistent_workers = True
19
+ self.is_mm = False
20
+
21
+ def get_sample_set(self, overrides={}):
22
+ sample_params = self.hparams.copy()
23
+ sample_params.update(overrides)
24
+ split_file = pjoin(
25
+ eval(f"self.cfg.DATASET.{self.name.upper()}.SPLIT_ROOT"), self.cfg.DATASET.VERSION,
26
+ self.cfg.EVAL.SPLIT + ".txt",
27
+ )
28
+ return self.Dataset(split_file=split_file, **sample_params)
29
+
30
+ def __getattr__(self, item):
31
+ # train_dataset/val_dataset etc cached like properties
32
+ if item.endswith("_dataset") and not item.startswith("_"):
33
+ subset = item[:-len("_dataset")]
34
+ item_c = "_" + item
35
+ if item_c not in self.__dict__:
36
+ # todo: config name not consistent
37
+ subset = subset.upper() if subset != "val" else "EVAL"
38
+ split = eval(f"self.cfg.{subset}.SPLIT")
39
+ split_file = pjoin(
40
+ eval(f"self.cfg.DATASET.{self.name.upper()}.SPLIT_ROOT"),
41
+ self.cfg.DATASET.VERSION,
42
+ eval(f"self.cfg.{subset}.SPLIT") + ".txt",
43
+ )
44
+ self.__dict__[item_c] = self.Dataset(split_file=split_file,
45
+ split=split,
46
+ **self.hparams)
47
+ return getattr(self, item_c)
48
+ classname = self.__class__.__name__
49
+ raise AttributeError(f"'{classname}' object has no attribute '{item}'")
50
+
51
+ def setup(self, stage=None):
52
+ self.stage = stage
53
+ # Use the getter the first time to load the data
54
+ if stage in (None, "fit"):
55
+ _ = self.train_dataset
56
+ _ = self.val_dataset
57
+ if stage in (None, "test"):
58
+ _ = self.test_dataset
59
+
60
+ def train_dataloader(self):
61
+ return DataLoader(
62
+ self.train_dataset,
63
+ shuffle=True,
64
+ persistent_workers=True,
65
+ **self.dataloader_options,
66
+ )
67
+
68
+ def predict_dataloader(self):
69
+ dataloader_options = self.dataloader_options.copy()
70
+ dataloader_options[
71
+ "batch_size"] = 1 if self.is_mm else self.cfg.TEST.BATCH_SIZE
72
+ dataloader_options["num_workers"] = self.cfg.TEST.NUM_WORKERS
73
+ dataloader_options["shuffle"] = False
74
+ return DataLoader(
75
+ self.test_dataset,
76
+ persistent_workers=True,
77
+ **dataloader_options,
78
+ )
79
+
80
+ def val_dataloader(self):
81
+ # overrides batch_size and num_workers
82
+ dataloader_options = self.dataloader_options.copy()
83
+ dataloader_options["batch_size"] = self.cfg.EVAL.BATCH_SIZE
84
+ dataloader_options["num_workers"] = self.cfg.EVAL.NUM_WORKERS
85
+ dataloader_options["shuffle"] = False
86
+
87
+ return DataLoader(
88
+ self.val_dataset,
89
+ persistent_workers=True,
90
+ **dataloader_options,
91
+ )
92
+
93
+ def test_dataloader(self):
94
+ # overrides batch_size and num_workers
95
+ dataloader_options = self.dataloader_options.copy()
96
+ dataloader_options[
97
+ "batch_size"] = 1 if self.is_mm else self.cfg.TEST.BATCH_SIZE
98
+ dataloader_options["num_workers"] = self.cfg.TEST.NUM_WORKERS
99
+ # dataloader_options["drop_last"] = True
100
+ dataloader_options["shuffle"] = False
101
+ return DataLoader(
102
+ self.test_dataset,
103
+ persistent_workers=True,
104
+ **dataloader_options,
105
+ )
Evaluator_272/mld/data/get_data.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from os.path import join as pjoin
2
+ import numpy as np
3
+ # from .humanml.utils.word_vectorizer import WordVectorizer, WordVectorizer_only_text_token
4
+ from .utils import *
5
+ from .HumanML3D_272 import HumanML3D_272_DataModule
6
+
7
+
8
+ def get_mean_std(phase, cfg, dataset_name):
9
+ assert dataset_name == 'humanml3d_272'
10
+
11
+ data_root = eval(f"cfg.DATASET.{dataset_name.upper()}.ROOT")
12
+ mean = np.load(pjoin(data_root, 'mean_std', cfg.DATASET.VERSION, cfg.DATASET.MOTION_TYPE, "Mean.npy"))
13
+ std = np.load(pjoin(data_root, 'mean_std', cfg.DATASET.VERSION, cfg.DATASET.MOTION_TYPE, "Std.npy"))
14
+ return mean, std
15
+
16
+
17
+
18
+ def get_njoints(dataset_name):
19
+ njoints = 22
20
+ return njoints
21
+
22
+
23
+ def reget_mean_std(cfg, dataset_name, mean, std):
24
+ if 'MINOR_MOTION_TYPE' in cfg.DATASET:
25
+ select_motion_type = cfg.DATASET.MINOR_MOTION_TYPE
26
+ else:
27
+ select_motion_type = cfg.DATASET.MOTION_TYPE
28
+
29
+ njoints = get_njoints(dataset_name)
30
+ if select_motion_type == 'root_position':
31
+ mean = mean[..., :4+(njoints - 1) * 3]
32
+ elif select_motion_type == 'root_position_vel':
33
+ mean = np.concatenate((mean[..., :4+(njoints - 1) * 3], mean[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=0)
34
+ elif select_motion_type == 'root_position_rot6d':
35
+ mean = np.concatenate((mean[..., :4+(njoints - 1) * 3], mean[..., 4+(njoints - 1) * 3: 4+(njoints - 1) * 9]), axis=0)
36
+ elif select_motion_type == 'root_rot6d':
37
+ mean = np.concatenate((mean[..., :4], mean[..., 4+(njoints - 1) * 3: 4+(njoints - 1) * 9]), axis=0)
38
+ elif select_motion_type in ['all', 'smplx_212', 'vector_263', 'vector_263_ori_humanml', 'smplx_159', '']:
39
+ pass
40
+ elif select_motion_type == 'root_body_pos_vel_hand_all':
41
+ mean = np.concatenate((mean[..., :4+(njoints - 1) * 3], mean[..., 4+(njoints - 1) * 3 + 21 * 6 : 4+(njoints - 1) * 9], mean[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=0)
42
+ # pass
43
+ elif select_motion_type == 'root_body_pos_vel_hand_pos_vel':
44
+ mean = np.concatenate((mean[..., :4+(njoints - 1) * 3], mean[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=0)
45
+ elif select_motion_type == 'root_body_pos_vel_hand_pos':
46
+ mean = np.concatenate((mean[..., :4+(njoints - 1) * 3], mean[..., 4+(njoints - 1) * 9 + 22 * 3: 4+(njoints - 1) * 9 + 52*3]), axis=0)
47
+ elif select_motion_type == 'root_body_pos_vel_hand_rot':
48
+ mean = np.concatenate((mean[..., :4+(22 - 1) * 3], mean[..., 4+(52 - 1) * 3 + (22-1)*6 : 4+(52-1)*9], mean[..., 4+(52 - 1) * 9: 4+(52 - 1) * 9 + 22*3]), axis=0)
49
+ elif select_motion_type == 'root_position_vel_only_body':
50
+ mean = np.concatenate((mean[..., :4+(22 - 1) * 3], mean[..., 4+(52 - 1) * 9: 4+(52 - 1) * 9 + 22*3]), axis=0)
51
+ elif select_motion_type == 'root_body_pos_vel_hand_pos_vel_hand_wrist':
52
+ body_pos_mean = mean[..., :4+(22 - 1) * 3] # 67
53
+ left_hand_pos_mean = (mean[..., 4+(22 - 1) * 3:4+(37 - 1) * 3].reshape(15, 3) - body_pos_mean[..., -6:-3]).reshape(-1) # 45
54
+ right_hand_pos_mean = (mean[..., 4+(37 - 1) * 3:4+(52 - 1) * 3].reshape(15, 3) - body_pos_mean[..., -3:]).reshape(-1) # 45
55
+
56
+ body_vel_mean = mean[..., 4+(52 - 1) * 9: 4+(52 - 1) * 9 + 22*3] # 66
57
+ left_hand_vel_mean = (mean[..., 4+(52 - 1) * 9 + 22*3: 4+(52 - 1) * 9 + 22*3 + 15 * 3].reshape(15, 3) - body_vel_mean[..., -6:-3]).reshape(-1)
58
+ right_hand_vel_mean = (mean[..., 4+(52 - 1) * 9 + 22*3+ 15 * 3: 4+(52 - 1) * 9 + 22*3 + 15 * 3 + 15 * 3].reshape(15, 3) - body_vel_mean[..., -3:]).reshape(-1)
59
+
60
+ mean = np.concatenate((body_pos_mean, left_hand_pos_mean, right_hand_pos_mean, body_vel_mean, left_hand_vel_mean, right_hand_vel_mean), axis=0)
61
+ else:
62
+ raise NotImplementedError
63
+
64
+ if select_motion_type == 'root_position':
65
+ std = std[..., :4+(njoints-1)*3]
66
+ elif select_motion_type == 'root_position_vel':
67
+ std = np.concatenate((std[..., :4+(njoints - 1) * 3], std[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=0)
68
+ elif select_motion_type == 'root_position_rot6d':
69
+ std = np.concatenate((std[..., :4+(njoints - 1) * 3], std[..., 4+(njoints - 1) * 3: 4+(njoints - 1) * 9]), axis=0)
70
+ elif select_motion_type == 'root_rot6d':
71
+ std = np.concatenate((std[..., :4], std[..., 4+(njoints - 1) * 3: 4+(njoints - 1) * 9]), axis=0)
72
+ elif select_motion_type in ['all', 'smplx_212', 'vector_263', 'vector_263_ori_humanml', 'smplx_159', '']:
73
+ pass
74
+ elif select_motion_type == 'root_body_pos_vel_hand_all':
75
+ std = np.concatenate((std[..., :4+(njoints - 1) * 3], std[..., 4+(njoints - 1) * 3 + 21 * 6 : 4+(njoints - 1) * 9], std[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=0)
76
+ # pass
77
+ elif select_motion_type == 'root_body_pos_vel_hand_pos_vel':
78
+ std = np.concatenate((std[..., :4+(njoints - 1) * 3], std[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=0)
79
+ elif select_motion_type == 'root_body_pos_vel_hand_pos':
80
+ std = np.concatenate((std[..., :4+(njoints - 1) * 3], std[..., 4+(njoints - 1) * 9 + 22 * 3: 4+(njoints - 1) * 9 + 52*3]), axis=0)
81
+ elif select_motion_type == 'root_body_pos_vel_hand_rot':
82
+ std = np.concatenate((std[..., :4+(22 - 1) * 3], std[..., 4+(52 - 1) * 3 + (22-1)*6 : 4+(52-1)*9], std[..., 4+(52 - 1) * 9: 4+(52 - 1) * 9 + 22*3]), axis=0)
83
+ elif select_motion_type == 'root_position_vel_only_body':
84
+ std = np.concatenate((std[..., :4+(22 - 1) * 3], std[..., 4+(52 - 1) * 9: 4+(52 - 1) * 9 + 22*3]), axis=0)
85
+ elif select_motion_type == 'root_body_pos_vel_hand_pos_vel_hand_wrist':
86
+ std = np.concatenate((std[..., :4+(njoints - 1) * 3], std[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=0)
87
+ else:
88
+ raise NotImplementedError
89
+
90
+ return mean, std
91
+
92
+ # def get_WordVectorizer(cfg, phase, dataset_name):
93
+ # if phase not in ["text_only"]:
94
+ # if dataset_name.lower() in ['humanml3d_272']:
95
+ # if cfg.model.eval_text_source == 'token':
96
+ # return WordVectorizer(cfg.DATASET.WORD_VERTILIZER_PATH, "our_vab", cfg.model.eval_text_encode_way)
97
+ # else:
98
+ # return WordVectorizer_only_text_token(cfg.DATASET.WORD_VERTILIZER_PATH, "our_vab", cfg.model.eval_text_encode_way)
99
+ # else:
100
+ # raise ValueError("Only support WordVectorizer for HumanML3D_272")
101
+ # else:
102
+ # return None
103
+
104
+
105
+ def get_collate_fn(name, cfg, phase="train"):
106
+ if name.lower() in ['humanml3d_272']:
107
+ if cfg.model.condition in ['text_all', 'text_face', 'text_body', 'text_hand', 'text_face_body', 'text_seperate', 'only_pose_concat', 'only_pose_fusion'] and (not cfg.TEST.inference_vq_code):
108
+ return mld_collate_text_all
109
+ elif cfg.TEST.inference_vq_code:
110
+ return vq_collate
111
+ elif cfg.TRAIN.STAGE in ['gpt'] and (not cfg.TEST.inference_vq_code):
112
+ return mld_collate_vq_token
113
+ else:
114
+ return mld_collate
115
+ else:
116
+ raise NotImplementedError
117
+
118
+
119
+ # map config name to module&path
120
+ dataset_module_map = {
121
+ 'humanml3d_272': HumanML3D_272_DataModule
122
+ }
123
+ motion_subdir = {'humanml3d_272': 'motion_data'}
124
+
125
+
126
+ def get_datasets(cfg, logger=None, phase="train"):
127
+ # get dataset names form cfg
128
+ dataset_names = eval(f"cfg.{phase.upper()}.DATASETS")
129
+ datasets = []
130
+ for dataset_name in dataset_names:
131
+ if dataset_name.lower() in ["humanml3d_272"]:
132
+
133
+ if 'MINOR_MOTION_TYPE' in cfg.DATASET:
134
+ input_format = cfg.DATASET.MINOR_MOTION_TYPE
135
+ else:
136
+ input_format = cfg.DATASET.MOTION_TYPE
137
+
138
+ data_root = eval(f"cfg.DATASET.{dataset_name.upper()}.ROOT")
139
+ # get mean and std corresponding to dataset
140
+ mean, std = get_mean_std(phase, cfg, dataset_name)
141
+ mean_eval, std_eval = get_mean_std("val", cfg, dataset_name)
142
+
143
+ mean, std = reget_mean_std(cfg, dataset_name, mean, std)
144
+ mean_eval, std_eval = reget_mean_std(cfg, dataset_name, mean_eval, std_eval)
145
+
146
+ # get WordVectorizer
147
+ # wordVectorizer = get_WordVectorizer(cfg, phase, dataset_name)
148
+ # get collect_fn
149
+ collate_fn = get_collate_fn(dataset_name, cfg, phase)
150
+ # get dataset module
151
+
152
+ dataset = dataset_module_map[dataset_name.lower()](
153
+ cfg=cfg,
154
+ batch_size=cfg.TRAIN.BATCH_SIZE,
155
+ num_workers=cfg.TRAIN.NUM_WORKERS,
156
+ debug=cfg.DEBUG,
157
+ collate_fn=collate_fn,
158
+ mean=mean,
159
+ std=std,
160
+ mean_eval=mean_eval,
161
+ std_eval=std_eval,
162
+ # w_vectorizer=wordVectorizer,
163
+ input_format=cfg.DATASET.MOTION_TYPE,
164
+ text_dir=pjoin(data_root, "texts"),
165
+ motion_dir=pjoin(data_root, motion_subdir[dataset_name]),
166
+ max_motion_length=cfg.DATASET.SAMPLER.MAX_LEN,
167
+ min_motion_length=cfg.DATASET.SAMPLER.MIN_LEN,
168
+ max_text_len=cfg.DATASET.SAMPLER.MAX_TEXT_LEN,
169
+ unit_length=eval(
170
+ f"cfg.DATASET.{dataset_name.upper()}.UNIT_LEN"),
171
+ )
172
+ datasets.append(dataset)
173
+
174
+ else:
175
+ raise NotImplementedError
176
+
177
+ if input_format == 'root_body_pos_vel_hand_pos_vel':
178
+ cfg.DATASET.NFEATS = 313
179
+ else:
180
+ cfg.DATASET.NFEATS = datasets[0].nfeats
181
+
182
+ cfg.DATASET.NJOINTS = datasets[0].njoints
183
+ return datasets
Evaluator_272/mld/data/humanml/__init__.py ADDED
File without changes
Evaluator_272/mld/data/humanml/common/quaternion.py ADDED
@@ -0,0 +1,423 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2018-present, Facebook, Inc.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ #
7
+
8
+ import torch
9
+ import numpy as np
10
+
11
+ _EPS4 = np.finfo(float).eps * 4.0
12
+
13
+ _FLOAT_EPS = np.finfo(np.float64).eps
14
+
15
+ # PyTorch-backed implementations
16
+ def qinv(q):
17
+ assert q.shape[-1] == 4, 'q must be a tensor of shape (*, 4)'
18
+ mask = torch.ones_like(q)
19
+ mask[..., 1:] = -mask[..., 1:]
20
+ return q * mask
21
+
22
+
23
+ def qinv_np(q):
24
+ assert q.shape[-1] == 4, 'q must be a tensor of shape (*, 4)'
25
+ return qinv(torch.from_numpy(q).float()).numpy()
26
+
27
+
28
+ def qnormalize(q):
29
+ assert q.shape[-1] == 4, 'q must be a tensor of shape (*, 4)'
30
+ return q / torch.norm(q, dim=-1, keepdim=True)
31
+
32
+
33
+ def qmul(q, r):
34
+ """
35
+ Multiply quaternion(s) q with quaternion(s) r.
36
+ Expects two equally-sized tensors of shape (*, 4), where * denotes any number of dimensions.
37
+ Returns q*r as a tensor of shape (*, 4).
38
+ """
39
+ assert q.shape[-1] == 4
40
+ assert r.shape[-1] == 4
41
+
42
+ original_shape = q.shape
43
+
44
+ # Compute outer product
45
+ terms = torch.bmm(r.view(-1, 4, 1), q.view(-1, 1, 4))
46
+
47
+ w = terms[:, 0, 0] - terms[:, 1, 1] - terms[:, 2, 2] - terms[:, 3, 3]
48
+ x = terms[:, 0, 1] + terms[:, 1, 0] - terms[:, 2, 3] + terms[:, 3, 2]
49
+ y = terms[:, 0, 2] + terms[:, 1, 3] + terms[:, 2, 0] - terms[:, 3, 1]
50
+ z = terms[:, 0, 3] - terms[:, 1, 2] + terms[:, 2, 1] + terms[:, 3, 0]
51
+ return torch.stack((w, x, y, z), dim=1).view(original_shape)
52
+
53
+
54
+ def qrot(q, v):
55
+ """
56
+ Rotate vector(s) v about the rotation described by quaternion(s) q.
57
+ Expects a tensor of shape (*, 4) for q and a tensor of shape (*, 3) for v,
58
+ where * denotes any number of dimensions.
59
+ Returns a tensor of shape (*, 3).
60
+ """
61
+ assert q.shape[-1] == 4
62
+ assert v.shape[-1] == 3
63
+ assert q.shape[:-1] == v.shape[:-1]
64
+
65
+ original_shape = list(v.shape)
66
+ # print(q.shape)
67
+ q = q.contiguous().view(-1, 4)
68
+ v = v.contiguous().view(-1, 3)
69
+
70
+ qvec = q[:, 1:]
71
+ uv = torch.cross(qvec, v, dim=1)
72
+ uuv = torch.cross(qvec, uv, dim=1)
73
+ return (v + 2 * (q[:, :1] * uv + uuv)).view(original_shape)
74
+
75
+
76
+ def qeuler(q, order, epsilon=0, deg=True):
77
+ """
78
+ Convert quaternion(s) q to Euler angles.
79
+ Expects a tensor of shape (*, 4), where * denotes any number of dimensions.
80
+ Returns a tensor of shape (*, 3).
81
+ """
82
+ assert q.shape[-1] == 4
83
+
84
+ original_shape = list(q.shape)
85
+ original_shape[-1] = 3
86
+ q = q.view(-1, 4)
87
+
88
+ q0 = q[:, 0]
89
+ q1 = q[:, 1]
90
+ q2 = q[:, 2]
91
+ q3 = q[:, 3]
92
+
93
+ if order == 'xyz':
94
+ x = torch.atan2(2 * (q0 * q1 - q2 * q3), 1 - 2 * (q1 * q1 + q2 * q2))
95
+ y = torch.asin(torch.clamp(2 * (q1 * q3 + q0 * q2), -1 + epsilon, 1 - epsilon))
96
+ z = torch.atan2(2 * (q0 * q3 - q1 * q2), 1 - 2 * (q2 * q2 + q3 * q3))
97
+ elif order == 'yzx':
98
+ x = torch.atan2(2 * (q0 * q1 - q2 * q3), 1 - 2 * (q1 * q1 + q3 * q3))
99
+ y = torch.atan2(2 * (q0 * q2 - q1 * q3), 1 - 2 * (q2 * q2 + q3 * q3))
100
+ z = torch.asin(torch.clamp(2 * (q1 * q2 + q0 * q3), -1 + epsilon, 1 - epsilon))
101
+ elif order == 'zxy':
102
+ x = torch.asin(torch.clamp(2 * (q0 * q1 + q2 * q3), -1 + epsilon, 1 - epsilon))
103
+ y = torch.atan2(2 * (q0 * q2 - q1 * q3), 1 - 2 * (q1 * q1 + q2 * q2))
104
+ z = torch.atan2(2 * (q0 * q3 - q1 * q2), 1 - 2 * (q1 * q1 + q3 * q3))
105
+ elif order == 'xzy':
106
+ x = torch.atan2(2 * (q0 * q1 + q2 * q3), 1 - 2 * (q1 * q1 + q3 * q3))
107
+ y = torch.atan2(2 * (q0 * q2 + q1 * q3), 1 - 2 * (q2 * q2 + q3 * q3))
108
+ z = torch.asin(torch.clamp(2 * (q0 * q3 - q1 * q2), -1 + epsilon, 1 - epsilon))
109
+ elif order == 'yxz':
110
+ x = torch.asin(torch.clamp(2 * (q0 * q1 - q2 * q3), -1 + epsilon, 1 - epsilon))
111
+ y = torch.atan2(2 * (q1 * q3 + q0 * q2), 1 - 2 * (q1 * q1 + q2 * q2))
112
+ z = torch.atan2(2 * (q1 * q2 + q0 * q3), 1 - 2 * (q1 * q1 + q3 * q3))
113
+ elif order == 'zyx':
114
+ x = torch.atan2(2 * (q0 * q1 + q2 * q3), 1 - 2 * (q1 * q1 + q2 * q2))
115
+ y = torch.asin(torch.clamp(2 * (q0 * q2 - q1 * q3), -1 + epsilon, 1 - epsilon))
116
+ z = torch.atan2(2 * (q0 * q3 + q1 * q2), 1 - 2 * (q2 * q2 + q3 * q3))
117
+ else:
118
+ raise
119
+
120
+ if deg:
121
+ return torch.stack((x, y, z), dim=1).view(original_shape) * 180 / np.pi
122
+ else:
123
+ return torch.stack((x, y, z), dim=1).view(original_shape)
124
+
125
+
126
+ # Numpy-backed implementations
127
+
128
+ def qmul_np(q, r):
129
+ q = torch.from_numpy(q).contiguous().float()
130
+ r = torch.from_numpy(r).contiguous().float()
131
+ return qmul(q, r).numpy()
132
+
133
+
134
+ def qrot_np(q, v):
135
+ q = torch.from_numpy(q).contiguous().float()
136
+ v = torch.from_numpy(v).contiguous().float()
137
+ return qrot(q, v).numpy()
138
+
139
+
140
+ def qeuler_np(q, order, epsilon=0, use_gpu=False):
141
+ if use_gpu:
142
+ q = torch.from_numpy(q).cuda().float()
143
+ return qeuler(q, order, epsilon).cpu().numpy()
144
+ else:
145
+ q = torch.from_numpy(q).contiguous().float()
146
+ return qeuler(q, order, epsilon).numpy()
147
+
148
+
149
+ def qfix(q):
150
+ """
151
+ Enforce quaternion continuity across the time dimension by selecting
152
+ the representation (q or -q) with minimal distance (or, equivalently, maximal dot product)
153
+ between two consecutive frames.
154
+
155
+ Expects a tensor of shape (L, J, 4), where L is the sequence length and J is the number of joints.
156
+ Returns a tensor of the same shape.
157
+ """
158
+ assert len(q.shape) == 3
159
+ assert q.shape[-1] == 4
160
+
161
+ result = q.copy()
162
+ dot_products = np.sum(q[1:] * q[:-1], axis=2)
163
+ mask = dot_products < 0
164
+ mask = (np.cumsum(mask, axis=0) % 2).astype(bool)
165
+ result[1:][mask] *= -1
166
+ return result
167
+
168
+
169
+ def euler2quat(e, order, deg=True):
170
+ """
171
+ Convert Euler angles to quaternions.
172
+ """
173
+ assert e.shape[-1] == 3
174
+
175
+ original_shape = list(e.shape)
176
+ original_shape[-1] = 4
177
+
178
+ e = e.view(-1, 3)
179
+
180
+ ## if euler angles in degrees
181
+ if deg:
182
+ e = e * np.pi / 180.
183
+
184
+ x = e[:, 0]
185
+ y = e[:, 1]
186
+ z = e[:, 2]
187
+
188
+ rx = torch.stack((torch.cos(x / 2), torch.sin(x / 2), torch.zeros_like(x), torch.zeros_like(x)), dim=1)
189
+ ry = torch.stack((torch.cos(y / 2), torch.zeros_like(y), torch.sin(y / 2), torch.zeros_like(y)), dim=1)
190
+ rz = torch.stack((torch.cos(z / 2), torch.zeros_like(z), torch.zeros_like(z), torch.sin(z / 2)), dim=1)
191
+
192
+ result = None
193
+ for coord in order:
194
+ if coord == 'x':
195
+ r = rx
196
+ elif coord == 'y':
197
+ r = ry
198
+ elif coord == 'z':
199
+ r = rz
200
+ else:
201
+ raise
202
+ if result is None:
203
+ result = r
204
+ else:
205
+ result = qmul(result, r)
206
+
207
+ # Reverse antipodal representation to have a non-negative "w"
208
+ if order in ['xyz', 'yzx', 'zxy']:
209
+ result *= -1
210
+
211
+ return result.view(original_shape)
212
+
213
+
214
+ def expmap_to_quaternion(e):
215
+ """
216
+ Convert axis-angle rotations (aka exponential maps) to quaternions.
217
+ Stable formula from "Practical Parameterization of Rotations Using the Exponential Map".
218
+ Expects a tensor of shape (*, 3), where * denotes any number of dimensions.
219
+ Returns a tensor of shape (*, 4).
220
+ """
221
+ assert e.shape[-1] == 3
222
+
223
+ original_shape = list(e.shape)
224
+ original_shape[-1] = 4
225
+ e = e.reshape(-1, 3)
226
+
227
+ theta = np.linalg.norm(e, axis=1).reshape(-1, 1)
228
+ w = np.cos(0.5 * theta).reshape(-1, 1)
229
+ xyz = 0.5 * np.sinc(0.5 * theta / np.pi) * e
230
+ return np.concatenate((w, xyz), axis=1).reshape(original_shape)
231
+
232
+
233
+ def euler_to_quaternion(e, order):
234
+ """
235
+ Convert Euler angles to quaternions.
236
+ """
237
+ assert e.shape[-1] == 3
238
+
239
+ original_shape = list(e.shape)
240
+ original_shape[-1] = 4
241
+
242
+ e = e.reshape(-1, 3)
243
+
244
+ x = e[:, 0]
245
+ y = e[:, 1]
246
+ z = e[:, 2]
247
+
248
+ rx = np.stack((np.cos(x / 2), np.sin(x / 2), np.zeros_like(x), np.zeros_like(x)), axis=1)
249
+ ry = np.stack((np.cos(y / 2), np.zeros_like(y), np.sin(y / 2), np.zeros_like(y)), axis=1)
250
+ rz = np.stack((np.cos(z / 2), np.zeros_like(z), np.zeros_like(z), np.sin(z / 2)), axis=1)
251
+
252
+ result = None
253
+ for coord in order:
254
+ if coord == 'x':
255
+ r = rx
256
+ elif coord == 'y':
257
+ r = ry
258
+ elif coord == 'z':
259
+ r = rz
260
+ else:
261
+ raise
262
+ if result is None:
263
+ result = r
264
+ else:
265
+ result = qmul_np(result, r)
266
+
267
+ # Reverse antipodal representation to have a non-negative "w"
268
+ if order in ['xyz', 'yzx', 'zxy']:
269
+ result *= -1
270
+
271
+ return result.reshape(original_shape)
272
+
273
+
274
+ def quaternion_to_matrix(quaternions):
275
+ """
276
+ Convert rotations given as quaternions to rotation matrices.
277
+ Args:
278
+ quaternions: quaternions with real part first,
279
+ as tensor of shape (..., 4).
280
+ Returns:
281
+ Rotation matrices as tensor of shape (..., 3, 3).
282
+ """
283
+ r, i, j, k = torch.unbind(quaternions, -1)
284
+ two_s = 2.0 / (quaternions * quaternions).sum(-1)
285
+
286
+ o = torch.stack(
287
+ (
288
+ 1 - two_s * (j * j + k * k),
289
+ two_s * (i * j - k * r),
290
+ two_s * (i * k + j * r),
291
+ two_s * (i * j + k * r),
292
+ 1 - two_s * (i * i + k * k),
293
+ two_s * (j * k - i * r),
294
+ two_s * (i * k - j * r),
295
+ two_s * (j * k + i * r),
296
+ 1 - two_s * (i * i + j * j),
297
+ ),
298
+ -1,
299
+ )
300
+ return o.reshape(quaternions.shape[:-1] + (3, 3))
301
+
302
+
303
+ def quaternion_to_matrix_np(quaternions):
304
+ q = torch.from_numpy(quaternions).contiguous().float()
305
+ return quaternion_to_matrix(q).numpy()
306
+
307
+
308
+ def quaternion_to_cont6d_np(quaternions):
309
+ rotation_mat = quaternion_to_matrix_np(quaternions)
310
+ cont_6d = np.concatenate([rotation_mat[..., 0], rotation_mat[..., 1]], axis=-1)
311
+ return cont_6d
312
+
313
+
314
+ def quaternion_to_cont6d(quaternions):
315
+ rotation_mat = quaternion_to_matrix(quaternions)
316
+ cont_6d = torch.cat([rotation_mat[..., 0], rotation_mat[..., 1]], dim=-1)
317
+ return cont_6d
318
+
319
+
320
+ def cont6d_to_matrix(cont6d):
321
+ assert cont6d.shape[-1] == 6, "The last dimension must be 6"
322
+ x_raw = cont6d[..., 0:3]
323
+ y_raw = cont6d[..., 3:6]
324
+
325
+ x = x_raw / torch.norm(x_raw, dim=-1, keepdim=True)
326
+ z = torch.cross(x, y_raw, dim=-1)
327
+ z = z / torch.norm(z, dim=-1, keepdim=True)
328
+
329
+ y = torch.cross(z, x, dim=-1)
330
+
331
+ x = x[..., None]
332
+ y = y[..., None]
333
+ z = z[..., None]
334
+
335
+ mat = torch.cat([x, y, z], dim=-1)
336
+ return mat
337
+
338
+
339
+ def cont6d_to_matrix_np(cont6d):
340
+ q = torch.from_numpy(cont6d).contiguous().float()
341
+ return cont6d_to_matrix(q).numpy()
342
+
343
+
344
+ def qpow(q0, t, dtype=torch.float):
345
+ ''' q0 : tensor of quaternions
346
+ t: tensor of powers
347
+ '''
348
+ q0 = qnormalize(q0)
349
+ theta0 = torch.acos(q0[..., 0])
350
+
351
+ ## if theta0 is close to zero, add epsilon to avoid NaNs
352
+ mask = (theta0 <= 10e-10) * (theta0 >= -10e-10)
353
+ theta0 = (1 - mask) * theta0 + mask * 10e-10
354
+ v0 = q0[..., 1:] / torch.sin(theta0).view(-1, 1)
355
+
356
+ if isinstance(t, torch.Tensor):
357
+ q = torch.zeros(t.shape + q0.shape)
358
+ theta = t.view(-1, 1) * theta0.view(1, -1)
359
+ else: ## if t is a number
360
+ q = torch.zeros(q0.shape)
361
+ theta = t * theta0
362
+
363
+ q[..., 0] = torch.cos(theta)
364
+ q[..., 1:] = v0 * torch.sin(theta).unsqueeze(-1)
365
+
366
+ return q.to(dtype)
367
+
368
+
369
+ def qslerp(q0, q1, t):
370
+ '''
371
+ q0: starting quaternion
372
+ q1: ending quaternion
373
+ t: array of points along the way
374
+
375
+ Returns:
376
+ Tensor of Slerps: t.shape + q0.shape
377
+ '''
378
+
379
+ q0 = qnormalize(q0)
380
+ q1 = qnormalize(q1)
381
+ q_ = qpow(qmul(q1, qinv(q0)), t)
382
+
383
+ return qmul(q_,
384
+ q0.contiguous().view(torch.Size([1] * len(t.shape)) + q0.shape).expand(t.shape + q0.shape).contiguous())
385
+
386
+
387
+ def qbetween(v0, v1):
388
+ '''
389
+ find the quaternion used to rotate v0 to v1
390
+ '''
391
+ assert v0.shape[-1] == 3, 'v0 must be of the shape (*, 3)'
392
+ assert v1.shape[-1] == 3, 'v1 must be of the shape (*, 3)'
393
+
394
+ v = torch.cross(v0, v1)
395
+ w = torch.sqrt((v0 ** 2).sum(dim=-1, keepdim=True) * (v1 ** 2).sum(dim=-1, keepdim=True)) + (v0 * v1).sum(dim=-1,
396
+ keepdim=True)
397
+ return qnormalize(torch.cat([w, v], dim=-1))
398
+
399
+
400
+ def qbetween_np(v0, v1):
401
+ '''
402
+ find the quaternion used to rotate v0 to v1
403
+ '''
404
+ assert v0.shape[-1] == 3, 'v0 must be of the shape (*, 3)'
405
+ assert v1.shape[-1] == 3, 'v1 must be of the shape (*, 3)'
406
+
407
+ v0 = torch.from_numpy(v0).float()
408
+ v1 = torch.from_numpy(v1).float()
409
+ return qbetween(v0, v1).numpy()
410
+
411
+
412
+ def lerp(p0, p1, t):
413
+ if not isinstance(t, torch.Tensor):
414
+ t = torch.Tensor([t])
415
+
416
+ new_shape = t.shape + p0.shape
417
+ new_view_t = t.shape + torch.Size([1] * len(p0.shape))
418
+ new_view_p = torch.Size([1] * len(t.shape)) + p0.shape
419
+ p0 = p0.view(new_view_p).expand(new_shape)
420
+ p1 = p1.view(new_view_p).expand(new_shape)
421
+ t = t.view(new_view_t).expand(new_shape)
422
+
423
+ return p0 + t * (p1 - p0)
Evaluator_272/mld/data/humanml/common/skeleton.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .quaternion import *
2
+ import scipy.ndimage.filters as filters
3
+
4
+ class Skeleton(object):
5
+ def __init__(self, offset, kinematic_tree, device):
6
+ self.device = device
7
+ self._raw_offset_np = offset.numpy()
8
+ self._raw_offset = offset.clone().detach().to(device).float()
9
+ self._kinematic_tree = kinematic_tree
10
+ self._offset = None
11
+ self._parents = [0] * len(self._raw_offset)
12
+ self._parents[0] = -1
13
+ for chain in self._kinematic_tree:
14
+ for j in range(1, len(chain)):
15
+ self._parents[chain[j]] = chain[j-1]
16
+
17
+ def njoints(self):
18
+ return len(self._raw_offset)
19
+
20
+ def offset(self):
21
+ return self._offset
22
+
23
+ def set_offset(self, offsets):
24
+ self._offset = offsets.clone().detach().to(self.device).float()
25
+
26
+ def kinematic_tree(self):
27
+ return self._kinematic_tree
28
+
29
+ def parents(self):
30
+ return self._parents
31
+
32
+ # joints (batch_size, joints_num, 3)
33
+ def get_offsets_joints_batch(self, joints):
34
+ assert len(joints.shape) == 3
35
+ _offsets = self._raw_offset.expand(joints.shape[0], -1, -1).clone()
36
+ for i in range(1, self._raw_offset.shape[0]):
37
+ _offsets[:, i] = torch.norm(joints[:, i] - joints[:, self._parents[i]], p=2, dim=1)[:, None] * _offsets[:, i]
38
+
39
+ self._offset = _offsets.detach()
40
+ return _offsets
41
+
42
+ # joints (joints_num, 3)
43
+ def get_offsets_joints(self, joints):
44
+ assert len(joints.shape) == 2
45
+ _offsets = self._raw_offset.clone()
46
+ for i in range(1, self._raw_offset.shape[0]):
47
+ # print(joints.shape)
48
+ _offsets[i] = torch.norm(joints[i] - joints[self._parents[i]], p=2, dim=0) * _offsets[i]
49
+
50
+ self._offset = _offsets.detach()
51
+ return _offsets
52
+
53
+ # face_joint_idx should follow the order of right hip, left hip, right shoulder, left shoulder
54
+ # joints (batch_size, joints_num, 3)
55
+ def inverse_kinematics_np(self, joints, face_joint_idx, smooth_forward=False):
56
+ assert len(face_joint_idx) == 4
57
+ '''Get Forward Direction'''
58
+ l_hip, r_hip, sdr_r, sdr_l = face_joint_idx
59
+ across1 = joints[:, r_hip] - joints[:, l_hip]
60
+ across2 = joints[:, sdr_r] - joints[:, sdr_l]
61
+ across = across1 + across2
62
+ across = across / np.sqrt((across**2).sum(axis=-1))[:, np.newaxis]
63
+ # print(across1.shape, across2.shape)
64
+
65
+ # forward (batch_size, 3)
66
+ forward = np.cross(np.array([[0, 1, 0]]), across, axis=-1)
67
+ if smooth_forward:
68
+ forward = filters.gaussian_filter1d(forward, 20, axis=0, mode='nearest')
69
+ # forward (batch_size, 3)
70
+ forward = forward / np.sqrt((forward**2).sum(axis=-1))[..., np.newaxis]
71
+
72
+ '''Get Root Rotation'''
73
+ target = np.array([[0,0,1]]).repeat(len(forward), axis=0)
74
+ root_quat = qbetween_np(forward, target)
75
+
76
+ '''Inverse Kinematics'''
77
+ # quat_params (batch_size, joints_num, 4)
78
+ # print(joints.shape[:-1])
79
+ quat_params = np.zeros(joints.shape[:-1] + (4,))
80
+ # print(quat_params.shape)
81
+ root_quat[0] = np.array([[1.0, 0.0, 0.0, 0.0]])
82
+ quat_params[:, 0] = root_quat
83
+ # quat_params[0, 0] = np.array([[1.0, 0.0, 0.0, 0.0]])
84
+ for chain in self._kinematic_tree:
85
+ R = root_quat
86
+ for j in range(len(chain) - 1):
87
+ # (batch, 3)
88
+ u = self._raw_offset_np[chain[j+1]][np.newaxis,...].repeat(len(joints), axis=0)
89
+ # print(u.shape)
90
+ # (batch, 3)
91
+ v = joints[:, chain[j+1]] - joints[:, chain[j]]
92
+ v = v / np.sqrt((v**2).sum(axis=-1))[:, np.newaxis]
93
+ # print(u.shape, v.shape)
94
+ rot_u_v = qbetween_np(u, v)
95
+
96
+ R_loc = qmul_np(qinv_np(R), rot_u_v)
97
+
98
+ quat_params[:,chain[j + 1], :] = R_loc
99
+ R = qmul_np(R, R_loc)
100
+
101
+ return quat_params
102
+
103
+ # Be sure root joint is at the beginning of kinematic chains
104
+ def forward_kinematics(self, quat_params, root_pos, skel_joints=None, do_root_R=True):
105
+ # quat_params (batch_size, joints_num, 4)
106
+ # joints (batch_size, joints_num, 3)
107
+ # root_pos (batch_size, 3)
108
+ if skel_joints is not None:
109
+ offsets = self.get_offsets_joints_batch(skel_joints)
110
+ if len(self._offset.shape) == 2:
111
+ offsets = self._offset.expand(quat_params.shape[0], -1, -1)
112
+ joints = torch.zeros(quat_params.shape[:-1] + (3,)).to(self.device)
113
+ joints[:, 0] = root_pos
114
+ for chain in self._kinematic_tree:
115
+ if do_root_R:
116
+ R = quat_params[:, 0]
117
+ else:
118
+ R = torch.tensor([[1.0, 0.0, 0.0, 0.0]]).expand(len(quat_params), -1).detach().to(self.device)
119
+ for i in range(1, len(chain)):
120
+ R = qmul(R, quat_params[:, chain[i]])
121
+ offset_vec = offsets[:, chain[i]]
122
+ joints[:, chain[i]] = qrot(R, offset_vec) + joints[:, chain[i-1]]
123
+ return joints
124
+
125
+ # Be sure root joint is at the beginning of kinematic chains
126
+ def forward_kinematics_np(self, quat_params, root_pos, skel_joints=None, do_root_R=True):
127
+ # quat_params (batch_size, joints_num, 4)
128
+ # joints (batch_size, joints_num, 3)
129
+ # root_pos (batch_size, 3)
130
+ if skel_joints is not None:
131
+ skel_joints = torch.from_numpy(skel_joints)
132
+ offsets = self.get_offsets_joints_batch(skel_joints)
133
+ if len(self._offset.shape) == 2:
134
+ offsets = self._offset.expand(quat_params.shape[0], -1, -1)
135
+ offsets = offsets.numpy()
136
+ joints = np.zeros(quat_params.shape[:-1] + (3,))
137
+ joints[:, 0] = root_pos
138
+ for chain in self._kinematic_tree:
139
+ if do_root_R:
140
+ R = quat_params[:, 0]
141
+ else:
142
+ R = np.array([[1.0, 0.0, 0.0, 0.0]]).repeat(len(quat_params), axis=0)
143
+ for i in range(1, len(chain)):
144
+ R = qmul_np(R, quat_params[:, chain[i]])
145
+ offset_vec = offsets[:, chain[i]]
146
+ joints[:, chain[i]] = qrot_np(R, offset_vec) + joints[:, chain[i - 1]]
147
+ return joints
148
+
149
+ def forward_kinematics_cont6d_np(self, cont6d_params, root_pos, skel_joints=None, do_root_R=True):
150
+ # cont6d_params (batch_size, joints_num, 6)
151
+ # joints (batch_size, joints_num, 3)
152
+ # root_pos (batch_size, 3)
153
+ if skel_joints is not None:
154
+ skel_joints = torch.from_numpy(skel_joints)
155
+ offsets = self.get_offsets_joints_batch(skel_joints)
156
+ if len(self._offset.shape) == 2:
157
+ offsets = self._offset.expand(cont6d_params.shape[0], -1, -1)
158
+ offsets = offsets.numpy()
159
+ joints = np.zeros(cont6d_params.shape[:-1] + (3,))
160
+ joints[:, 0] = root_pos
161
+ for chain in self._kinematic_tree:
162
+ if do_root_R:
163
+ matR = cont6d_to_matrix_np(cont6d_params[:, 0])
164
+ else:
165
+ matR = np.eye(3)[np.newaxis, :].repeat(len(cont6d_params), axis=0)
166
+ for i in range(1, len(chain)):
167
+ matR = np.matmul(matR, cont6d_to_matrix_np(cont6d_params[:, chain[i]]))
168
+ offset_vec = offsets[:, chain[i]][..., np.newaxis]
169
+ # print(matR.shape, offset_vec.shape)
170
+ joints[:, chain[i]] = np.matmul(matR, offset_vec).squeeze(-1) + joints[:, chain[i-1]]
171
+ return joints
172
+
173
+ def forward_kinematics_cont6d(self, cont6d_params, root_pos, skel_joints=None, do_root_R=True):
174
+ # cont6d_params (batch_size, joints_num, 6)
175
+ # joints (batch_size, joints_num, 3)
176
+ # root_pos (batch_size, 3)
177
+ if skel_joints is not None:
178
+ # skel_joints = torch.from_numpy(skel_joints)
179
+ offsets = self.get_offsets_joints_batch(skel_joints)
180
+ if len(self._offset.shape) == 2:
181
+ offsets = self._offset.expand(cont6d_params.shape[0], -1, -1)
182
+ joints = torch.zeros(cont6d_params.shape[:-1] + (3,)).to(cont6d_params.device)
183
+ joints[..., 0, :] = root_pos
184
+ for chain in self._kinematic_tree:
185
+ if do_root_R:
186
+ matR = cont6d_to_matrix(cont6d_params[:, 0])
187
+ else:
188
+ matR = torch.eye(3).expand((len(cont6d_params), -1, -1)).detach().to(cont6d_params.device)
189
+ for i in range(1, len(chain)):
190
+ matR = torch.matmul(matR, cont6d_to_matrix(cont6d_params[:, chain[i]]))
191
+ offset_vec = offsets[:, chain[i]].unsqueeze(-1)
192
+ # print(matR.shape, offset_vec.shape)
193
+ joints[:, chain[i]] = torch.matmul(matR, offset_vec).squeeze(-1) + joints[:, chain[i-1]]
194
+ return joints
195
+
196
+
197
+
198
+
199
+
Evaluator_272/mld/data/humanml/data/__init__.py ADDED
File without changes
Evaluator_272/mld/data/humanml/data/dataset.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import codecs as cs
2
+ import os
3
+ import random
4
+ from os.path import join as pjoin
5
+
6
+ import numpy as np
7
+ import spacy
8
+ import torch
9
+ from rich.progress import track
10
+ from torch.utils import data
11
+ from torch.utils.data._utils.collate import default_collate
12
+ from tqdm import tqdm
13
+ import json
14
+
15
+
16
+ def collate_fn(batch):
17
+ batch.sort(key=lambda x: x[3], reverse=True)
18
+ return default_collate(batch)
19
+
20
+
21
+
22
+ def findAllFile(base):
23
+ file_path = []
24
+ for root, ds, fs in os.walk(base, followlinks=True):
25
+ for f in fs:
26
+ fullname = os.path.join(root, f)
27
+ file_path.append(fullname)
28
+ return file_path
29
+
30
+
31
+ class Text2MotionDatasetV2(data.Dataset):
32
+
33
+ def __init__(
34
+ self,
35
+ mean,
36
+ std,
37
+ split_file,
38
+ max_motion_length,
39
+ min_motion_length,
40
+ max_text_len,
41
+ unit_length,
42
+ motion_dir,
43
+ text_dir,
44
+ input_format,
45
+ njoints,
46
+ tiny=False,
47
+ debug=False,
48
+ progress_bar=True,
49
+ **kwargs,
50
+ ):
51
+
52
+ self.max_length = 20
53
+ self.pointer = 0
54
+ self.max_motion_length = max_motion_length
55
+
56
+ self.min_motion_length = min_motion_length
57
+ self.max_text_len = max_text_len
58
+ self.unit_length = unit_length
59
+ data_dict = {}
60
+ id_list = []
61
+ with cs.open(split_file, "r") as f:
62
+ for line in f.readlines():
63
+ id_list.append(line.strip())
64
+ self.id_list = id_list
65
+ if tiny or debug:
66
+ progress_bar = False
67
+ maxdata = 10 if tiny else 100
68
+ else:
69
+ maxdata = 1e10
70
+
71
+ if progress_bar:
72
+ enumerator = enumerate(
73
+ track(
74
+ id_list,
75
+ f"Loading {split_file.split('/')[-2]} {split_file.split('/')[-1].split('.')[0]}",
76
+ ))
77
+ else:
78
+ enumerator = enumerate(id_list)
79
+ count = 0
80
+ bad_count = 0
81
+ miss_count = 0
82
+ new_name_list = []
83
+ length_list = []
84
+
85
+ for i, name in enumerator:
86
+ if count > maxdata:
87
+ break
88
+ try:
89
+
90
+ motion = np.load(pjoin(motion_dir, name + ".npy"))
91
+
92
+ if input_format == 'root_position':
93
+ motion = motion[..., :4+(njoints-1)*3]
94
+ elif input_format == 'root_position_vel':
95
+ motion = np.concatenate((motion[..., :4+(njoints - 1) * 3], motion[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=-1)
96
+ elif input_format == 'root_position_rot6d':
97
+ motion = np.concatenate((motion[..., :4+(njoints - 1) * 3], motion[..., 4+(njoints - 1) * 3: 4+(njoints - 1) * 9]), axis=-1)
98
+ elif input_format == 'root_rot6d':
99
+ motion = np.concatenate((motion[..., :4], motion[..., 4+(njoints - 1) * 3: 4+(njoints - 1) * 9]), axis=-1)
100
+ elif input_format in ['vector_263', '']:
101
+ pass
102
+ else:
103
+ raise NotImplementedError
104
+
105
+
106
+ text_data = []
107
+ flag = False
108
+ with cs.open(pjoin(text_dir, name + ".txt")) as f:
109
+ for line in f.readlines():
110
+ text_dict = {}
111
+ line_split = line.strip().split("#")
112
+ caption = line_split[0]
113
+ tokens = line_split[1].split(" ")
114
+ f_tag = float(line_split[2])
115
+ to_tag = float(line_split[3])
116
+ f_tag = 0.0 if np.isnan(f_tag) else f_tag
117
+ to_tag = 0.0 if np.isnan(to_tag) else to_tag
118
+
119
+ text_dict["caption"] = caption
120
+ text_dict["tokens"] = tokens
121
+ if f_tag == 0.0 and to_tag == 0.0:
122
+ flag = True
123
+ text_data.append(text_dict)
124
+ else:
125
+ try:
126
+ n_motion = motion[int(f_tag * 30):int(to_tag * 30)]
127
+
128
+ new_name = (
129
+ random.choice("ABCDEFGHIJKLMNOPQRSTUVW") +
130
+ "_" + name)
131
+ while new_name in data_dict:
132
+ new_name = (random.choice(
133
+ "ABCDEFGHIJKLMNOPQRSTUVW") + "_" +
134
+ name)
135
+ data_dict[new_name] = {
136
+ "motion": n_motion,
137
+ "length": len(n_motion),
138
+ "text": [text_dict],
139
+ }
140
+ new_name_list.append(new_name)
141
+ length_list.append(len(n_motion))
142
+ except:
143
+ print(line_split)
144
+ print(line_split[2], line_split[3], f_tag,
145
+ to_tag, name)
146
+
147
+
148
+ if flag:
149
+ data_dict[name] = {
150
+ "motion": motion,
151
+ "length": len(motion),
152
+ "text": text_data,
153
+ }
154
+ new_name_list.append(name)
155
+ length_list.append(len(motion))
156
+ count += 1
157
+
158
+ except:
159
+ miss_count += 1
160
+ pass
161
+
162
+ print(f'Here are {miss_count} not in dataset!')
163
+
164
+ name_list, length_list = zip(
165
+ *sorted(zip(new_name_list, length_list), key=lambda x: x[1]))
166
+
167
+
168
+
169
+ self.mean = mean
170
+ self.std = std
171
+
172
+ self.length_arr = np.array(length_list)
173
+ self.data_dict = data_dict
174
+ self.nfeats = motion.shape[1]
175
+ self.name_list = name_list
176
+ self.reset_max_len(self.max_length)
177
+
178
+
179
+ def reset_max_len(self, length):
180
+ assert length <= self.max_motion_length
181
+ self.pointer = np.searchsorted(self.length_arr, length)
182
+ print("Pointer Pointing at %d" % self.pointer)
183
+ self.max_length = length
184
+
185
+ def inv_transform(self, data):
186
+ return data * self.std + self.mean
187
+
188
+ def __len__(self):
189
+ return len(self.name_list) - self.pointer
190
+
191
+ def __getitem__(self, item):
192
+ idx = self.pointer + item
193
+ data = self.data_dict[self.name_list[idx]]
194
+
195
+ retrieval_name = self.name_list[idx].split('_')[-1]
196
+
197
+ motion, m_length, text_list = data["motion"], data["length"], data["text"]
198
+
199
+ # Randomly select a caption
200
+ text_data = random.choice(text_list)
201
+ # caption, tokens = text_data["caption"], text_data["tokens"]
202
+ caption = text_data["caption"]
203
+
204
+ # Crop the motions in to times of 4, and introduce small variations
205
+ if self.unit_length < 10:
206
+ coin2 = np.random.choice(["single", "single", "double"])
207
+ else:
208
+ coin2 = "single"
209
+
210
+ if coin2 == "double":
211
+ m_length = (m_length // self.unit_length - 1) * self.unit_length
212
+ elif coin2 == "single":
213
+ m_length = (m_length // self.unit_length) * self.unit_length
214
+ idx = random.randint(0, len(motion) - m_length)
215
+ motion = motion[idx:idx + m_length]
216
+ "Normalization"
217
+ motion = (motion - self.mean) / self.std
218
+
219
+ if np.any(np.isnan(motion)):
220
+ raise ValueError("nan in motion")
221
+
222
+ return (
223
+ caption,
224
+ motion,
225
+ m_length,
226
+ retrieval_name
227
+ )
Evaluator_272/mld/data/humanml/scripts/motion_process.py ADDED
@@ -0,0 +1,576 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from os.path import join as pjoin
2
+
3
+ from ..common.skeleton import Skeleton
4
+ import numpy as np
5
+ import os
6
+ from ..common.quaternion import *
7
+ from ..utils.paramUtil import *
8
+
9
+ import torch
10
+ from tqdm import tqdm
11
+
12
+ # positions (batch, joint_num, 3)
13
+ def uniform_skeleton(positions, target_offset):
14
+ src_skel = Skeleton(n_raw_offsets, kinematic_chain, 'cpu')
15
+ src_offset = src_skel.get_offsets_joints(torch.from_numpy(positions[0]))
16
+ src_offset = src_offset.numpy()
17
+ tgt_offset = target_offset.numpy()
18
+ # print(src_offset)
19
+ # print(tgt_offset)
20
+ '''Calculate Scale Ratio as the ratio of legs'''
21
+ src_leg_len = np.abs(src_offset[l_idx1]).max() + np.abs(src_offset[l_idx2]).max()
22
+ tgt_leg_len = np.abs(tgt_offset[l_idx1]).max() + np.abs(tgt_offset[l_idx2]).max()
23
+
24
+ scale_rt = tgt_leg_len / src_leg_len
25
+ # print(scale_rt)
26
+ src_root_pos = positions[:, 0]
27
+ tgt_root_pos = src_root_pos * scale_rt
28
+
29
+ '''Inverse Kinematics'''
30
+ quat_params = src_skel.inverse_kinematics_np(positions, face_joint_indx)
31
+ # print(quat_params.shape)
32
+
33
+ '''Forward Kinematics'''
34
+ src_skel.set_offset(target_offset)
35
+ new_joints = src_skel.forward_kinematics_np(quat_params, tgt_root_pos)
36
+ return new_joints
37
+
38
+
39
+ def extract_features(positions, feet_thre, n_raw_offsets, kinematic_chain, face_joint_indx, fid_r, fid_l):
40
+ global_positions = positions.copy()
41
+ """ Get Foot Contacts """
42
+
43
+ def foot_detect(positions, thres):
44
+ velfactor, heightfactor = np.array([thres, thres]), np.array([3.0, 2.0])
45
+
46
+ feet_l_x = (positions[1:, fid_l, 0] - positions[:-1, fid_l, 0]) ** 2
47
+ feet_l_y = (positions[1:, fid_l, 1] - positions[:-1, fid_l, 1]) ** 2
48
+ feet_l_z = (positions[1:, fid_l, 2] - positions[:-1, fid_l, 2]) ** 2
49
+ # feet_l_h = positions[:-1,fid_l,1]
50
+ # feet_l = (((feet_l_x + feet_l_y + feet_l_z) < velfactor) & (feet_l_h < heightfactor)).astype(np.float64)
51
+ feet_l = ((feet_l_x + feet_l_y + feet_l_z) < velfactor).astype(np.float64)
52
+
53
+ feet_r_x = (positions[1:, fid_r, 0] - positions[:-1, fid_r, 0]) ** 2
54
+ feet_r_y = (positions[1:, fid_r, 1] - positions[:-1, fid_r, 1]) ** 2
55
+ feet_r_z = (positions[1:, fid_r, 2] - positions[:-1, fid_r, 2]) ** 2
56
+ # feet_r_h = positions[:-1,fid_r,1]
57
+ # feet_r = (((feet_r_x + feet_r_y + feet_r_z) < velfactor) & (feet_r_h < heightfactor)).astype(np.float64)
58
+ feet_r = (((feet_r_x + feet_r_y + feet_r_z) < velfactor)).astype(np.float64)
59
+ return feet_l, feet_r
60
+
61
+ #
62
+ feet_l, feet_r = foot_detect(positions, feet_thre)
63
+ # feet_l, feet_r = foot_detect(positions, 0.002)
64
+
65
+ '''Quaternion and Cartesian representation'''
66
+ r_rot = None
67
+
68
+ def get_rifke(positions):
69
+ '''Local pose'''
70
+ positions[..., 0] -= positions[:, 0:1, 0]
71
+ positions[..., 2] -= positions[:, 0:1, 2]
72
+ '''All pose face Z+'''
73
+ positions = qrot_np(np.repeat(r_rot[:, None], positions.shape[1], axis=1), positions)
74
+ return positions
75
+
76
+ def get_quaternion(positions):
77
+ skel = Skeleton(n_raw_offsets, kinematic_chain, "cpu")
78
+ # (seq_len, joints_num, 4)
79
+ quat_params = skel.inverse_kinematics_np(positions, face_joint_indx, smooth_forward=False)
80
+
81
+ '''Fix Quaternion Discontinuity'''
82
+ quat_params = qfix(quat_params)
83
+ # (seq_len, 4)
84
+ r_rot = quat_params[:, 0].copy()
85
+ # print(r_rot[0])
86
+ '''Root Linear Velocity'''
87
+ # (seq_len - 1, 3)
88
+ velocity = (positions[1:, 0] - positions[:-1, 0]).copy()
89
+ # print(r_rot.shape, velocity.shape)
90
+ velocity = qrot_np(r_rot[1:], velocity)
91
+ '''Root Angular Velocity'''
92
+ # (seq_len - 1, 4)
93
+ r_velocity = qmul_np(r_rot[1:], qinv_np(r_rot[:-1]))
94
+ quat_params[1:, 0] = r_velocity
95
+ # (seq_len, joints_num, 4)
96
+ return quat_params, r_velocity, velocity, r_rot
97
+
98
+ def get_cont6d_params(positions):
99
+ skel = Skeleton(n_raw_offsets, kinematic_chain, "cpu")
100
+ # (seq_len, joints_num, 4)
101
+ quat_params = skel.inverse_kinematics_np(positions, face_joint_indx, smooth_forward=True)
102
+
103
+ '''Quaternion to continuous 6D'''
104
+ cont_6d_params = quaternion_to_cont6d_np(quat_params)
105
+ # (seq_len, 4)
106
+ r_rot = quat_params[:, 0].copy()
107
+ # print(r_rot[0])
108
+ '''Root Linear Velocity'''
109
+ # (seq_len - 1, 3)
110
+ velocity = (positions[1:, 0] - positions[:-1, 0]).copy()
111
+ # print(r_rot.shape, velocity.shape)
112
+ velocity = qrot_np(r_rot[1:], velocity)
113
+ '''Root Angular Velocity'''
114
+ # (seq_len - 1, 4)
115
+ r_velocity = qmul_np(r_rot[1:], qinv_np(r_rot[:-1]))
116
+ # (seq_len, joints_num, 4)
117
+ return cont_6d_params, r_velocity, velocity, r_rot
118
+
119
+ cont_6d_params, r_velocity, velocity, r_rot = get_cont6d_params(positions)
120
+ positions = get_rifke(positions)
121
+
122
+ # trejec = np.cumsum(np.concatenate([np.array([[0, 0, 0]]), velocity], axis=0), axis=0)
123
+ # r_rotations, r_pos = recover_ric_glo_np(r_velocity, velocity[:, [0, 2]])
124
+
125
+ # plt.plot(positions_b[:, 0, 0], positions_b[:, 0, 2], marker='*')
126
+ # plt.plot(ground_positions[:, 0, 0], ground_positions[:, 0, 2], marker='o', color='r')
127
+ # plt.plot(trejec[:, 0], trejec[:, 2], marker='^', color='g')
128
+ # plt.plot(r_pos[:, 0], r_pos[:, 2], marker='s', color='y')
129
+ # plt.xlabel('x')
130
+ # plt.ylabel('z')
131
+ # plt.axis('equal')
132
+ # plt.show()
133
+
134
+ '''Root height'''
135
+ root_y = positions[:, 0, 1:2]
136
+
137
+ '''Root rotation and linear velocity'''
138
+ # (seq_len-1, 1) rotation velocity along y-axis
139
+ # (seq_len-1, 2) linear velovity on xz plane
140
+ r_velocity = np.arcsin(r_velocity[:, 2:3])
141
+ l_velocity = velocity[:, [0, 2]]
142
+ # print(r_velocity.shape, l_velocity.shape, root_y.shape)
143
+ root_data = np.concatenate([r_velocity, l_velocity, root_y[:-1]], axis=-1)
144
+
145
+ '''Get Joint Rotation Representation'''
146
+ # (seq_len, (joints_num-1) *6) quaternion for skeleton joints
147
+ rot_data = cont_6d_params[:, 1:].reshape(len(cont_6d_params), -1)
148
+
149
+ '''Get Joint Rotation Invariant Position Represention'''
150
+ # (seq_len, (joints_num-1)*3) local joint position
151
+ ric_data = positions[:, 1:].reshape(len(positions), -1)
152
+
153
+ '''Get Joint Velocity Representation'''
154
+ # (seq_len-1, joints_num*3)
155
+ local_vel = qrot_np(np.repeat(r_rot[:-1, None], global_positions.shape[1], axis=1),
156
+ global_positions[1:] - global_positions[:-1])
157
+ local_vel = local_vel.reshape(len(local_vel), -1)
158
+
159
+ data = root_data
160
+ data = np.concatenate([data, ric_data[:-1]], axis=-1)
161
+ data = np.concatenate([data, rot_data[:-1]], axis=-1)
162
+ # print(dataset.shape, local_vel.shape)
163
+ data = np.concatenate([data, local_vel], axis=-1)
164
+ data = np.concatenate([data, feet_l, feet_r], axis=-1)
165
+
166
+ return data
167
+
168
+
169
+ def process_file(positions, feet_thre):
170
+ # (seq_len, joints_num, 3)
171
+ # '''Down Sample'''
172
+ # positions = positions[::ds_num]
173
+
174
+ '''Uniform Skeleton'''
175
+ positions = uniform_skeleton(positions, tgt_offsets)
176
+
177
+ '''Put on Floor'''
178
+ floor_height = positions.min(axis=0).min(axis=0)[1]
179
+ positions[:, :, 1] -= floor_height
180
+ # print(floor_height)
181
+
182
+ # plot_3d_motion("./positions_1.mp4", kinematic_chain, positions, 'title', fps=20)
183
+
184
+ '''XZ at origin'''
185
+ root_pos_init = positions[0]
186
+ root_pose_init_xz = root_pos_init[0] * np.array([1, 0, 1])
187
+ positions = positions - root_pose_init_xz
188
+
189
+ # '''Move the first pose to origin '''
190
+ # root_pos_init = positions[0]
191
+ # positions = positions - root_pos_init[0]
192
+
193
+ '''All initially face Z+'''
194
+ r_hip, l_hip, sdr_r, sdr_l = face_joint_indx
195
+ across1 = root_pos_init[r_hip] - root_pos_init[l_hip]
196
+ across2 = root_pos_init[sdr_r] - root_pos_init[sdr_l]
197
+ across = across1 + across2
198
+ across = across / np.sqrt((across ** 2).sum(axis=-1))[..., np.newaxis]
199
+
200
+ # forward (3,), rotate around y-axis
201
+ forward_init = np.cross(np.array([[0, 1, 0]]), across, axis=-1)
202
+ # forward (3,)
203
+ forward_init = forward_init / np.sqrt((forward_init ** 2).sum(axis=-1))[..., np.newaxis]
204
+
205
+ # print(forward_init)
206
+
207
+ target = np.array([[0, 0, 1]])
208
+ root_quat_init = qbetween_np(forward_init, target)
209
+ root_quat_init = np.ones(positions.shape[:-1] + (4,)) * root_quat_init
210
+
211
+ positions_b = positions.copy()
212
+
213
+ positions = qrot_np(root_quat_init, positions)
214
+
215
+ # plot_3d_motion("./positions_2.mp4", kinematic_chain, positions, 'title', fps=20)
216
+
217
+ '''New ground truth positions'''
218
+ global_positions = positions.copy()
219
+
220
+ # plt.plot(positions_b[:, 0, 0], positions_b[:, 0, 2], marker='*')
221
+ # plt.plot(positions[:, 0, 0], positions[:, 0, 2], marker='o', color='r')
222
+ # plt.xlabel('x')
223
+ # plt.ylabel('z')
224
+ # plt.axis('equal')
225
+ # plt.show()
226
+
227
+ """ Get Foot Contacts """
228
+
229
+ def foot_detect(positions, thres):
230
+ velfactor, heightfactor = np.array([thres, thres]), np.array([3.0, 2.0])
231
+
232
+ feet_l_x = (positions[1:, fid_l, 0] - positions[:-1, fid_l, 0]) ** 2
233
+ feet_l_y = (positions[1:, fid_l, 1] - positions[:-1, fid_l, 1]) ** 2
234
+ feet_l_z = (positions[1:, fid_l, 2] - positions[:-1, fid_l, 2]) ** 2
235
+ # feet_l_h = positions[:-1,fid_l,1]
236
+ # feet_l = (((feet_l_x + feet_l_y + feet_l_z) < velfactor) & (feet_l_h < heightfactor)).astype(np.float64)
237
+ feet_l = ((feet_l_x + feet_l_y + feet_l_z) < velfactor).astype(np.float64)
238
+
239
+ feet_r_x = (positions[1:, fid_r, 0] - positions[:-1, fid_r, 0]) ** 2
240
+ feet_r_y = (positions[1:, fid_r, 1] - positions[:-1, fid_r, 1]) ** 2
241
+ feet_r_z = (positions[1:, fid_r, 2] - positions[:-1, fid_r, 2]) ** 2
242
+ # feet_r_h = positions[:-1,fid_r,1]
243
+ # feet_r = (((feet_r_x + feet_r_y + feet_r_z) < velfactor) & (feet_r_h < heightfactor)).astype(np.float64)
244
+ feet_r = (((feet_r_x + feet_r_y + feet_r_z) < velfactor)).astype(np.float64)
245
+ return feet_l, feet_r
246
+ #
247
+ feet_l, feet_r = foot_detect(positions, feet_thre)
248
+ # feet_l, feet_r = foot_detect(positions, 0.002)
249
+
250
+ '''Quaternion and Cartesian representation'''
251
+ r_rot = None
252
+
253
+ def get_rifke(positions):
254
+ '''Local pose'''
255
+ positions[..., 0] -= positions[:, 0:1, 0]
256
+ positions[..., 2] -= positions[:, 0:1, 2]
257
+ '''All pose face Z+'''
258
+ positions = qrot_np(np.repeat(r_rot[:, None], positions.shape[1], axis=1), positions)
259
+ return positions
260
+
261
+ def get_quaternion(positions):
262
+ skel = Skeleton(n_raw_offsets, kinematic_chain, "cpu")
263
+ # (seq_len, joints_num, 4)
264
+ quat_params = skel.inverse_kinematics_np(positions, face_joint_indx, smooth_forward=False)
265
+
266
+ '''Fix Quaternion Discontinuity'''
267
+ quat_params = qfix(quat_params)
268
+ # (seq_len, 4)
269
+ r_rot = quat_params[:, 0].copy()
270
+ # print(r_rot[0])
271
+ '''Root Linear Velocity'''
272
+ # (seq_len - 1, 3)
273
+ velocity = (positions[1:, 0] - positions[:-1, 0]).copy()
274
+ # print(r_rot.shape, velocity.shape)
275
+ velocity = qrot_np(r_rot[1:], velocity)
276
+ '''Root Angular Velocity'''
277
+ # (seq_len - 1, 4)
278
+ r_velocity = qmul_np(r_rot[1:], qinv_np(r_rot[:-1]))
279
+ quat_params[1:, 0] = r_velocity
280
+ # (seq_len, joints_num, 4)
281
+ return quat_params, r_velocity, velocity, r_rot
282
+
283
+ def get_cont6d_params(positions):
284
+ skel = Skeleton(n_raw_offsets, kinematic_chain, "cpu")
285
+ # (seq_len, joints_num, 4)
286
+ quat_params = skel.inverse_kinematics_np(positions, face_joint_indx, smooth_forward=True)
287
+
288
+ '''Quaternion to continuous 6D'''
289
+ cont_6d_params = quaternion_to_cont6d_np(quat_params)
290
+ # (seq_len, 4)
291
+ r_rot = quat_params[:, 0].copy()
292
+ # print(r_rot[0])
293
+ '''Root Linear Velocity'''
294
+ # (seq_len - 1, 3)
295
+ velocity = (positions[1:, 0] - positions[:-1, 0]).copy()
296
+ # print(r_rot.shape, velocity.shape)
297
+ velocity = qrot_np(r_rot[1:], velocity)
298
+ '''Root Angular Velocity'''
299
+ # (seq_len - 1, 4)
300
+ r_velocity = qmul_np(r_rot[1:], qinv_np(r_rot[:-1]))
301
+ # (seq_len, joints_num, 4)
302
+ return cont_6d_params, r_velocity, velocity, r_rot
303
+
304
+ cont_6d_params, r_velocity, velocity, r_rot = get_cont6d_params(positions)
305
+ positions = get_rifke(positions)
306
+
307
+ # trejec = np.cumsum(np.concatenate([np.array([[0, 0, 0]]), velocity], axis=0), axis=0)
308
+ # r_rotations, r_pos = recover_ric_glo_np(r_velocity, velocity[:, [0, 2]])
309
+
310
+ # plt.plot(positions_b[:, 0, 0], positions_b[:, 0, 2], marker='*')
311
+ # plt.plot(ground_positions[:, 0, 0], ground_positions[:, 0, 2], marker='o', color='r')
312
+ # plt.plot(trejec[:, 0], trejec[:, 2], marker='^', color='g')
313
+ # plt.plot(r_pos[:, 0], r_pos[:, 2], marker='s', color='y')
314
+ # plt.xlabel('x')
315
+ # plt.ylabel('z')
316
+ # plt.axis('equal')
317
+ # plt.show()
318
+
319
+ '''Root height'''
320
+ root_y = positions[:, 0, 1:2]
321
+
322
+ '''Root rotation and linear velocity'''
323
+ # (seq_len-1, 1) rotation velocity along y-axis
324
+ # (seq_len-1, 2) linear velovity on xz plane
325
+ r_velocity = np.arcsin(r_velocity[:, 2:3])
326
+ l_velocity = velocity[:, [0, 2]]
327
+ # print(r_velocity.shape, l_velocity.shape, root_y.shape)
328
+ root_data = np.concatenate([r_velocity, l_velocity, root_y[:-1]], axis=-1)
329
+
330
+ '''Get Joint Rotation Representation'''
331
+ # (seq_len, (joints_num-1) *6) quaternion for skeleton joints
332
+ rot_data = cont_6d_params[:, 1:].reshape(len(cont_6d_params), -1)
333
+
334
+ '''Get Joint Rotation Invariant Position Represention'''
335
+ # (seq_len, (joints_num-1)*3) local joint position
336
+ ric_data = positions[:, 1:].reshape(len(positions), -1)
337
+
338
+ '''Get Joint Velocity Representation'''
339
+ # (seq_len-1, joints_num*3)
340
+ local_vel = qrot_np(np.repeat(r_rot[:-1, None], global_positions.shape[1], axis=1),
341
+ global_positions[1:] - global_positions[:-1])
342
+ local_vel = local_vel.reshape(len(local_vel), -1)
343
+
344
+ data = root_data
345
+ data = np.concatenate([data, ric_data[:-1]], axis=-1)
346
+ data = np.concatenate([data, rot_data[:-1]], axis=-1)
347
+ # print(dataset.shape, local_vel.shape)
348
+ data = np.concatenate([data, local_vel], axis=-1)
349
+ data = np.concatenate([data, feet_l, feet_r], axis=-1)
350
+
351
+ return data, global_positions, positions, l_velocity
352
+
353
+
354
+ # Recover global angle and positions for rotation dataset
355
+ # root_rot_velocity (B, seq_len, 1)
356
+ # root_linear_velocity (B, seq_len, 2)
357
+ # root_y (B, seq_len, 1)
358
+ # ric_data (B, seq_len, (joint_num - 1)*3)
359
+ # rot_data (B, seq_len, (joint_num - 1)*6)
360
+ # local_velocity (B, seq_len, joint_num*3)
361
+ # foot contact (B, seq_len, 4)
362
+ def recover_root_rot_pos(data):
363
+ rot_vel = data[..., 0]
364
+ r_rot_ang = torch.zeros_like(rot_vel).to(data.device)
365
+ '''Get Y-axis rotation from rotation velocity'''
366
+ r_rot_ang[..., 1:] = rot_vel[..., :-1]
367
+ r_rot_ang = torch.cumsum(r_rot_ang, dim=-1)
368
+
369
+ r_rot_quat = torch.zeros(data.shape[:-1] + (4,)).to(data.device)
370
+ r_rot_quat[..., 0] = torch.cos(r_rot_ang)
371
+ r_rot_quat[..., 2] = torch.sin(r_rot_ang)
372
+
373
+ r_pos = torch.zeros(data.shape[:-1] + (3,)).to(data.device)
374
+ r_pos[..., 1:, [0, 2]] = data[..., :-1, 1:3]
375
+ '''Add Y-axis rotation to root position'''
376
+ r_pos = qrot(qinv(r_rot_quat), r_pos)
377
+
378
+ r_pos = torch.cumsum(r_pos, dim=-2)
379
+
380
+ r_pos[..., 1] = data[..., 3]
381
+ return r_rot_quat, r_pos
382
+
383
+
384
+ def recover_from_rot(data, joints_num, skeleton):
385
+ r_rot_quat, r_pos = recover_root_rot_pos(data)
386
+
387
+ r_rot_cont6d = quaternion_to_cont6d(r_rot_quat)
388
+
389
+ start_indx = 1 + 2 + 1 + (joints_num - 1) * 3
390
+ end_indx = start_indx + (joints_num - 1) * 6
391
+ cont6d_params = data[..., start_indx:end_indx]
392
+ # print(r_rot_cont6d.shape, cont6d_params.shape, r_pos.shape)
393
+ cont6d_params = torch.cat([r_rot_cont6d, cont6d_params], dim=-1)
394
+ cont6d_params = cont6d_params.view(-1, joints_num, 6)
395
+
396
+ positions = skeleton.forward_kinematics_cont6d(cont6d_params, r_pos)
397
+
398
+ return positions
399
+
400
+ def recover_from_root_rot6d(data, joints_num, skeleton):
401
+
402
+ r_rot_quat, r_pos = recover_root_rot_pos(data)
403
+
404
+ r_rot_cont6d = quaternion_to_cont6d(r_rot_quat)
405
+
406
+ start_indx = 1 + 2 + 1
407
+ end_indx = start_indx + (joints_num - 1) * 6
408
+ cont6d_params = data[..., start_indx:end_indx]
409
+ # print(r_rot_cont6d.shape, cont6d_params.shape, r_pos.shape)
410
+ cont6d_params = torch.cat([r_rot_cont6d, cont6d_params], dim=-1)
411
+ cont6d_params = cont6d_params.view(-1, joints_num, 6)
412
+ r_pos = r_pos.view(-1,3)
413
+ positions = skeleton.forward_kinematics_cont6d(cont6d_params, r_pos)
414
+ return positions
415
+
416
+ def recover_from_body_pos_vel_hand_rot(data, joints_num, skeleton):
417
+ assert len(skeleton) == 2
418
+ body_skel = skeleton[0]
419
+ all_skel = skeleton[1]
420
+ assert joints_num == 52
421
+ face_joint_indx = [2, 1, 17, 16]
422
+
423
+ r_rot_quat, r_pos = recover_root_rot_pos(data)
424
+
425
+ r_rot_cont6d = quaternion_to_cont6d(r_rot_quat)
426
+
427
+ pos_body_data = data[..., : 4 + 21 * 3]
428
+ pos_body_data_global = recover_from_ric(pos_body_data, 22)
429
+ # pos_body_data_global shape (bs, frame, 22, 3)
430
+ quat_params = body_skel.inverse_kinematics(pos_body_data_global, face_joint_indx)
431
+ bs = quat_params.shape[0]
432
+ frame = quat_params.shape[1]
433
+ cont6d_params = quaternion_to_cont6d(quat_params).view(bs, frame, -1)
434
+
435
+ # cont6d_params
436
+ rot6d_hand_data = data[..., 4 + 21 * 3: 4 + 21 * 3 + 30 * 6]
437
+
438
+ cont6d_params = torch.cat([cont6d_params, rot6d_hand_data], dim=-1)
439
+ cont6d_params = cont6d_params.view(-1, joints_num, 6)
440
+ r_pos = r_pos.view(-1,3)
441
+ positions = all_skel.forward_kinematics_cont6d(cont6d_params, r_pos)
442
+ return positions
443
+
444
+
445
+ def recover_rot(data):
446
+ # dataset [bs, seqlen, 263/251] HumanML/KIT
447
+ joints_num = 22 if data.shape[-1] == 263 else 21
448
+ r_rot_quat, r_pos = recover_root_rot_pos(data)
449
+ r_pos_pad = torch.cat([r_pos, torch.zeros_like(r_pos)], dim=-1).unsqueeze(-2)
450
+ r_rot_cont6d = quaternion_to_cont6d(r_rot_quat)
451
+ start_indx = 1 + 2 + 1 + (joints_num - 1) * 3
452
+ end_indx = start_indx + (joints_num - 1) * 6
453
+ cont6d_params = data[..., start_indx:end_indx]
454
+ cont6d_params = torch.cat([r_rot_cont6d, cont6d_params], dim=-1)
455
+ cont6d_params = cont6d_params.view(-1, joints_num, 6)
456
+ cont6d_params = torch.cat([cont6d_params, r_pos_pad], dim=-2)
457
+ return cont6d_params
458
+
459
+
460
+ def recover_from_ric(data, joints_num):
461
+ r_rot_quat, r_pos = recover_root_rot_pos(data)
462
+ positions = data[..., 4:(joints_num - 1) * 3 + 4]
463
+ positions = positions.view(positions.shape[:-1] + (-1, 3))
464
+
465
+ '''Add Y-axis rotation to local joints'''
466
+ positions = qrot(qinv(r_rot_quat[..., None, :]).expand(positions.shape[:-1] + (4,)), positions)
467
+
468
+ '''Add root XZ to joints'''
469
+ positions[..., 0] += r_pos[..., 0:1]
470
+ positions[..., 2] += r_pos[..., 2:3]
471
+
472
+ '''Concate root and joints'''
473
+ positions = torch.cat([r_pos.unsqueeze(-2), positions], dim=-2)
474
+
475
+ return positions
476
+
477
+
478
+ '''
479
+ For Text2Motion Dataset
480
+ '''
481
+ '''
482
+ if __name__ == "__main__":
483
+ example_id = "000021"
484
+ # Lower legs
485
+ l_idx1, l_idx2 = 5, 8
486
+ # Right/Left foot
487
+ fid_r, fid_l = [8, 11], [7, 10]
488
+ # Face direction, r_hip, l_hip, sdr_r, sdr_l
489
+ face_joint_indx = [2, 1, 17, 16]
490
+ # l_hip, r_hip
491
+ r_hip, l_hip = 2, 1
492
+ joints_num = 22
493
+ # ds_num = 8
494
+ data_dir = '../dataset/pose_data_raw/joints/'
495
+ save_dir1 = '../dataset/pose_data_raw/new_joints/'
496
+ save_dir2 = '../dataset/pose_data_raw/new_joint_vecs/'
497
+
498
+ n_raw_offsets = torch.from_numpy(t2m_raw_offsets)
499
+ kinematic_chain = t2m_kinematic_chain
500
+
501
+ # Get offsets of target skeleton
502
+ example_data = np.load(os.path.join(data_dir, example_id + '.npy'))
503
+ example_data = example_data.reshape(len(example_data), -1, 3)
504
+ example_data = torch.from_numpy(example_data)
505
+ tgt_skel = Skeleton(n_raw_offsets, kinematic_chain, 'cpu')
506
+ # (joints_num, 3)
507
+ tgt_offsets = tgt_skel.get_offsets_joints(example_data[0])
508
+ # print(tgt_offsets)
509
+
510
+ source_list = os.listdir(data_dir)
511
+ frame_num = 0
512
+ for source_file in tqdm(source_list):
513
+ source_data = np.load(os.path.join(data_dir, source_file))[:, :joints_num]
514
+ try:
515
+ dataset, ground_positions, positions, l_velocity = process_file(source_data, 0.002)
516
+ rec_ric_data = recover_from_ric(torch.from_numpy(dataset).unsqueeze(0).float(), joints_num)
517
+ np.save(pjoin(save_dir1, source_file), rec_ric_data.squeeze().numpy())
518
+ np.save(pjoin(save_dir2, source_file), dataset)
519
+ frame_num += dataset.shape[0]
520
+ except Exception as e:
521
+ print(source_file)
522
+ print(e)
523
+
524
+ print('Total clips: %d, Frames: %d, Duration: %fm' %
525
+ (len(source_list), frame_num, frame_num / 20 / 60))
526
+ '''
527
+
528
+ if __name__ == "__main__":
529
+ example_id = "03950_gt"
530
+ # Lower legs
531
+ l_idx1, l_idx2 = 17, 18
532
+ # Right/Left foot
533
+ fid_r, fid_l = [14, 15], [19, 20]
534
+ # Face direction, r_hip, l_hip, sdr_r, sdr_l
535
+ face_joint_indx = [11, 16, 5, 8]
536
+ # l_hip, r_hip
537
+ r_hip, l_hip = 11, 16
538
+ joints_num = 21
539
+ # ds_num = 8
540
+ data_dir = '../dataset/kit_mocap_dataset/joints/'
541
+ save_dir1 = '../dataset/kit_mocap_dataset/new_joints/'
542
+ save_dir2 = '../dataset/kit_mocap_dataset/new_joint_vecs/'
543
+
544
+ n_raw_offsets = torch.from_numpy(kit_raw_offsets)
545
+ kinematic_chain = kit_kinematic_chain
546
+
547
+ '''Get offsets of target skeleton'''
548
+ example_data = np.load(os.path.join(data_dir, example_id + '.npy'))
549
+ example_data = example_data.reshape(len(example_data), -1, 3)
550
+ example_data = torch.from_numpy(example_data)
551
+ tgt_skel = Skeleton(n_raw_offsets, kinematic_chain, 'cpu')
552
+ # (joints_num, 3)
553
+ tgt_offsets = tgt_skel.get_offsets_joints(example_data[0])
554
+ # print(tgt_offsets)
555
+
556
+ source_list = os.listdir(data_dir)
557
+ frame_num = 0
558
+ '''Read source dataset'''
559
+ for source_file in tqdm(source_list):
560
+ source_data = np.load(os.path.join(data_dir, source_file))[:, :joints_num]
561
+ try:
562
+ name = ''.join(source_file[:-7].split('_')) + '.npy'
563
+ data, ground_positions, positions, l_velocity = process_file(source_data, 0.05)
564
+ rec_ric_data = recover_from_ric(torch.from_numpy(data).unsqueeze(0).float(), joints_num)
565
+ if np.isnan(rec_ric_data.numpy()).any():
566
+ print(source_file)
567
+ continue
568
+ np.save(pjoin(save_dir1, name), rec_ric_data.squeeze().numpy())
569
+ np.save(pjoin(save_dir2, name), data)
570
+ frame_num += data.shape[0]
571
+ except Exception as e:
572
+ print(source_file)
573
+ print(e)
574
+
575
+ print('Total clips: %d, Frames: %d, Duration: %fm' %
576
+ (len(source_list), frame_num, frame_num / 12.5 / 60))
Evaluator_272/mld/data/humanml/utils/__init__.py ADDED
File without changes
Evaluator_272/mld/data/humanml/utils/metrics.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from scipy import linalg
3
+
4
+ def euclidean_distance_matrix(matrix1, matrix2):
5
+ """
6
+ Params:
7
+ -- matrix1: N1 x D
8
+ -- matrix2: N2 x D
9
+ Returns:
10
+ -- dist: N1 x N2
11
+ dist[i, j] == distance(matrix1[i], matrix2[j])
12
+ """
13
+ assert matrix1.shape[1] == matrix2.shape[1]
14
+ d1 = -2 * np.dot(matrix1, matrix2.T)
15
+ d2 = np.sum(np.square(matrix1), axis=1, keepdims=True)
16
+ d3 = np.sum(np.square(matrix2), axis=1)
17
+ dists = np.sqrt(d1 + d2 + d3)
18
+ return dists
19
+
20
+ def calculate_top_k(mat, top_k):
21
+ size = mat.shape[0]
22
+ gt_mat = np.expand_dims(np.arange(size), 1).repeat(size, 1)
23
+ bool_mat = (mat == gt_mat)
24
+ correct_vec = False
25
+ top_k_list = []
26
+ for i in range(top_k):
27
+ correct_vec = (correct_vec | bool_mat[:, i])
28
+ top_k_list.append(correct_vec[:, None])
29
+ top_k_mat = np.concatenate(top_k_list, axis=1)
30
+ return top_k_mat
31
+
32
+
33
+ def calculate_R_precision(embedding1, embedding2, top_k, sum_all=False):
34
+ dist_mat = euclidean_distance_matrix(embedding1, embedding2)
35
+ argmax = np.argsort(dist_mat, axis=1)
36
+ top_k_mat = calculate_top_k(argmax, top_k)
37
+ if sum_all:
38
+ return top_k_mat.sum(axis=0)
39
+ else:
40
+ return top_k_mat
41
+
42
+
43
+ def calculate_matching_score(embedding1, embedding2, sum_all=False):
44
+ assert len(embedding1.shape) == 2
45
+ assert embedding1.shape[0] == embedding2.shape[0]
46
+ assert embedding1.shape[1] == embedding2.shape[1]
47
+
48
+ dist = linalg.norm(embedding1 - embedding2, axis=1)
49
+ if sum_all:
50
+ return dist.sum(axis=0)
51
+ else:
52
+ return dist
53
+
54
+
55
+
56
+ def calculate_activation_statistics(activations):
57
+ """
58
+ Params:
59
+ -- activation: num_samples x dim_feat
60
+ Returns:
61
+ -- mu: dim_feat
62
+ -- sigma: dim_feat x dim_feat
63
+ """
64
+ mu = np.mean(activations, axis=0)
65
+ cov = np.cov(activations, rowvar=False)
66
+ return mu, cov
67
+
68
+
69
+ def calculate_diversity(activation, diversity_times):
70
+ assert len(activation.shape) == 2
71
+ assert activation.shape[0] > diversity_times
72
+ num_samples = activation.shape[0]
73
+
74
+ first_indices = np.random.choice(num_samples, diversity_times, replace=False)
75
+ second_indices = np.random.choice(num_samples, diversity_times, replace=False)
76
+ dist = linalg.norm(activation[first_indices] - activation[second_indices], axis=1)
77
+ return dist.mean()
78
+
79
+
80
+ def calculate_multimodality(activation, multimodality_times):
81
+ assert len(activation.shape) == 3
82
+ assert activation.shape[1] > multimodality_times
83
+ num_per_sent = activation.shape[1]
84
+
85
+ first_dices = np.random.choice(num_per_sent, multimodality_times, replace=False)
86
+ second_dices = np.random.choice(num_per_sent, multimodality_times, replace=False)
87
+ dist = linalg.norm(activation[:, first_dices] - activation[:, second_dices], axis=2)
88
+ return dist.mean()
89
+
90
+
91
+ def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
92
+ """Numpy implementation of the Frechet Distance.
93
+ The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
94
+ and X_2 ~ N(mu_2, C_2) is
95
+ d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
96
+ Stable version by Dougal J. Sutherland.
97
+ Params:
98
+ -- mu1 : Numpy array containing the activations of a layer of the
99
+ inception net (like returned by the function 'get_predictions')
100
+ for generated samples.
101
+ -- mu2 : The sample mean over activations, precalculated on an
102
+ representative dataset set.
103
+ -- sigma1: The covariance matrix over activations for generated samples.
104
+ -- sigma2: The covariance matrix over activations, precalculated on an
105
+ representative dataset set.
106
+ Returns:
107
+ -- : The Frechet Distance.
108
+ """
109
+
110
+ mu1 = np.atleast_1d(mu1)
111
+ mu2 = np.atleast_1d(mu2)
112
+
113
+ sigma1 = np.atleast_2d(sigma1)
114
+ sigma2 = np.atleast_2d(sigma2)
115
+
116
+ assert mu1.shape == mu2.shape, \
117
+ 'Training and test mean vectors have different lengths'
118
+ assert sigma1.shape == sigma2.shape, \
119
+ 'Training and test covariances have different dimensions'
120
+
121
+ diff = mu1 - mu2
122
+
123
+ # Product might be almost singular
124
+ covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
125
+ if not np.isfinite(covmean).all():
126
+ msg = ('fid calculation produces singular product; '
127
+ 'adding %s to diagonal of cov estimates') % eps
128
+ print(msg)
129
+ offset = np.eye(sigma1.shape[0]) * eps
130
+ covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
131
+
132
+ # Numerical error might give slight imaginary component
133
+ if np.iscomplexobj(covmean):
134
+ if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
135
+ m = np.max(np.abs(covmean.imag))
136
+ raise ValueError('Imaginary component {}'.format(m))
137
+ covmean = covmean.real
138
+
139
+ tr_covmean = np.trace(covmean)
140
+
141
+ return (diff.dot(diff) + np.trace(sigma1) +
142
+ np.trace(sigma2) - 2 * tr_covmean)
Evaluator_272/mld/data/humanml/utils/paramUtil.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ # Define a kinematic tree for the skeletal struture
4
+ kit_kinematic_chain = [[0, 11, 12, 13, 14, 15], [0, 16, 17, 18, 19, 20], [0, 1, 2, 3, 4], [3, 5, 6, 7], [3, 8, 9, 10]]
5
+
6
+ kit_raw_offsets = np.array(
7
+ [
8
+ [0, 0, 0],
9
+ [0, 1, 0],
10
+ [0, 1, 0],
11
+ [0, 1, 0],
12
+ [0, 1, 0],
13
+ [1, 0, 0],
14
+ [0, -1, 0],
15
+ [0, -1, 0],
16
+ [-1, 0, 0],
17
+ [0, -1, 0],
18
+ [0, -1, 0],
19
+ [1, 0, 0],
20
+ [0, -1, 0],
21
+ [0, -1, 0],
22
+ [0, 0, 1],
23
+ [0, 0, 1],
24
+ [-1, 0, 0],
25
+ [0, -1, 0],
26
+ [0, -1, 0],
27
+ [0, 0, 1],
28
+ [0, 0, 1]
29
+ ]
30
+ )
31
+
32
+ t2m_raw_offsets = np.array([[0,0,0],
33
+ [1,0,0],
34
+ [-1,0,0],
35
+ [0,1,0],
36
+ [0,-1,0],
37
+ [0,-1,0],
38
+ [0,1,0],
39
+ [0,-1,0],
40
+ [0,-1,0],
41
+ [0,1,0],
42
+ [0,0,1],
43
+ [0,0,1],
44
+ [0,1,0],
45
+ [1,0,0],
46
+ [-1,0,0],
47
+ [0,0,1],
48
+ [0,-1,0],
49
+ [0,-1,0],
50
+ [0,-1,0],
51
+ [0,-1,0],
52
+ [0,-1,0],
53
+ [0,-1,0]])
54
+
55
+ t2m_kinematic_chain = [[0, 2, 5, 8, 11], [0, 1, 4, 7, 10], [0, 3, 6, 9, 12, 15], [9, 14, 17, 19, 21], [9, 13, 16, 18, 20]]
56
+ t2m_left_hand_chain = [[20, 22, 23, 24], [20, 34, 35, 36], [20, 25, 26, 27], [20, 31, 32, 33], [20, 28, 29, 30]]
57
+ t2m_right_hand_chain = [[21, 43, 44, 45], [21, 46, 47, 48], [21, 40, 41, 42], [21, 37, 38, 39], [21, 49, 50, 51]]
58
+
59
+
60
+ kit_tgt_skel_id = '03950'
61
+
62
+ t2m_tgt_skel_id = '000021'
63
+
Evaluator_272/mld/data/humanml/utils/plot_script.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ # import cv2
3
+ from textwrap import wrap
4
+
5
+ import matplotlib
6
+ import matplotlib.pyplot as plt
7
+ import mpl_toolkits.mplot3d.axes3d as p3
8
+ import numpy as np
9
+ from matplotlib.animation import FFMpegFileWriter, FuncAnimation
10
+ from mpl_toolkits.mplot3d import Axes3D
11
+ from mpl_toolkits.mplot3d.art3d import Poly3DCollection
12
+
13
+ import mld.data.humanml.utils.paramUtil as paramUtil
14
+
15
+ skeleton = paramUtil.t2m_kinematic_chain
16
+
17
+
18
+ def list_cut_average(ll, intervals):
19
+ if intervals == 1:
20
+ return ll
21
+
22
+ bins = math.ceil(len(ll) * 1.0 / intervals)
23
+ ll_new = []
24
+ for i in range(bins):
25
+ l_low = intervals * i
26
+ l_high = l_low + intervals
27
+ l_high = l_high if l_high < len(ll) else len(ll)
28
+ ll_new.append(np.mean(ll[l_low:l_high]))
29
+ return ll_new
30
+
31
+
32
+ def plot_3d_motion(save_path, joints, title, figsize=(3, 3), fps=120, radius=3, kinematic_tree=skeleton):
33
+ matplotlib.use('Agg')
34
+ title = '\n'.join(wrap(title, 20))
35
+
36
+ def init():
37
+ ax.set_xlim3d([-radius / 2, radius / 2])
38
+ ax.set_ylim3d([0, radius])
39
+ ax.set_zlim3d([-radius / 3., radius * 2 / 3.])
40
+ fig.suptitle(title, fontsize=10)
41
+ ax.grid(b=False)
42
+
43
+ def plot_xzPlane(minx, maxx, miny, minz, maxz):
44
+ verts = [
45
+ [minx, miny, minz],
46
+ [minx, miny, maxz],
47
+ [maxx, miny, maxz],
48
+ [maxx, miny, minz]
49
+ ]
50
+ xz_plane = Poly3DCollection([verts])
51
+ xz_plane.set_facecolor((0.5, 0.5, 0.5, 0.5))
52
+ ax.add_collection3d(xz_plane)
53
+
54
+
55
+ data = joints.copy().reshape(len(joints), -1, 3)
56
+ fig = plt.figure(figsize=figsize)
57
+ plt.tight_layout()
58
+ ax = p3.Axes3D(fig)
59
+ init()
60
+ MINS = data.min(axis=0).min(axis=0)
61
+ MAXS = data.max(axis=0).max(axis=0)
62
+
63
+ colors = ["#DD5A37", "#D69E00", "#B75A39", "#DD5A37", "#D69E00",
64
+ "#FF6D00", "#FF6D00", "#FF6D00", "#FF6D00", "#FF6D00",
65
+ "#DDB50E", "#DDB50E", "#DDB50E", "#DDB50E", "#DDB50E", ]
66
+
67
+ frame_number = data.shape[0]
68
+
69
+ height_offset = MINS[1]
70
+ data[:, :, 1] -= height_offset
71
+ trajec = data[:, 0, [0, 2]]
72
+
73
+ data[..., 0] -= data[:, 0:1, 0]
74
+ data[..., 2] -= data[:, 0:1, 2]
75
+
76
+
77
+ def update(index):
78
+
79
+ ax.view_init(elev=120, azim=-90)
80
+ ax.dist = 7.5
81
+ plot_xzPlane(MINS[0] - trajec[index, 0], MAXS[0] - trajec[index, 0], 0, MINS[2] - trajec[index, 1],
82
+ MAXS[2] - trajec[index, 1])
83
+
84
+
85
+ for i, (chain, color) in enumerate(zip(kinematic_tree, colors)):
86
+ # print(color)
87
+ if i < 5:
88
+ linewidth = 4.0
89
+ else:
90
+ linewidth = 2.0
91
+ ax.plot3D(data[index, chain, 0], data[index, chain, 1], data[index, chain, 2], linewidth=linewidth,
92
+ color=color)
93
+
94
+ plt.axis('off')
95
+ ax.set_xticklabels([])
96
+ ax.set_yticklabels([])
97
+ ax.set_zticklabels([])
98
+
99
+ ani = FuncAnimation(fig, update, frames=frame_number,
100
+ interval=1000 / fps, repeat=False)
101
+
102
+ ani.save(save_path, fps=fps)
103
+ plt.close()
Evaluator_272/mld/data/humanml/utils/utils.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ # import cv2
4
+ from PIL import Image
5
+ import paramUtil
6
+ import math
7
+ import time
8
+ import matplotlib.pyplot as plt
9
+ from scipy.ndimage import gaussian_filter
10
+
11
+
12
+ def mkdir(path):
13
+ if not os.path.exists(path):
14
+ os.makedirs(path)
15
+
16
+ COLORS = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0],
17
+ [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255],
18
+ [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
19
+
20
+ MISSING_VALUE = -1
21
+
22
+ def save_image(image_numpy, image_path):
23
+ img_pil = Image.fromarray(image_numpy)
24
+ img_pil.save(image_path)
25
+
26
+
27
+ def save_logfile(log_loss, save_path):
28
+ with open(save_path, 'wt') as f:
29
+ for k, v in log_loss.items():
30
+ w_line = k
31
+ for digit in v:
32
+ w_line += ' %.3f' % digit
33
+ f.write(w_line + '\n')
34
+
35
+
36
+ def print_current_loss(start_time, niter_state, losses, epoch=None, sub_epoch=None,
37
+ inner_iter=None, tf_ratio=None, sl_steps=None):
38
+
39
+ def as_minutes(s):
40
+ m = math.floor(s / 60)
41
+ s -= m * 60
42
+ return '%dm %ds' % (m, s)
43
+
44
+ def time_since(since, percent):
45
+ now = time.time()
46
+ s = now - since
47
+ es = s / percent
48
+ rs = es - s
49
+ return '%s (- %s)' % (as_minutes(s), as_minutes(rs))
50
+
51
+ if epoch is not None:
52
+ print('epoch: %3d niter: %6d sub_epoch: %2d inner_iter: %4d' % (epoch, niter_state, sub_epoch, inner_iter), end=" ")
53
+
54
+
55
+ now = time.time()
56
+ message = '%s'%(as_minutes(now - start_time))
57
+
58
+ for k, v in losses.items():
59
+ message += ' %s: %.4f ' % (k, v)
60
+ message += ' sl_length:%2d tf_ratio:%.2f'%(sl_steps, tf_ratio)
61
+ print(message)
62
+
63
+ def print_current_loss_decomp(start_time, niter_state, total_niters, losses, epoch=None, inner_iter=None):
64
+
65
+ def as_minutes(s):
66
+ m = math.floor(s / 60)
67
+ s -= m * 60
68
+ return '%dm %ds' % (m, s)
69
+
70
+ def time_since(since, percent):
71
+ now = time.time()
72
+ s = now - since
73
+ es = s / percent
74
+ rs = es - s
75
+ return '%s (- %s)' % (as_minutes(s), as_minutes(rs))
76
+
77
+ print('epoch: %03d inner_iter: %5d' % (epoch, inner_iter), end=" ")
78
+ # now = time.time()
79
+ message = '%s niter: %07d completed: %3d%%)'%(time_since(start_time, niter_state / total_niters), niter_state, niter_state / total_niters * 100)
80
+ for k, v in losses.items():
81
+ message += ' %s: %.4f ' % (k, v)
82
+ print(message)
83
+
84
+
85
+ def compose_gif_img_list(img_list, fp_out, duration):
86
+ img, *imgs = [Image.fromarray(np.array(image)) for image in img_list]
87
+ img.save(fp=fp_out, format='GIF', append_images=imgs, optimize=False,
88
+ save_all=True, loop=0, duration=duration)
89
+
90
+
91
+ def save_images(visuals, image_path):
92
+ if not os.path.exists(image_path):
93
+ os.makedirs(image_path)
94
+
95
+ for i, (label, img_numpy) in enumerate(visuals.items()):
96
+ img_name = '%d_%s.jpg' % (i, label)
97
+ save_path = os.path.join(image_path, img_name)
98
+ save_image(img_numpy, save_path)
99
+
100
+
101
+ def save_images_test(visuals, image_path, from_name, to_name):
102
+ if not os.path.exists(image_path):
103
+ os.makedirs(image_path)
104
+
105
+ for i, (label, img_numpy) in enumerate(visuals.items()):
106
+ img_name = "%s_%s_%s" % (from_name, to_name, label)
107
+ save_path = os.path.join(image_path, img_name)
108
+ save_image(img_numpy, save_path)
109
+
110
+
111
+ def compose_and_save_img(img_list, save_dir, img_name, col=4, row=1, img_size=(256, 200)):
112
+ # print(col, row)
113
+ compose_img = compose_image(img_list, col, row, img_size)
114
+ if not os.path.exists(save_dir):
115
+ os.makedirs(save_dir)
116
+ img_path = os.path.join(save_dir, img_name)
117
+ compose_img.save(img_path)
118
+
119
+
120
+ def compose_image(img_list, col, row, img_size):
121
+ to_image = Image.new('RGB', (col * img_size[0], row * img_size[1]))
122
+ for y in range(0, row):
123
+ for x in range(0, col):
124
+ from_img = Image.fromarray(img_list[y * col + x])
125
+
126
+ paste_area = (x * img_size[0], y*img_size[1],
127
+ (x + 1) * img_size[0], (y + 1) * img_size[1])
128
+ to_image.paste(from_img, paste_area)
129
+ return to_image
130
+
131
+
132
+ def plot_loss_curve(losses, save_path, intervals=500):
133
+ plt.figure(figsize=(10, 5))
134
+ plt.title("Loss During Training")
135
+ for key in losses.keys():
136
+ plt.plot(list_cut_average(losses[key], intervals), label=key)
137
+ plt.xlabel("Iterations/" + str(intervals))
138
+ plt.ylabel("Loss")
139
+ plt.legend()
140
+ plt.savefig(save_path)
141
+ plt.show()
142
+
143
+
144
+ def list_cut_average(ll, intervals):
145
+ if intervals == 1:
146
+ return ll
147
+
148
+ bins = math.ceil(len(ll) * 1.0 / intervals)
149
+ ll_new = []
150
+ for i in range(bins):
151
+ l_low = intervals * i
152
+ l_high = l_low + intervals
153
+ l_high = l_high if l_high < len(ll) else len(ll)
154
+ ll_new.append(np.mean(ll[l_low:l_high]))
155
+ return ll_new
156
+
157
+
158
+ def motion_temporal_filter(motion, sigma=1):
159
+ motion = motion.reshape(motion.shape[0], -1)
160
+ for i in range(motion.shape[1]):
161
+ motion[:, i] = gaussian_filter(motion[:, i], sigma=sigma, mode="nearest")
162
+ return motion.reshape(motion.shape[0], -1, 3)
163
+
Evaluator_272/mld/data/humanml/utils/word_vectorizer.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pickle
3
+ from os.path import join as pjoin
4
+
5
+ POS_enumerator = {
6
+ 'VERB': 0,
7
+ 'NOUN': 1,
8
+ 'DET': 2,
9
+ 'ADP': 3,
10
+ 'NUM': 4,
11
+ 'AUX': 5,
12
+ 'PRON': 6,
13
+ 'ADJ': 7,
14
+ 'ADV': 8,
15
+ 'Loc_VIP': 9,
16
+ 'Body_VIP': 10,
17
+ 'Obj_VIP': 11,
18
+ 'Act_VIP': 12,
19
+ 'Desc_VIP': 13,
20
+ 'OTHER': 14,
21
+ }
22
+
23
+ Loc_list = ('left', 'right', 'clockwise', 'counterclockwise', 'anticlockwise', 'forward', 'back', 'backward',
24
+ 'up', 'down', 'straight', 'curve')
25
+
26
+ Body_list = ('arm', 'chin', 'foot', 'feet', 'face', 'hand', 'mouth', 'leg', 'waist', 'eye', 'knee', 'shoulder', 'thigh')
27
+
28
+ Obj_List = ('stair', 'dumbbell', 'chair', 'window', 'floor', 'car', 'ball', 'handrail', 'baseball', 'basketball')
29
+
30
+ Act_list = ('walk', 'run', 'swing', 'pick', 'bring', 'kick', 'put', 'squat', 'throw', 'hop', 'dance', 'jump', 'turn',
31
+ 'stumble', 'dance', 'stop', 'sit', 'lift', 'lower', 'raise', 'wash', 'stand', 'kneel', 'stroll',
32
+ 'rub', 'bend', 'balance', 'flap', 'jog', 'shuffle', 'lean', 'rotate', 'spin', 'spread', 'climb')
33
+
34
+ Desc_list = ('slowly', 'carefully', 'fast', 'careful', 'slow', 'quickly', 'happy', 'angry', 'sad', 'happily',
35
+ 'angrily', 'sadly')
36
+
37
+ VIP_dict = {
38
+ 'Loc_VIP': Loc_list,
39
+ 'Body_VIP': Body_list,
40
+ 'Obj_VIP': Obj_List,
41
+ 'Act_VIP': Act_list,
42
+ 'Desc_VIP': Desc_list,
43
+ }
44
+
45
+
46
+ class WordVectorizer(object):
47
+ def __init__(self, meta_root, prefix, text_encode_way):
48
+
49
+ self.text_encode_way = text_encode_way
50
+
51
+ vectors = np.load(pjoin(meta_root, '%s_data.npy'%prefix))
52
+ words = pickle.load(open(pjoin(meta_root, '%s_words.pkl'%prefix), 'rb'))
53
+ word2idx = pickle.load(open(pjoin(meta_root, '%s_idx.pkl'%prefix), 'rb'))
54
+ self.word2vec = {w: vectors[word2idx[w]] for w in words}
55
+
56
+ if 'glove_6B' in self.text_encode_way:
57
+ from torchtext.vocab import GloVe
58
+ glove_6b = GloVe(name='6B', dim=300)
59
+ self.word2vec_glove_6b = glove_6b.get_vecs_by_tokens
60
+
61
+ def _get_pos_ohot(self, pos):
62
+ pos_vec = np.zeros(len(POS_enumerator))
63
+ if pos in POS_enumerator:
64
+ pos_vec[POS_enumerator[pos]] = 1
65
+ else:
66
+ pos_vec[POS_enumerator['OTHER']] = 1
67
+ return pos_vec
68
+
69
+ def __len__(self):
70
+ return len(self.word2vec)
71
+
72
+ def __getitem__(self, item):
73
+ word, pos = item.split('/')
74
+ if 'given_glove' in self.text_encode_way:
75
+ if word in self.word2vec:
76
+ word_vec = self.word2vec[word]
77
+ vip_pos = None
78
+ for key, values in VIP_dict.items():
79
+ if word in values:
80
+ vip_pos = key
81
+ break
82
+ if vip_pos is not None:
83
+ pos_vec = self._get_pos_ohot(vip_pos)
84
+ else:
85
+ pos_vec = self._get_pos_ohot(pos)
86
+ else:
87
+ word_vec = self.word2vec['unk']
88
+ pos_vec = self._get_pos_ohot('OTHER')
89
+
90
+ elif 'glove_6B' in self.text_encode_way:
91
+ word_vec = self.word2vec_glove_6b([word]).squeeze()
92
+
93
+ if word in self.word2vec:
94
+ vip_pos = None
95
+ for key, values in VIP_dict.items():
96
+ if word in values:
97
+ vip_pos = key
98
+ break
99
+ if vip_pos is not None:
100
+ pos_vec = self._get_pos_ohot(vip_pos)
101
+ else:
102
+ pos_vec = self._get_pos_ohot(pos)
103
+ else:
104
+ pos_vec = self._get_pos_ohot('OTHER')
105
+
106
+
107
+
108
+ return word_vec, pos_vec
109
+
110
+ class WordVectorizer_only_text_token(object):
111
+ def __init__(self, meta_root, prefix, text_encode_way):
112
+
113
+ self.text_encode_way = text_encode_way
114
+
115
+ vectors = np.load(pjoin(meta_root, '%s_data.npy'%prefix))
116
+ words = pickle.load(open(pjoin(meta_root, '%s_words.pkl'%prefix), 'rb'))
117
+ word2idx = pickle.load(open(pjoin(meta_root, '%s_idx.pkl'%prefix), 'rb'))
118
+ self.word2vec = {w: vectors[word2idx[w]] for w in words}
119
+
120
+ if 'glove_6B' in self.text_encode_way:
121
+ from torchtext.vocab import GloVe
122
+ glove_6b = GloVe(name='6B', dim=300)
123
+ self.word2vec_glove_6b = glove_6b.get_vecs_by_tokens
124
+
125
+ def __len__(self):
126
+ return len(self.word2vec)
127
+
128
+ def __getitem__(self, item):
129
+ word = item
130
+
131
+ if 'given_glove' in self.text_encode_way:
132
+ if word in self.word2vec:
133
+ word_vec = self.word2vec[word]
134
+ else:
135
+ word_vec = self.word2vec['unk']
136
+
137
+ elif 'glove_6B' in self.text_encode_way:
138
+ word_vec = self.word2vec_glove_6b([word]).squeeze()
139
+
140
+ return word_vec
141
+
142
+
143
+
Evaluator_272/mld/data/sampling/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .base import FrameSampler
2
+ from .framerate import subsample, upsample
Evaluator_272/mld/data/sampling/base.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .frames import get_frameix_from_data_index
2
+
3
+ class FrameSampler:
4
+ def __init__(self, sampling="conseq", sampling_step=1, request_frames=None,threshold_reject=0.75,max_len=1000,min_len=10):
5
+ self.sampling = sampling
6
+
7
+ self.sampling_step = sampling_step
8
+ self.request_frames = request_frames
9
+ self.threshold_reject = threshold_reject
10
+ self.max_len = max_len
11
+ self.min_len = min_len
12
+
13
+ def __call__(self, num_frames):
14
+
15
+ return get_frameix_from_data_index(num_frames,
16
+ self.request_frames,
17
+ self.sampling,
18
+ self.sampling_step)
19
+
20
+ def accept(self, duration):
21
+ # Outputs have original lengths
22
+ # Check if it is too long
23
+ if self.request_frames is None:
24
+ if duration > self.max_len:
25
+ return False
26
+ elif duration < self.min_len:
27
+ return False
28
+ else:
29
+ # Reject sample if the length is
30
+ # too little relative to
31
+ # the request frames
32
+ min_number = self.threshold_reject * self.request_frames
33
+ if duration < min_number:
34
+ return False
35
+ return True
36
+
37
+ def get(self, key, default=None):
38
+ return getattr(self, key, default)
39
+
40
+ def __getitem__(self, key):
41
+ return getattr(self, key)
Evaluator_272/mld/data/sampling/framerate.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ def subsample(num_frames, last_framerate, new_framerate):
4
+ step = int(last_framerate / new_framerate)
5
+ assert step >= 1
6
+ frames = np.arange(0, num_frames, step)
7
+ return frames
8
+
9
+
10
+
11
+ def upsample(motion, last_framerate, new_framerate):
12
+ step = int(new_framerate / last_framerate)
13
+ assert step >= 1
14
+
15
+ # Alpha blending => interpolation
16
+ alpha = np.linspace(0, 1, step+1)
17
+ last = np.einsum("l,...->l...", 1-alpha, motion[:-1])
18
+ new = np.einsum("l,...->l...", alpha, motion[1:])
19
+
20
+ chuncks = (last + new)[:-1]
21
+ output = np.concatenate(chuncks.swapaxes(1, 0))
22
+ # Don't forget the last one
23
+ output = np.concatenate((output, motion[[-1]]))
24
+ return output
25
+
26
+
27
+ if __name__ == "__main__":
28
+ motion = np.arange(105)
29
+ submotion = motion[subsample(len(motion), 100.0, 12.5)]
30
+ newmotion = upsample(submotion, 12.5, 100)
31
+
32
+ print(newmotion)
Evaluator_272/mld/data/sampling/frames.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+ from numpy import ndarray as Array
5
+ import random
6
+
7
+
8
+ def get_frameix_from_data_index(num_frames: int,
9
+ request_frames: Optional[int],
10
+ sampling: str = "conseq",
11
+ sampling_step: int = 1) -> Array:
12
+ nframes = num_frames
13
+
14
+ if request_frames is None:
15
+ frame_ix = np.arange(nframes)
16
+ else:
17
+
18
+ if request_frames > nframes:
19
+ fair = False # True
20
+ if fair:
21
+ # distills redundancy everywhere
22
+ choices = np.random.choice(range(nframes),
23
+ request_frames,
24
+ replace=True)
25
+ frame_ix = sorted(choices)
26
+ else:
27
+ # adding the last frame until done
28
+ ntoadd = max(0, request_frames - nframes)
29
+ lastframe = nframes - 1
30
+ padding = lastframe * np.ones(ntoadd, dtype=int)
31
+ frame_ix = np.concatenate((np.arange(0, nframes),
32
+ padding))
33
+
34
+ elif sampling in ["conseq", "random_conseq"]:
35
+ step_max = (nframes - 1) // (request_frames - 1)
36
+ if sampling == "conseq":
37
+ if sampling_step == -1 or sampling_step * (request_frames - 1) >= nframes:
38
+ step = step_max
39
+ else:
40
+ step = sampling_step
41
+ elif sampling == "random_conseq":
42
+ step = random.randint(1, step_max)
43
+
44
+ lastone = step * (request_frames - 1)
45
+ shift_max = nframes - lastone - 1
46
+ shift = random.randint(0, max(0, shift_max - 1))
47
+ frame_ix = shift + np.arange(0, lastone + 1, step)
48
+
49
+ elif sampling == "random":
50
+ choices = np.random.choice(range(nframes),
51
+ request_frames,
52
+ replace=False)
53
+ frame_ix = sorted(choices)
54
+
55
+ else:
56
+ raise ValueError("Sampling not recognized.")
57
+
58
+ return frame_ix
Evaluator_272/mld/data/utils.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+
4
+ def lengths_to_mask(lengths):
5
+ max_len = max(lengths)
6
+ mask = torch.arange(max_len, device=lengths.device).expand(
7
+ len(lengths), max_len) < lengths.unsqueeze(1)
8
+ return mask
9
+
10
+
11
+ def collate_tensors(batch):
12
+ dims = batch[0].dim()
13
+ max_size = [max([b.size(i) for b in batch]) for i in range(dims)]
14
+ size = (len(batch), ) + tuple(max_size)
15
+ canvas = batch[0].new_zeros(size=size)
16
+ for i, b in enumerate(batch):
17
+ sub_tensor = canvas[i]
18
+ for d in range(dims):
19
+ sub_tensor = sub_tensor.narrow(d, 0, b.size(d))
20
+ sub_tensor.add_(b)
21
+ return canvas
22
+
23
+ def mld_collate(batch):
24
+ notnone_batches = [b for b in batch if b is not None]
25
+ notnone_batches.sort(key=lambda x: x[2], reverse=True)
26
+ adapted_batch = {
27
+ "motion":
28
+ collate_tensors([torch.tensor(b[1]).float() for b in notnone_batches]),
29
+ "text": [b[0] for b in notnone_batches],
30
+ "length": [b[2] for b in notnone_batches],
31
+ "retrieval_name": [b[3] for b in notnone_batches]
32
+ }
33
+ return adapted_batch
34
+
35
+
36
+
37
+
38
+
Evaluator_272/mld/launch/__init__.py ADDED
File without changes
Evaluator_272/mld/launch/blender.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Fix blender path
2
+ import sys
3
+ import os
4
+ # local packages
5
+ sys.path.append(os.path.expanduser("~/.local/lib/python3.9/site-packages"))
6
+ import bpy
7
+ import os
8
+ from argparse import ArgumentParser
9
+
10
+ # Monkey patch argparse such that
11
+ # blender / python / hydra parsing works
12
+ def parse_args(self, args=None, namespace=None):
13
+ if args is not None:
14
+ return self.parse_args_bak(args=args, namespace=namespace)
15
+ try:
16
+ idx = sys.argv.index("--")
17
+ args = sys.argv[idx+1:] # the list after '--'
18
+ except ValueError as e: # '--' not in the list:
19
+ args = []
20
+ return self.parse_args_bak(args=args, namespace=namespace)
21
+
22
+ setattr(ArgumentParser, 'parse_args_bak', ArgumentParser.parse_args)
23
+ setattr(ArgumentParser, 'parse_args', parse_args)
Evaluator_272/mld/launch/prepare.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import warnings
3
+ from pathlib import Path
4
+
5
+ import hydra
6
+ from mld.tools.runid import generate_id
7
+ from omegaconf import OmegaConf
8
+
9
+
10
+ # Local paths
11
+ def code_path(path=""):
12
+ code_dir = hydra.utils.get_original_cwd()
13
+ code_dir = Path(code_dir)
14
+ return str(code_dir / path)
15
+
16
+
17
+ def working_path(path):
18
+ return str(Path(os.getcwd()) / path)
19
+
20
+
21
+ # fix the id for this run
22
+ ID = generate_id()
23
+
24
+
25
+ def generate_id():
26
+ return ID
27
+
28
+
29
+ def get_last_checkpoint(path, ckpt_name="last.ckpt"):
30
+ output_dir = Path(hydra.utils.to_absolute_path(path))
31
+ last_ckpt_path = output_dir / "checkpoints" / ckpt_name
32
+ return str(last_ckpt_path)
33
+
34
+
35
+ def get_kitname(load_amass_data: bool, load_with_rot: bool):
36
+ if not load_amass_data:
37
+ return "kit-mmm-xyz"
38
+ if load_amass_data and not load_with_rot:
39
+ return "kit-amass-xyz"
40
+ if load_amass_data and load_with_rot:
41
+ return "kit-amass-rot"
42
+
43
+
44
+ OmegaConf.register_new_resolver("code_path", code_path)
45
+ OmegaConf.register_new_resolver("working_path", working_path)
46
+ OmegaConf.register_new_resolver("generate_id", generate_id)
47
+ OmegaConf.register_new_resolver("absolute_path", hydra.utils.to_absolute_path)
48
+ OmegaConf.register_new_resolver("get_last_checkpoint", get_last_checkpoint)
49
+ OmegaConf.register_new_resolver("get_kitname", get_kitname)
50
+
51
+
52
+ # Remove warnings
53
+ warnings.filterwarnings(
54
+ "ignore", ".*Trying to infer the `batch_size` from an ambiguous collection.*"
55
+ )
56
+
57
+ warnings.filterwarnings(
58
+ "ignore", ".*does not have many workers which may be a bottleneck*"
59
+ )
60
+
61
+ warnings.filterwarnings(
62
+ "ignore", ".*Our suggested max number of worker in current system is*"
63
+ )
64
+
65
+
66
+ os.environ["NUMEXPR_MAX_THREADS"] = "24"
Evaluator_272/mld/launch/tools.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from omegaconf import DictConfig, OmegaConf
3
+ import hydra
4
+ import os
5
+
6
+
7
+ def resolve_cfg_path(cfg: DictConfig):
8
+ working_dir = os.getcwd()
9
+ cfg.working_dir = working_dir
Evaluator_272/mld/models/__init__.py ADDED
File without changes
Evaluator_272/mld/models/architectures/__init__.py ADDED
File without changes
Evaluator_272/mld/models/architectures/actor_vae.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional, Union
2
+ import numpy as np
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch import Tensor, nn
6
+ from torch.distributions.distribution import Distribution
7
+ from mld.utils.temos_utils import lengths_to_mask
8
+ from mld.models.operator import PositionalEncoding
9
+
10
+
11
+ class ActorVae(nn.Module):
12
+
13
+ def __init__(self,
14
+ ablation,
15
+ nfeats: int,
16
+ latent_dim: list = [1, 256],
17
+ ff_size: int = 1024,
18
+ num_layers: int = 9,
19
+ num_heads: int = 4,
20
+ dropout: float = 0.1,
21
+ is_vae: bool = True,
22
+ activation: str = "gelu",
23
+ position_embedding: str = "learned",
24
+ **kwargs) -> None:
25
+
26
+ super().__init__()
27
+
28
+ self.latent_size = latent_dim[0]
29
+ self.latent_dim = latent_dim[-1]
30
+ self.is_vae = is_vae
31
+ input_feats = nfeats
32
+ output_feats = nfeats
33
+
34
+ self.encoder = ActorAgnosticEncoder(nfeats=input_feats,
35
+ vae=True,
36
+ latent_dim=self.latent_dim,
37
+ ff_size=ff_size,
38
+ num_layers=num_layers,
39
+ num_heads=num_heads,
40
+ dropout=dropout,
41
+ activation=activation,
42
+ **kwargs)
43
+
44
+ self.decoder = ActorAgnosticDecoder(nfeats=output_feats,
45
+ vae=True,
46
+ latent_dim=self.latent_dim,
47
+ ff_size=ff_size,
48
+ num_layers=num_layers,
49
+ num_heads=num_heads,
50
+ dropout=dropout,
51
+ activation=activation,
52
+ **kwargs)
53
+
54
+ def forward(self, features: Tensor, lengths: Optional[List[int]] = None):
55
+ # Temp
56
+ # Todo
57
+ # remove and test this function
58
+ print("Should Not enter here")
59
+
60
+ z, dist = self.encode(features, lengths)
61
+ feats_rst = self.decode(z, lengths)
62
+ return feats_rst, z, dist
63
+
64
+ def encode(
65
+ self,
66
+ features: Tensor,
67
+ lengths: Optional[List[int]] = None
68
+ ) -> Union[Tensor, Distribution]:
69
+
70
+ dist = self.encoder(features, lengths)
71
+ if self.is_vae:
72
+ latent = sample_from_distribution(dist)
73
+ else:
74
+ latent = dist.unsqueeze(0)
75
+
76
+ return latent, dist
77
+
78
+ def decode(self, z: Tensor, lengths: List[int]):
79
+
80
+ feats = self.decoder(z, lengths)
81
+ return feats
82
+
83
+
84
+ class ActorAgnosticEncoder(nn.Module):
85
+
86
+ def __init__(self,
87
+ nfeats: int,
88
+ vae: bool,
89
+ latent_dim: int = 256,
90
+ ff_size: int = 1024,
91
+ num_layers: int = 4,
92
+ num_heads: int = 4,
93
+ dropout: float = 0.1,
94
+ activation: str = "gelu",
95
+ **kwargs) -> None:
96
+ super().__init__()
97
+
98
+ input_feats = nfeats
99
+ self.vae = vae
100
+ self.skel_embedding = nn.Linear(input_feats, latent_dim)
101
+
102
+ # Action agnostic: only one set of params
103
+ if vae:
104
+ self.mu_token = nn.Parameter(torch.randn(latent_dim))
105
+ self.logvar_token = nn.Parameter(torch.randn(latent_dim))
106
+ else:
107
+ self.emb_token = nn.Parameter(torch.randn(latent_dim))
108
+
109
+ self.sequence_pos_encoding = PositionalEncoding(latent_dim, dropout)
110
+
111
+ seq_trans_encoder_layer = nn.TransformerEncoderLayer(
112
+ d_model=latent_dim,
113
+ nhead=num_heads,
114
+ dim_feedforward=ff_size,
115
+ dropout=dropout,
116
+ activation=activation)
117
+
118
+ self.seqTransEncoder = nn.TransformerEncoder(seq_trans_encoder_layer,
119
+ num_layers=num_layers)
120
+
121
+ def forward(
122
+ self,
123
+ features: Tensor,
124
+ lengths: Optional[List[int]] = None
125
+ ) -> Union[Tensor, Distribution]:
126
+ if lengths is None:
127
+ lengths = [len(feature) for feature in features]
128
+
129
+ device = features.device
130
+
131
+ bs, nframes, nfeats = features.shape
132
+ mask = lengths_to_mask(lengths, device)
133
+
134
+ x = features
135
+ # Embed each human poses into latent vectors
136
+ x = self.skel_embedding(x)
137
+
138
+ # Switch sequence and batch_size because the input of
139
+ # Pytorch Transformer is [Sequence, Batch size, ...]
140
+ x = x.permute(1, 0, 2) # now it is [nframes, bs, latent_dim]
141
+
142
+ # Each batch has its own set of tokens
143
+ if self.vae:
144
+ mu_token = torch.tile(self.mu_token, (bs, )).reshape(bs, -1)
145
+ logvar_token = torch.tile(self.logvar_token,
146
+ (bs, )).reshape(bs, -1)
147
+
148
+ # adding the distribution tokens for all sequences
149
+ xseq = torch.cat((mu_token[None], logvar_token[None], x), 0)
150
+
151
+ # create a bigger mask, to allow attend to mu and logvar
152
+ token_mask = torch.ones((bs, 2), dtype=bool, device=x.device)
153
+ aug_mask = torch.cat((token_mask, mask), 1)
154
+ else:
155
+ emb_token = torch.tile(self.emb_token, (bs, )).reshape(bs, -1)
156
+
157
+ # adding the embedding token for all sequences
158
+ xseq = torch.cat((emb_token[None], x), 0)
159
+
160
+ # create a bigger mask, to allow attend to emb
161
+ token_mask = torch.ones((bs, 1), dtype=bool, device=x.device)
162
+ aug_mask = torch.cat((token_mask, mask), 1)
163
+
164
+ # add positional encoding
165
+ xseq = self.sequence_pos_encoding(xseq)
166
+ final = self.seqTransEncoder(xseq, src_key_padding_mask=~aug_mask)
167
+
168
+ if self.vae:
169
+ mu, logvar = final[0], final[1]
170
+ std = logvar.exp().pow(0.5)
171
+ # https://github.com/kampta/pytorch-distributions/blob/master/gaussian_vae.py
172
+ dist = torch.distributions.Normal(mu, std)
173
+ return dist
174
+ else:
175
+ return final[0]
176
+
177
+
178
+ class ActorAgnosticDecoder(nn.Module):
179
+
180
+ def __init__(self,
181
+ nfeats: int,
182
+ latent_dim: int = 256,
183
+ ff_size: int = 1024,
184
+ num_layers: int = 4,
185
+ num_heads: int = 4,
186
+ dropout: float = 0.1,
187
+ activation: str = "gelu",
188
+ **kwargs) -> None:
189
+ super().__init__()
190
+
191
+ output_feats = nfeats
192
+ self.latent_dim = latent_dim
193
+ self.nfeats = nfeats
194
+
195
+ self.sequence_pos_encoding = PositionalEncoding(latent_dim, dropout)
196
+
197
+ seq_trans_decoder_layer = nn.TransformerDecoderLayer(
198
+ d_model=latent_dim,
199
+ nhead=num_heads,
200
+ dim_feedforward=ff_size,
201
+ dropout=dropout,
202
+ activation=activation)
203
+
204
+ self.seqTransDecoder = nn.TransformerDecoder(seq_trans_decoder_layer,
205
+ num_layers=num_layers)
206
+
207
+ self.final_layer = nn.Linear(latent_dim, output_feats)
208
+
209
+ def forward(self, z: Tensor, lengths: List[int]):
210
+ mask = lengths_to_mask(lengths, z.device)
211
+ # latent_dim = z.shape[1]
212
+ bs, nframes = mask.shape
213
+ nfeats = self.nfeats
214
+
215
+ # z = z[None] # sequence of 1 element for the memory
216
+
217
+ # Construct time queries
218
+ time_queries = torch.zeros(nframes,
219
+ bs,
220
+ self.latent_dim,
221
+ device=z.device)
222
+ time_queries = self.sequence_pos_encoding(time_queries)
223
+
224
+ # Pass through the transformer decoder
225
+ # with the latent vector for memory
226
+ output = self.seqTransDecoder(tgt=time_queries,
227
+ memory=z,
228
+ tgt_key_padding_mask=~mask)
229
+
230
+ output = self.final_layer(output)
231
+ # zero for padded area
232
+ output[~mask.T] = 0
233
+ # Pytorch Transformer: [Sequence, Batch size, ...]
234
+ feats = output.permute(1, 0, 2)
235
+ return feats
236
+
237
+
238
+ def sample_from_distribution(
239
+ dist,
240
+ *,
241
+ fact=1.0,
242
+ sample_mean=False,
243
+ ) -> Tensor:
244
+
245
+ if sample_mean:
246
+ return dist.loc.unsqueeze(0)
247
+
248
+ # Reparameterization trick
249
+ if fact is None:
250
+ return dist.rsample().unsqueeze(0)
251
+
252
+ # Resclale the eps
253
+ eps = dist.rsample() - dist.loc
254
+ z = dist.loc + fact * eps
255
+
256
+ # add latent size
257
+ z = z.unsqueeze(0)
258
+ return z
Evaluator_272/mld/models/architectures/fc.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+
6
+ class Encoder_FC(nn.Module):
7
+ def __init__(self, modeltype, njoints, nfeats, num_frames, num_classes, translation, pose_rep, glob, glob_rot,
8
+ latent_dim=256, **kargs):
9
+ super().__init__()
10
+
11
+ self.modeltype = modeltype
12
+ self.njoints = njoints
13
+ self.nfeats = nfeats
14
+ self.num_frames = num_frames
15
+ self.num_classes = num_classes
16
+ self.translation = translation
17
+ self.pose_rep = pose_rep
18
+ self.glob = glob
19
+ self.glob_rot = glob_rot
20
+
21
+ self.latent_dim = latent_dim
22
+
23
+ self.activation = nn.GELU()
24
+
25
+ self.input_dim = self.njoints*self.nfeats*self.num_frames+self.num_classes
26
+
27
+ self.fully_connected = nn.Sequential(nn.Linear(self.input_dim, 512),
28
+ nn.GELU(),
29
+ nn.Linear(512, 256),
30
+ nn.GELU())
31
+ if self.modeltype == "cvae":
32
+ self.mu = nn.Linear(256, self.latent_dim)
33
+ self.var = nn.Linear(256, self.latent_dim)
34
+ else:
35
+ self.final = nn.Linear(256, self.latent_dim)
36
+
37
+ def forward(self, batch):
38
+ x, y = batch["x"], batch["y"]
39
+ bs, njoints, feats, nframes = x.size()
40
+ if (njoints * feats * nframes) != self.njoints*self.nfeats*self.num_frames:
41
+ raise ValueError("This model is not adapted with this input")
42
+
43
+ if len(y.shape) == 1: # can give on hot encoded as input
44
+ y = F.one_hot(y, self.num_classes)
45
+ y = y.to(dtype=x.dtype)
46
+ x = x.reshape(bs, njoints*feats*nframes)
47
+ x = torch.cat((x, y), 1)
48
+
49
+ x = self.fully_connected(x)
50
+
51
+ if self.modeltype == "cvae":
52
+ return {"mu": self.mu(x), "logvar": self.var(x)}
53
+ else:
54
+ return {"z": self.final(x)}
55
+
56
+
57
+ class Decoder_FC(nn.Module):
58
+ def __init__(self, modeltype, njoints, nfeats, num_frames, num_classes, translation, pose_rep, glob, glob_rot,
59
+ latent_dim=256, **kargs):
60
+ super().__init__()
61
+
62
+ self.modeltype = modeltype
63
+ self.njoints = njoints
64
+ self.nfeats = nfeats
65
+ self.num_frames = num_frames
66
+ self.num_classes = num_classes
67
+ self.translation = translation
68
+ self.pose_rep = pose_rep
69
+ self.glob = glob
70
+ self.glob_rot = glob_rot
71
+
72
+ self.latent_dim = latent_dim
73
+
74
+ self.input_dim = self.latent_dim + self.num_classes
75
+ self.output_dim = self.njoints*self.nfeats*self.num_frames
76
+
77
+ self.fully_connected = nn.Sequential(nn.Linear(self.input_dim, 256),
78
+ nn.GELU(),
79
+ nn.Linear(256, 512),
80
+ nn.GELU(),
81
+ nn.Linear(512, self.output_dim),
82
+ nn.GELU())
83
+
84
+ def forward(self, batch):
85
+ z, y = batch["z"], batch["y"]
86
+ # z: [batch_size, latent_dim]
87
+ # y: [batch_size]
88
+ if len(y.shape) == 1: # can give on hot encoded as input
89
+ y = F.one_hot(y, self.num_classes)
90
+ y = y.to(dtype=z.dtype) # y: [batch_size, num_classes]
91
+ # z: [batch_size, latent_dim+num_classes]
92
+ z = torch.cat((z, y), dim=1)
93
+
94
+ z = self.fully_connected(z)
95
+
96
+ bs, _ = z.size()
97
+
98
+ z = z.reshape(bs, self.njoints, self.nfeats, self.num_frames)
99
+ batch["output"] = z
100
+ return batch
Evaluator_272/mld/models/architectures/gpt/clip.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Union
3
+
4
+ import torch
5
+ from torch import Tensor, nn
6
+ from torch.distributions.distribution import Distribution
7
+ from transformers import AutoModel, AutoTokenizer, CLIPTextModel, CLIPTokenizer
8
+
9
+ from mld.models.operator import PositionalEncoding
10
+ from mld.utils.temos_utils import lengths_to_mask
11
+
12
+ import pytorch_lightning as pl
13
+ class TextEncoder(pl.LightningModule):
14
+
15
+ def __init__(
16
+ self,
17
+ modelpath: str,
18
+ finetune: bool = False,
19
+ last_hidden_state: bool = False,
20
+ latent_dim: list = [1, 256],
21
+ ) -> None:
22
+
23
+ super().__init__()
24
+
25
+ self.latent_dim = latent_dim
26
+
27
+ self.tokenizer = AutoTokenizer.from_pretrained(modelpath)
28
+ self.text_model = AutoModel.from_pretrained(modelpath)
29
+
30
+ # Don't train the model
31
+ if not finetune:
32
+ self.text_model.training = False
33
+ for p in self.text_model.parameters():
34
+ p.requires_grad = False
35
+
36
+ # Then configure the model
37
+ self.max_length = self.tokenizer.model_max_length
38
+ if "clip" in modelpath:
39
+ self.text_encoded_dim = self.text_model.config.text_config.hidden_size
40
+ if last_hidden_state:
41
+ self.name = "clip_hidden"
42
+ else:
43
+ self.name = "clip"
44
+ elif "bert" in modelpath:
45
+ self.name = "bert"
46
+ self.text_encoded_dim = self.text_model.config.hidden_size
47
+ else:
48
+ raise ValueError(f"Model {modelpath} not supported")
49
+
50
+ def forward(self, texts: List[str]):
51
+ # get prompt text embeddings
52
+ if self.name in ["clip", "clip_hidden"]:
53
+ text_inputs = self.tokenizer(
54
+ texts,
55
+ padding="max_length",
56
+ truncation=True,
57
+ max_length=self.max_length,
58
+ return_tensors="pt",
59
+ )
60
+ text_input_ids = text_inputs.input_ids
61
+ # split into max length Clip can handle
62
+ if text_input_ids.shape[-1] > self.tokenizer.model_max_length:
63
+ text_input_ids = text_input_ids[:, :self.tokenizer.
64
+ model_max_length]
65
+ elif self.name == "bert":
66
+ text_inputs = self.tokenizer(texts,
67
+ return_tensors="pt",
68
+ padding=True)
69
+
70
+ # use pooled ouuput if latent dim is two-dimensional
71
+ # pooled = 0 if self.latent_dim[0] == 1 else 1 # (bs, seq_len, text_encoded_dim) -> (bs, text_encoded_dim)
72
+ # text encoder forward, clip must use get_text_features
73
+ if self.name == "clip":
74
+ # (batch_Size, text_encoded_dim)
75
+ text_embeddings = self.text_model.get_text_features(
76
+ text_input_ids.to(self.text_model.device))
77
+ # (batch_Size, 1, text_encoded_dim)
78
+ text_embeddings = text_embeddings.unsqueeze(1)
79
+ elif self.name == "clip_hidden":
80
+ # (batch_Size, seq_length , text_encoded_dim)
81
+ text_embeddings = self.text_model.text_model(
82
+ text_input_ids.to(self.text_model.device)).last_hidden_state
83
+ elif self.name == "bert":
84
+ # (batch_Size, seq_length , text_encoded_dim)
85
+ text_embeddings = self.text_model(
86
+ **text_inputs.to(self.text_model.device)).last_hidden_state
87
+ else:
88
+ raise NotImplementedError(f"Model {self.name} not implemented")
89
+
90
+ return text_embeddings
Evaluator_272/mld/models/architectures/gpt/pos_encoding.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Various positional encodings for the transformer.
3
+ """
4
+ import math
5
+ import torch
6
+ from torch import nn
7
+
8
+ def PE1d_sincos(seq_length, dim):
9
+ """
10
+ :param d_model: dimension of the model
11
+ :param length: length of positions
12
+ :return: length*d_model position matrix
13
+ """
14
+ if dim % 2 != 0:
15
+ raise ValueError("Cannot use sin/cos positional encoding with "
16
+ "odd dim (got dim={:d})".format(dim))
17
+ pe = torch.zeros(seq_length, dim)
18
+ position = torch.arange(0, seq_length).unsqueeze(1)
19
+ div_term = torch.exp((torch.arange(0, dim, 2, dtype=torch.float) *
20
+ -(math.log(10000.0) / dim)))
21
+ pe[:, 0::2] = torch.sin(position.float() * div_term)
22
+ pe[:, 1::2] = torch.cos(position.float() * div_term)
23
+
24
+ return pe.unsqueeze(1)
25
+
26
+
27
+ class PositionEmbedding(nn.Module):
28
+ """
29
+ Absolute pos embedding (standard), learned.
30
+ """
31
+ def __init__(self, seq_length, dim, dropout, grad=False):
32
+ super().__init__()
33
+ self.embed = nn.Parameter(data=PE1d_sincos(seq_length, dim), requires_grad=grad)
34
+ self.dropout = nn.Dropout(p=dropout)
35
+
36
+ def forward(self, x):
37
+ # x.shape: bs, seq_len, feat_dim
38
+ l = x.shape[1]
39
+ x = x.permute(1, 0, 2) + self.embed[:l].expand(x.permute(1, 0, 2).shape)
40
+ x = self.dropout(x.permute(1, 0, 2))
41
+ return x
42
+
43
+