zirobtc commited on 24 days ago

Commit

adeebb7

verified ·

1 Parent(s): 60b86d7

Initial upload of MotionStreamer code, excluding large extracted data and output folders.

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Causal_TAE/net_last.pth +3 -0
Causal_TAE_t2m_babel/net_last.pth +3 -0
Evaluator_272/.DS_Store +0 -0
Evaluator_272/configs/assets.yaml +13 -0
Evaluator_272/configs/base.yaml +92 -0
Evaluator_272/configs/configs_evaluator_272/H3D-TMR.yaml +95 -0
Evaluator_272/configs/modules/denoiser.yaml +22 -0
Evaluator_272/configs/modules/evaluators.yaml +20 -0
Evaluator_272/configs/modules/motion_vae.yaml +15 -0
Evaluator_272/configs/modules/scheduler.yaml +25 -0
Evaluator_272/configs/modules/text_encoder.yaml +8 -0
Evaluator_272/configs/modules_temos/motiondecoder.yaml +11 -0
Evaluator_272/configs/modules_temos/motionencoder.yaml +12 -0
Evaluator_272/configs/modules_temos/text_encoder.yaml +13 -0
Evaluator_272/datasets/__init__.py +0 -0
Evaluator_272/mld/__init__.py +0 -0
Evaluator_272/mld/callback/__init__.py +1 -0
Evaluator_272/mld/callback/progress.py +54 -0
Evaluator_272/mld/config.py +104 -0
Evaluator_272/mld/data/HumanML3D_272.py +131 -0
Evaluator_272/mld/data/__init__.py +0 -0
Evaluator_272/mld/data/base.py +105 -0
Evaluator_272/mld/data/get_data.py +183 -0
Evaluator_272/mld/data/humanml/__init__.py +0 -0
Evaluator_272/mld/data/humanml/common/quaternion.py +423 -0
Evaluator_272/mld/data/humanml/common/skeleton.py +199 -0
Evaluator_272/mld/data/humanml/data/__init__.py +0 -0
Evaluator_272/mld/data/humanml/data/dataset.py +227 -0
Evaluator_272/mld/data/humanml/scripts/motion_process.py +576 -0
Evaluator_272/mld/data/humanml/utils/__init__.py +0 -0
Evaluator_272/mld/data/humanml/utils/metrics.py +142 -0
Evaluator_272/mld/data/humanml/utils/paramUtil.py +63 -0
Evaluator_272/mld/data/humanml/utils/plot_script.py +103 -0
Evaluator_272/mld/data/humanml/utils/utils.py +163 -0
Evaluator_272/mld/data/humanml/utils/word_vectorizer.py +143 -0
Evaluator_272/mld/data/sampling/__init__.py +2 -0
Evaluator_272/mld/data/sampling/base.py +41 -0
Evaluator_272/mld/data/sampling/framerate.py +32 -0
Evaluator_272/mld/data/sampling/frames.py +58 -0
Evaluator_272/mld/data/utils.py +38 -0
Evaluator_272/mld/launch/__init__.py +0 -0
Evaluator_272/mld/launch/blender.py +23 -0
Evaluator_272/mld/launch/prepare.py +66 -0
Evaluator_272/mld/launch/tools.py +9 -0
Evaluator_272/mld/models/__init__.py +0 -0
Evaluator_272/mld/models/architectures/__init__.py +0 -0
Evaluator_272/mld/models/architectures/actor_vae.py +258 -0
Evaluator_272/mld/models/architectures/fc.py +100 -0
Evaluator_272/mld/models/architectures/gpt/clip.py +90 -0
Evaluator_272/mld/models/architectures/gpt/pos_encoding.py +43 -0

Causal_TAE/net_last.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8becaeebbd0588d7080ea3baf19ca036fe06851035c8b5f214dac1a5cf23949c
+size 304843534

Causal_TAE_t2m_babel/net_last.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d4cf982269fed7887c45076852fe44be3611ac3c7761caaa5c849a8725ae3c6
+size 304843534

Evaluator_272/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

Evaluator_272/configs/assets.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+FOLDER: './experiments' # Experiment files saving path
+TEST:
+  FOLDER: './results' # Testing files saving path
+DATASET:
+  HUMANML3D_272:
+    ROOT: './datasets/humanml3d_272' # HumanML3D_272 directory
+    SPLIT_ROOT: './datasets/humanml3d_272/split' # HumanML3D_272 splits directory
+model:
+  bert_path: './deps/distilbert-base-uncased'

Evaluator_272/configs/base.yaml ADDED Viewed

	@@ -0,0 +1,92 @@

+SEED_VALUE: 1234
+DEBUG: True
+TRAIN:
+  SPLIT: 'train'
+  NUM_WORKERS: 2 # Number of workers
+  BATCH_SIZE: 4 # Size of batches
+  START_EPOCH: 0 # Start epoch
+  END_EPOCH: 400 # End epoch
+  RESUME: '' # Experiment path to be resumed training
+  PRETRAINED_VAE: ''
+  PRETRAINED: '' # Pretrained model path
+  OPTIM:
+    OPTIM.TYPE: 'AdamW' # Optimizer type
+    OPTIM.LR: 1e-4 # Learning rate
+  ABLATION:
+    VAE_TYPE: 'actor' # vae ablation: actor or mcross
+    VAE_ARCH: 'encoder_decoder' # mdiffusion vae architecture
+    PE_TYPE: 'actor' # mdiffusion mld or actor
+    DIFF_PE_TYPE: 'actor' # mdiffusion mld or actor
+    SKIP_CONNECT: False # skip connection for denoiser va
+    # use linear to expand mean and std rather expand token nums
+    MLP_DIST: False
+    IS_DIST: False # Mcross distribution kl
+    PREDICT_EPSILON: True # noise or motion
+EVAL:
+  SPLIT: 'gtest'
+  BATCH_SIZE: 1 # Evaluating Batch size
+  NUM_WORKERS: 12 # Evaluating Batch size
+TEST:
+  TEST_DIR: ''
+  CHECKPOINTS: '' # Pretrained model path
+  SPLIT: 'gtest'
+  BATCH_SIZE: 1 # Testing Batch size
+  NUM_WORKERS: 12 # Evaluating Batch size
+  SAVE_PREDICTIONS: False # Weather to save predictions
+  COUNT_TIME: False # Weather to count time during test
+  REPLICATION_TIMES: 20 # Number of times to replicate the test
+  MM_NUM_SAMPLES: 100 # Number of samples for multimodal test
+  MM_NUM_REPEATS: 30 # Number of repeats for multimodal test
+  MM_NUM_TIMES: 10 # Number of times to repeat the multimodal test
+  DIVERSITY_TIMES: 300 # Number of times to repeat the diversity test
+  REP_I: 0
+model:
+  target: 'modules'
+  t2m_textencoder:
+    dim_word: 300
+    dim_pos_ohot: 15
+    dim_text_hidden: 512
+    dim_coemb_hidden: 512
+  t2m_motionencoder:
+    dim_move_hidden: 512
+    dim_move_latent: 512
+    dim_motion_hidden: 1024
+    dim_motion_latent: 512
+LOSS:
+  LAMBDA_LATENT: 1e-5 # Lambda for latent losses
+  LAMBDA_KL: 1e-5 # Lambda for kl losses
+  LAMBDA_REC: 1.0 # Lambda for reconstruction losses
+  LAMBDA_JOINT: 1.0 # Lambda for joint losses
+  LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses
+  LAMBDA_CROSS: 1.0 # Lambda for cross-reconstruction losses
+  LAMBDA_CYCLE: 1.0 # Lambda for cycle losses
+  LAMBDA_PRIOR: 0.0
+  DIST_SYNC_ON_STEP: True
+METRIC:
+  FORCE_IN_METER: True
+  DIST_SYNC_ON_STEP: True
+DATASET:
+  NCLASSES: 10
+  SAMPLER:
+    MAX_SQE: -1
+    MAX_LEN: 196
+    MIN_LEN: 40
+    MAX_TEXT_LEN: 20
+  HUMANML3D_272:
+    UNIT_LEN: 4
+LOGGER:
+  SACE_CHECKPOINT_EPOCH: 1
+  LOG_EVERY_STEPS: 1
+  VAL_EVERY_STEPS: 10
+  TENSORBOARD: true
+  WANDB:
+    OFFLINE: false
+    PROJECT: null
+    RESUME_ID: null

Evaluator_272/configs/configs_evaluator_272/H3D-TMR.yaml ADDED Viewed

	@@ -0,0 +1,95 @@

+NAME: EXP1 # Experiment name
+DEBUG: False # Debug mode
+ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”
+DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]
+# DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]
+# Training configuration
+TRAIN:
+  #---------------------------------
+  STAGE: temos # stage "vae" or "diffusion", "vae_diffusion"
+  #---------------------------------
+  DATASETS: ['humanml3d_272'] # Training datasets
+  NUM_WORKERS: 11 # Number of workers
+  BATCH_SIZE: 256 # Size of batches
+  START_EPOCH: 0 # Start epochMMOTIONENCODER
+  END_EPOCH: 100 # End epoch
+  RESUME: '' # Resume training from this path
+  OPTIM:
+    TYPE: AdamW # Optimizer type
+    LR: 1e-4 # Learning rate
+  PRETRAINED_MLD: False
+# Evaluating Configuration
+EVAL:
+  DATASETS: ['humanml3d_272'] # Evaluating datasets
+  BATCH_SIZE: 32 # Evaluating Batch size
+  SPLIT: test
+  eval_self_on_gt: True
+# Test Configuration
+TEST:
+  PRETRAINED_CHECKPOINTS_VAE: ''
+  SAVE_PREDICTIONS: False
+  CHECKPOINTS: '' # Pretrained model path
+  DATASETS: ['humanml3d_272'] # training datasets
+  SPLIT: test
+  BATCH_SIZE: 32 # training Batch size
+  MEAN: False
+  NUM_SAMPLES: 1
+  FACT: 1
+  inference_vq_code: False
+  # REPLICATION_TIM
+# Datasets Configuration
+DATASET:
+  JOINT_TYPE: 'humanml3d_v3' # join type
+  VERSION: ''
+  MOTION_TYPE: ''
+METRIC:
+  TYPE: ['TMR_TM2TMetrics']
+# Losses Configuration
+LOSS:
+  TYPE: temos # Losses type
+  USE_INFONCE: True
+  USE_INFONCE_FILTER: True
+  LAMBDA_LATENT: 1.0e-5 # Lambda for latent Losses
+  LAMBDA_KL: 1.0e-5 # Lambda for kl Losses
+  LAMBDA_REC: 1.0 # Lambda for reconstruction Losses
+  LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses
+  LAMBDA_CROSS: 1.0 # Lambda for reconstruction Losses
+  LAMBDA_CYCLE: 0.0 # Lambda for cycle Losses
+  LAMBDA_PRIOR: 0.0
+  LAMBDA_INFONCE: 0.1 # Lambda for infonce
+  INFONCE_TEMP: 0.1
+  DIST_SYNC_ON_STEP: False # Sync Losses on step when distributed trained
+  USE_RECLIPLOSS: False
+  SYNC: False
+  TRAIN_TMR: False
+# Model Configuration
+model:
+  vae: true # whether vae model
+  model_type: temos # model type
+  condition: 'text'
+  target: modules_temos
+  #####
+  latent_dim: 256 # latent dimension
+  ff_size: 1024 #
+  num_layers: 4 # number of layers
+  num_head: 6 # number of head layers
+  dropout: 0.1 # dropout rate
+  activation: gelu # activation type
+  eval_text_encode_way: given_glove
+  eval_text_source: token
+# Logger configuration
+LOGGER:
+  SAVE_CHECKPOINT_EPOCH: 10
+  LOG_EVERY_STEPS: 1
+  VAL_EVERY_STEPS: 5
+  TENSORBOARD: True
+  WANDB:
+    PROJECT: null
+    OFFLINE: False
+    RESUME_ID: null

Evaluator_272/configs/modules/denoiser.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+denoiser:
+  target: mld.models.architectures.mld_denoiser.MldDenoiser
+  params:
+    text_encoded_dim: 768
+    ff_size: 1024
+    num_layers: 9
+    num_heads: 4
+    dropout: 0.1
+    normalize_before: False
+    activation: 'gelu'
+    flip_sin_to_cos: True
+    return_intermediate_dec: False
+    position_embedding: 'learned'
+    arch: trans_enc
+    freq_shift: 0
+    condition: ${model.condition}
+    latent_dim: ${model.latent_dim}
+    guidance_scale: ${model.guidance_scale}
+    guidance_uncondp: ${model.guidance_uncondp}
+    nfeats: ${DATASET.NFEATS}
+    nclasses: ${DATASET.NCLASSES}
+    ablation: ${TRAIN.ABLATION}

Evaluator_272/configs/modules/evaluators.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+t2m_textencoder:
+  target: mld.models.architectures.t2m_textenc.TextEncoderBiGRUCo
+  params:
+    word_size: 300
+    pos_size: 15
+    hidden_size: 512
+    output_size: 512
+t2m_moveencoder:
+  target: mld.models.architectures.t2m_textenc.MovementConvEncoder
+  params:
+    hidden_size: 512
+    output_size: 512
+t2m_motionencoder:
+  target: mld.models.architectures.t2m_motionenc.MotionEncoder
+  params:
+    input_size: ${model.t2m_moveencoder.output_size}
+    hidden_size: 1024
+    output_size: 512

Evaluator_272/configs/modules/motion_vae.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+motion_vae:
+  # Optional: mld_vae, vposert_vae
+  target: mld.models.architectures.mld_vae.MldVae
+  params:
+    arch: 'encoder_decoder'
+    ff_size: 1024
+    num_layers: 9
+    num_heads: 4
+    dropout: 0.1
+    normalize_before: false
+    activation: 'gelu'
+    position_embedding: 'learned'
+    latent_dim: ${model.latent_dim}
+    nfeats: ${DATASET.NFEATS}
+    ablation: ${TRAIN.ABLATION}

Evaluator_272/configs/modules/scheduler.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+scheduler:
+  target: diffusers.DDIMScheduler
+  num_inference_timesteps: 50
+  eta: 0.0
+  params:
+    num_train_timesteps: 1000
+    beta_start: 0.00085
+    beta_end: 0.012
+    beta_schedule: 'scaled_linear' # Optional: ['linear', 'scaled_linear', 'squaredcos_cap_v2']
+    # variance_type: 'fixed_small'
+    clip_sample: false # clip sample to -1~1
+    # below are for ddim
+    set_alpha_to_one: false
+    steps_offset: 1
+noise_scheduler:
+  target: diffusers.DDPMScheduler
+  params:
+    num_train_timesteps: 1000
+    beta_start: 0.00085
+    beta_end: 0.012
+    beta_schedule: 'scaled_linear' # Optional: ['linear', 'scaled_linear', 'squaredcos_cap_v2']
+    variance_type: 'fixed_small'
+    clip_sample: false # clip sample to -1~1

Evaluator_272/configs/modules/text_encoder.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+text_encoder:
+  # Optional: mld_clip, mld_bert
+  target: mld.models.architectures.mld_clip.MldTextEncoder
+  params:
+    finetune: false # if false, model weights are frozen
+    last_hidden_state: false # if true, the last hidden state is used as the text embedding
+    latent_dim: ${model.latent_dim}
+    modelpath: ${model.clip_path}

Evaluator_272/configs/modules_temos/motiondecoder.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+motiondecoder:
+  name: actor_decoder
+  target: mld.models.architectures.temos.motiondecoder.actor.ActorAgnosticDecoder
+  params:
+    latent_dim: ${model.latent_dim}
+    ff_size: ${model.ff_size}
+    num_layers: ${model.num_layers}
+    num_head: ${model.num_head}
+    droupout: ${model.dropout}
+    activation: ${model.activation}
+    nfeats: ${DATASET.NFEATS}

Evaluator_272/configs/modules_temos/motionencoder.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+motionencoder:
+  name: actor_encoder
+  target: mld.models.architectures.temos.motionencoder.actor.ActorAgnosticEncoder
+  params:
+    latent_dim: ${model.latent_dim}
+    vae: ${model.vae}
+    ff_size: ${model.ff_size}
+    num_layers: ${model.num_layers}
+    num_head: ${model.num_head}
+    droupout: ${model.dropout}
+    activation: ${model.activation}
+    nfeats: ${DATASET.NFEATS}

Evaluator_272/configs/modules_temos/text_encoder.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+textencoder:
+  name: distilbert_actor
+  target: mld.models.architectures.temos.textencoder.distillbert_actor.DistilbertActorAgnosticEncoder
+  params:
+    latent_dim: ${model.latent_dim}
+    vae: ${model.vae}
+    ff_size: ${model.ff_size}
+    num_layers: ${model.num_layers}
+    num_head: ${model.num_head}
+    droupout: ${model.dropout}
+    activation: ${model.activation}
+    finetune: false
+    modelpath: ${model.bert_path}

Evaluator_272/datasets/__init__.py ADDED Viewed

File without changes

Evaluator_272/mld/__init__.py ADDED Viewed

File without changes

Evaluator_272/mld/callback/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .progress import ProgressLogger

Evaluator_272/mld/callback/progress.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import logging
+from pytorch_lightning import LightningModule, Trainer
+from pytorch_lightning.callbacks import Callback
+import psutil
+logger = logging.getLogger()
+class ProgressLogger(Callback):
+    def __init__(self, metric_monitor: dict, precision: int = 3):
+        # Metric to monitor
+        self.metric_monitor = metric_monitor
+        self.precision = precision
+    def on_train_start(self, trainer: Trainer, pl_module: LightningModule,
+                       **kwargs) -> None:
+        logger.info("Training started")
+    def on_train_end(self, trainer: Trainer, pl_module: LightningModule,
+                     **kwargs) -> None:
+        logger.info("Training done")
+    def on_validation_epoch_end(self, trainer: Trainer,
+                                pl_module: LightningModule, **kwargs) -> None:
+        if trainer.sanity_checking:
+            logger.info("Sanity checking ok.")
+    def on_train_epoch_end(self,
+                           trainer: Trainer,
+                           pl_module: LightningModule,
+                           padding=False,
+                           **kwargs) -> None:
+        metric_format = f"{{:.{self.precision}e}}"
+        line = f"Epoch {trainer.current_epoch}"
+        if padding:
+            line = f"{line:>{len('Epoch xxxx')}}"  # Right padding
+        metrics_str = []
+        losses_dict = trainer.callback_metrics
+        for metric_name, dico_name in self.metric_monitor.items():
+            if dico_name in losses_dict:
+                metric = losses_dict[dico_name].item()
+                metric = metric_format.format(metric)
+                metric = f"{metric_name} {metric}"
+                metrics_str.append(metric)
+        if len(metrics_str) == 0:
+            return
+        memory = f"Memory {psutil.virtual_memory().percent}%"
+        line = line + ": " + "   ".join(metrics_str) + "   " + memory
+        logger.info(line)

Evaluator_272/mld/config.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import importlib
+from argparse import ArgumentParser
+from omegaconf import OmegaConf
+import os
+def get_module_config(cfg_model, path="modules"):
+    module_conf = OmegaConf.create()
+    files = os.listdir(f'./configs/{path}/')
+    for file in files:
+        if file.endswith('.yaml'):
+            with open(f'./configs/{path}/' + file, 'r') as f:
+                module_conf.merge_with(OmegaConf.load(f))
+    module_conf.merge_with(cfg_model)
+    return module_conf
+def get_obj_from_str(string, reload=False):
+    module, cls = string.rsplit(".", 1)
+    if reload:
+        module_imp = importlib.import_module(module)
+        importlib.reload(module_imp)
+    return getattr(importlib.import_module(module, package=None), cls)
+def instantiate_from_config(config):
+    if not "target" in config:
+        if config == '__is_first_stage__':
+            return None
+        elif config == "__is_unconditional__":
+            return None
+        raise KeyError("Expected key `target` to instantiate.")
+    return get_obj_from_str(config["target"])(**config.get("params", dict()))
+def parse_args(phase="train"):
+    parser = ArgumentParser()
+    group = parser.add_argument_group("Training options")
+    if phase in ["train", "test"]:
+        group.add_argument(
+            "--cfg",
+            type=str,
+            required=False,
+            default="./configs/config.yaml",
+            help="config file",
+        )
+        group.add_argument(
+            "--cfg_assets",
+            type=str,
+            required=False,
+            default="./configs/assets.yaml",
+            help="config file for asset paths",
+        )
+        group.add_argument("--batch_size",
+                           type=int,
+                           required=False,
+                           help="training batch size")
+        group.add_argument("--device",
+                           type=int,
+                           nargs="+",
+                           required=False,
+                           help="training device")
+        group.add_argument("--nodebug",
+                           action="store_true",
+                           required=False,
+                           help="debug or not")
+        group.add_argument("--dir",
+                           type=str,
+                           required=False,
+                           help="evaluate existing npys")
+    # remove None params, and create a dictionnary
+    params = parser.parse_args()
+    # params = {key: val for key, val in vars(opt).items() if val is not None}
+    # update config from files
+    cfg_base = OmegaConf.load('./configs/base.yaml')
+    cfg_exp = OmegaConf.merge(cfg_base, OmegaConf.load(params.cfg))
+    cfg_model = get_module_config(cfg_exp.model, cfg_exp.model.target)
+    cfg_exp.model = cfg_model
+    cfg_assets = OmegaConf.load(params.cfg_assets)
+    cfg = OmegaConf.merge(cfg_exp, cfg_model, cfg_assets)
+    if phase in ["train", "test"]:
+        cfg.TRAIN.BATCH_SIZE = (params.batch_size
+                                if params.batch_size else cfg.TRAIN.BATCH_SIZE)
+        cfg.DEVICE = params.device if params.device else cfg.DEVICE
+        cfg.DEBUG = not params.nodebug if params.nodebug is not None else cfg.DEBUG
+        cfg.DEBUG = False if phase == "test" else cfg.DEBUG
+        if phase == "test":
+            cfg.DEBUG = False
+            cfg.DEVICE = [0]
+            print("Force no debugging and one gpu when testing")
+        cfg.TEST.TEST_DIR = params.dir if params.dir else cfg.TEST.TEST_DIR
+    # debug mode
+    if cfg.DEBUG:
+        cfg.NAME = "debug--" + cfg.NAME
+        cfg.LOGGER.WANDB.OFFLINE = True
+        cfg.LOGGER.VAL_EVERY_STEPS = 1
+    return cfg

Evaluator_272/mld/data/HumanML3D_272.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import numpy as np
+import torch
+from mld.data.humanml.scripts.motion_process import (process_file,
+                                                     recover_from_ric, recover_from_root_rot6d)
+from .base import BASEDataModule
+from .humanml.data.dataset import Text2MotionDatasetV2
+from .humanml.common.skeleton import Skeleton
+import torch.nn.functional as F
+class HumanML3D_272_DataModule(BASEDataModule):
+    def __init__(self,
+                 cfg,
+                 batch_size,
+                 num_workers,
+                 collate_fn=None,
+                 phase="train",
+                 **kwargs):
+        super().__init__(batch_size=batch_size,
+                         num_workers=num_workers,
+                         collate_fn=collate_fn)
+        self.save_hyperparameters(logger=False)
+        self.name = "humanml3d_272"
+        self.njoints = 22
+        self.hparams['njoints']=22
+        if phase == "text_only":
+            self.Dataset = TextOnlyDataset
+        else:
+            if cfg.TRAIN.STAGE in ['gpt'] and (not cfg.TEST.inference_vq_code):
+                if cfg.model.vae_type in ['humanvq']:
+                    self.Dataset = Text2MotionDatasetV2_VQToken
+                elif cfg.model.vae_type in ['hvq']:
+                    self.Dataset = Text2MotionDatasetV2_Dual_codebook_VQToken
+                else:
+                    raise NotImplementedError
+            elif cfg.TEST.inference_vq_code:
+                self.Dataset = VQMotionDataset
+            else:
+                self.Dataset = Text2MotionDatasetV2
+        self.cfg = cfg
+        sample_overrides = {
+            "split": "val",
+            "tiny": True,
+            "progress_bar": False
+        }
+        self._sample_set = self.get_sample_set(overrides=sample_overrides)
+        self.nfeats = self._sample_set.nfeats
+    def recover_from_local_position(self, final_x, njoint):
+        def accumulate_rotations(relative_rotations):
+            R_total = [relative_rotations[0]]
+            for R_rel in relative_rotations[1:]:
+                R_total.append(np.matmul(R_rel, R_total[-1]))
+            return np.array(R_total)
+        def rotation_6d_to_matrix(d6: torch.Tensor) -> torch.Tensor:
+            a1, a2 = d6[..., :3], d6[..., 3:]
+            b1 = F.normalize(a1, dim=-1)
+            b2 = a2 - (b1 * a2).sum(-1, keepdim=True) * b1
+            b2 = F.normalize(b2, dim=-1)
+            b3 = torch.cross(b1, b2, dim=-1)
+            return torch.stack((b1, b2, b3), dim=-2)
+        nfrm, _ = final_x.shape
+        positions_no_heading = final_x[:,8:8+3*njoint].reshape(nfrm, -1, 3)
+        velocities_root_xy_no_heading = final_x[:,:2]
+        global_heading_diff_rot = final_x[:,2:8]
+        global_heading_rot = accumulate_rotations(rotation_6d_to_matrix(torch.from_numpy(global_heading_diff_rot)).numpy())
+        inv_global_heading_rot = np.transpose(global_heading_rot, (0, 2, 1))
+        positions_with_heading = np.matmul(np.repeat(inv_global_heading_rot[:, None,:, :], njoint, axis=1), positions_no_heading[...,None]).squeeze(-1)
+        velocities_root_xyz_no_heading = np.zeros((velocities_root_xy_no_heading.shape[0], 3))
+        velocities_root_xyz_no_heading[:, 0] = velocities_root_xy_no_heading[:, 0]
+        velocities_root_xyz_no_heading[:, 2] = velocities_root_xy_no_heading[:, 1]
+        velocities_root_xyz_no_heading[1:, :] = np.matmul(inv_global_heading_rot[:-1], velocities_root_xyz_no_heading[1:, :,None]).squeeze(-1)
+        root_translation = np.cumsum(velocities_root_xyz_no_heading, axis=0)
+        positions_with_heading[:, :, 0] += root_translation[:, 0:1]
+        positions_with_heading[:, :, 2] += root_translation[:, 2:]
+        return positions_with_heading
+    def feats2joints(self, features, skel=None, motion_type=''):
+        assert motion_type in ['']
+        assert features.shape[2] == 272
+        mean = torch.tensor(self.hparams.mean).to(features)
+        std = torch.tensor(self.hparams.std).to(features)
+        features = features * std + mean
+        return self.recover_from_local_position(features.reshape(-1, 272).detach().cpu().numpy(), self.njoints).reshape(features.shape[0], -1, 22, 3)
+    def joints2feats(self, features):
+        features = process_file(features, self.njoints)[0]
+        return features
+    def renorm4t2m(self, features):
+        ori_mean = torch.tensor(self.hparams.mean).to(features)
+        ori_std = torch.tensor(self.hparams.std).to(features)
+        eval_mean = torch.tensor(self.hparams.mean_eval).to(features)
+        eval_std = torch.tensor(self.hparams.std_eval).to(features)
+        features = features * ori_std + ori_mean
+        features = (features - eval_mean) / eval_std
+        return features
+    def renorm2ori(self, features):
+        mean = torch.tensor(self.hparams.mean).to(features)
+        std = torch.tensor(self.hparams.std).to(features)
+        features = features * std + mean
+        return features
+    def mm_mode(self, mm_on=True):
+        if mm_on:
+            self.is_mm = True
+            self.name_list = self.test_dataset.name_list
+            self.mm_list = np.random.choice(self.name_list,
+                                            self.cfg.TEST.MM_NUM_SAMPLES,
+                                            replace=False)
+            self.test_dataset.name_list = self.mm_list
+        else:
+            self.is_mm = False
+            self.test_dataset.name_list = self.name_list

Evaluator_272/mld/data/__init__.py ADDED Viewed

File without changes

Evaluator_272/mld/data/base.py ADDED Viewed

	@@ -0,0 +1,105 @@

+from os.path import join as pjoin
+import numpy as np
+import pytorch_lightning as pl
+from torch.utils.data import DataLoader
+class BASEDataModule(pl.LightningDataModule):
+    def __init__(self, collate_fn, batch_size: int, num_workers: int):
+        super().__init__()
+        self.dataloader_options = {
+            "batch_size": batch_size,
+            "num_workers": num_workers,
+            "collate_fn": collate_fn,
+        }
+        self.persistent_workers = True
+        self.is_mm = False
+    def get_sample_set(self, overrides={}):
+        sample_params = self.hparams.copy()
+        sample_params.update(overrides)
+        split_file = pjoin(
+            eval(f"self.cfg.DATASET.{self.name.upper()}.SPLIT_ROOT"), self.cfg.DATASET.VERSION,
+            self.cfg.EVAL.SPLIT + ".txt",
+        )
+        return self.Dataset(split_file=split_file, **sample_params)
+    def __getattr__(self, item):
+        # train_dataset/val_dataset etc cached like properties
+        if item.endswith("_dataset") and not item.startswith("_"):
+            subset = item[:-len("_dataset")]
+            item_c = "_" + item
+            if item_c not in self.__dict__:
+                # todo: config name not consistent
+                subset = subset.upper() if subset != "val" else "EVAL"
+                split = eval(f"self.cfg.{subset}.SPLIT")
+                split_file = pjoin(
+                    eval(f"self.cfg.DATASET.{self.name.upper()}.SPLIT_ROOT"),
+                    self.cfg.DATASET.VERSION,
+                    eval(f"self.cfg.{subset}.SPLIT") + ".txt",
+                )
+                self.__dict__[item_c] = self.Dataset(split_file=split_file,
+                                                     split=split,
+                                                     **self.hparams)
+            return getattr(self, item_c)
+        classname = self.__class__.__name__
+        raise AttributeError(f"'{classname}' object has no attribute '{item}'")
+    def setup(self, stage=None):
+        self.stage = stage
+        # Use the getter the first time to load the data
+        if stage in (None, "fit"):
+            _ = self.train_dataset
+            _ = self.val_dataset
+        if stage in (None, "test"):
+            _ = self.test_dataset
+    def train_dataloader(self):
+        return DataLoader(
+            self.train_dataset,
+            shuffle=True,
+            persistent_workers=True,
+            **self.dataloader_options,
+        )
+    def predict_dataloader(self):
+        dataloader_options = self.dataloader_options.copy()
+        dataloader_options[
+            "batch_size"] = 1 if self.is_mm else self.cfg.TEST.BATCH_SIZE
+        dataloader_options["num_workers"] = self.cfg.TEST.NUM_WORKERS
+        dataloader_options["shuffle"] = False
+        return DataLoader(
+            self.test_dataset,
+            persistent_workers=True,
+            **dataloader_options,
+        )
+    def val_dataloader(self):
+        # overrides batch_size and num_workers
+        dataloader_options = self.dataloader_options.copy()
+        dataloader_options["batch_size"] = self.cfg.EVAL.BATCH_SIZE
+        dataloader_options["num_workers"] = self.cfg.EVAL.NUM_WORKERS
+        dataloader_options["shuffle"] = False
+        return DataLoader(
+            self.val_dataset,
+            persistent_workers=True,
+            **dataloader_options,
+        )
+    def test_dataloader(self):
+        # overrides batch_size and num_workers
+        dataloader_options = self.dataloader_options.copy()
+        dataloader_options[
+            "batch_size"] = 1 if self.is_mm else self.cfg.TEST.BATCH_SIZE
+        dataloader_options["num_workers"] = self.cfg.TEST.NUM_WORKERS
+        # dataloader_options["drop_last"] = True
+        dataloader_options["shuffle"] = False
+        return DataLoader(
+            self.test_dataset,
+            persistent_workers=True,
+            **dataloader_options,
+        )

Evaluator_272/mld/data/get_data.py ADDED Viewed

	@@ -0,0 +1,183 @@

+from os.path import join as pjoin
+import numpy as np
+# from .humanml.utils.word_vectorizer import WordVectorizer, WordVectorizer_only_text_token
+from .utils import *
+from .HumanML3D_272 import HumanML3D_272_DataModule
+def get_mean_std(phase, cfg, dataset_name):
+    assert dataset_name == 'humanml3d_272'
+    data_root = eval(f"cfg.DATASET.{dataset_name.upper()}.ROOT")
+    mean = np.load(pjoin(data_root, 'mean_std', cfg.DATASET.VERSION, cfg.DATASET.MOTION_TYPE, "Mean.npy"))
+    std = np.load(pjoin(data_root, 'mean_std', cfg.DATASET.VERSION, cfg.DATASET.MOTION_TYPE, "Std.npy"))
+    return mean, std
+def get_njoints(dataset_name):
+    njoints = 22
+    return njoints
+def reget_mean_std(cfg, dataset_name, mean, std):
+    if 'MINOR_MOTION_TYPE' in cfg.DATASET:
+        select_motion_type = cfg.DATASET.MINOR_MOTION_TYPE
+    else:
+        select_motion_type = cfg.DATASET.MOTION_TYPE
+    njoints = get_njoints(dataset_name)
+    if select_motion_type == 'root_position':
+        mean = mean[..., :4+(njoints - 1) * 3]
+    elif select_motion_type == 'root_position_vel':
+        mean = np.concatenate((mean[..., :4+(njoints - 1) * 3], mean[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=0)
+    elif select_motion_type == 'root_position_rot6d':
+        mean = np.concatenate((mean[..., :4+(njoints - 1) * 3], mean[..., 4+(njoints - 1) * 3: 4+(njoints - 1) * 9]), axis=0)
+    elif select_motion_type == 'root_rot6d':
+        mean = np.concatenate((mean[..., :4], mean[..., 4+(njoints - 1) * 3: 4+(njoints - 1) * 9]), axis=0)
+    elif select_motion_type in ['all', 'smplx_212', 'vector_263', 'vector_263_ori_humanml', 'smplx_159', '']:
+        pass
+    elif select_motion_type == 'root_body_pos_vel_hand_all':
+        mean = np.concatenate((mean[..., :4+(njoints - 1) * 3], mean[..., 4+(njoints - 1) * 3 + 21 * 6 : 4+(njoints - 1) * 9], mean[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=0)
+        # pass
+    elif select_motion_type == 'root_body_pos_vel_hand_pos_vel':
+        mean = np.concatenate((mean[..., :4+(njoints - 1) * 3], mean[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=0)
+    elif select_motion_type == 'root_body_pos_vel_hand_pos':
+        mean = np.concatenate((mean[..., :4+(njoints - 1) * 3], mean[..., 4+(njoints - 1) * 9 + 22 * 3: 4+(njoints - 1) * 9 + 52*3]), axis=0)
+    elif select_motion_type == 'root_body_pos_vel_hand_rot':
+        mean = np.concatenate((mean[..., :4+(22 - 1) * 3], mean[..., 4+(52 - 1) * 3 + (22-1)*6 : 4+(52-1)*9], mean[..., 4+(52 - 1) * 9: 4+(52 - 1) * 9 + 22*3]), axis=0)
+    elif select_motion_type == 'root_position_vel_only_body':
+        mean = np.concatenate((mean[..., :4+(22 - 1) * 3], mean[..., 4+(52 - 1) * 9: 4+(52 - 1) * 9 + 22*3]), axis=0)
+    elif select_motion_type == 'root_body_pos_vel_hand_pos_vel_hand_wrist':
+        body_pos_mean = mean[..., :4+(22 - 1) * 3] # 67
+        left_hand_pos_mean = (mean[..., 4+(22 - 1) * 3:4+(37 - 1) * 3].reshape(15, 3) - body_pos_mean[..., -6:-3]).reshape(-1) # 45
+        right_hand_pos_mean = (mean[..., 4+(37 - 1) * 3:4+(52 - 1) * 3].reshape(15, 3) - body_pos_mean[..., -3:]).reshape(-1) # 45
+        body_vel_mean = mean[..., 4+(52 - 1) * 9: 4+(52 - 1) * 9 + 22*3] # 66
+        left_hand_vel_mean = (mean[..., 4+(52 - 1) * 9 + 22*3: 4+(52 - 1) * 9 + 22*3 + 15 * 3].reshape(15, 3) - body_vel_mean[..., -6:-3]).reshape(-1)
+        right_hand_vel_mean = (mean[..., 4+(52 - 1) * 9 + 22*3+ 15 * 3: 4+(52 - 1) * 9 + 22*3 + 15 * 3 + 15 * 3].reshape(15, 3) - body_vel_mean[..., -3:]).reshape(-1)
+        mean = np.concatenate((body_pos_mean, left_hand_pos_mean, right_hand_pos_mean, body_vel_mean, left_hand_vel_mean, right_hand_vel_mean), axis=0)
+    else:
+        raise NotImplementedError
+    if select_motion_type == 'root_position':
+        std = std[..., :4+(njoints-1)*3]
+    elif select_motion_type == 'root_position_vel':
+        std = np.concatenate((std[..., :4+(njoints - 1) * 3], std[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=0)
+    elif select_motion_type == 'root_position_rot6d':
+        std = np.concatenate((std[..., :4+(njoints - 1) * 3], std[..., 4+(njoints - 1) * 3: 4+(njoints - 1) * 9]), axis=0)
+    elif select_motion_type == 'root_rot6d':
+        std = np.concatenate((std[..., :4], std[..., 4+(njoints - 1) * 3: 4+(njoints - 1) * 9]), axis=0)
+    elif select_motion_type in ['all', 'smplx_212', 'vector_263', 'vector_263_ori_humanml', 'smplx_159', '']:
+        pass
+    elif select_motion_type == 'root_body_pos_vel_hand_all':
+        std = np.concatenate((std[..., :4+(njoints - 1) * 3], std[..., 4+(njoints - 1) * 3 + 21 * 6 : 4+(njoints - 1) * 9], std[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=0)
+        # pass
+    elif select_motion_type == 'root_body_pos_vel_hand_pos_vel':
+        std = np.concatenate((std[..., :4+(njoints - 1) * 3], std[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=0)
+    elif select_motion_type == 'root_body_pos_vel_hand_pos':
+        std = np.concatenate((std[..., :4+(njoints - 1) * 3], std[..., 4+(njoints - 1) * 9 + 22 * 3: 4+(njoints - 1) * 9 + 52*3]), axis=0)
+    elif select_motion_type == 'root_body_pos_vel_hand_rot':
+        std = np.concatenate((std[..., :4+(22 - 1) * 3], std[..., 4+(52 - 1) * 3 + (22-1)*6 : 4+(52-1)*9], std[..., 4+(52 - 1) * 9: 4+(52 - 1) * 9 + 22*3]), axis=0)
+    elif select_motion_type == 'root_position_vel_only_body':
+        std = np.concatenate((std[..., :4+(22 - 1) * 3], std[..., 4+(52 - 1) * 9: 4+(52 - 1) * 9 + 22*3]), axis=0)
+    elif select_motion_type == 'root_body_pos_vel_hand_pos_vel_hand_wrist':
+        std = np.concatenate((std[..., :4+(njoints - 1) * 3], std[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=0)
+    else:
+        raise NotImplementedError
+    return mean, std
+# def get_WordVectorizer(cfg, phase, dataset_name):
+#     if phase not in ["text_only"]:
+#         if dataset_name.lower() in ['humanml3d_272']:
+#             if cfg.model.eval_text_source == 'token':
+#                 return WordVectorizer(cfg.DATASET.WORD_VERTILIZER_PATH, "our_vab", cfg.model.eval_text_encode_way)
+#             else:
+#                 return WordVectorizer_only_text_token(cfg.DATASET.WORD_VERTILIZER_PATH, "our_vab", cfg.model.eval_text_encode_way)
+#         else:
+#             raise ValueError("Only support WordVectorizer for HumanML3D_272")
+#     else:
+#         return None
+def get_collate_fn(name, cfg, phase="train"):
+    if name.lower() in ['humanml3d_272']:
+        if cfg.model.condition in ['text_all', 'text_face', 'text_body', 'text_hand', 'text_face_body', 'text_seperate', 'only_pose_concat', 'only_pose_fusion'] and (not cfg.TEST.inference_vq_code):
+            return mld_collate_text_all
+        elif cfg.TEST.inference_vq_code:
+            return vq_collate
+        elif cfg.TRAIN.STAGE in ['gpt'] and (not cfg.TEST.inference_vq_code):
+            return mld_collate_vq_token
+        else:
+            return mld_collate
+    else:
+        raise NotImplementedError
+# map config name to module&path
+dataset_module_map = {
+    'humanml3d_272': HumanML3D_272_DataModule
+}
+motion_subdir = {'humanml3d_272': 'motion_data'}
+def get_datasets(cfg, logger=None, phase="train"):
+    # get dataset names form cfg
+    dataset_names = eval(f"cfg.{phase.upper()}.DATASETS")
+    datasets = []
+    for dataset_name in dataset_names:
+        if dataset_name.lower() in ["humanml3d_272"]:
+            if 'MINOR_MOTION_TYPE' in cfg.DATASET:
+                input_format = cfg.DATASET.MINOR_MOTION_TYPE
+            else:
+                input_format = cfg.DATASET.MOTION_TYPE
+            data_root = eval(f"cfg.DATASET.{dataset_name.upper()}.ROOT")
+            # get mean and std corresponding to dataset
+            mean, std = get_mean_std(phase, cfg, dataset_name)
+            mean_eval, std_eval = get_mean_std("val", cfg, dataset_name)
+            mean, std = reget_mean_std(cfg, dataset_name, mean, std)
+            mean_eval, std_eval = reget_mean_std(cfg, dataset_name, mean_eval, std_eval)
+            # get WordVectorizer
+            # wordVectorizer = get_WordVectorizer(cfg, phase, dataset_name)
+            # get collect_fn
+            collate_fn = get_collate_fn(dataset_name, cfg, phase)
+            # get dataset module
+            dataset = dataset_module_map[dataset_name.lower()](
+                cfg=cfg,
+                batch_size=cfg.TRAIN.BATCH_SIZE,
+                num_workers=cfg.TRAIN.NUM_WORKERS,
+                debug=cfg.DEBUG,
+                collate_fn=collate_fn,
+                mean=mean,
+                std=std,
+                mean_eval=mean_eval,
+                std_eval=std_eval,
+                # w_vectorizer=wordVectorizer,
+                input_format=cfg.DATASET.MOTION_TYPE,
+                text_dir=pjoin(data_root, "texts"),
+                motion_dir=pjoin(data_root, motion_subdir[dataset_name]),
+                max_motion_length=cfg.DATASET.SAMPLER.MAX_LEN,
+                min_motion_length=cfg.DATASET.SAMPLER.MIN_LEN,
+                max_text_len=cfg.DATASET.SAMPLER.MAX_TEXT_LEN,
+                unit_length=eval(
+                    f"cfg.DATASET.{dataset_name.upper()}.UNIT_LEN"),
+            )
+            datasets.append(dataset)
+        else:
+            raise NotImplementedError
+    if input_format == 'root_body_pos_vel_hand_pos_vel':
+        cfg.DATASET.NFEATS = 313
+    else:
+        cfg.DATASET.NFEATS = datasets[0].nfeats
+    cfg.DATASET.NJOINTS = datasets[0].njoints
+    return datasets

Evaluator_272/mld/data/humanml/__init__.py ADDED Viewed

File without changes

Evaluator_272/mld/data/humanml/common/quaternion.py ADDED Viewed

	@@ -0,0 +1,423 @@

+# Copyright (c) 2018-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+#
+import torch
+import numpy as np
+_EPS4 = np.finfo(float).eps * 4.0
+_FLOAT_EPS = np.finfo(np.float64).eps
+# PyTorch-backed implementations
+def qinv(q):
+    assert q.shape[-1] == 4, 'q must be a tensor of shape (*, 4)'
+    mask = torch.ones_like(q)
+    mask[..., 1:] = -mask[..., 1:]
+    return q * mask
+def qinv_np(q):
+    assert q.shape[-1] == 4, 'q must be a tensor of shape (*, 4)'
+    return qinv(torch.from_numpy(q).float()).numpy()
+def qnormalize(q):
+    assert q.shape[-1] == 4, 'q must be a tensor of shape (*, 4)'
+    return q / torch.norm(q, dim=-1, keepdim=True)
+def qmul(q, r):
+    """
+    Multiply quaternion(s) q with quaternion(s) r.
+    Expects two equally-sized tensors of shape (*, 4), where * denotes any number of dimensions.
+    Returns q*r as a tensor of shape (*, 4).
+    """
+    assert q.shape[-1] == 4
+    assert r.shape[-1] == 4
+    original_shape = q.shape
+    # Compute outer product
+    terms = torch.bmm(r.view(-1, 4, 1), q.view(-1, 1, 4))
+    w = terms[:, 0, 0] - terms[:, 1, 1] - terms[:, 2, 2] - terms[:, 3, 3]
+    x = terms[:, 0, 1] + terms[:, 1, 0] - terms[:, 2, 3] + terms[:, 3, 2]
+    y = terms[:, 0, 2] + terms[:, 1, 3] + terms[:, 2, 0] - terms[:, 3, 1]
+    z = terms[:, 0, 3] - terms[:, 1, 2] + terms[:, 2, 1] + terms[:, 3, 0]
+    return torch.stack((w, x, y, z), dim=1).view(original_shape)
+def qrot(q, v):
+    """
+    Rotate vector(s) v about the rotation described by quaternion(s) q.
+    Expects a tensor of shape (*, 4) for q and a tensor of shape (*, 3) for v,
+    where * denotes any number of dimensions.
+    Returns a tensor of shape (*, 3).
+    """
+    assert q.shape[-1] == 4
+    assert v.shape[-1] == 3
+    assert q.shape[:-1] == v.shape[:-1]
+    original_shape = list(v.shape)
+    # print(q.shape)
+    q = q.contiguous().view(-1, 4)
+    v = v.contiguous().view(-1, 3)
+    qvec = q[:, 1:]
+    uv = torch.cross(qvec, v, dim=1)
+    uuv = torch.cross(qvec, uv, dim=1)
+    return (v + 2 * (q[:, :1] * uv + uuv)).view(original_shape)
+def qeuler(q, order, epsilon=0, deg=True):
+    """
+    Convert quaternion(s) q to Euler angles.
+    Expects a tensor of shape (*, 4), where * denotes any number of dimensions.
+    Returns a tensor of shape (*, 3).
+    """
+    assert q.shape[-1] == 4
+    original_shape = list(q.shape)
+    original_shape[-1] = 3
+    q = q.view(-1, 4)
+    q0 = q[:, 0]
+    q1 = q[:, 1]
+    q2 = q[:, 2]
+    q3 = q[:, 3]
+    if order == 'xyz':
+        x = torch.atan2(2 * (q0 * q1 - q2 * q3), 1 - 2 * (q1 * q1 + q2 * q2))
+        y = torch.asin(torch.clamp(2 * (q1 * q3 + q0 * q2), -1 + epsilon, 1 - epsilon))
+        z = torch.atan2(2 * (q0 * q3 - q1 * q2), 1 - 2 * (q2 * q2 + q3 * q3))
+    elif order == 'yzx':
+        x = torch.atan2(2 * (q0 * q1 - q2 * q3), 1 - 2 * (q1 * q1 + q3 * q3))
+        y = torch.atan2(2 * (q0 * q2 - q1 * q3), 1 - 2 * (q2 * q2 + q3 * q3))
+        z = torch.asin(torch.clamp(2 * (q1 * q2 + q0 * q3), -1 + epsilon, 1 - epsilon))
+    elif order == 'zxy':
+        x = torch.asin(torch.clamp(2 * (q0 * q1 + q2 * q3), -1 + epsilon, 1 - epsilon))
+        y = torch.atan2(2 * (q0 * q2 - q1 * q3), 1 - 2 * (q1 * q1 + q2 * q2))
+        z = torch.atan2(2 * (q0 * q3 - q1 * q2), 1 - 2 * (q1 * q1 + q3 * q3))
+    elif order == 'xzy':
+        x = torch.atan2(2 * (q0 * q1 + q2 * q3), 1 - 2 * (q1 * q1 + q3 * q3))
+        y = torch.atan2(2 * (q0 * q2 + q1 * q3), 1 - 2 * (q2 * q2 + q3 * q3))
+        z = torch.asin(torch.clamp(2 * (q0 * q3 - q1 * q2), -1 + epsilon, 1 - epsilon))
+    elif order == 'yxz':
+        x = torch.asin(torch.clamp(2 * (q0 * q1 - q2 * q3), -1 + epsilon, 1 - epsilon))
+        y = torch.atan2(2 * (q1 * q3 + q0 * q2), 1 - 2 * (q1 * q1 + q2 * q2))
+        z = torch.atan2(2 * (q1 * q2 + q0 * q3), 1 - 2 * (q1 * q1 + q3 * q3))
+    elif order == 'zyx':
+        x = torch.atan2(2 * (q0 * q1 + q2 * q3), 1 - 2 * (q1 * q1 + q2 * q2))
+        y = torch.asin(torch.clamp(2 * (q0 * q2 - q1 * q3), -1 + epsilon, 1 - epsilon))
+        z = torch.atan2(2 * (q0 * q3 + q1 * q2), 1 - 2 * (q2 * q2 + q3 * q3))
+    else:
+        raise
+    if deg:
+        return torch.stack((x, y, z), dim=1).view(original_shape) * 180 / np.pi
+    else:
+        return torch.stack((x, y, z), dim=1).view(original_shape)
+# Numpy-backed implementations
+def qmul_np(q, r):
+    q = torch.from_numpy(q).contiguous().float()
+    r = torch.from_numpy(r).contiguous().float()
+    return qmul(q, r).numpy()
+def qrot_np(q, v):
+    q = torch.from_numpy(q).contiguous().float()
+    v = torch.from_numpy(v).contiguous().float()
+    return qrot(q, v).numpy()
+def qeuler_np(q, order, epsilon=0, use_gpu=False):
+    if use_gpu:
+        q = torch.from_numpy(q).cuda().float()
+        return qeuler(q, order, epsilon).cpu().numpy()
+    else:
+        q = torch.from_numpy(q).contiguous().float()
+        return qeuler(q, order, epsilon).numpy()
+def qfix(q):
+    """
+    Enforce quaternion continuity across the time dimension by selecting
+    the representation (q or -q) with minimal distance (or, equivalently, maximal dot product)
+    between two consecutive frames.
+    Expects a tensor of shape (L, J, 4), where L is the sequence length and J is the number of joints.
+    Returns a tensor of the same shape.
+    """
+    assert len(q.shape) == 3
+    assert q.shape[-1] == 4
+    result = q.copy()
+    dot_products = np.sum(q[1:] * q[:-1], axis=2)
+    mask = dot_products < 0
+    mask = (np.cumsum(mask, axis=0) % 2).astype(bool)
+    result[1:][mask] *= -1
+    return result
+def euler2quat(e, order, deg=True):
+    """
+    Convert Euler angles to quaternions.
+    """
+    assert e.shape[-1] == 3
+    original_shape = list(e.shape)
+    original_shape[-1] = 4
+    e = e.view(-1, 3)
+    ## if euler angles in degrees
+    if deg:
+        e = e * np.pi / 180.
+    x = e[:, 0]
+    y = e[:, 1]
+    z = e[:, 2]
+    rx = torch.stack((torch.cos(x / 2), torch.sin(x / 2), torch.zeros_like(x), torch.zeros_like(x)), dim=1)
+    ry = torch.stack((torch.cos(y / 2), torch.zeros_like(y), torch.sin(y / 2), torch.zeros_like(y)), dim=1)
+    rz = torch.stack((torch.cos(z / 2), torch.zeros_like(z), torch.zeros_like(z), torch.sin(z / 2)), dim=1)
+    result = None
+    for coord in order:
+        if coord == 'x':
+            r = rx
+        elif coord == 'y':
+            r = ry
+        elif coord == 'z':
+            r = rz
+        else:
+            raise
+        if result is None:
+            result = r
+        else:
+            result = qmul(result, r)
+    # Reverse antipodal representation to have a non-negative "w"
+    if order in ['xyz', 'yzx', 'zxy']:
+        result *= -1
+    return result.view(original_shape)
+def expmap_to_quaternion(e):
+    """
+    Convert axis-angle rotations (aka exponential maps) to quaternions.
+    Stable formula from "Practical Parameterization of Rotations Using the Exponential Map".
+    Expects a tensor of shape (*, 3), where * denotes any number of dimensions.
+    Returns a tensor of shape (*, 4).
+    """
+    assert e.shape[-1] == 3
+    original_shape = list(e.shape)
+    original_shape[-1] = 4
+    e = e.reshape(-1, 3)
+    theta = np.linalg.norm(e, axis=1).reshape(-1, 1)
+    w = np.cos(0.5 * theta).reshape(-1, 1)
+    xyz = 0.5 * np.sinc(0.5 * theta / np.pi) * e
+    return np.concatenate((w, xyz), axis=1).reshape(original_shape)
+def euler_to_quaternion(e, order):
+    """
+    Convert Euler angles to quaternions.
+    """
+    assert e.shape[-1] == 3
+    original_shape = list(e.shape)
+    original_shape[-1] = 4
+    e = e.reshape(-1, 3)
+    x = e[:, 0]
+    y = e[:, 1]
+    z = e[:, 2]
+    rx = np.stack((np.cos(x / 2), np.sin(x / 2), np.zeros_like(x), np.zeros_like(x)), axis=1)
+    ry = np.stack((np.cos(y / 2), np.zeros_like(y), np.sin(y / 2), np.zeros_like(y)), axis=1)
+    rz = np.stack((np.cos(z / 2), np.zeros_like(z), np.zeros_like(z), np.sin(z / 2)), axis=1)
+    result = None
+    for coord in order:
+        if coord == 'x':
+            r = rx
+        elif coord == 'y':
+            r = ry
+        elif coord == 'z':
+            r = rz
+        else:
+            raise
+        if result is None:
+            result = r
+        else:
+            result = qmul_np(result, r)
+    # Reverse antipodal representation to have a non-negative "w"
+    if order in ['xyz', 'yzx', 'zxy']:
+        result *= -1
+    return result.reshape(original_shape)
+def quaternion_to_matrix(quaternions):
+    """
+    Convert rotations given as quaternions to rotation matrices.
+    Args:
+        quaternions: quaternions with real part first,
+            as tensor of shape (..., 4).
+    Returns:
+        Rotation matrices as tensor of shape (..., 3, 3).
+    """
+    r, i, j, k = torch.unbind(quaternions, -1)
+    two_s = 2.0 / (quaternions * quaternions).sum(-1)
+    o = torch.stack(
+        (
+            1 - two_s * (j * j + k * k),
+            two_s * (i * j - k * r),
+            two_s * (i * k + j * r),
+            two_s * (i * j + k * r),
+            1 - two_s * (i * i + k * k),
+            two_s * (j * k - i * r),
+            two_s * (i * k - j * r),
+            two_s * (j * k + i * r),
+            1 - two_s * (i * i + j * j),
+        ),
+        -1,
+    )
+    return o.reshape(quaternions.shape[:-1] + (3, 3))
+def quaternion_to_matrix_np(quaternions):
+    q = torch.from_numpy(quaternions).contiguous().float()
+    return quaternion_to_matrix(q).numpy()
+def quaternion_to_cont6d_np(quaternions):
+    rotation_mat = quaternion_to_matrix_np(quaternions)
+    cont_6d = np.concatenate([rotation_mat[..., 0], rotation_mat[..., 1]], axis=-1)
+    return cont_6d
+def quaternion_to_cont6d(quaternions):
+    rotation_mat = quaternion_to_matrix(quaternions)
+    cont_6d = torch.cat([rotation_mat[..., 0], rotation_mat[..., 1]], dim=-1)
+    return cont_6d
+def cont6d_to_matrix(cont6d):
+    assert cont6d.shape[-1] == 6, "The last dimension must be 6"
+    x_raw = cont6d[..., 0:3]
+    y_raw = cont6d[..., 3:6]
+    x = x_raw / torch.norm(x_raw, dim=-1, keepdim=True)
+    z = torch.cross(x, y_raw, dim=-1)
+    z = z / torch.norm(z, dim=-1, keepdim=True)
+    y = torch.cross(z, x, dim=-1)
+    x = x[..., None]
+    y = y[..., None]
+    z = z[..., None]
+    mat = torch.cat([x, y, z], dim=-1)
+    return mat
+def cont6d_to_matrix_np(cont6d):
+    q = torch.from_numpy(cont6d).contiguous().float()
+    return cont6d_to_matrix(q).numpy()
+def qpow(q0, t, dtype=torch.float):
+    ''' q0 : tensor of quaternions
+    t: tensor of powers
+    '''
+    q0 = qnormalize(q0)
+    theta0 = torch.acos(q0[..., 0])
+    ## if theta0 is close to zero, add epsilon to avoid NaNs
+    mask = (theta0 <= 10e-10) * (theta0 >= -10e-10)
+    theta0 = (1 - mask) * theta0 + mask * 10e-10
+    v0 = q0[..., 1:] / torch.sin(theta0).view(-1, 1)
+    if isinstance(t, torch.Tensor):
+        q = torch.zeros(t.shape + q0.shape)
+        theta = t.view(-1, 1) * theta0.view(1, -1)
+    else:  ## if t is a number
+        q = torch.zeros(q0.shape)
+        theta = t * theta0
+    q[..., 0] = torch.cos(theta)
+    q[..., 1:] = v0 * torch.sin(theta).unsqueeze(-1)
+    return q.to(dtype)
+def qslerp(q0, q1, t):
+    '''
+    q0: starting quaternion
+    q1: ending quaternion
+    t: array of points along the way
+    Returns:
+    Tensor of Slerps: t.shape + q0.shape
+    '''
+    q0 = qnormalize(q0)
+    q1 = qnormalize(q1)
+    q_ = qpow(qmul(q1, qinv(q0)), t)
+    return qmul(q_,
+                q0.contiguous().view(torch.Size([1] * len(t.shape)) + q0.shape).expand(t.shape + q0.shape).contiguous())
+def qbetween(v0, v1):
+    '''
+    find the quaternion used to rotate v0 to v1
+    '''
+    assert v0.shape[-1] == 3, 'v0 must be of the shape (*, 3)'
+    assert v1.shape[-1] == 3, 'v1 must be of the shape (*, 3)'
+    v = torch.cross(v0, v1)
+    w = torch.sqrt((v0 ** 2).sum(dim=-1, keepdim=True) * (v1 ** 2).sum(dim=-1, keepdim=True)) + (v0 * v1).sum(dim=-1,
+                                                                                                              keepdim=True)
+    return qnormalize(torch.cat([w, v], dim=-1))
+def qbetween_np(v0, v1):
+    '''
+    find the quaternion used to rotate v0 to v1
+    '''
+    assert v0.shape[-1] == 3, 'v0 must be of the shape (*, 3)'
+    assert v1.shape[-1] == 3, 'v1 must be of the shape (*, 3)'
+    v0 = torch.from_numpy(v0).float()
+    v1 = torch.from_numpy(v1).float()
+    return qbetween(v0, v1).numpy()
+def lerp(p0, p1, t):
+    if not isinstance(t, torch.Tensor):
+        t = torch.Tensor([t])
+    new_shape = t.shape + p0.shape
+    new_view_t = t.shape + torch.Size([1] * len(p0.shape))
+    new_view_p = torch.Size([1] * len(t.shape)) + p0.shape
+    p0 = p0.view(new_view_p).expand(new_shape)
+    p1 = p1.view(new_view_p).expand(new_shape)
+    t = t.view(new_view_t).expand(new_shape)
+    return p0 + t * (p1 - p0)

Evaluator_272/mld/data/humanml/common/skeleton.py ADDED Viewed

	@@ -0,0 +1,199 @@

+from .quaternion import *
+import scipy.ndimage.filters as filters
+class Skeleton(object):
+    def __init__(self, offset, kinematic_tree, device):
+        self.device = device
+        self._raw_offset_np = offset.numpy()
+        self._raw_offset = offset.clone().detach().to(device).float()
+        self._kinematic_tree = kinematic_tree
+        self._offset = None
+        self._parents = [0] * len(self._raw_offset)
+        self._parents[0] = -1
+        for chain in self._kinematic_tree:
+            for j in range(1, len(chain)):
+                self._parents[chain[j]] = chain[j-1]
+    def njoints(self):
+        return len(self._raw_offset)
+    def offset(self):
+        return self._offset
+    def set_offset(self, offsets):
+        self._offset = offsets.clone().detach().to(self.device).float()
+    def kinematic_tree(self):
+        return self._kinematic_tree
+    def parents(self):
+        return self._parents
+    # joints (batch_size, joints_num, 3)
+    def get_offsets_joints_batch(self, joints):
+        assert len(joints.shape) == 3
+        _offsets = self._raw_offset.expand(joints.shape[0], -1, -1).clone()
+        for i in range(1, self._raw_offset.shape[0]):
+            _offsets[:, i] = torch.norm(joints[:, i] - joints[:, self._parents[i]], p=2, dim=1)[:, None] * _offsets[:, i]
+        self._offset = _offsets.detach()
+        return _offsets
+    # joints (joints_num, 3)
+    def get_offsets_joints(self, joints):
+        assert len(joints.shape) == 2
+        _offsets = self._raw_offset.clone()
+        for i in range(1, self._raw_offset.shape[0]):
+            # print(joints.shape)
+            _offsets[i] = torch.norm(joints[i] - joints[self._parents[i]], p=2, dim=0) * _offsets[i]
+        self._offset = _offsets.detach()
+        return _offsets
+    # face_joint_idx should follow the order of right hip, left hip, right shoulder, left shoulder
+    # joints (batch_size, joints_num, 3)
+    def inverse_kinematics_np(self, joints, face_joint_idx, smooth_forward=False):
+        assert len(face_joint_idx) == 4
+        '''Get Forward Direction'''
+        l_hip, r_hip, sdr_r, sdr_l = face_joint_idx
+        across1 = joints[:, r_hip] - joints[:, l_hip]
+        across2 = joints[:, sdr_r] - joints[:, sdr_l]
+        across = across1 + across2
+        across = across / np.sqrt((across**2).sum(axis=-1))[:, np.newaxis]
+        # print(across1.shape, across2.shape)
+        # forward (batch_size, 3)
+        forward = np.cross(np.array([[0, 1, 0]]), across, axis=-1)
+        if smooth_forward:
+            forward = filters.gaussian_filter1d(forward, 20, axis=0, mode='nearest')
+            # forward (batch_size, 3)
+        forward = forward / np.sqrt((forward**2).sum(axis=-1))[..., np.newaxis]
+        '''Get Root Rotation'''
+        target = np.array([[0,0,1]]).repeat(len(forward), axis=0)
+        root_quat = qbetween_np(forward, target)
+        '''Inverse Kinematics'''
+        # quat_params (batch_size, joints_num, 4)
+        # print(joints.shape[:-1])
+        quat_params = np.zeros(joints.shape[:-1] + (4,))
+        # print(quat_params.shape)
+        root_quat[0] = np.array([[1.0, 0.0, 0.0, 0.0]])
+        quat_params[:, 0] = root_quat
+        # quat_params[0, 0] = np.array([[1.0, 0.0, 0.0, 0.0]])
+        for chain in self._kinematic_tree:
+            R = root_quat
+            for j in range(len(chain) - 1):
+                # (batch, 3)
+                u = self._raw_offset_np[chain[j+1]][np.newaxis,...].repeat(len(joints), axis=0)
+                # print(u.shape)
+                # (batch, 3)
+                v = joints[:, chain[j+1]] - joints[:, chain[j]]
+                v = v / np.sqrt((v**2).sum(axis=-1))[:, np.newaxis]
+                # print(u.shape, v.shape)
+                rot_u_v = qbetween_np(u, v)
+                R_loc = qmul_np(qinv_np(R), rot_u_v)
+                quat_params[:,chain[j + 1], :] = R_loc
+                R = qmul_np(R, R_loc)
+        return quat_params
+    # Be sure root joint is at the beginning of kinematic chains
+    def forward_kinematics(self, quat_params, root_pos, skel_joints=None, do_root_R=True):
+        # quat_params (batch_size, joints_num, 4)
+        # joints (batch_size, joints_num, 3)
+        # root_pos (batch_size, 3)
+        if skel_joints is not None:
+            offsets = self.get_offsets_joints_batch(skel_joints)
+        if len(self._offset.shape) == 2:
+            offsets = self._offset.expand(quat_params.shape[0], -1, -1)
+        joints = torch.zeros(quat_params.shape[:-1] + (3,)).to(self.device)
+        joints[:, 0] = root_pos
+        for chain in self._kinematic_tree:
+            if do_root_R:
+                R = quat_params[:, 0]
+            else:
+                R = torch.tensor([[1.0, 0.0, 0.0, 0.0]]).expand(len(quat_params), -1).detach().to(self.device)
+            for i in range(1, len(chain)):
+                R = qmul(R, quat_params[:, chain[i]])
+                offset_vec = offsets[:, chain[i]]
+                joints[:, chain[i]] = qrot(R, offset_vec) + joints[:, chain[i-1]]
+        return joints
+    # Be sure root joint is at the beginning of kinematic chains
+    def forward_kinematics_np(self, quat_params, root_pos, skel_joints=None, do_root_R=True):
+        # quat_params (batch_size, joints_num, 4)
+        # joints (batch_size, joints_num, 3)
+        # root_pos (batch_size, 3)
+        if skel_joints is not None:
+            skel_joints = torch.from_numpy(skel_joints)
+            offsets = self.get_offsets_joints_batch(skel_joints)
+        if len(self._offset.shape) == 2:
+            offsets = self._offset.expand(quat_params.shape[0], -1, -1)
+        offsets = offsets.numpy()
+        joints = np.zeros(quat_params.shape[:-1] + (3,))
+        joints[:, 0] = root_pos
+        for chain in self._kinematic_tree:
+            if do_root_R:
+                R = quat_params[:, 0]
+            else:
+                R = np.array([[1.0, 0.0, 0.0, 0.0]]).repeat(len(quat_params), axis=0)
+            for i in range(1, len(chain)):
+                R = qmul_np(R, quat_params[:, chain[i]])
+                offset_vec = offsets[:, chain[i]]
+                joints[:, chain[i]] = qrot_np(R, offset_vec) + joints[:, chain[i - 1]]
+        return joints
+    def forward_kinematics_cont6d_np(self, cont6d_params, root_pos, skel_joints=None, do_root_R=True):
+        # cont6d_params (batch_size, joints_num, 6)
+        # joints (batch_size, joints_num, 3)
+        # root_pos (batch_size, 3)
+        if skel_joints is not None:
+            skel_joints = torch.from_numpy(skel_joints)
+            offsets = self.get_offsets_joints_batch(skel_joints)
+        if len(self._offset.shape) == 2:
+            offsets = self._offset.expand(cont6d_params.shape[0], -1, -1)
+        offsets = offsets.numpy()
+        joints = np.zeros(cont6d_params.shape[:-1] + (3,))
+        joints[:, 0] = root_pos
+        for chain in self._kinematic_tree:
+            if do_root_R:
+                matR = cont6d_to_matrix_np(cont6d_params[:, 0])
+            else:
+                matR = np.eye(3)[np.newaxis, :].repeat(len(cont6d_params), axis=0)
+            for i in range(1, len(chain)):
+                matR = np.matmul(matR, cont6d_to_matrix_np(cont6d_params[:, chain[i]]))
+                offset_vec = offsets[:, chain[i]][..., np.newaxis]
+                # print(matR.shape, offset_vec.shape)
+                joints[:, chain[i]] = np.matmul(matR, offset_vec).squeeze(-1) + joints[:, chain[i-1]]
+        return joints
+    def forward_kinematics_cont6d(self, cont6d_params, root_pos, skel_joints=None, do_root_R=True):
+        # cont6d_params (batch_size, joints_num, 6)
+        # joints (batch_size, joints_num, 3)
+        # root_pos (batch_size, 3)
+        if skel_joints is not None:
+            # skel_joints = torch.from_numpy(skel_joints)
+            offsets = self.get_offsets_joints_batch(skel_joints)
+        if len(self._offset.shape) == 2:
+            offsets = self._offset.expand(cont6d_params.shape[0], -1, -1)
+        joints = torch.zeros(cont6d_params.shape[:-1] + (3,)).to(cont6d_params.device)
+        joints[..., 0, :] = root_pos
+        for chain in self._kinematic_tree:
+            if do_root_R:
+                matR = cont6d_to_matrix(cont6d_params[:, 0])
+            else:
+                matR = torch.eye(3).expand((len(cont6d_params), -1, -1)).detach().to(cont6d_params.device)
+            for i in range(1, len(chain)):
+                matR = torch.matmul(matR, cont6d_to_matrix(cont6d_params[:, chain[i]]))
+                offset_vec = offsets[:, chain[i]].unsqueeze(-1)
+                # print(matR.shape, offset_vec.shape)
+                joints[:, chain[i]] = torch.matmul(matR, offset_vec).squeeze(-1) + joints[:, chain[i-1]]
+        return joints

Evaluator_272/mld/data/humanml/data/__init__.py ADDED Viewed

File without changes

Evaluator_272/mld/data/humanml/data/dataset.py ADDED Viewed

	@@ -0,0 +1,227 @@

+import codecs as cs
+import os
+import random
+from os.path import join as pjoin
+import numpy as np
+import spacy
+import torch
+from rich.progress import track
+from torch.utils import data
+from torch.utils.data._utils.collate import default_collate
+from tqdm import tqdm
+import json
+def collate_fn(batch):
+    batch.sort(key=lambda x: x[3], reverse=True)
+    return default_collate(batch)
+def findAllFile(base):
+    file_path = []
+    for root, ds, fs in os.walk(base, followlinks=True):
+        for f in fs:
+            fullname = os.path.join(root, f)
+            file_path.append(fullname)
+    return file_path
+class Text2MotionDatasetV2(data.Dataset):
+    def __init__(
+        self,
+        mean,
+        std,
+        split_file,
+        max_motion_length,
+        min_motion_length,
+        max_text_len,
+        unit_length,
+        motion_dir,
+        text_dir,
+        input_format,
+        njoints,
+        tiny=False,
+        debug=False,
+        progress_bar=True,
+        **kwargs,
+    ):
+        self.max_length = 20
+        self.pointer = 0
+        self.max_motion_length = max_motion_length
+        self.min_motion_length = min_motion_length
+        self.max_text_len = max_text_len
+        self.unit_length = unit_length
+        data_dict = {}
+        id_list = []
+        with cs.open(split_file, "r") as f:
+            for line in f.readlines():
+                id_list.append(line.strip())
+        self.id_list = id_list
+        if tiny or debug:
+            progress_bar = False
+            maxdata = 10 if tiny else 100
+        else:
+            maxdata = 1e10
+        if progress_bar:
+            enumerator = enumerate(
+                track(
+                    id_list,
+                    f"Loading {split_file.split('/')[-2]} {split_file.split('/')[-1].split('.')[0]}",
+                ))
+        else:
+            enumerator = enumerate(id_list)
+        count = 0
+        bad_count = 0
+        miss_count = 0
+        new_name_list = []
+        length_list = []
+        for i, name in enumerator:
+            if count > maxdata:
+                break
+            try:
+                motion = np.load(pjoin(motion_dir, name + ".npy"))
+                if input_format == 'root_position':
+                    motion = motion[..., :4+(njoints-1)*3]
+                elif input_format == 'root_position_vel':
+                    motion = np.concatenate((motion[..., :4+(njoints - 1) * 3], motion[..., 4+(njoints - 1) * 9: 4+(njoints - 1) * 9 + njoints*3]), axis=-1)
+                elif input_format == 'root_position_rot6d':
+                    motion = np.concatenate((motion[..., :4+(njoints - 1) * 3], motion[..., 4+(njoints - 1) * 3: 4+(njoints - 1) * 9]), axis=-1)
+                elif input_format == 'root_rot6d':
+                    motion = np.concatenate((motion[..., :4], motion[..., 4+(njoints - 1) * 3: 4+(njoints - 1) * 9]), axis=-1)
+                elif input_format in ['vector_263', '']:
+                    pass
+                else:
+                    raise NotImplementedError
+                text_data = []
+                flag = False
+                with cs.open(pjoin(text_dir, name + ".txt")) as f:
+                    for line in f.readlines():
+                        text_dict = {}
+                        line_split = line.strip().split("#")
+                        caption = line_split[0]
+                        tokens = line_split[1].split(" ")
+                        f_tag = float(line_split[2])
+                        to_tag = float(line_split[3])
+                        f_tag = 0.0 if np.isnan(f_tag) else f_tag
+                        to_tag = 0.0 if np.isnan(to_tag) else to_tag
+                        text_dict["caption"] = caption
+                        text_dict["tokens"] = tokens
+                        if f_tag == 0.0 and to_tag == 0.0:
+                            flag = True
+                            text_data.append(text_dict)
+                        else:
+                            try:
+                                n_motion = motion[int(f_tag * 30):int(to_tag * 30)]
+                                new_name = (
+                                    random.choice("ABCDEFGHIJKLMNOPQRSTUVW") +
+                                    "_" + name)
+                                while new_name in data_dict:
+                                    new_name = (random.choice(
+                                        "ABCDEFGHIJKLMNOPQRSTUVW") + "_" +
+                                                name)
+                                data_dict[new_name] = {
+                                    "motion": n_motion,
+                                    "length": len(n_motion),
+                                    "text": [text_dict],
+                                }
+                                new_name_list.append(new_name)
+                                length_list.append(len(n_motion))
+                            except:
+                                print(line_split)
+                                print(line_split[2], line_split[3], f_tag,
+                                        to_tag, name)
+                if flag:
+                    data_dict[name] = {
+                        "motion": motion,
+                        "length": len(motion),
+                        "text": text_data,
+                    }
+                    new_name_list.append(name)
+                    length_list.append(len(motion))
+                    count += 1
+            except:
+                miss_count += 1
+                pass
+        print(f'Here are {miss_count} not in dataset!')
+        name_list, length_list = zip(
+            *sorted(zip(new_name_list, length_list), key=lambda x: x[1]))
+        self.mean = mean
+        self.std = std
+        self.length_arr = np.array(length_list)
+        self.data_dict = data_dict
+        self.nfeats = motion.shape[1]
+        self.name_list = name_list
+        self.reset_max_len(self.max_length)
+    def reset_max_len(self, length):
+        assert length <= self.max_motion_length
+        self.pointer = np.searchsorted(self.length_arr, length)
+        print("Pointer Pointing at %d" % self.pointer)
+        self.max_length = length
+    def inv_transform(self, data):
+        return data * self.std + self.mean
+    def __len__(self):
+        return len(self.name_list) - self.pointer
+    def __getitem__(self, item):
+        idx = self.pointer + item
+        data = self.data_dict[self.name_list[idx]]
+        retrieval_name = self.name_list[idx].split('_')[-1]
+        motion, m_length, text_list = data["motion"], data["length"], data["text"]
+        # Randomly select a caption
+        text_data = random.choice(text_list)
+        # caption, tokens = text_data["caption"], text_data["tokens"]
+        caption = text_data["caption"]
+        # Crop the motions in to times of 4, and introduce small variations
+        if self.unit_length < 10:
+            coin2 = np.random.choice(["single", "single", "double"])
+        else:
+            coin2 = "single"
+        if coin2 == "double":
+            m_length = (m_length // self.unit_length - 1) * self.unit_length
+        elif coin2 == "single":
+            m_length = (m_length // self.unit_length) * self.unit_length
+        idx = random.randint(0, len(motion) - m_length)
+        motion = motion[idx:idx + m_length]
+        "Normalization"
+        motion = (motion - self.mean) / self.std
+        if np.any(np.isnan(motion)):
+            raise ValueError("nan in motion")
+        return (
+            caption,
+            motion,
+            m_length,
+            retrieval_name
+        )

Evaluator_272/mld/data/humanml/scripts/motion_process.py ADDED Viewed

	@@ -0,0 +1,576 @@

+from os.path import join as pjoin
+from ..common.skeleton import Skeleton
+import numpy as np
+import os
+from ..common.quaternion import *
+from ..utils.paramUtil import *
+import torch
+from tqdm import tqdm
+# positions (batch, joint_num, 3)
+def uniform_skeleton(positions, target_offset):
+    src_skel = Skeleton(n_raw_offsets, kinematic_chain, 'cpu')
+    src_offset = src_skel.get_offsets_joints(torch.from_numpy(positions[0]))
+    src_offset = src_offset.numpy()
+    tgt_offset = target_offset.numpy()
+    # print(src_offset)
+    # print(tgt_offset)
+    '''Calculate Scale Ratio as the ratio of legs'''
+    src_leg_len = np.abs(src_offset[l_idx1]).max() + np.abs(src_offset[l_idx2]).max()
+    tgt_leg_len = np.abs(tgt_offset[l_idx1]).max() + np.abs(tgt_offset[l_idx2]).max()
+    scale_rt = tgt_leg_len / src_leg_len
+    # print(scale_rt)
+    src_root_pos = positions[:, 0]
+    tgt_root_pos = src_root_pos * scale_rt
+    '''Inverse Kinematics'''
+    quat_params = src_skel.inverse_kinematics_np(positions, face_joint_indx)
+    # print(quat_params.shape)
+    '''Forward Kinematics'''
+    src_skel.set_offset(target_offset)
+    new_joints = src_skel.forward_kinematics_np(quat_params, tgt_root_pos)
+    return new_joints
+def extract_features(positions, feet_thre, n_raw_offsets, kinematic_chain, face_joint_indx, fid_r, fid_l):
+    global_positions = positions.copy()
+    """ Get Foot Contacts """
+    def foot_detect(positions, thres):
+        velfactor, heightfactor = np.array([thres, thres]), np.array([3.0, 2.0])
+        feet_l_x = (positions[1:, fid_l, 0] - positions[:-1, fid_l, 0]) ** 2
+        feet_l_y = (positions[1:, fid_l, 1] - positions[:-1, fid_l, 1]) ** 2
+        feet_l_z = (positions[1:, fid_l, 2] - positions[:-1, fid_l, 2]) ** 2
+        #     feet_l_h = positions[:-1,fid_l,1]
+        #     feet_l = (((feet_l_x + feet_l_y + feet_l_z) < velfactor) & (feet_l_h < heightfactor)).astype(np.float64)
+        feet_l = ((feet_l_x + feet_l_y + feet_l_z) < velfactor).astype(np.float64)
+        feet_r_x = (positions[1:, fid_r, 0] - positions[:-1, fid_r, 0]) ** 2
+        feet_r_y = (positions[1:, fid_r, 1] - positions[:-1, fid_r, 1]) ** 2
+        feet_r_z = (positions[1:, fid_r, 2] - positions[:-1, fid_r, 2]) ** 2
+        #     feet_r_h = positions[:-1,fid_r,1]
+        #     feet_r = (((feet_r_x + feet_r_y + feet_r_z) < velfactor) & (feet_r_h < heightfactor)).astype(np.float64)
+        feet_r = (((feet_r_x + feet_r_y + feet_r_z) < velfactor)).astype(np.float64)
+        return feet_l, feet_r
+    #
+    feet_l, feet_r = foot_detect(positions, feet_thre)
+    # feet_l, feet_r = foot_detect(positions, 0.002)
+    '''Quaternion and Cartesian representation'''
+    r_rot = None
+    def get_rifke(positions):
+        '''Local pose'''
+        positions[..., 0] -= positions[:, 0:1, 0]
+        positions[..., 2] -= positions[:, 0:1, 2]
+        '''All pose face Z+'''
+        positions = qrot_np(np.repeat(r_rot[:, None], positions.shape[1], axis=1), positions)
+        return positions
+    def get_quaternion(positions):
+        skel = Skeleton(n_raw_offsets, kinematic_chain, "cpu")
+        # (seq_len, joints_num, 4)
+        quat_params = skel.inverse_kinematics_np(positions, face_joint_indx, smooth_forward=False)
+        '''Fix Quaternion Discontinuity'''
+        quat_params = qfix(quat_params)
+        # (seq_len, 4)
+        r_rot = quat_params[:, 0].copy()
+        #     print(r_rot[0])
+        '''Root Linear Velocity'''
+        # (seq_len - 1, 3)
+        velocity = (positions[1:, 0] - positions[:-1, 0]).copy()
+        #     print(r_rot.shape, velocity.shape)
+        velocity = qrot_np(r_rot[1:], velocity)
+        '''Root Angular Velocity'''
+        # (seq_len - 1, 4)
+        r_velocity = qmul_np(r_rot[1:], qinv_np(r_rot[:-1]))
+        quat_params[1:, 0] = r_velocity
+        # (seq_len, joints_num, 4)
+        return quat_params, r_velocity, velocity, r_rot
+    def get_cont6d_params(positions):
+        skel = Skeleton(n_raw_offsets, kinematic_chain, "cpu")
+        # (seq_len, joints_num, 4)
+        quat_params = skel.inverse_kinematics_np(positions, face_joint_indx, smooth_forward=True)
+        '''Quaternion to continuous 6D'''
+        cont_6d_params = quaternion_to_cont6d_np(quat_params)
+        # (seq_len, 4)
+        r_rot = quat_params[:, 0].copy()
+        #     print(r_rot[0])
+        '''Root Linear Velocity'''
+        # (seq_len - 1, 3)
+        velocity = (positions[1:, 0] - positions[:-1, 0]).copy()
+        #     print(r_rot.shape, velocity.shape)
+        velocity = qrot_np(r_rot[1:], velocity)
+        '''Root Angular Velocity'''
+        # (seq_len - 1, 4)
+        r_velocity = qmul_np(r_rot[1:], qinv_np(r_rot[:-1]))
+        # (seq_len, joints_num, 4)
+        return cont_6d_params, r_velocity, velocity, r_rot
+    cont_6d_params, r_velocity, velocity, r_rot = get_cont6d_params(positions)
+    positions = get_rifke(positions)
+    #     trejec = np.cumsum(np.concatenate([np.array([[0, 0, 0]]), velocity], axis=0), axis=0)
+    #     r_rotations, r_pos = recover_ric_glo_np(r_velocity, velocity[:, [0, 2]])
+    # plt.plot(positions_b[:, 0, 0], positions_b[:, 0, 2], marker='*')
+    # plt.plot(ground_positions[:, 0, 0], ground_positions[:, 0, 2], marker='o', color='r')
+    # plt.plot(trejec[:, 0], trejec[:, 2], marker='^', color='g')
+    # plt.plot(r_pos[:, 0], r_pos[:, 2], marker='s', color='y')
+    # plt.xlabel('x')
+    # plt.ylabel('z')
+    # plt.axis('equal')
+    # plt.show()
+    '''Root height'''
+    root_y = positions[:, 0, 1:2]
+    '''Root rotation and linear velocity'''
+    # (seq_len-1, 1) rotation velocity along y-axis
+    # (seq_len-1, 2) linear velovity on xz plane
+    r_velocity = np.arcsin(r_velocity[:, 2:3])
+    l_velocity = velocity[:, [0, 2]]
+    #     print(r_velocity.shape, l_velocity.shape, root_y.shape)
+    root_data = np.concatenate([r_velocity, l_velocity, root_y[:-1]], axis=-1)
+    '''Get Joint Rotation Representation'''
+    # (seq_len, (joints_num-1) *6) quaternion for skeleton joints
+    rot_data = cont_6d_params[:, 1:].reshape(len(cont_6d_params), -1)
+    '''Get Joint Rotation Invariant Position Represention'''
+    # (seq_len, (joints_num-1)*3) local joint position
+    ric_data = positions[:, 1:].reshape(len(positions), -1)
+    '''Get Joint Velocity Representation'''
+    # (seq_len-1, joints_num*3)
+    local_vel = qrot_np(np.repeat(r_rot[:-1, None], global_positions.shape[1], axis=1),
+                        global_positions[1:] - global_positions[:-1])
+    local_vel = local_vel.reshape(len(local_vel), -1)
+    data = root_data
+    data = np.concatenate([data, ric_data[:-1]], axis=-1)
+    data = np.concatenate([data, rot_data[:-1]], axis=-1)
+    #     print(dataset.shape, local_vel.shape)
+    data = np.concatenate([data, local_vel], axis=-1)
+    data = np.concatenate([data, feet_l, feet_r], axis=-1)
+    return data
+def process_file(positions, feet_thre):
+    # (seq_len, joints_num, 3)
+    #     '''Down Sample'''
+    #     positions = positions[::ds_num]
+    '''Uniform Skeleton'''
+    positions = uniform_skeleton(positions, tgt_offsets)
+    '''Put on Floor'''
+    floor_height = positions.min(axis=0).min(axis=0)[1]
+    positions[:, :, 1] -= floor_height
+    #     print(floor_height)
+    #     plot_3d_motion("./positions_1.mp4", kinematic_chain, positions, 'title', fps=20)
+    '''XZ at origin'''
+    root_pos_init = positions[0]
+    root_pose_init_xz = root_pos_init[0] * np.array([1, 0, 1])
+    positions = positions - root_pose_init_xz
+    # '''Move the first pose to origin '''
+    # root_pos_init = positions[0]
+    # positions = positions - root_pos_init[0]
+    '''All initially face Z+'''
+    r_hip, l_hip, sdr_r, sdr_l = face_joint_indx
+    across1 = root_pos_init[r_hip] - root_pos_init[l_hip]
+    across2 = root_pos_init[sdr_r] - root_pos_init[sdr_l]
+    across = across1 + across2
+    across = across / np.sqrt((across ** 2).sum(axis=-1))[..., np.newaxis]
+    # forward (3,), rotate around y-axis
+    forward_init = np.cross(np.array([[0, 1, 0]]), across, axis=-1)
+    # forward (3,)
+    forward_init = forward_init / np.sqrt((forward_init ** 2).sum(axis=-1))[..., np.newaxis]
+    #     print(forward_init)
+    target = np.array([[0, 0, 1]])
+    root_quat_init = qbetween_np(forward_init, target)
+    root_quat_init = np.ones(positions.shape[:-1] + (4,)) * root_quat_init
+    positions_b = positions.copy()
+    positions = qrot_np(root_quat_init, positions)
+    #     plot_3d_motion("./positions_2.mp4", kinematic_chain, positions, 'title', fps=20)
+    '''New ground truth positions'''
+    global_positions = positions.copy()
+    # plt.plot(positions_b[:, 0, 0], positions_b[:, 0, 2], marker='*')
+    # plt.plot(positions[:, 0, 0], positions[:, 0, 2], marker='o', color='r')
+    # plt.xlabel('x')
+    # plt.ylabel('z')
+    # plt.axis('equal')
+    # plt.show()
+    """ Get Foot Contacts """
+    def foot_detect(positions, thres):
+        velfactor, heightfactor = np.array([thres, thres]), np.array([3.0, 2.0])
+        feet_l_x = (positions[1:, fid_l, 0] - positions[:-1, fid_l, 0]) ** 2
+        feet_l_y = (positions[1:, fid_l, 1] - positions[:-1, fid_l, 1]) ** 2
+        feet_l_z = (positions[1:, fid_l, 2] - positions[:-1, fid_l, 2]) ** 2
+        #     feet_l_h = positions[:-1,fid_l,1]
+        #     feet_l = (((feet_l_x + feet_l_y + feet_l_z) < velfactor) & (feet_l_h < heightfactor)).astype(np.float64)
+        feet_l = ((feet_l_x + feet_l_y + feet_l_z) < velfactor).astype(np.float64)
+        feet_r_x = (positions[1:, fid_r, 0] - positions[:-1, fid_r, 0]) ** 2
+        feet_r_y = (positions[1:, fid_r, 1] - positions[:-1, fid_r, 1]) ** 2
+        feet_r_z = (positions[1:, fid_r, 2] - positions[:-1, fid_r, 2]) ** 2
+        #     feet_r_h = positions[:-1,fid_r,1]
+        #     feet_r = (((feet_r_x + feet_r_y + feet_r_z) < velfactor) & (feet_r_h < heightfactor)).astype(np.float64)
+        feet_r = (((feet_r_x + feet_r_y + feet_r_z) < velfactor)).astype(np.float64)
+        return feet_l, feet_r
+    #
+    feet_l, feet_r = foot_detect(positions, feet_thre)
+    # feet_l, feet_r = foot_detect(positions, 0.002)
+    '''Quaternion and Cartesian representation'''
+    r_rot = None
+    def get_rifke(positions):
+        '''Local pose'''
+        positions[..., 0] -= positions[:, 0:1, 0]
+        positions[..., 2] -= positions[:, 0:1, 2]
+        '''All pose face Z+'''
+        positions = qrot_np(np.repeat(r_rot[:, None], positions.shape[1], axis=1), positions)
+        return positions
+    def get_quaternion(positions):
+        skel = Skeleton(n_raw_offsets, kinematic_chain, "cpu")
+        # (seq_len, joints_num, 4)
+        quat_params = skel.inverse_kinematics_np(positions, face_joint_indx, smooth_forward=False)
+        '''Fix Quaternion Discontinuity'''
+        quat_params = qfix(quat_params)
+        # (seq_len, 4)
+        r_rot = quat_params[:, 0].copy()
+        #     print(r_rot[0])
+        '''Root Linear Velocity'''
+        # (seq_len - 1, 3)
+        velocity = (positions[1:, 0] - positions[:-1, 0]).copy()
+        #     print(r_rot.shape, velocity.shape)
+        velocity = qrot_np(r_rot[1:], velocity)
+        '''Root Angular Velocity'''
+        # (seq_len - 1, 4)
+        r_velocity = qmul_np(r_rot[1:], qinv_np(r_rot[:-1]))
+        quat_params[1:, 0] = r_velocity
+        # (seq_len, joints_num, 4)
+        return quat_params, r_velocity, velocity, r_rot
+    def get_cont6d_params(positions):
+        skel = Skeleton(n_raw_offsets, kinematic_chain, "cpu")
+        # (seq_len, joints_num, 4)
+        quat_params = skel.inverse_kinematics_np(positions, face_joint_indx, smooth_forward=True)
+        '''Quaternion to continuous 6D'''
+        cont_6d_params = quaternion_to_cont6d_np(quat_params)
+        # (seq_len, 4)
+        r_rot = quat_params[:, 0].copy()
+        #     print(r_rot[0])
+        '''Root Linear Velocity'''
+        # (seq_len - 1, 3)
+        velocity = (positions[1:, 0] - positions[:-1, 0]).copy()
+        #     print(r_rot.shape, velocity.shape)
+        velocity = qrot_np(r_rot[1:], velocity)
+        '''Root Angular Velocity'''
+        # (seq_len - 1, 4)
+        r_velocity = qmul_np(r_rot[1:], qinv_np(r_rot[:-1]))
+        # (seq_len, joints_num, 4)
+        return cont_6d_params, r_velocity, velocity, r_rot
+    cont_6d_params, r_velocity, velocity, r_rot = get_cont6d_params(positions)
+    positions = get_rifke(positions)
+    #     trejec = np.cumsum(np.concatenate([np.array([[0, 0, 0]]), velocity], axis=0), axis=0)
+    #     r_rotations, r_pos = recover_ric_glo_np(r_velocity, velocity[:, [0, 2]])
+    # plt.plot(positions_b[:, 0, 0], positions_b[:, 0, 2], marker='*')
+    # plt.plot(ground_positions[:, 0, 0], ground_positions[:, 0, 2], marker='o', color='r')
+    # plt.plot(trejec[:, 0], trejec[:, 2], marker='^', color='g')
+    # plt.plot(r_pos[:, 0], r_pos[:, 2], marker='s', color='y')
+    # plt.xlabel('x')
+    # plt.ylabel('z')
+    # plt.axis('equal')
+    # plt.show()
+    '''Root height'''
+    root_y = positions[:, 0, 1:2]
+    '''Root rotation and linear velocity'''
+    # (seq_len-1, 1) rotation velocity along y-axis
+    # (seq_len-1, 2) linear velovity on xz plane
+    r_velocity = np.arcsin(r_velocity[:, 2:3])
+    l_velocity = velocity[:, [0, 2]]
+    #     print(r_velocity.shape, l_velocity.shape, root_y.shape)
+    root_data = np.concatenate([r_velocity, l_velocity, root_y[:-1]], axis=-1)
+    '''Get Joint Rotation Representation'''
+    # (seq_len, (joints_num-1) *6) quaternion for skeleton joints
+    rot_data = cont_6d_params[:, 1:].reshape(len(cont_6d_params), -1)
+    '''Get Joint Rotation Invariant Position Represention'''
+    # (seq_len, (joints_num-1)*3) local joint position
+    ric_data = positions[:, 1:].reshape(len(positions), -1)
+    '''Get Joint Velocity Representation'''
+    # (seq_len-1, joints_num*3)
+    local_vel = qrot_np(np.repeat(r_rot[:-1, None], global_positions.shape[1], axis=1),
+                        global_positions[1:] - global_positions[:-1])
+    local_vel = local_vel.reshape(len(local_vel), -1)
+    data = root_data
+    data = np.concatenate([data, ric_data[:-1]], axis=-1)
+    data = np.concatenate([data, rot_data[:-1]], axis=-1)
+    #     print(dataset.shape, local_vel.shape)
+    data = np.concatenate([data, local_vel], axis=-1)
+    data = np.concatenate([data, feet_l, feet_r], axis=-1)
+    return data, global_positions, positions, l_velocity
+# Recover global angle and positions for rotation dataset
+# root_rot_velocity (B, seq_len, 1)
+# root_linear_velocity (B, seq_len, 2)
+# root_y (B, seq_len, 1)
+# ric_data (B, seq_len, (joint_num - 1)*3)
+# rot_data (B, seq_len, (joint_num - 1)*6)
+# local_velocity (B, seq_len, joint_num*3)
+# foot contact (B, seq_len, 4)
+def recover_root_rot_pos(data):
+    rot_vel = data[..., 0]
+    r_rot_ang = torch.zeros_like(rot_vel).to(data.device)
+    '''Get Y-axis rotation from rotation velocity'''
+    r_rot_ang[..., 1:] = rot_vel[..., :-1]
+    r_rot_ang = torch.cumsum(r_rot_ang, dim=-1)
+    r_rot_quat = torch.zeros(data.shape[:-1] + (4,)).to(data.device)
+    r_rot_quat[..., 0] = torch.cos(r_rot_ang)
+    r_rot_quat[..., 2] = torch.sin(r_rot_ang)
+    r_pos = torch.zeros(data.shape[:-1] + (3,)).to(data.device)
+    r_pos[..., 1:, [0, 2]] = data[..., :-1, 1:3]
+    '''Add Y-axis rotation to root position'''
+    r_pos = qrot(qinv(r_rot_quat), r_pos)
+    r_pos = torch.cumsum(r_pos, dim=-2)
+    r_pos[..., 1] = data[..., 3]
+    return r_rot_quat, r_pos
+def recover_from_rot(data, joints_num, skeleton):
+    r_rot_quat, r_pos = recover_root_rot_pos(data)
+    r_rot_cont6d = quaternion_to_cont6d(r_rot_quat)
+    start_indx = 1 + 2 + 1 + (joints_num - 1) * 3
+    end_indx = start_indx + (joints_num - 1) * 6
+    cont6d_params = data[..., start_indx:end_indx]
+    #     print(r_rot_cont6d.shape, cont6d_params.shape, r_pos.shape)
+    cont6d_params = torch.cat([r_rot_cont6d, cont6d_params], dim=-1)
+    cont6d_params = cont6d_params.view(-1, joints_num, 6)
+    positions = skeleton.forward_kinematics_cont6d(cont6d_params, r_pos)
+    return positions
+def recover_from_root_rot6d(data, joints_num, skeleton):
+    r_rot_quat, r_pos = recover_root_rot_pos(data)
+    r_rot_cont6d = quaternion_to_cont6d(r_rot_quat)
+    start_indx = 1 + 2 + 1
+    end_indx = start_indx + (joints_num - 1) * 6
+    cont6d_params = data[..., start_indx:end_indx]
+    #     print(r_rot_cont6d.shape, cont6d_params.shape, r_pos.shape)
+    cont6d_params = torch.cat([r_rot_cont6d, cont6d_params], dim=-1)
+    cont6d_params = cont6d_params.view(-1, joints_num, 6)
+    r_pos = r_pos.view(-1,3)
+    positions = skeleton.forward_kinematics_cont6d(cont6d_params, r_pos)
+    return positions
+def recover_from_body_pos_vel_hand_rot(data, joints_num, skeleton):
+    assert len(skeleton) == 2
+    body_skel = skeleton[0]
+    all_skel = skeleton[1]
+    assert joints_num == 52
+    face_joint_indx = [2, 1, 17, 16]
+    r_rot_quat, r_pos = recover_root_rot_pos(data)
+    r_rot_cont6d = quaternion_to_cont6d(r_rot_quat)
+    pos_body_data = data[..., : 4 + 21 * 3]
+    pos_body_data_global = recover_from_ric(pos_body_data, 22)
+    # pos_body_data_global shape (bs, frame, 22, 3)
+    quat_params = body_skel.inverse_kinematics(pos_body_data_global, face_joint_indx)
+    bs = quat_params.shape[0]
+    frame = quat_params.shape[1]
+    cont6d_params = quaternion_to_cont6d(quat_params).view(bs, frame, -1)
+    # cont6d_params
+    rot6d_hand_data = data[..., 4 + 21 * 3: 4 + 21 * 3 + 30 * 6]
+    cont6d_params = torch.cat([cont6d_params, rot6d_hand_data], dim=-1)
+    cont6d_params = cont6d_params.view(-1, joints_num, 6)
+    r_pos = r_pos.view(-1,3)
+    positions = all_skel.forward_kinematics_cont6d(cont6d_params, r_pos)
+    return positions
+def recover_rot(data):
+    # dataset [bs, seqlen, 263/251] HumanML/KIT
+    joints_num = 22 if data.shape[-1] == 263 else 21
+    r_rot_quat, r_pos = recover_root_rot_pos(data)
+    r_pos_pad = torch.cat([r_pos, torch.zeros_like(r_pos)], dim=-1).unsqueeze(-2)
+    r_rot_cont6d = quaternion_to_cont6d(r_rot_quat)
+    start_indx = 1 + 2 + 1 + (joints_num - 1) * 3
+    end_indx = start_indx + (joints_num - 1) * 6
+    cont6d_params = data[..., start_indx:end_indx]
+    cont6d_params = torch.cat([r_rot_cont6d, cont6d_params], dim=-1)
+    cont6d_params = cont6d_params.view(-1, joints_num, 6)
+    cont6d_params = torch.cat([cont6d_params, r_pos_pad], dim=-2)
+    return cont6d_params
+def recover_from_ric(data, joints_num):
+    r_rot_quat, r_pos = recover_root_rot_pos(data)
+    positions = data[..., 4:(joints_num - 1) * 3 + 4]
+    positions = positions.view(positions.shape[:-1] + (-1, 3))
+    '''Add Y-axis rotation to local joints'''
+    positions = qrot(qinv(r_rot_quat[..., None, :]).expand(positions.shape[:-1] + (4,)), positions)
+    '''Add root XZ to joints'''
+    positions[..., 0] += r_pos[..., 0:1]
+    positions[..., 2] += r_pos[..., 2:3]
+    '''Concate root and joints'''
+    positions = torch.cat([r_pos.unsqueeze(-2), positions], dim=-2)
+    return positions
+'''
+For Text2Motion Dataset
+'''
+'''
+if __name__ == "__main__":
+    example_id = "000021"
+    # Lower legs
+    l_idx1, l_idx2 = 5, 8
+    # Right/Left foot
+    fid_r, fid_l = [8, 11], [7, 10]
+    # Face direction, r_hip, l_hip, sdr_r, sdr_l
+    face_joint_indx = [2, 1, 17, 16]
+    # l_hip, r_hip
+    r_hip, l_hip = 2, 1
+    joints_num = 22
+    # ds_num = 8
+    data_dir = '../dataset/pose_data_raw/joints/'
+    save_dir1 = '../dataset/pose_data_raw/new_joints/'
+    save_dir2 = '../dataset/pose_data_raw/new_joint_vecs/'
+    n_raw_offsets = torch.from_numpy(t2m_raw_offsets)
+    kinematic_chain = t2m_kinematic_chain
+    # Get offsets of target skeleton
+    example_data = np.load(os.path.join(data_dir, example_id + '.npy'))
+    example_data = example_data.reshape(len(example_data), -1, 3)
+    example_data = torch.from_numpy(example_data)
+    tgt_skel = Skeleton(n_raw_offsets, kinematic_chain, 'cpu')
+    # (joints_num, 3)
+    tgt_offsets = tgt_skel.get_offsets_joints(example_data[0])
+    # print(tgt_offsets)
+    source_list = os.listdir(data_dir)
+    frame_num = 0
+    for source_file in tqdm(source_list):
+        source_data = np.load(os.path.join(data_dir, source_file))[:, :joints_num]
+        try:
+            dataset, ground_positions, positions, l_velocity = process_file(source_data, 0.002)
+            rec_ric_data = recover_from_ric(torch.from_numpy(dataset).unsqueeze(0).float(), joints_num)
+            np.save(pjoin(save_dir1, source_file), rec_ric_data.squeeze().numpy())
+            np.save(pjoin(save_dir2, source_file), dataset)
+            frame_num += dataset.shape[0]
+        except Exception as e:
+            print(source_file)
+            print(e)
+    print('Total clips: %d, Frames: %d, Duration: %fm' %
+          (len(source_list), frame_num, frame_num / 20 / 60))
+'''
+if __name__ == "__main__":
+    example_id = "03950_gt"
+    # Lower legs
+    l_idx1, l_idx2 = 17, 18
+    # Right/Left foot
+    fid_r, fid_l = [14, 15], [19, 20]
+    # Face direction, r_hip, l_hip, sdr_r, sdr_l
+    face_joint_indx = [11, 16, 5, 8]
+    # l_hip, r_hip
+    r_hip, l_hip = 11, 16
+    joints_num = 21
+    # ds_num = 8
+    data_dir = '../dataset/kit_mocap_dataset/joints/'
+    save_dir1 = '../dataset/kit_mocap_dataset/new_joints/'
+    save_dir2 = '../dataset/kit_mocap_dataset/new_joint_vecs/'
+    n_raw_offsets = torch.from_numpy(kit_raw_offsets)
+    kinematic_chain = kit_kinematic_chain
+    '''Get offsets of target skeleton'''
+    example_data = np.load(os.path.join(data_dir, example_id + '.npy'))
+    example_data = example_data.reshape(len(example_data), -1, 3)
+    example_data = torch.from_numpy(example_data)
+    tgt_skel = Skeleton(n_raw_offsets, kinematic_chain, 'cpu')
+    # (joints_num, 3)
+    tgt_offsets = tgt_skel.get_offsets_joints(example_data[0])
+    # print(tgt_offsets)
+    source_list = os.listdir(data_dir)
+    frame_num = 0
+    '''Read source dataset'''
+    for source_file in tqdm(source_list):
+        source_data = np.load(os.path.join(data_dir, source_file))[:, :joints_num]
+        try:
+            name = ''.join(source_file[:-7].split('_')) + '.npy'
+            data, ground_positions, positions, l_velocity = process_file(source_data, 0.05)
+            rec_ric_data = recover_from_ric(torch.from_numpy(data).unsqueeze(0).float(), joints_num)
+            if np.isnan(rec_ric_data.numpy()).any():
+                print(source_file)
+                continue
+            np.save(pjoin(save_dir1, name), rec_ric_data.squeeze().numpy())
+            np.save(pjoin(save_dir2, name), data)
+            frame_num += data.shape[0]
+        except Exception as e:
+            print(source_file)
+            print(e)
+    print('Total clips: %d, Frames: %d, Duration: %fm' %
+          (len(source_list), frame_num, frame_num / 12.5 / 60))

Evaluator_272/mld/data/humanml/utils/__init__.py ADDED Viewed

File without changes

Evaluator_272/mld/data/humanml/utils/metrics.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import numpy as np
+from scipy import linalg
+def euclidean_distance_matrix(matrix1, matrix2):
+    """
+        Params:
+        -- matrix1: N1 x D
+        -- matrix2: N2 x D
+        Returns:
+        -- dist: N1 x N2
+        dist[i, j] == distance(matrix1[i], matrix2[j])
+    """
+    assert matrix1.shape[1] == matrix2.shape[1]
+    d1 = -2 * np.dot(matrix1, matrix2.T)
+    d2 = np.sum(np.square(matrix1), axis=1, keepdims=True)
+    d3 = np.sum(np.square(matrix2), axis=1)
+    dists = np.sqrt(d1 + d2 + d3)
+    return dists
+def calculate_top_k(mat, top_k):
+    size = mat.shape[0]
+    gt_mat = np.expand_dims(np.arange(size), 1).repeat(size, 1)
+    bool_mat = (mat == gt_mat)
+    correct_vec = False
+    top_k_list = []
+    for i in range(top_k):
+        correct_vec = (correct_vec | bool_mat[:, i])
+        top_k_list.append(correct_vec[:, None])
+    top_k_mat = np.concatenate(top_k_list, axis=1)
+    return top_k_mat
+def calculate_R_precision(embedding1, embedding2, top_k, sum_all=False):
+    dist_mat = euclidean_distance_matrix(embedding1, embedding2)
+    argmax = np.argsort(dist_mat, axis=1)
+    top_k_mat = calculate_top_k(argmax, top_k)
+    if sum_all:
+        return top_k_mat.sum(axis=0)
+    else:
+        return top_k_mat
+def calculate_matching_score(embedding1, embedding2, sum_all=False):
+    assert len(embedding1.shape) == 2
+    assert embedding1.shape[0] == embedding2.shape[0]
+    assert embedding1.shape[1] == embedding2.shape[1]
+    dist = linalg.norm(embedding1 - embedding2, axis=1)
+    if sum_all:
+        return dist.sum(axis=0)
+    else:
+        return dist
+def calculate_activation_statistics(activations):
+    """
+    Params:
+    -- activation: num_samples x dim_feat
+    Returns:
+    -- mu: dim_feat
+    -- sigma: dim_feat x dim_feat
+    """
+    mu = np.mean(activations, axis=0)
+    cov = np.cov(activations, rowvar=False)
+    return mu, cov
+def calculate_diversity(activation, diversity_times):
+    assert len(activation.shape) == 2
+    assert activation.shape[0] > diversity_times
+    num_samples = activation.shape[0]
+    first_indices = np.random.choice(num_samples, diversity_times, replace=False)
+    second_indices = np.random.choice(num_samples, diversity_times, replace=False)
+    dist = linalg.norm(activation[first_indices] - activation[second_indices], axis=1)
+    return dist.mean()
+def calculate_multimodality(activation, multimodality_times):
+    assert len(activation.shape) == 3
+    assert activation.shape[1] > multimodality_times
+    num_per_sent = activation.shape[1]
+    first_dices = np.random.choice(num_per_sent, multimodality_times, replace=False)
+    second_dices = np.random.choice(num_per_sent, multimodality_times, replace=False)
+    dist = linalg.norm(activation[:, first_dices] - activation[:, second_dices], axis=2)
+    return dist.mean()
+def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
+    """Numpy implementation of the Frechet Distance.
+    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
+    and X_2 ~ N(mu_2, C_2) is
+            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
+    Stable version by Dougal J. Sutherland.
+    Params:
+    -- mu1   : Numpy array containing the activations of a layer of the
+               inception net (like returned by the function 'get_predictions')
+               for generated samples.
+    -- mu2   : The sample mean over activations, precalculated on an
+               representative dataset set.
+    -- sigma1: The covariance matrix over activations for generated samples.
+    -- sigma2: The covariance matrix over activations, precalculated on an
+               representative dataset set.
+    Returns:
+    --   : The Frechet Distance.
+    """
+    mu1 = np.atleast_1d(mu1)
+    mu2 = np.atleast_1d(mu2)
+    sigma1 = np.atleast_2d(sigma1)
+    sigma2 = np.atleast_2d(sigma2)
+    assert mu1.shape == mu2.shape, \
+        'Training and test mean vectors have different lengths'
+    assert sigma1.shape == sigma2.shape, \
+        'Training and test covariances have different dimensions'
+    diff = mu1 - mu2
+    # Product might be almost singular
+    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
+    if not np.isfinite(covmean).all():
+        msg = ('fid calculation produces singular product; '
+               'adding %s to diagonal of cov estimates') % eps
+        print(msg)
+        offset = np.eye(sigma1.shape[0]) * eps
+        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
+    # Numerical error might give slight imaginary component
+    if np.iscomplexobj(covmean):
+        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
+            m = np.max(np.abs(covmean.imag))
+            raise ValueError('Imaginary component {}'.format(m))
+        covmean = covmean.real
+    tr_covmean = np.trace(covmean)
+    return (diff.dot(diff) + np.trace(sigma1) +
+            np.trace(sigma2) - 2 * tr_covmean)

Evaluator_272/mld/data/humanml/utils/paramUtil.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import numpy as np
+# Define a kinematic tree for the skeletal struture
+kit_kinematic_chain = [[0, 11, 12, 13, 14, 15], [0, 16, 17, 18, 19, 20], [0, 1, 2, 3, 4], [3, 5, 6, 7], [3, 8, 9, 10]]
+kit_raw_offsets = np.array(
+    [
+        [0, 0, 0],
+        [0, 1, 0],
+        [0, 1, 0],
+        [0, 1, 0],
+        [0, 1, 0],
+        [1, 0, 0],
+        [0, -1, 0],
+        [0, -1, 0],
+        [-1, 0, 0],
+        [0, -1, 0],
+        [0, -1, 0],
+        [1, 0, 0],
+        [0, -1, 0],
+        [0, -1, 0],
+        [0, 0, 1],
+        [0, 0, 1],
+        [-1, 0, 0],
+        [0, -1, 0],
+        [0, -1, 0],
+        [0, 0, 1],
+        [0, 0, 1]
+    ]
+)
+t2m_raw_offsets = np.array([[0,0,0],
+                           [1,0,0],
+                           [-1,0,0],
+                           [0,1,0],
+                           [0,-1,0],
+                           [0,-1,0],
+                           [0,1,0],
+                           [0,-1,0],
+                           [0,-1,0],
+                           [0,1,0],
+                           [0,0,1],
+                           [0,0,1],
+                           [0,1,0],
+                           [1,0,0],
+                           [-1,0,0],
+                           [0,0,1],
+                           [0,-1,0],
+                           [0,-1,0],
+                           [0,-1,0],
+                           [0,-1,0],
+                           [0,-1,0],
+                           [0,-1,0]])
+t2m_kinematic_chain = [[0, 2, 5, 8, 11], [0, 1, 4, 7, 10], [0, 3, 6, 9, 12, 15], [9, 14, 17, 19, 21], [9, 13, 16, 18, 20]]
+t2m_left_hand_chain = [[20, 22, 23, 24], [20, 34, 35, 36], [20, 25, 26, 27], [20, 31, 32, 33], [20, 28, 29, 30]]
+t2m_right_hand_chain = [[21, 43, 44, 45], [21, 46, 47, 48], [21, 40, 41, 42], [21, 37, 38, 39], [21, 49, 50, 51]]
+kit_tgt_skel_id = '03950'
+t2m_tgt_skel_id = '000021'

Evaluator_272/mld/data/humanml/utils/plot_script.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import math
+# import cv2
+from textwrap import wrap
+import matplotlib
+import matplotlib.pyplot as plt
+import mpl_toolkits.mplot3d.axes3d as p3
+import numpy as np
+from matplotlib.animation import FFMpegFileWriter, FuncAnimation
+from mpl_toolkits.mplot3d import Axes3D
+from mpl_toolkits.mplot3d.art3d import Poly3DCollection
+import mld.data.humanml.utils.paramUtil as paramUtil
+skeleton = paramUtil.t2m_kinematic_chain
+def list_cut_average(ll, intervals):
+    if intervals == 1:
+        return ll
+    bins = math.ceil(len(ll) * 1.0 / intervals)
+    ll_new = []
+    for i in range(bins):
+        l_low = intervals * i
+        l_high = l_low + intervals
+        l_high = l_high if l_high < len(ll) else len(ll)
+        ll_new.append(np.mean(ll[l_low:l_high]))
+    return ll_new
+def plot_3d_motion(save_path, joints, title, figsize=(3, 3), fps=120, radius=3, kinematic_tree=skeleton):
+    matplotlib.use('Agg')
+    title = '\n'.join(wrap(title, 20))
+    def init():
+        ax.set_xlim3d([-radius / 2, radius / 2])
+        ax.set_ylim3d([0, radius])
+        ax.set_zlim3d([-radius / 3., radius * 2 / 3.])
+        fig.suptitle(title, fontsize=10)
+        ax.grid(b=False)
+    def plot_xzPlane(minx, maxx, miny, minz, maxz):
+        verts = [
+            [minx, miny, minz],
+            [minx, miny, maxz],
+            [maxx, miny, maxz],
+            [maxx, miny, minz]
+        ]
+        xz_plane = Poly3DCollection([verts])
+        xz_plane.set_facecolor((0.5, 0.5, 0.5, 0.5))
+        ax.add_collection3d(xz_plane)
+    data = joints.copy().reshape(len(joints), -1, 3)
+    fig = plt.figure(figsize=figsize)
+    plt.tight_layout()
+    ax = p3.Axes3D(fig)
+    init()
+    MINS = data.min(axis=0).min(axis=0)
+    MAXS = data.max(axis=0).max(axis=0)
+    colors = ["#DD5A37", "#D69E00", "#B75A39", "#DD5A37", "#D69E00",
+              "#FF6D00", "#FF6D00", "#FF6D00", "#FF6D00", "#FF6D00",
+              "#DDB50E", "#DDB50E", "#DDB50E", "#DDB50E", "#DDB50E", ]
+    frame_number = data.shape[0]
+    height_offset = MINS[1]
+    data[:, :, 1] -= height_offset
+    trajec = data[:, 0, [0, 2]]
+    data[..., 0] -= data[:, 0:1, 0]
+    data[..., 2] -= data[:, 0:1, 2]
+    def update(index):
+        ax.view_init(elev=120, azim=-90)
+        ax.dist = 7.5
+        plot_xzPlane(MINS[0] - trajec[index, 0], MAXS[0] - trajec[index, 0], 0, MINS[2] - trajec[index, 1],
+                     MAXS[2] - trajec[index, 1])
+        for i, (chain, color) in enumerate(zip(kinematic_tree, colors)):
+            #             print(color)
+            if i < 5:
+                linewidth = 4.0
+            else:
+                linewidth = 2.0
+            ax.plot3D(data[index, chain, 0], data[index, chain, 1], data[index, chain, 2], linewidth=linewidth,
+                      color=color)
+        plt.axis('off')
+        ax.set_xticklabels([])
+        ax.set_yticklabels([])
+        ax.set_zticklabels([])
+    ani = FuncAnimation(fig, update, frames=frame_number,
+                        interval=1000 / fps, repeat=False)
+    ani.save(save_path, fps=fps)
+    plt.close()

Evaluator_272/mld/data/humanml/utils/utils.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import os
+import numpy as np
+# import cv2
+from PIL import Image
+import paramUtil
+import math
+import time
+import matplotlib.pyplot as plt
+from scipy.ndimage import gaussian_filter
+def mkdir(path):
+    if not os.path.exists(path):
+        os.makedirs(path)
+COLORS = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0],
+          [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255],
+          [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
+MISSING_VALUE = -1
+def save_image(image_numpy, image_path):
+    img_pil = Image.fromarray(image_numpy)
+    img_pil.save(image_path)
+def save_logfile(log_loss, save_path):
+    with open(save_path, 'wt') as f:
+        for k, v in log_loss.items():
+            w_line = k
+            for digit in v:
+                w_line += ' %.3f' % digit
+            f.write(w_line + '\n')
+def print_current_loss(start_time, niter_state, losses, epoch=None, sub_epoch=None,
+                       inner_iter=None, tf_ratio=None, sl_steps=None):
+    def as_minutes(s):
+        m = math.floor(s / 60)
+        s -= m * 60
+        return '%dm %ds' % (m, s)
+    def time_since(since, percent):
+        now = time.time()
+        s = now - since
+        es = s / percent
+        rs = es - s
+        return '%s (- %s)' % (as_minutes(s), as_minutes(rs))
+    if epoch is not None:
+        print('epoch: %3d niter: %6d sub_epoch: %2d inner_iter: %4d' % (epoch, niter_state, sub_epoch, inner_iter), end=" ")
+    now = time.time()
+    message = '%s'%(as_minutes(now - start_time))
+    for k, v in losses.items():
+        message += ' %s: %.4f ' % (k, v)
+    message += ' sl_length:%2d tf_ratio:%.2f'%(sl_steps, tf_ratio)
+    print(message)
+def print_current_loss_decomp(start_time, niter_state, total_niters, losses, epoch=None, inner_iter=None):
+    def as_minutes(s):
+        m = math.floor(s / 60)
+        s -= m * 60
+        return '%dm %ds' % (m, s)
+    def time_since(since, percent):
+        now = time.time()
+        s = now - since
+        es = s / percent
+        rs = es - s
+        return '%s (- %s)' % (as_minutes(s), as_minutes(rs))
+    print('epoch: %03d inner_iter: %5d' % (epoch, inner_iter), end=" ")
+    # now = time.time()
+    message = '%s niter: %07d completed: %3d%%)'%(time_since(start_time, niter_state / total_niters), niter_state, niter_state / total_niters * 100)
+    for k, v in losses.items():
+        message += ' %s: %.4f ' % (k, v)
+    print(message)
+def compose_gif_img_list(img_list, fp_out, duration):
+    img, *imgs = [Image.fromarray(np.array(image)) for image in img_list]
+    img.save(fp=fp_out, format='GIF', append_images=imgs, optimize=False,
+             save_all=True, loop=0, duration=duration)
+def save_images(visuals, image_path):
+    if not os.path.exists(image_path):
+        os.makedirs(image_path)
+    for i, (label, img_numpy) in enumerate(visuals.items()):
+        img_name = '%d_%s.jpg' % (i, label)
+        save_path = os.path.join(image_path, img_name)
+        save_image(img_numpy, save_path)
+def save_images_test(visuals, image_path, from_name, to_name):
+    if not os.path.exists(image_path):
+        os.makedirs(image_path)
+    for i, (label, img_numpy) in enumerate(visuals.items()):
+        img_name = "%s_%s_%s" % (from_name, to_name, label)
+        save_path = os.path.join(image_path, img_name)
+        save_image(img_numpy, save_path)
+def compose_and_save_img(img_list, save_dir, img_name, col=4, row=1, img_size=(256, 200)):
+    # print(col, row)
+    compose_img = compose_image(img_list, col, row, img_size)
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    img_path = os.path.join(save_dir, img_name)
+    compose_img.save(img_path)
+def compose_image(img_list, col, row, img_size):
+    to_image = Image.new('RGB', (col * img_size[0], row * img_size[1]))
+    for y in range(0, row):
+        for x in range(0, col):
+            from_img = Image.fromarray(img_list[y * col + x])
+            paste_area = (x * img_size[0], y*img_size[1],
+                                      (x + 1) * img_size[0], (y + 1) * img_size[1])
+            to_image.paste(from_img, paste_area)
+    return to_image
+def plot_loss_curve(losses, save_path, intervals=500):
+    plt.figure(figsize=(10, 5))
+    plt.title("Loss During Training")
+    for key in losses.keys():
+        plt.plot(list_cut_average(losses[key], intervals), label=key)
+    plt.xlabel("Iterations/" + str(intervals))
+    plt.ylabel("Loss")
+    plt.legend()
+    plt.savefig(save_path)
+    plt.show()
+def list_cut_average(ll, intervals):
+    if intervals == 1:
+        return ll
+    bins = math.ceil(len(ll) * 1.0 / intervals)
+    ll_new = []
+    for i in range(bins):
+        l_low = intervals * i
+        l_high = l_low + intervals
+        l_high = l_high if l_high < len(ll) else len(ll)
+        ll_new.append(np.mean(ll[l_low:l_high]))
+    return ll_new
+def motion_temporal_filter(motion, sigma=1):
+    motion = motion.reshape(motion.shape[0], -1)
+    for i in range(motion.shape[1]):
+        motion[:, i] = gaussian_filter(motion[:, i], sigma=sigma, mode="nearest")
+    return motion.reshape(motion.shape[0], -1, 3)

Evaluator_272/mld/data/humanml/utils/word_vectorizer.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import numpy as np
+import pickle
+from os.path import join as pjoin
+POS_enumerator = {
+    'VERB': 0,
+    'NOUN': 1,
+    'DET': 2,
+    'ADP': 3,
+    'NUM': 4,
+    'AUX': 5,
+    'PRON': 6,
+    'ADJ': 7,
+    'ADV': 8,
+    'Loc_VIP': 9,
+    'Body_VIP': 10,
+    'Obj_VIP': 11,
+    'Act_VIP': 12,
+    'Desc_VIP': 13,
+    'OTHER': 14,
+}
+Loc_list = ('left', 'right', 'clockwise', 'counterclockwise', 'anticlockwise', 'forward', 'back', 'backward',
+            'up', 'down', 'straight', 'curve')
+Body_list = ('arm', 'chin', 'foot', 'feet', 'face', 'hand', 'mouth', 'leg', 'waist', 'eye', 'knee', 'shoulder', 'thigh')
+Obj_List = ('stair', 'dumbbell', 'chair', 'window', 'floor', 'car', 'ball', 'handrail', 'baseball', 'basketball')
+Act_list = ('walk', 'run', 'swing', 'pick', 'bring', 'kick', 'put', 'squat', 'throw', 'hop', 'dance', 'jump', 'turn',
+            'stumble', 'dance', 'stop', 'sit', 'lift', 'lower', 'raise', 'wash', 'stand', 'kneel', 'stroll',
+            'rub', 'bend', 'balance', 'flap', 'jog', 'shuffle', 'lean', 'rotate', 'spin', 'spread', 'climb')
+Desc_list = ('slowly', 'carefully', 'fast', 'careful', 'slow', 'quickly', 'happy', 'angry', 'sad', 'happily',
+             'angrily', 'sadly')
+VIP_dict = {
+    'Loc_VIP': Loc_list,
+    'Body_VIP': Body_list,
+    'Obj_VIP': Obj_List,
+    'Act_VIP': Act_list,
+    'Desc_VIP': Desc_list,
+}
+class WordVectorizer(object):
+    def __init__(self, meta_root, prefix, text_encode_way):
+        self.text_encode_way = text_encode_way
+        vectors = np.load(pjoin(meta_root, '%s_data.npy'%prefix))
+        words = pickle.load(open(pjoin(meta_root, '%s_words.pkl'%prefix), 'rb'))
+        word2idx = pickle.load(open(pjoin(meta_root, '%s_idx.pkl'%prefix), 'rb'))
+        self.word2vec = {w: vectors[word2idx[w]] for w in words}
+        if 'glove_6B' in self.text_encode_way:
+            from torchtext.vocab import GloVe
+            glove_6b = GloVe(name='6B', dim=300)
+            self.word2vec_glove_6b = glove_6b.get_vecs_by_tokens
+    def _get_pos_ohot(self, pos):
+        pos_vec = np.zeros(len(POS_enumerator))
+        if pos in POS_enumerator:
+            pos_vec[POS_enumerator[pos]] = 1
+        else:
+            pos_vec[POS_enumerator['OTHER']] = 1
+        return pos_vec
+    def __len__(self):
+        return len(self.word2vec)
+    def __getitem__(self, item):
+        word, pos = item.split('/')
+        if 'given_glove' in self.text_encode_way:
+            if word in self.word2vec:
+                word_vec = self.word2vec[word]
+                vip_pos = None
+                for key, values in VIP_dict.items():
+                    if word in values:
+                        vip_pos = key
+                        break
+                if vip_pos is not None:
+                    pos_vec = self._get_pos_ohot(vip_pos)
+                else:
+                    pos_vec = self._get_pos_ohot(pos)
+            else:
+                word_vec = self.word2vec['unk']
+                pos_vec = self._get_pos_ohot('OTHER')
+        elif 'glove_6B' in self.text_encode_way:
+            word_vec = self.word2vec_glove_6b([word]).squeeze()
+            if word in self.word2vec:
+                vip_pos = None
+                for key, values in VIP_dict.items():
+                    if word in values:
+                        vip_pos = key
+                        break
+                if vip_pos is not None:
+                    pos_vec = self._get_pos_ohot(vip_pos)
+                else:
+                    pos_vec = self._get_pos_ohot(pos)
+            else:
+                pos_vec = self._get_pos_ohot('OTHER')
+        return word_vec, pos_vec
+class WordVectorizer_only_text_token(object):
+    def __init__(self, meta_root, prefix, text_encode_way):
+        self.text_encode_way = text_encode_way
+        vectors = np.load(pjoin(meta_root, '%s_data.npy'%prefix))
+        words = pickle.load(open(pjoin(meta_root, '%s_words.pkl'%prefix), 'rb'))
+        word2idx = pickle.load(open(pjoin(meta_root, '%s_idx.pkl'%prefix), 'rb'))
+        self.word2vec = {w: vectors[word2idx[w]] for w in words}
+        if 'glove_6B' in self.text_encode_way:
+            from torchtext.vocab import GloVe
+            glove_6b = GloVe(name='6B', dim=300)
+            self.word2vec_glove_6b = glove_6b.get_vecs_by_tokens
+    def __len__(self):
+        return len(self.word2vec)
+    def __getitem__(self, item):
+        word = item
+        if 'given_glove' in self.text_encode_way:
+            if word in self.word2vec:
+                word_vec = self.word2vec[word]
+            else:
+                word_vec = self.word2vec['unk']
+        elif 'glove_6B' in self.text_encode_way:
+            word_vec = self.word2vec_glove_6b([word]).squeeze()
+        return word_vec

Evaluator_272/mld/data/sampling/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .base import FrameSampler
2	+ from .framerate import subsample, upsample

Evaluator_272/mld/data/sampling/base.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from .frames import get_frameix_from_data_index
+class FrameSampler:
+    def __init__(self, sampling="conseq", sampling_step=1, request_frames=None,threshold_reject=0.75,max_len=1000,min_len=10):
+        self.sampling  = sampling
+        self.sampling_step = sampling_step
+        self.request_frames = request_frames
+        self.threshold_reject = threshold_reject
+        self.max_len = max_len
+        self.min_len = min_len
+    def __call__(self, num_frames):
+        return get_frameix_from_data_index(num_frames,
+                                           self.request_frames,
+                                           self.sampling,
+                                           self.sampling_step)
+    def accept(self, duration):
+        # Outputs have original lengths
+        # Check if it is too long
+        if self.request_frames is None:
+            if duration > self.max_len:
+                return False
+            elif duration < self.min_len:
+                return False
+        else:
+            # Reject sample if the length is
+            # too little relative to
+            # the request frames
+            min_number = self.threshold_reject * self.request_frames
+            if duration < min_number:
+                return False
+        return True
+    def get(self, key, default=None):
+        return getattr(self, key, default)
+    def __getitem__(self, key):
+        return getattr(self, key)

Evaluator_272/mld/data/sampling/framerate.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import numpy as np
+def subsample(num_frames, last_framerate, new_framerate):
+    step = int(last_framerate / new_framerate)
+    assert step >= 1
+    frames = np.arange(0, num_frames, step)
+    return frames
+def upsample(motion, last_framerate, new_framerate):
+    step = int(new_framerate / last_framerate)
+    assert step >= 1
+    # Alpha blending => interpolation
+    alpha = np.linspace(0, 1, step+1)
+    last = np.einsum("l,...->l...", 1-alpha, motion[:-1])
+    new = np.einsum("l,...->l...", alpha, motion[1:])
+    chuncks = (last + new)[:-1]
+    output = np.concatenate(chuncks.swapaxes(1, 0))
+    # Don't forget the last one
+    output = np.concatenate((output, motion[[-1]]))
+    return output
+if __name__ == "__main__":
+    motion = np.arange(105)
+    submotion = motion[subsample(len(motion), 100.0, 12.5)]
+    newmotion = upsample(submotion, 12.5, 100)
+    print(newmotion)

Evaluator_272/mld/data/sampling/frames.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from typing import Optional
+import numpy as np
+from numpy import ndarray as Array
+import random
+def get_frameix_from_data_index(num_frames: int,
+                                request_frames: Optional[int],
+                                sampling: str = "conseq",
+                                sampling_step: int = 1) -> Array:
+    nframes = num_frames
+    if request_frames is None:
+        frame_ix = np.arange(nframes)
+    else:
+        if request_frames > nframes:
+            fair = False  # True
+            if fair:
+                # distills redundancy everywhere
+                choices = np.random.choice(range(nframes),
+                                           request_frames,
+                                           replace=True)
+                frame_ix = sorted(choices)
+            else:
+                # adding the last frame until done
+                ntoadd = max(0, request_frames - nframes)
+                lastframe = nframes - 1
+                padding = lastframe * np.ones(ntoadd, dtype=int)
+                frame_ix = np.concatenate((np.arange(0, nframes),
+                                           padding))
+        elif sampling in ["conseq", "random_conseq"]:
+            step_max = (nframes - 1) // (request_frames - 1)
+            if sampling == "conseq":
+                if sampling_step == -1 or sampling_step * (request_frames - 1) >= nframes:
+                    step = step_max
+                else:
+                    step = sampling_step
+            elif sampling == "random_conseq":
+                step = random.randint(1, step_max)
+            lastone = step * (request_frames - 1)
+            shift_max = nframes - lastone - 1
+            shift = random.randint(0, max(0, shift_max - 1))
+            frame_ix = shift + np.arange(0, lastone + 1, step)
+        elif sampling == "random":
+            choices = np.random.choice(range(nframes),
+                                       request_frames,
+                                       replace=False)
+            frame_ix = sorted(choices)
+        else:
+            raise ValueError("Sampling not recognized.")
+    return frame_ix

Evaluator_272/mld/data/utils.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import torch
+def lengths_to_mask(lengths):
+    max_len = max(lengths)
+    mask = torch.arange(max_len, device=lengths.device).expand(
+        len(lengths), max_len) < lengths.unsqueeze(1)
+    return mask
+def collate_tensors(batch):
+    dims = batch[0].dim()
+    max_size = [max([b.size(i) for b in batch]) for i in range(dims)]
+    size = (len(batch), ) + tuple(max_size)
+    canvas = batch[0].new_zeros(size=size)
+    for i, b in enumerate(batch):
+        sub_tensor = canvas[i]
+        for d in range(dims):
+            sub_tensor = sub_tensor.narrow(d, 0, b.size(d))
+        sub_tensor.add_(b)
+    return canvas
+def mld_collate(batch):
+    notnone_batches = [b for b in batch if b is not None]
+    notnone_batches.sort(key=lambda x: x[2], reverse=True)
+    adapted_batch = {
+        "motion":
+        collate_tensors([torch.tensor(b[1]).float() for b in notnone_batches]),
+        "text": [b[0] for b in notnone_batches],
+        "length": [b[2] for b in notnone_batches],
+        "retrieval_name": [b[3] for b in notnone_batches]
+    }
+    return adapted_batch

Evaluator_272/mld/launch/__init__.py ADDED Viewed

File without changes

Evaluator_272/mld/launch/blender.py ADDED Viewed

	@@ -0,0 +1,23 @@

+# Fix blender path
+import sys
+import os
+# local packages
+sys.path.append(os.path.expanduser("~/.local/lib/python3.9/site-packages"))
+import bpy
+import os
+from argparse import ArgumentParser
+# Monkey patch argparse such that
+# blender / python / hydra parsing works
+def parse_args(self, args=None, namespace=None):
+    if args is not None:
+        return self.parse_args_bak(args=args, namespace=namespace)
+    try:
+        idx = sys.argv.index("--")
+        args = sys.argv[idx+1:]  # the list after '--'
+    except ValueError as e:  # '--' not in the list:
+        args = []
+    return self.parse_args_bak(args=args, namespace=namespace)
+setattr(ArgumentParser, 'parse_args_bak', ArgumentParser.parse_args)
+setattr(ArgumentParser, 'parse_args', parse_args)

Evaluator_272/mld/launch/prepare.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os
+import warnings
+from pathlib import Path
+import hydra
+from mld.tools.runid import generate_id
+from omegaconf import OmegaConf
+# Local paths
+def code_path(path=""):
+    code_dir = hydra.utils.get_original_cwd()
+    code_dir = Path(code_dir)
+    return str(code_dir / path)
+def working_path(path):
+    return str(Path(os.getcwd()) / path)
+# fix the id for this run
+ID = generate_id()
+def generate_id():
+    return ID
+def get_last_checkpoint(path, ckpt_name="last.ckpt"):
+    output_dir = Path(hydra.utils.to_absolute_path(path))
+    last_ckpt_path = output_dir / "checkpoints" / ckpt_name
+    return str(last_ckpt_path)
+def get_kitname(load_amass_data: bool, load_with_rot: bool):
+    if not load_amass_data:
+        return "kit-mmm-xyz"
+    if load_amass_data and not load_with_rot:
+        return "kit-amass-xyz"
+    if load_amass_data and load_with_rot:
+        return "kit-amass-rot"
+OmegaConf.register_new_resolver("code_path", code_path)
+OmegaConf.register_new_resolver("working_path", working_path)
+OmegaConf.register_new_resolver("generate_id", generate_id)
+OmegaConf.register_new_resolver("absolute_path", hydra.utils.to_absolute_path)
+OmegaConf.register_new_resolver("get_last_checkpoint", get_last_checkpoint)
+OmegaConf.register_new_resolver("get_kitname", get_kitname)
+# Remove warnings
+warnings.filterwarnings(
+    "ignore", ".*Trying to infer the `batch_size` from an ambiguous collection.*"
+)
+warnings.filterwarnings(
+    "ignore", ".*does not have many workers which may be a bottleneck*"
+)
+warnings.filterwarnings(
+    "ignore", ".*Our suggested max number of worker in current system is*"
+)
+os.environ["NUMEXPR_MAX_THREADS"] = "24"

Evaluator_272/mld/launch/tools.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from pathlib import Path
+from omegaconf import DictConfig, OmegaConf
+import hydra
+import os
+def resolve_cfg_path(cfg: DictConfig):
+    working_dir = os.getcwd()
+    cfg.working_dir = working_dir

Evaluator_272/mld/models/__init__.py ADDED Viewed

File without changes

Evaluator_272/mld/models/architectures/__init__.py ADDED Viewed

File without changes

Evaluator_272/mld/models/architectures/actor_vae.py ADDED Viewed

	@@ -0,0 +1,258 @@

+from typing import List, Optional, Union
+import numpy as np
+import torch
+import torch.nn as nn
+from torch import Tensor, nn
+from torch.distributions.distribution import Distribution
+from mld.utils.temos_utils import lengths_to_mask
+from mld.models.operator import PositionalEncoding
+class ActorVae(nn.Module):
+    def __init__(self,
+                 ablation,
+                 nfeats: int,
+                 latent_dim: list = [1, 256],
+                 ff_size: int = 1024,
+                 num_layers: int = 9,
+                 num_heads: int = 4,
+                 dropout: float = 0.1,
+                 is_vae: bool = True,
+                 activation: str = "gelu",
+                 position_embedding: str = "learned",
+                 **kwargs) -> None:
+        super().__init__()
+        self.latent_size = latent_dim[0]
+        self.latent_dim = latent_dim[-1]
+        self.is_vae = is_vae
+        input_feats = nfeats
+        output_feats = nfeats
+        self.encoder = ActorAgnosticEncoder(nfeats=input_feats,
+                                            vae=True,
+                                            latent_dim=self.latent_dim,
+                                            ff_size=ff_size,
+                                            num_layers=num_layers,
+                                            num_heads=num_heads,
+                                            dropout=dropout,
+                                            activation=activation,
+                                            **kwargs)
+        self.decoder = ActorAgnosticDecoder(nfeats=output_feats,
+                                            vae=True,
+                                            latent_dim=self.latent_dim,
+                                            ff_size=ff_size,
+                                            num_layers=num_layers,
+                                            num_heads=num_heads,
+                                            dropout=dropout,
+                                            activation=activation,
+                                            **kwargs)
+    def forward(self, features: Tensor, lengths: Optional[List[int]] = None):
+        # Temp
+        # Todo
+        # remove and test this function
+        print("Should Not enter here")
+        z, dist = self.encode(features, lengths)
+        feats_rst = self.decode(z, lengths)
+        return feats_rst, z, dist
+    def encode(
+            self,
+            features: Tensor,
+            lengths: Optional[List[int]] = None
+    ) -> Union[Tensor, Distribution]:
+        dist = self.encoder(features, lengths)
+        if self.is_vae:
+            latent = sample_from_distribution(dist)
+        else:
+            latent = dist.unsqueeze(0)
+        return latent, dist
+    def decode(self, z: Tensor, lengths: List[int]):
+        feats = self.decoder(z, lengths)
+        return feats
+class ActorAgnosticEncoder(nn.Module):
+    def __init__(self,
+                 nfeats: int,
+                 vae: bool,
+                 latent_dim: int = 256,
+                 ff_size: int = 1024,
+                 num_layers: int = 4,
+                 num_heads: int = 4,
+                 dropout: float = 0.1,
+                 activation: str = "gelu",
+                 **kwargs) -> None:
+        super().__init__()
+        input_feats = nfeats
+        self.vae = vae
+        self.skel_embedding = nn.Linear(input_feats, latent_dim)
+        # Action agnostic: only one set of params
+        if vae:
+            self.mu_token = nn.Parameter(torch.randn(latent_dim))
+            self.logvar_token = nn.Parameter(torch.randn(latent_dim))
+        else:
+            self.emb_token = nn.Parameter(torch.randn(latent_dim))
+        self.sequence_pos_encoding = PositionalEncoding(latent_dim, dropout)
+        seq_trans_encoder_layer = nn.TransformerEncoderLayer(
+            d_model=latent_dim,
+            nhead=num_heads,
+            dim_feedforward=ff_size,
+            dropout=dropout,
+            activation=activation)
+        self.seqTransEncoder = nn.TransformerEncoder(seq_trans_encoder_layer,
+                                                     num_layers=num_layers)
+    def forward(
+            self,
+            features: Tensor,
+            lengths: Optional[List[int]] = None
+    ) -> Union[Tensor, Distribution]:
+        if lengths is None:
+            lengths = [len(feature) for feature in features]
+        device = features.device
+        bs, nframes, nfeats = features.shape
+        mask = lengths_to_mask(lengths, device)
+        x = features
+        # Embed each human poses into latent vectors
+        x = self.skel_embedding(x)
+        # Switch sequence and batch_size because the input of
+        # Pytorch Transformer is [Sequence, Batch size, ...]
+        x = x.permute(1, 0, 2)  # now it is [nframes, bs, latent_dim]
+        # Each batch has its own set of tokens
+        if self.vae:
+            mu_token = torch.tile(self.mu_token, (bs, )).reshape(bs, -1)
+            logvar_token = torch.tile(self.logvar_token,
+                                      (bs, )).reshape(bs, -1)
+            # adding the distribution tokens for all sequences
+            xseq = torch.cat((mu_token[None], logvar_token[None], x), 0)
+            # create a bigger mask, to allow attend to mu and logvar
+            token_mask = torch.ones((bs, 2), dtype=bool, device=x.device)
+            aug_mask = torch.cat((token_mask, mask), 1)
+        else:
+            emb_token = torch.tile(self.emb_token, (bs, )).reshape(bs, -1)
+            # adding the embedding token for all sequences
+            xseq = torch.cat((emb_token[None], x), 0)
+            # create a bigger mask, to allow attend to emb
+            token_mask = torch.ones((bs, 1), dtype=bool, device=x.device)
+            aug_mask = torch.cat((token_mask, mask), 1)
+        # add positional encoding
+        xseq = self.sequence_pos_encoding(xseq)
+        final = self.seqTransEncoder(xseq, src_key_padding_mask=~aug_mask)
+        if self.vae:
+            mu, logvar = final[0], final[1]
+            std = logvar.exp().pow(0.5)
+            # https://github.com/kampta/pytorch-distributions/blob/master/gaussian_vae.py
+            dist = torch.distributions.Normal(mu, std)
+            return dist
+        else:
+            return final[0]
+class ActorAgnosticDecoder(nn.Module):
+    def __init__(self,
+                 nfeats: int,
+                 latent_dim: int = 256,
+                 ff_size: int = 1024,
+                 num_layers: int = 4,
+                 num_heads: int = 4,
+                 dropout: float = 0.1,
+                 activation: str = "gelu",
+                 **kwargs) -> None:
+        super().__init__()
+        output_feats = nfeats
+        self.latent_dim = latent_dim
+        self.nfeats = nfeats
+        self.sequence_pos_encoding = PositionalEncoding(latent_dim, dropout)
+        seq_trans_decoder_layer = nn.TransformerDecoderLayer(
+            d_model=latent_dim,
+            nhead=num_heads,
+            dim_feedforward=ff_size,
+            dropout=dropout,
+            activation=activation)
+        self.seqTransDecoder = nn.TransformerDecoder(seq_trans_decoder_layer,
+                                                     num_layers=num_layers)
+        self.final_layer = nn.Linear(latent_dim, output_feats)
+    def forward(self, z: Tensor, lengths: List[int]):
+        mask = lengths_to_mask(lengths, z.device)
+        # latent_dim = z.shape[1]
+        bs, nframes = mask.shape
+        nfeats = self.nfeats
+        # z = z[None]  # sequence of 1 element for the memory
+        # Construct time queries
+        time_queries = torch.zeros(nframes,
+                                   bs,
+                                   self.latent_dim,
+                                   device=z.device)
+        time_queries = self.sequence_pos_encoding(time_queries)
+        # Pass through the transformer decoder
+        # with the latent vector for memory
+        output = self.seqTransDecoder(tgt=time_queries,
+                                      memory=z,
+                                      tgt_key_padding_mask=~mask)
+        output = self.final_layer(output)
+        # zero for padded area
+        output[~mask.T] = 0
+        # Pytorch Transformer: [Sequence, Batch size, ...]
+        feats = output.permute(1, 0, 2)
+        return feats
+def sample_from_distribution(
+    dist,
+    *,
+    fact=1.0,
+    sample_mean=False,
+) -> Tensor:
+    if sample_mean:
+        return dist.loc.unsqueeze(0)
+    # Reparameterization trick
+    if fact is None:
+        return dist.rsample().unsqueeze(0)
+    # Resclale the eps
+    eps = dist.rsample() - dist.loc
+    z = dist.loc + fact * eps
+    # add latent size
+    z = z.unsqueeze(0)
+    return z

Evaluator_272/mld/models/architectures/fc.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Encoder_FC(nn.Module):
+    def __init__(self, modeltype, njoints, nfeats, num_frames, num_classes, translation, pose_rep, glob, glob_rot,
+                 latent_dim=256, **kargs):
+        super().__init__()
+        self.modeltype = modeltype
+        self.njoints = njoints
+        self.nfeats = nfeats
+        self.num_frames = num_frames
+        self.num_classes = num_classes
+        self.translation = translation
+        self.pose_rep = pose_rep
+        self.glob = glob
+        self.glob_rot = glob_rot
+        self.latent_dim = latent_dim
+        self.activation = nn.GELU()
+        self.input_dim = self.njoints*self.nfeats*self.num_frames+self.num_classes
+        self.fully_connected = nn.Sequential(nn.Linear(self.input_dim, 512),
+                                             nn.GELU(),
+                                             nn.Linear(512, 256),
+                                             nn.GELU())
+        if self.modeltype == "cvae":
+            self.mu = nn.Linear(256, self.latent_dim)
+            self.var = nn.Linear(256, self.latent_dim)
+        else:
+            self.final = nn.Linear(256, self.latent_dim)
+    def forward(self, batch):
+        x, y = batch["x"], batch["y"]
+        bs, njoints, feats, nframes = x.size()
+        if (njoints * feats * nframes) != self.njoints*self.nfeats*self.num_frames:
+            raise ValueError("This model is not adapted with this input")
+        if len(y.shape) == 1:  # can give on hot encoded as input
+            y = F.one_hot(y, self.num_classes)
+        y = y.to(dtype=x.dtype)
+        x = x.reshape(bs, njoints*feats*nframes)
+        x = torch.cat((x, y), 1)
+        x = self.fully_connected(x)
+        if self.modeltype == "cvae":
+            return {"mu": self.mu(x), "logvar": self.var(x)}
+        else:
+            return {"z": self.final(x)}
+class Decoder_FC(nn.Module):
+    def __init__(self, modeltype, njoints, nfeats, num_frames, num_classes, translation, pose_rep, glob, glob_rot,
+                 latent_dim=256, **kargs):
+        super().__init__()
+        self.modeltype = modeltype
+        self.njoints = njoints
+        self.nfeats = nfeats
+        self.num_frames = num_frames
+        self.num_classes = num_classes
+        self.translation = translation
+        self.pose_rep = pose_rep
+        self.glob = glob
+        self.glob_rot = glob_rot
+        self.latent_dim = latent_dim
+        self.input_dim = self.latent_dim + self.num_classes
+        self.output_dim = self.njoints*self.nfeats*self.num_frames
+        self.fully_connected = nn.Sequential(nn.Linear(self.input_dim, 256),
+                                             nn.GELU(),
+                                             nn.Linear(256, 512),
+                                             nn.GELU(),
+                                             nn.Linear(512, self.output_dim),
+                                             nn.GELU())
+    def forward(self, batch):
+        z, y = batch["z"], batch["y"]
+        # z: [batch_size, latent_dim]
+        # y: [batch_size]
+        if len(y.shape) == 1:  # can give on hot encoded as input
+            y = F.one_hot(y, self.num_classes)
+        y = y.to(dtype=z.dtype)  # y: [batch_size, num_classes]
+        # z: [batch_size, latent_dim+num_classes]
+        z = torch.cat((z, y), dim=1)
+        z = self.fully_connected(z)
+        bs, _ = z.size()
+        z = z.reshape(bs, self.njoints, self.nfeats, self.num_frames)
+        batch["output"] = z
+        return batch

Evaluator_272/mld/models/architectures/gpt/clip.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import os
+from typing import List, Union
+import torch
+from torch import Tensor, nn
+from torch.distributions.distribution import Distribution
+from transformers import AutoModel, AutoTokenizer, CLIPTextModel, CLIPTokenizer
+from mld.models.operator import PositionalEncoding
+from mld.utils.temos_utils import lengths_to_mask
+import pytorch_lightning as pl
+class TextEncoder(pl.LightningModule):
+    def __init__(
+        self,
+        modelpath: str,
+        finetune: bool = False,
+        last_hidden_state: bool = False,
+        latent_dim: list = [1, 256],
+    ) -> None:
+        super().__init__()
+        self.latent_dim = latent_dim
+        self.tokenizer = AutoTokenizer.from_pretrained(modelpath)
+        self.text_model = AutoModel.from_pretrained(modelpath)
+        # Don't train the model
+        if not finetune:
+            self.text_model.training = False
+            for p in self.text_model.parameters():
+                p.requires_grad = False
+        # Then configure the model
+        self.max_length = self.tokenizer.model_max_length
+        if "clip" in modelpath:
+            self.text_encoded_dim = self.text_model.config.text_config.hidden_size
+            if last_hidden_state:
+                self.name = "clip_hidden"
+            else:
+                self.name = "clip"
+        elif "bert" in modelpath:
+            self.name = "bert"
+            self.text_encoded_dim = self.text_model.config.hidden_size
+        else:
+            raise ValueError(f"Model {modelpath} not supported")
+    def forward(self, texts: List[str]):
+        # get prompt text embeddings
+        if self.name in ["clip", "clip_hidden"]:
+            text_inputs = self.tokenizer(
+                texts,
+                padding="max_length",
+                truncation=True,
+                max_length=self.max_length,
+                return_tensors="pt",
+            )
+            text_input_ids = text_inputs.input_ids
+            # split into max length Clip can handle
+            if text_input_ids.shape[-1] > self.tokenizer.model_max_length:
+                text_input_ids = text_input_ids[:, :self.tokenizer.
+                                                model_max_length]
+        elif self.name == "bert":
+            text_inputs = self.tokenizer(texts,
+                                         return_tensors="pt",
+                                         padding=True)
+        # use pooled ouuput if latent dim is two-dimensional
+        # pooled = 0 if self.latent_dim[0] == 1 else 1 # (bs, seq_len, text_encoded_dim) -> (bs, text_encoded_dim)
+        # text encoder forward, clip must use get_text_features
+        if self.name == "clip":
+            # (batch_Size, text_encoded_dim)
+            text_embeddings = self.text_model.get_text_features(
+                text_input_ids.to(self.text_model.device))
+            # (batch_Size, 1, text_encoded_dim)
+            text_embeddings = text_embeddings.unsqueeze(1)
+        elif self.name == "clip_hidden":
+            # (batch_Size, seq_length , text_encoded_dim)
+            text_embeddings = self.text_model.text_model(
+                text_input_ids.to(self.text_model.device)).last_hidden_state
+        elif self.name == "bert":
+            # (batch_Size, seq_length , text_encoded_dim)
+            text_embeddings = self.text_model(
+                **text_inputs.to(self.text_model.device)).last_hidden_state
+        else:
+            raise NotImplementedError(f"Model {self.name} not implemented")
+        return text_embeddings

Evaluator_272/mld/models/architectures/gpt/pos_encoding.py ADDED Viewed

	@@ -0,0 +1,43 @@

+"""
+Various positional encodings for the transformer.
+"""
+import math
+import torch
+from torch import nn
+def PE1d_sincos(seq_length, dim):
+    """
+    :param d_model: dimension of the model
+    :param length: length of positions
+    :return: length*d_model position matrix
+    """
+    if dim % 2 != 0:
+        raise ValueError("Cannot use sin/cos positional encoding with "
+                         "odd dim (got dim={:d})".format(dim))
+    pe = torch.zeros(seq_length, dim)
+    position = torch.arange(0, seq_length).unsqueeze(1)
+    div_term = torch.exp((torch.arange(0, dim, 2, dtype=torch.float) *
+                         -(math.log(10000.0) / dim)))
+    pe[:, 0::2] = torch.sin(position.float() * div_term)
+    pe[:, 1::2] = torch.cos(position.float() * div_term)
+    return pe.unsqueeze(1)
+class PositionEmbedding(nn.Module):
+    """
+    Absolute pos embedding (standard), learned.
+    """
+    def __init__(self, seq_length, dim, dropout, grad=False):
+        super().__init__()
+        self.embed = nn.Parameter(data=PE1d_sincos(seq_length, dim), requires_grad=grad)
+        self.dropout = nn.Dropout(p=dropout)
+    def forward(self, x):
+        # x.shape: bs, seq_len, feat_dim
+        l = x.shape[1]
+        x = x.permute(1, 0, 2) + self.embed[:l].expand(x.permute(1, 0, 2).shape)
+        x = self.dropout(x.permute(1, 0, 2))
+        return x