zirobtc commited on 18 days ago

Commit

0e267a7

verified ·

1 Parent(s): 2c7cf25

Initial upload of MotionStreamer code, excluding large extracted data and output folders.

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
.ipynb_checkpoints/TRAIN_motionstreamer-checkpoint.sh +15 -0
.ipynb_checkpoints/demo_t2m-checkpoint.py +204 -0
.ipynb_checkpoints/environment-checkpoint.yaml +258 -0
.ipynb_checkpoints/requirements-checkpoint.txt +17 -0
.ipynb_checkpoints/train_motionstreamer-checkpoint.py +264 -0
EVAL_causal_TAE.sh +6 -0
EVAL_t2m.sh +7 -0
LICENSE +21 -0
README.md +336 -0
TRAIN_causal_TAE.sh +22 -0
TRAIN_evaluator_272.sh +6 -0
TRAIN_motionstreamer.sh +16 -0
TRAIN_t2m.sh +15 -0
assets/teaser.jpg +3 -0
babel_272/.gitattributes +59 -0
babel_272/README.md +34 -0
babel_272/motion_data.zip +3 -0
babel_272/split/train.txt +0 -0
babel_272/split/val.txt +0 -0
babel_272/t2m_babel_mean_std/Mean.npy +3 -0
babel_272/t2m_babel_mean_std/Std.npy +3 -0
babel_272/texts.zip +3 -0
babel_272_stream/.gitattributes +59 -0
babel_272_stream/README.md +62 -0
babel_272_stream/train_stream.zip +3 -0
babel_272_stream/train_stream_text.zip +3 -0
babel_272_stream/val_stream.zip +3 -0
babel_272_stream/val_stream_text.zip +3 -0
body_models/human_model_files/mano/MANO_LEFT.pkl +3 -0
body_models/human_model_files/mano/MANO_RIGHT.pkl +3 -0
body_models/human_model_files/smpl/J_regressor_extra.npy +3 -0
body_models/human_model_files/smpl/SMPL_FEMALE.pkl +3 -0
body_models/human_model_files/smpl/SMPL_MALE.pkl +3 -0
body_models/human_model_files/smpl/SMPL_NEUTRAL.pkl +3 -0
body_models/human_model_files/smpl/VPOSER_CKPT/TR00_004_00_WO_accad.ini +29 -0
body_models/human_model_files/smpl/VPOSER_CKPT/snapshots/._TR00_E096.pt +3 -0
body_models/human_model_files/smpl/VPOSER_CKPT/snapshots/TR00_E096.pt +3 -0
body_models/human_model_files/smpl/VPOSER_CKPT/vposer_smpl.py +164 -0
body_models/human_model_files/smplx/MANO_SMPLX_vertex_ids.pkl +3 -0
body_models/human_model_files/smplx/SMPL-X__FLAME_vertex_ids.npy +3 -0
body_models/human_model_files/smplx/SMPLX_FEMALE.npz +3 -0
body_models/human_model_files/smplx/SMPLX_FEMALE.pkl +3 -0
body_models/human_model_files/smplx/SMPLX_MALE.npz +3 -0
body_models/human_model_files/smplx/SMPLX_MALE.pkl +3 -0
body_models/human_model_files/smplx/SMPLX_NEUTRAL.npz +3 -0
body_models/human_model_files/smplx/SMPLX_NEUTRAL.pkl +3 -0
body_models/human_model_files/smplx/SMPLX_NEUTRAL_NEW.npy +3 -0
body_models/human_model_files/smplx/SMPLX_NEUTRAL_NEW.npz +3 -0
body_models/human_model_files/smplx/SMPLX_NEUTRAL_NEW_WiFlame.npy +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/teaser.jpg filter=lfs diff=lfs merge=lfs -text

.ipynb_checkpoints/TRAIN_motionstreamer-checkpoint.sh ADDED Viewed

	@@ -0,0 +1,15 @@

+NUM_GPUS=${1:-1}  # default: 1 GPU
+BATCH_SIZE=$((30 / NUM_GPUS))
+echo "Using $NUM_GPUS GPUs, each with a batch size of $BATCH_SIZE"
+accelerate launch --num_processes $NUM_GPUS train_motionstreamer.py \
+--batch-size $BATCH_SIZE \
+--lr 0.0001 \
+--total-iter 100000 \
+--out-dir Experiments \
+--exp-name motionstreamer_model \
+--dataname t2m_babel_272 \
+--latent_dir babel_272_stream/t2m_babel_latents \
+--num_gpus $NUM_GPUS

.ipynb_checkpoints/demo_t2m-checkpoint.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import os
+import torch
+import numpy as np
+from models.llama_model import LLaMAHF, LLaMAHFConfig
+import models.tae as tae
+import options.option_transformer as option_trans
+import warnings
+import smplx
+from utils import bvh, quat
+from utils.face_z_align_util import rotation_6d_to_matrix, matrix_to_axis_angle, axis_angle_to_quaternion
+warnings.filterwarnings('ignore')
+comp_device = torch.device('cuda')
+##### ---- Exp dirs ---- #####
+args = option_trans.get_args_parser()
+torch.manual_seed(args.seed)
+from sentence_transformers import SentenceTransformer
+t5_model = SentenceTransformer('sentencet5-xxl/')
+t5_model.eval()
+for p in t5_model.parameters():
+    p.requires_grad = False
+def save_motion_as_bvh(motion_data, output_path, fps=30):
+    """
+    Saves a motion tensor in the 272-dimensional format to a BVH file.
+    This version is adapted from the official repository script for robustness.
+    """
+    print(f"--- Starting direct conversion to BVH: {os.path.basename(output_path)} ---")
+    try:
+        # --- 1. Ensure data is a 2D NumPy array ---
+        if isinstance(motion_data, torch.Tensor):
+            motion_data = motion_data.detach().cpu().numpy()
+        # This is the key fix: Check dimensions before squeezing
+        if motion_data.ndim == 3 and motion_data.shape[0] == 1:
+            motion_data = motion_data.squeeze(0)
+        elif motion_data.ndim != 2:
+            raise ValueError(f"Input motion data must be 2D or 3D with a batch size of 1, but got shape {motion_data.shape}")
+        # --- 2. Recover 85-dim SMPL format from 272-dim format ---
+        # This logic is from the official script's `recover_from_local_rotation`
+        njoint = 22
+        nfrm, _ = motion_data.shape
+        rotations_matrix = rotation_6d_to_matrix(torch.from_numpy(motion_data[:, 8+6*njoint : 8+12*njoint]).reshape(nfrm, -1, 6)).numpy()
+        # Accumulate heading rotations
+        global_heading_diff_rot_6d = torch.from_numpy(motion_data[:, 2:8])
+        global_heading_diff_rot = rotation_6d_to_matrix(global_heading_diff_rot_6d).numpy()
+        global_heading_rot = np.zeros_like(global_heading_diff_rot)
+        global_heading_rot[0] = global_heading_diff_rot[0]
+        for i in range(1, nfrm):
+            global_heading_rot[i] = np.matmul(global_heading_diff_rot[i], global_heading_rot[i-1])
+        # Calculate root translation
+        velocities_root_xy = motion_data[:, :2]
+        positions_no_heading = motion_data[:, 8 : 8+3*njoint].reshape(nfrm, -1, 3)
+        height = positions_no_heading[:, 0, 1]
+        inv_global_heading_rot = np.transpose(global_heading_rot, (0, 2, 1))
+        rotations_matrix[:, 0, ...] = np.matmul(inv_global_heading_rot, rotations_matrix[:, 0, ...])
+        velocities_root_xyz = np.zeros((nfrm, 3))
+        velocities_root_xyz[:, 0] = velocities_root_xy[:, 0]
+        velocities_root_xyz[:, 2] = velocities_root_xy[:, 1]
+        velocities_root_xyz[1:, :] = np.matmul(inv_global_heading_rot[:-1], velocities_root_xyz[1:, :, None]).squeeze(-1)
+        root_translation = np.cumsum(velocities_root_xyz, axis=0)
+        root_translation[:, 1] = height
+        # Convert rotation matrices to axis-angle
+        axis_angle = matrix_to_axis_angle(torch.from_numpy(rotations_matrix)).numpy()
+        poses_85dim = np.concatenate([axis_angle.reshape(nfrm, -1), np.zeros((nfrm, 6)), root_translation, np.zeros((nfrm, 10))], axis=-1)
+        # --- 3. Convert 85-dim SMPL to BVH data ---
+        # This logic is from the official script's `smpl2bvh`
+        rots = poses_85dim[:, :72].reshape(-1, 24, 3)
+        trans = poses_85dim[:, 72:75]
+        # Get skeleton from SMPL model
+        model = smplx.create(model_path="body_models/human_model_files", model_type="smpl", gender="NEUTRAL")
+        parents = model.parents.detach().cpu().numpy()
+        rest_pose = model().joints.detach().cpu().numpy().squeeze()[:24,:]
+        offsets = rest_pose - rest_pose[parents]
+        offsets[0] = np.array([0,0,0])
+        rotations_quat = axis_angle_to_quaternion(torch.from_numpy(rots)).numpy()
+        rotations_euler = np.degrees(quat.to_euler(rotations_quat, order="zyx"))
+        positions = offsets[None].repeat(len(rots), axis=0)
+        positions[:, 0] = trans
+        joint_names = [
+            "Pelvis", "Left_hip", "Right_hip", "Spine1", "Left_knee", "Right_knee", "Spine2",
+            "Left_ankle", "Right_ankle", "Spine3", "Left_foot", "Right_foot", "Neck",
+            "Left_collar", "Right_collar", "Head", "Left_shoulder", "Right_shoulder",
+            "Left_elbow", "Right_elbow", "Left_wrist", "Right_wrist", "Left_hand", "Right_hand"
+        ]
+        # --- 4. Save the final BVH file ---
+        bvh.save(output_path, {
+            "rotations": rotations_euler,
+            "positions": positions,
+            "offsets": offsets,
+            "parents": parents,
+            "names": joint_names,
+            "order": "zyx",
+            "frametime": 1.0 / fps,
+        })
+        print(f"✅ BVH file saved successfully to {output_path}")
+    except Exception as e:
+        print(f"❌ BVH Conversion Failed. Error: {e}")
+        import traceback
+        traceback.print_exc()
+##### ---- Network ---- #####
+clip_range = [-30,20]
+net = tae.Causal_HumanTAE(
+                       hidden_size=args.hidden_size,
+                       down_t=args.down_t,
+                       stride_t=args.stride_t,
+                       depth=args.depth,
+                       dilation_growth_rate=args.dilation_growth_rate,
+                       activation='relu',
+                       latent_dim=args.latent_dim,
+                       clip_range=clip_range
+                       )
+config = LLaMAHFConfig.from_name('Normal_size')
+config.block_size = 78
+trans_encoder = LLaMAHF(config, args.num_diffusion_head_layers, args.latent_dim, comp_device)
+print('loading checkpoint from {}'.format(args.resume_pth))
+ckpt = torch.load(args.resume_pth, map_location='cpu')
+net.load_state_dict(ckpt['net'], strict=True)
+net.eval()
+net.to(comp_device)
+if args.resume_trans is not None:
+    print('loading transformer checkpoint from {}'.format(args.resume_trans))
+    ckpt = torch.load(args.resume_trans, map_location='cpu')
+    new_ckpt_trans = {}
+    for key in ckpt['trans'].keys():
+        if key.split('.')[0]=='module':
+            new_key = '.'.join(key.split('.')[1:])
+        else:
+            new_key = key
+        new_ckpt_trans[new_key] = ckpt['trans'][key]
+    trans_encoder.load_state_dict(new_ckpt_trans, strict=True)
+trans_encoder.eval()
+trans_encoder.to(comp_device)
+reference_end_latent = np.load('reference_end_latent_t2m_272.npy')
+reference_end_latent = torch.from_numpy(reference_end_latent).to(comp_device)
+mean = np.load('humanml3d_272/mean_std/Mean.npy')
+std = np.load('humanml3d_272/mean_std/Std.npy')
+# forward inference
+threshold = 0.1
+cfg_scale = 4.0
+print(f"Generating motion with CFG scale: {cfg_scale}")
+motion_latents = trans_encoder.sample_for_eval_CFG_inference(text=args.text, tokenizer=t5_model, device=comp_device, reference_end_latent=reference_end_latent, threshold=threshold, cfg=cfg_scale)
+# forward decode
+motion_seqs = net.forward_decoder(motion_latents)
+from visualization.recover_visualize import recover_from_local_position
+import visualization.plot_3d_global as plot_3d
+motion = motion_seqs.squeeze(0)
+motion = motion.detach().cpu().numpy()
+if not os.path.exists('demo_output'):
+    os.makedirs('demo_output')
+if args.mode == 'pos':
+    # Option1: recover from joint position
+    pred_xyz = recover_from_local_position(motion * std + mean, 22)
+    xyz = pred_xyz.reshape(1, -1, 22, 3)
+    pose_vis = plot_3d.draw_to_batch(xyz, [args.text], [f'demo_output/{args.text}.mp4'], fps=30)
+    print(f"Visualized result is saved in demo_output/{args.text}.mp4")
+elif args.mode == 'rot':
+    # De-normalize the motion data to its original scale
+    motion = motion * std + mean
+    # Define the output path for the new BVH file
+    output_bvh_path = os.path.join('demo_output', f'{args.text}.bvh')
+    # Call the new function to save the BVH file directly
+    save_motion_as_bvh(motion, output_bvh_path, fps=30)
+else:
+    raise ValueError(f'Invalid mode: {args.mode}')

.ipynb_checkpoints/environment-checkpoint.yaml ADDED Viewed

	@@ -0,0 +1,258 @@

+name: mgpt
+channels:
+  - pytorch
+  - conda-forge
+  - defaults
+  - https://repo.anaconda.com/pkgs/main
+  - https://repo.anaconda.com/pkgs/r
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=4.5=1_gnu
+  - asttokens=3.0.0=pyhd8ed1ab_0
+  - backcall=0.2.0=pyh9f0ad1d_0
+  - blas=1.0=mkl
+  - bzip2=1.0.8=h7b6447c_0
+  - ca-certificates=2025.1.31=hbcca054_0
+  - certifi=2024.8.30=pyhd8ed1ab_0
+  - comm=0.2.2=pyhd8ed1ab_0
+  - cudatoolkit=10.1.243=h6bb024c_0
+  - debugpy=1.4.1=py38h709712a_0
+  - entrypoints=0.4=pyhd8ed1ab_0
+  - executing=2.1.0=pyhd8ed1ab_0
+  - ffmpeg=4.3=hf484d3e_0
+  - freetype=2.10.4=h5ab3b9f_0
+  - gmp=6.2.1=h2531618_2
+  - gnutls=3.6.15=he1e5248_0
+  - intel-openmp=2021.3.0=h06a4308_3350
+  - ipykernel=6.20.2=pyh210e3f2_0
+  - jpeg=9b=h024ee3a_2
+  - jupyter_client=7.1.2=pyhd8ed1ab_0
+  - jupyter_core=5.7.2=pyh31011fe_1
+  - lame=3.100=h7b6447c_0
+  - lcms2=2.12=h3be6417_0
+  - ld_impl_linux-64=2.35.1=h7274673_9
+  - libffi=3.3=he6710b0_2
+  - libgcc-ng=9.3.0=h5101ec6_17
+  - libgomp=9.3.0=h5101ec6_17
+  - libiconv=1.15=h63c8f33_5
+  - libidn2=2.3.2=h7f8727e_0
+  - libpng=1.6.37=hbc83047_0
+  - libsodium=1.0.18=h36c2ea0_1
+  - libstdcxx-ng=13.2.0=hc0a3c3a_7
+  - libtasn1=4.16.0=h27cfd23_0
+  - libtiff=4.2.0=h85742a9_0
+  - libunistring=0.9.10=h27cfd23_0
+  - libuv=1.40.0=h7b6447c_0
+  - libwebp-base=1.2.0=h27cfd23_0
+  - lz4-c=1.9.3=h295c915_1
+  - mkl=2021.3.0=h06a4308_520
+  - mkl-service=2.4.0=py38h7f8727e_0
+  - mkl_fft=1.3.0=py38h42c9631_2
+  - mkl_random=1.2.2=py38h51133e4_0
+  - ncurses=6.2=he6710b0_1
+  - nest-asyncio=1.6.0=pyhd8ed1ab_0
+  - nettle=3.7.3=hbbd107a_1
+  - ninja=1.10.2=hff7bd54_1
+  - olefile=0.46=py_0
+  - openh264=2.1.0=hd408876_0
+  - openjpeg=2.3.0=h05c96fa_1
+  - openssl=1.1.1k=h7f98852_0
+  - packaging=24.2=pyhd8ed1ab_2
+  - pickleshare=0.7.5=py_1003
+  - pillow=8.3.1=py38h2c7a002_0
+  - pip=21.0.1=py38h06a4308_0
+  - platformdirs=4.3.6=pyhd8ed1ab_0
+  - prompt_toolkit=3.0.48=hd8ed1ab_1
+  - ptyprocess=0.7.0=pyhd3deb0d_0
+  - pure_eval=0.2.3=pyhd8ed1ab_0
+  - pygments=2.18.0=pyhd8ed1ab_0
+  - python=3.8.11=h12debd9_0_cpython
+  - python_abi=3.8=5_cp38
+  - pyzmq=22.1.0=py38h2035c66_0
+  - readline=8.1=h27cfd23_0
+  - setuptools=52.0.0=py38h06a4308_0
+  - six=1.16.0=pyhd3eb1b0_0
+  - sqlite=3.36.0=hc218d9a_0
+  - stack_data=0.6.2=pyhd8ed1ab_0
+  - tk=8.6.10=hbc83047_0
+  - torchaudio=0.8.1=py38
+  - torchvision=0.9.1=py38_cu101
+  - tornado=6.1=py38h497a2fe_1
+  - wheel=0.37.0=pyhd3eb1b0_0
+  - xz=5.2.5=h7b6447c_0
+  - zeromq=4.3.4=h9c3ff4c_0
+  - zlib=1.2.11=h7b6447c_3
+  - zstd=1.4.9=haebb681_0
+  - pip:
+      - absl-py==0.13.0
+      - accelerate==1.0.1
+      - aiohappyeyeballs==2.4.3
+      - aiohttp==3.10.11
+      - aiosignal==1.3.1
+      - annotated-types==0.7.0
+      - antlr4-python3-runtime==4.9.3
+      - async-timeout==5.0.1
+      - attrs==24.2.0
+      - beautifulsoup4==4.12.3
+      - blis==0.7.11
+      - cachetools==4.2.2
+      - catalogue==2.0.10
+      - charset-normalizer==2.0.4
+      - chumpy==0.70
+      - click==8.1.7
+      - clip==1.0
+      - cloudpathlib==0.20.0
+      - confection==0.1.5
+      - cycler==0.10.0
+      - cymem==2.0.10
+      - decorator==5.0.9
+      - diffusers==0.31.0
+      - einops==0.8.0
+      - ffmpeg-python==0.2.0
+      - filelock==3.16.1
+      - freetype-py==2.5.1
+      - frozenlist==1.5.0
+      - fsspec==2024.2.0
+      - ftfy==6.1.1
+      - future==1.0.0
+      - fvcore==0.1.5.post20221221
+      - gdown==5.2.0
+      - glfw==2.8.0
+      - google-auth==2.36.0
+      - google-auth-oauthlib==0.4.6
+      - grpcio==1.68.0
+      - h5py==3.11.0
+      - huggingface-hub==0.26.2
+      - human-body-prior==2.2.2.0
+      - idna==3.2
+      - imageio==2.9.0
+      - imageio-ffmpeg==0.5.1
+      - importlib-metadata==8.5.0
+      - iopath==0.1.10
+      - ipdb==0.13.9
+      - ipython==7.26.0
+      - ipython-genutils==0.2.0
+      - jedi==0.18.0
+      - jinja2==3.1.3
+      - joblib==1.0.1
+      - kiwisolver==1.3.1
+      - langcodes==3.4.1
+      - language-data==1.3.0
+      - lightning-utilities==0.11.9
+      - marisa-trie==1.2.1
+      - markdown==3.3.4
+      - markdown-it-py==3.0.0
+      - markupsafe==2.1.5
+      - matplotlib==3.4.3
+      - matplotlib-inline==0.1.2
+      - mdurl==0.1.2
+      - moviepy==0.2.3.1
+      - mpmath==1.3.0
+      - multidict==6.1.0
+      - murmurhash==1.0.11
+      - natsort==8.4.0
+      - networkx==3.0
+      - numpy==1.22.4
+      - nvidia-cublas-cu11==11.11.3.6
+      - nvidia-cublas-cu12==12.1.3.1
+      - nvidia-cuda-cupti-cu11==11.8.87
+      - nvidia-cuda-cupti-cu12==12.1.105
+      - nvidia-cuda-nvrtc-cu11==11.8.89
+      - nvidia-cuda-nvrtc-cu12==12.1.105
+      - nvidia-cuda-runtime-cu11==11.8.89
+      - nvidia-cuda-runtime-cu12==12.1.105
+      - nvidia-cudnn-cu11==9.1.0.70
+      - nvidia-cudnn-cu12==9.1.0.70
+      - nvidia-cufft-cu11==10.9.0.58
+      - nvidia-cufft-cu12==11.0.2.54
+      - nvidia-curand-cu11==10.3.0.86
+      - nvidia-curand-cu12==10.3.2.106
+      - nvidia-cusolver-cu11==11.4.1.48
+      - nvidia-cusolver-cu12==11.4.5.107
+      - nvidia-cusparse-cu11==11.7.5.86
+      - nvidia-cusparse-cu12==12.1.0.106
+      - nvidia-nccl-cu11==2.20.5
+      - nvidia-nccl-cu12==2.20.5
+      - nvidia-nvjitlink-cu12==12.1.105
+      - nvidia-nvtx-cu11==11.8.86
+      - nvidia-nvtx-cu12==12.1.105
+      - oauthlib==3.1.1
+      - omegaconf==2.3.0
+      - orjson==3.10.15
+      - pandas==1.3.2
+      - parso==0.8.2
+      - pexpect==4.8.0
+      - portalocker==3.0.0
+      - preshed==3.0.9
+      - prompt-toolkit==3.0.20
+      - propcache==0.2.0
+      - protobuf==5.28.3
+      - psutil==6.1.0
+      - pyasn1==0.4.8
+      - pyasn1-modules==0.2.8
+      - pydantic==2.10.1
+      - pydantic-core==2.27.1
+      - pydeprecate==0.3.2
+      - pygame==2.6.1
+      - pyglet==2.1.2
+      - pyopengl==3.1.0
+      - pyparsing==2.4.7
+      - pyrender==0.1.45
+      - pysocks==1.7.1
+      - python-dateutil==2.8.2
+      - pytorch-lightning==1.7.0
+      - pytorch3d==0.3.0
+      - pytz==2021.1
+      - pyyaml==5.4.1
+      - regex==2024.11.6
+      - requests==2.26.0
+      - requests-oauthlib==1.3.0
+      - rich==13.9.4
+      - rsa==4.7.2
+      - safetensors==0.4.5
+      - scikit-learn==0.24.2
+      - scipy==1.7.1
+      - sentence-transformers==3.2.1
+      - sentencepiece==0.2.0
+      - shapely==2.0.7
+      - shellingham==1.5.4
+      - sklearn==0.0
+      - smart-open==7.0.5
+      - smplx==0.1.28
+      - soupsieve==2.6
+      - spacy==3.7.5
+      - spacy-legacy==3.0.12
+      - spacy-loggers==1.0.5
+      - srsly==2.4.8
+      - sympy==1.13.1
+      - tabulate==0.9.0
+      - tensorboard==2.12.0
+      - tensorboard-data-server==0.7.2
+      - tensorboard-plugin-wit==1.8.0
+      - termcolor==2.4.0
+      - thinc==8.2.5
+      - threadpoolctl==2.2.0
+      - timm==1.0.12
+      - tokenizers==0.20.3
+      - toml==0.10.2
+      - torch==2.4.1+cu118
+      - torchgeometry==0.1.2
+      - torchmetrics==0.7.0
+      - tqdm==4.62.2
+      - traitlets==5.0.5
+      - transformers==4.46.3
+      - triangle==20250106
+      - trimesh==4.6.2
+      - triton==3.0.0
+      - typer==0.13.1
+      - typing-extensions==4.12.2
+      - urllib3==1.26.6
+      - wasabi==1.1.3
+      - wcwidth==0.2.5
+      - weasel==0.4.1
+      - werkzeug==2.0.1
+      - wrapt==1.17.0
+      - yacs==0.1.8
+      - yarl==1.15.2
+      - zipp==3.20.2
+prefix: /root/miniconda3/envs/mgpt

.ipynb_checkpoints/requirements-checkpoint.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+smplx==0.1.28
+transformers==4.56.2
+timm==1.0.12
+sentence-transformers==5.1.0
+clip @ git+https://github.com/openai/CLIP.git@main#egg=clip
+human-body-prior @ git+https://github.com/nghorbani/human_body_prior.git@master#egg=human-body-prior
+gdown
+chumpy==0.70
+scipy==1.7.1
+numpy==1.22.4
+tensorboard
+accelerate
+flash_attn
+matplotlib==3.4.3
+matplotlib-inline==0.1.2
+imageio==2.9.0
+imageio-ffmpeg==0.5.1

.ipynb_checkpoints/train_motionstreamer-checkpoint.py ADDED Viewed

	@@ -0,0 +1,264 @@

+"""Train streaming motion generation model (MotionStreamer) with llama blocks, Two-Forward strategy and QK-Norm, using the motion latents encoded by the Causal TAE (trained in the first stage)."""
+import os
+import torch
+import numpy as np
+import random
+from torch.utils.tensorboard import SummaryWriter
+import json
+from accelerate import Accelerator
+from tqdm import tqdm
+from models.llama_model import LLaMAHF, LLaMAHFConfig
+import options.option_transformer as option_trans
+import utils.utils_model as utils_model
+import warnings
+from torch.optim.lr_scheduler import LambdaLR, CosineAnnealingLR
+warnings.filterwarnings('ignore')
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+##### ---- Exp dirs ---- #####
+args = option_trans.get_args_parser()
+torch.manual_seed(args.seed)
+# warm-up + cosine decay scheduler
+class WarmupCosineDecayScheduler:
+    def __init__(self, optimizer, warmup_iters, total_iters, min_lr=0):
+        self.optimizer = optimizer
+        self.warmup_iters = warmup_iters
+        self.total_iters = total_iters
+        self.min_lr = min_lr
+        self.warmup_scheduler = LambdaLR(optimizer, lr_lambda=self.warmup_lambda)
+        self.cosine_scheduler = CosineAnnealingLR(optimizer,
+                                                  T_max=total_iters - warmup_iters,
+                                                  eta_min=min_lr)
+    def warmup_lambda(self, current_iter):
+        if current_iter < self.warmup_iters:
+            return float(current_iter) / float(max(1, self.warmup_iters))
+        return 1.0
+    def step(self, current_iter):
+        if current_iter < self.warmup_iters:
+            self.warmup_scheduler.step()
+        else:
+            self.cosine_scheduler.step()
+    def state_dict(self):
+        return {
+            'warmup_iters': self.warmup_iters,
+            'total_iters': self.total_iters,
+            'min_lr': self.min_lr,
+        }
+    def load_state_dict(self, state_dict):
+        self.warmup_iters = state_dict['warmup_iters']
+        self.total_iters = state_dict['total_iters']
+        self.min_lr = state_dict['min_lr']
+args.out_dir = os.path.join(args.out_dir, f'{args.exp_name}')
+os.makedirs(args.out_dir, exist_ok = True)
+##### ---- Accelerator Setup ---- #####
+accelerator = Accelerator()
+comp_device = accelerator.device
+##### ---- Logger ---- #####
+logger = utils_model.get_logger(args.out_dir)
+writer = SummaryWriter(args.out_dir)
+logger.info(json.dumps(vars(args), indent=4, sort_keys=True))
+##### ---- Dataloader ---- #####
+from humanml3d_272 import dataset_TM_train_motionstreamer
+train_loader = dataset_TM_train_motionstreamer.DATALoader(args.dataname, args.batch_size, unit_length=2**args.down_t, latent_dir=args.latent_dir)
+##### ---- Network ---- #####
+from sentence_transformers import SentenceTransformer
+t5_model = SentenceTransformer('sentencet5-xxl/')
+t5_model.eval()
+for p in t5_model.parameters():
+    p.requires_grad = False
+config = LLaMAHFConfig.from_name('Normal_size')
+config.block_size = 78
+trans_encoder = LLaMAHF(config, args.num_diffusion_head_layers, args.latent_dim, comp_device)
+if args.resume_trans is not None:
+    print('loading transformer checkpoint from {}'.format(args.resume_trans))
+    ckpt = torch.load(args.resume_trans, map_location='cpu')
+    new_ckpt_trans = {}
+    for key in ckpt['trans'].keys():
+        if key.split('.')[0]=='module':
+            new_key = '.'.join(key.split('.')[1:])
+        else:
+            new_key = key
+        new_ckpt_trans[new_key] = ckpt['trans'][key]
+    trans_encoder.load_state_dict(new_ckpt_trans, strict=True)
+trans_encoder.train()
+trans_encoder.to(comp_device)
+##### ---- Optimizer & Scheduler ---- #####
+optimizer = utils_model.initial_optim(args.decay_option, args.lr, args.weight_decay, trans_encoder, args.optimizer)
+scheduler = WarmupCosineDecayScheduler(optimizer, args.total_iter//10, args.total_iter)
+t5_model, trans_encoder, optimizer, train_loader = accelerator.prepare(t5_model, trans_encoder, optimizer, train_loader)
+train_loader_iter = dataset_TM_train_motionstreamer.cycle(train_loader)
+diffmlps_batch_mul = 4
+def lengths_to_mask(lengths, max_len):
+    mask = torch.arange(max_len, device=lengths.device).expand(len(lengths), max_len) < lengths.unsqueeze(1)
+    return mask
+def get_mask_subset_prob(mask, prob):
+    subset_mask = torch.bernoulli(mask, p=prob) & mask
+    return subset_mask
+def uniform(shape, device=None):
+    return torch.zeros(shape, device=device).float().uniform_(0, 1)
+import math
+def cosine_schedule(t):
+    return torch.cos(t * math.pi * 0.5)
+#--------------2-forward:------------------
+def cosine_decay(step, total_steps, start_value=1.0, end_value=0.0):
+    step = torch.tensor(step, dtype=torch.float32)
+    total_steps = torch.tensor(total_steps, dtype=torch.float32)
+    cosine_factor = 0.5 * (1 + torch.cos(torch.pi * step / total_steps))
+    return start_value + (end_value - start_value) * cosine_factor
+def replace_with_pred(latents, pred_xstart, step, total_steps):
+    decay_factor = cosine_decay(step, total_steps).to(latents.device)
+    b, l, d = latents.shape
+    num_replace = int(l * decay_factor)
+    replace_indices = torch.randperm(l)[:num_replace]
+    replace_mask = torch.zeros(b, l, dtype=torch.bool).to(latents.device)
+    replace_mask[:, replace_indices] = 1
+    updated_latents = latents.clone()
+    updated_latents[replace_mask] = pred_xstart[replace_mask]
+    return updated_latents
+def forward_loss_withmask_2_forward_streaming(latents, trans, m_lens, feat_text, step, total_steps, A_token_length):
+    latents = latents.to(comp_device)
+    feat_text = feat_text.to(comp_device)
+    A_token_length = A_token_length.to(comp_device)
+    conditions = trans(latents, feat_text)
+    conditions = conditions.contiguous()
+    z = conditions[:,:-1,:]
+    b, l, d = latents.shape
+    mask = lengths_to_mask(m_lens, l)
+    for j in range(b):
+        mask[j, :A_token_length[j].item()] = False   # A_motion token: do not compute loss
+    mask = mask.reshape(b * l).repeat(diffmlps_batch_mul)
+    target = latents.clone().detach()
+    target = target.reshape(b * l, -1)
+    z = z.reshape(b * l, -1)
+    with torch.no_grad():
+        loss, pred_xstart = trans.diff_loss(target=target, z=z)
+    pred_xstart = pred_xstart.clone().detach()
+    pred_xstart = pred_xstart.reshape(b, l, -1)
+    # do not replace A_motion tokens
+    for k in range(b):
+        pred_xstart[k, :A_token_length[k].item(),:] = latents[k, :A_token_length[k].item(),:]
+    updated_latents = replace_with_pred(latents, pred_xstart, step, total_steps)
+    updated_conditions = trans(updated_latents, feat_text)
+    updated_conditions = updated_conditions.contiguous()
+    updated_z = updated_conditions[:,:-1,:]
+    updated_target = latents.clone().detach()
+    updated_target = updated_target.reshape(b * l, -1).repeat(diffmlps_batch_mul, 1)
+    updated_z = updated_z.reshape(b * l, -1).repeat(diffmlps_batch_mul, 1)
+    updated_target = updated_target[mask]
+    updated_z = updated_z[mask]
+    updated_loss, updated_pred_xstart = trans.diff_loss(target=updated_target, z=updated_z)
+    return updated_loss
+##### ---- Training Loop ---- #####
+avg_loss_cls = 0.
+pbar = tqdm(range(1, args.total_iter + 1), desc="Training MotionStreamer")
+for nb_iter in pbar:
+    batch = next(train_loader_iter)
+    caption, m_tokens, m_tokens_len, A_token_length = batch
+    caption = list(caption)
+    m_tokens, m_tokens_len = m_tokens.to(comp_device), m_tokens_len.to(comp_device)
+    A_token_length = A_token_length.to(comp_device)
+    bs = len(caption)
+    num_masked = int(bs * 0.1)  # 10%
+    mask_indices = random.sample(range(bs), num_masked)
+    for idx in mask_indices:
+        caption[idx] = ''
+    feat_text = torch.from_numpy(t5_model.encode(caption)).float()
+    feat_text = feat_text.to(comp_device)
+    # -------gt--------
+    input_latent = m_tokens[:,:-1,:]  # continuous token
+    loss_cls = 0.0
+    if args.num_gpus > 1:
+        loss_cls = forward_loss_withmask_2_forward_streaming(latents=input_latent, trans=trans_encoder.module, m_lens = m_tokens_len, feat_text=feat_text, step=nb_iter, total_steps=args.total_iter, A_token_length=A_token_length)
+    else:
+        loss_cls = forward_loss_withmask_2_forward_streaming(latents=input_latent, trans=trans_encoder, m_lens = m_tokens_len, feat_text=feat_text, step=nb_iter, total_steps=args.total_iter, A_token_length=A_token_length)
+    # backward & optimizer step
+    optimizer.zero_grad()
+    accelerator.backward(loss_cls)
+    optimizer.step()
+    scheduler.step(nb_iter)
+    avg_loss_cls = avg_loss_cls + loss_cls.item()
+    args.print_iter = 100
+    if nb_iter % args.print_iter ==  0 :
+        if accelerator.is_main_process:
+            avg_loss_cls = avg_loss_cls / args.print_iter
+            lr = optimizer.param_groups[0]['lr']
+            writer.add_scalar('./Loss/train', avg_loss_cls, nb_iter)
+            writer.add_scalar('./LR/train', optimizer.param_groups[0]['lr'], nb_iter)
+            msg = f"Train. Iter {nb_iter} : Loss. {avg_loss_cls:.5f}"
+            tqdm.write(f"Iter {nb_iter} | Loss: {avg_loss_cls:.5f} | LR: {lr:.6f}")
+            logger.info(msg)
+        avg_loss_cls = 0.
+    args.save_iter = 10000
+    if nb_iter % args.save_iter == 0:
+        # save checkpoint
+        if accelerator.is_main_process:
+            torch.save({
+                'trans': trans_encoder.state_dict(),
+            }, os.path.join(args.out_dir, f'latest.pth'))
+accelerator.wait_for_everyone()

EVAL_causal_TAE.sh ADDED Viewed

	@@ -0,0 +1,6 @@

+ln -s ../utils ./Evaluator_272/
+ln -s ../humanml3d_272 ./Evaluator_272/
+ln -s ../options ./Evaluator_272/
+ln -s ../models ./Evaluator_272/
+ln -s ../visualization ./Evaluator_272/
+python eval_causal_TAE.py --resume-pth output/causal_TAE/net_last.pth

EVAL_t2m.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+ln -s ../utils ./Evaluator_272/
+ln -s ../humanml3d_272 ./Evaluator_272/
+ln -s ../options ./Evaluator_272/
+ln -s ../models ./Evaluator_272/
+ln -s ../visualization ./Evaluator_272/
+ln -s ../Causal_TAE ./Evaluator_272/
+python eval_t2m.py --resume-pth Causal_TAE/net_last.pth --resume-trans /cpfs03/shared/IDC/wangjingbo_group/motionstreamer/Open_source_Train_AR_16_1024_fps_30_111M_9/latest.pth

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 ZJU3DV
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,336 @@

+<h2 align="center"<strong>MotionStreamer: Streaming Motion Generation via Diffusion-based Autoregressive Model in Causal Latent Space</strong></h2>
+  <p align="center">
+    <a href='https://li-xingxiao.github.io/homepage/' target='_blank'>Lixing Xiao</a><sup>1</sup>
+    ·
+    <a href='https://shunlinlu.github.io/' target='_blank'>Shunlin Lu</a> <sup>2</sup>
+    ·
+    <a href='https://phj128.github.io/' target='_blank'>Huaijin Pi</a><sup>3</sup>
+    ·
+    <a href='https://vankouf.github.io/' target='_blank'>Ke Fan</a><sup>4</sup>
+    ·
+    <a href='https://liangpan99.github.io/' target='_blank'>Liang Pan</a><sup>3</sup>
+    ·
+    <a href='https://[email protected]' target='_blank'>Yueer Zhou</a><sup>1</sup>
+    ·
+    <a href='https://dblp.org/pid/120/4362.html/' target='_blank'>Ziyong Feng</a><sup>5</sup>
+    ·
+    <br>
+    <a href='https://www.xzhou.me/' target='_blank'>Xiaowei Zhou</a><sup>1</sup>
+    ·
+    <a href='https://pengsida.net/' target='_blank'>Sida Peng</a><sup>1†</sup>
+    ·
+     <a href='https://wangjingbo1219.github.io/' target='_blank'>Jingbo Wang</a><sup>6</sup>
+    <br>
+    <br>
+    <sup>1</sup>Zhejiang University  <sup>2</sup>The Chinese University of Hong Kong, Shenzhen  <sup>3</sup>The University of Hong Kong  <br><sup>4</sup>Shanghai Jiao Tong University  <sup>5</sup>DeepGlint  <sup>6</sup>Shanghai AI Lab
+    <br>
+    <strong>ICCV 2025</strong>
+  </p>
+</p>
+<p align="center">
+  <a href='https://arxiv.org/abs/2503.15451'>
+    <img src='https://img.shields.io/badge/Arxiv-2503.15451-A42C25?style=flat&logo=arXiv&logoColor=A42C25'></a>
+  <a href='https://arxiv.org/pdf/2503.15451'>
+    <img src='https://img.shields.io/badge/Paper-PDF-blue?style=flat&logo=arXiv&logoColor=blue'></a>
+  <a href='https://zju3dv.github.io/MotionStreamer/'>
+    <img src='https://img.shields.io/badge/Project-Page-green?style=flat&logo=Google%20chrome&logoColor=green'></a>
+  <a href='https://huggingface.co/datasets/lxxiao/272-dim-HumanML3D'>
+    <img src='https://img.shields.io/badge/Data-Download-yellow?style=flat&logo=huggingface&logoColor=yellow'></a>
+</p>
+<img width="1385" alt="image" src="assets/teaser.jpg"/>
+## 🔥 News
+- **[2025-06]** MotionStreamer has been accepted to ICCV 2025! 🎉
+## TODO List
+- [x] Release the processing script of 272-dim motion representation.
+- [x] Release the processed 272-dim Motion Representation of [HumanML3D](https://github.com/EricGuo5513/HumanML3D) dataset. Only for academic usage.
+- [x] Release the training code and checkpoint of our [TMR](https://github.com/Mathux/TMR)-based motion evaluator trained on the processed 272-dim [HumanML3D](https://github.com/EricGuo5513/HumanML3D) dataset.
+- [x] Release the training and evaluation code as well as checkpoint of Causal TAE.
+- [x] Release the training code of original motion generation model and streaming generation model (MotionStreamer).
+- [x] Release the checkpoint and demo inference code of original motion generation model.
+- [ ] Release complete code for MotionStreamer.
+## 🏃 Motion Representation
+For more details of how to obtain the 272-dim motion representation, as well as other useful tools (e.g., Visualization and Conversion to BVH format), please refer to our [GitHub repo](https://github.com/Li-xingXiao/272-dim-Motion-Representation).
+## Installation
+### 🐍 Python Virtual Environment
+```sh
+conda env create -f environment.yaml
+conda activate mgpt
+```
+### 🤗 Hugging Face Mirror
+Since all of our models and data are available on Hugging Face, if Hugging Face is not directly accessible, you can use the HF-mirror tools following:
+```sh
+pip install -U huggingface_hub
+export HF_ENDPOINT=https://hf-mirror.com
+```
+## 📥 Data Preparation
+To facilitate researchers, we provide the processed 272-dim Motion Representation of:
+> HumanML3D dataset at [this link](https://huggingface.co/datasets/lxxiao/272-dim-HumanML3D).
+> BABEL dataset at [this link](https://huggingface.co/datasets/lxxiao/272-dim-BABEL).
+❗️❗️❗️ The processed data is solely for academic purposes. Make sure you read through the [AMASS License](https://amass.is.tue.mpg.de/license.html).
+1. Download the processed 272-dim [HumanML3D](https://github.com/EricGuo5513/HumanML3D) dataset following:
+```bash
+huggingface-cli download --repo-type dataset --resume-download lxxiao/272-dim-HumanML3D --local-dir ./humanml3d_272
+cd ./humanml3d_272
+unzip texts.zip
+unzip motion_data.zip
+```
+The dataset is organized as:
+```
+./humanml3d_272
+  ├── mean_std
+      ├── Mean.npy
+      ├── Std.npy
+  ├── split
+      ├── train.txt
+      ├── val.txt
+      ├── test.txt
+  ├── texts
+      ├── 000000.txt
+      ...
+  ├── motion_data
+      ├── 000000.npy
+      ...
+```
+2. Download the processed 272-dim [BABEL](https://babel.is.tue.mpg.de/) dataset following:
+```bash
+huggingface-cli download --repo-type dataset --resume-download lxxiao/272-dim-BABEL --local-dir ./babel_272
+cd ./babel_272
+unzip texts.zip
+unzip motion_data.zip
+```
+The dataset is organized as:
+```
+./babel_272
+  ├── t2m_babel_mean_std
+      ├── Mean.npy
+      ├── Std.npy
+  ├── split
+      ├── train.txt
+      ├── val.txt
+  ├── texts
+      ├── 000000.txt
+      ...
+  ├── motion_data
+      ├── 000000.npy
+      ...
+```
+3. Download the processed streaming 272-dim [BABEL](https://babel.is.tue.mpg.de/) dataset following:
+```bash
+huggingface-cli download --repo-type dataset --resume-download lxxiao/272-dim-BABEL-stream --local-dir ./babel_272_stream
+cd ./babel_272_stream
+unzip train_stream.zip
+unzip train_stream_text.zip
+unzip val_stream.zip
+unzip val_stream_text.zip
+```
+The dataset is organized as:
+```
+./babel_272_stream
+  ├── train_stream
+      ├── seq1.npy
+      ...
+  ├── train_stream_text
+      ├── seq1.txt
+      ...
+  ├── val_stream
+      ├── seq1.npy
+      ...
+  ├── val_stream_text
+      ├── seq1.txt
+      ...
+```
+> NOTE: We process the original BABEL dataset to support training of streaming motion generation. e.g. If there is a motion sequence A, annotated as (A1, A2, A3, A4) in BABEL dataset, each subsequence has text description: (A1_t, A2_t, A3_t, A4_t).
+> Then, our BABEL-stream is constructed as:
+> seq1: (A1, A2) --- seq1_text: (A1_t*A2_t#A1_length)
+> seq2: (A2, A3) --- seq2_text: (A2_t*A3_t#A2_length)
+> seq3: (A3, A4) --- seq3_text: (A3_t*A4_t#A3_length)
+> Here, * and # is separation symbol, A1_length means the number of frames of subsequence A1.
+## 🚀 Training
+1. Train our [TMR](https://github.com/Mathux/TMR)-based motion evaluator on the processed 272-dim [HumanML3D](https://github.com/EricGuo5513/HumanML3D) dataset:
+    ```bash
+    bash TRAIN_evaluator_272.sh
+    ```
+    >After training for 100 epochs, the checkpoint will be stored at:
+    ``Evaluator_272/experiments/temos/EXP1/checkpoints/``.
+    ⬇️ We provide the evaluator checkpoint on [Hugging Face](https://huggingface.co/lxxiao/MotionStreamer/tree/main/Evaluator_272), download it following:
+    ```bash
+    python humanml3d_272/prepare/download_evaluator_ckpt.py
+    ```
+    >The downloaded checkpoint will be stored at: ``Evaluator_272/``.
+2. Train the Causal TAE:
+    ```bash
+    bash TRAIN_causal_TAE.sh ${NUM_GPUS}
+    ```
+    > e.g., if you have 8 GPUs, run: bash TRAIN_causal_TAE.sh 8
+    > The checkpoint will be stored at:
+    ``Experiments/causal_TAE_t2m_272/``
+    > Tensorboard visualization:
+    ```bash
+    tensorboard --logdir='Experiments/causal_TAE_t2m_272'
+    ```
+    ⬇️ We provide the Causal TAE checkpoint on [Hugging Face](https://huggingface.co/lxxiao/MotionStreamer/tree/main/Causal_TAE), download it following:
+    ```bash
+    python humanml3d_272/prepare/download_Causal_TAE_t2m_272_ckpt.py
+    ```
+3. Train text to motion model:
+    > We provide scripts to train the original text to motion generation model with llama blocks, Two-Forward strategy and QK-Norm, using the motion latents encoded by the Causal TAE (trained in the first stage).
+    3.1 Get motion latents:
+   ```bash
+   python get_latent.py --resume-pth Causal_TAE/net_last.pth --latent_dir humanml3d_272/t2m_latents
+   ```
+    3.2 Download [sentence-T5-XXL model](https://huggingface.co/sentence-transformers/sentence-t5-xxl/tree/main) on Hugging Face:
+   ```bash
+   huggingface-cli download --resume-download sentence-transformers/sentence-t5-xxl --local-dir sentencet5-xxl/
+   ```
+    3.3 Train text to motion generation model:
+   ```bash
+   bash TRAIN_t2m.sh ${NUM_GPUS}
+   ```
+    > e.g., if you have 8 GPUs, run: bash TRAIN_t2m.sh 8
+    > The checkpoint will be stored at:
+    ``Experiments/t2m_model/``
+    > Tensorboard visualization:
+    ```bash
+    tensorboard --logdir='Experiments/t2m_model'
+    ```
+    ⬇️ We provide the text to motion model checkpoint on [Hugging Face](https://huggingface.co/lxxiao/MotionStreamer/tree/main/Experiments/t2m_model), download it following:
+    ```bash
+    python humanml3d_272/prepare/download_t2m_model_ckpt.py
+    ```
+4. Train streaming motion generation model (MotionStreamer):
+    > We provide scripts to train the streaming motion generation model (MotionStreamer) with llama blocks, Two-Forward strategy and QK-Norm, using the motion latents encoded by the Causal TAE (need to train a new Causal TAE using both HumanML3D-272 and BABEL-272 data).
+    4.1 Train a Causal TAE using both HumanML3D-272 and BABEL-272 data:
+    ```bash
+    bash TRAIN_causal_TAE.sh ${NUM_GPUS} t2m_babel_272
+    ```
+    > e.g., if you have 8 GPUs, run: bash TRAIN_causal_TAE.sh 8 t2m_babel_272
+    > The checkpoint will be stored at:
+    ``Experiments/causal_TAE_t2m_babel_272/``
+    > Tensorboard visualization:
+    ```bash
+    tensorboard --logdir='Experiments/causal_TAE_t2m_babel_272'
+    ```
+    ⬇️ We provide the Causal TAE checkpoint trained using both HumanML3D-272 and BABEL-272 data on [Hugging Face](https://huggingface.co/lxxiao/MotionStreamer/tree/main/Causal_TAE_t2m_babel), download it following:
+    ```bash
+    python humanml3d_272/prepare/download_Causal_TAE_t2m_babel_272_ckpt.py
+    ```
+    4.2 Get motion latents of both HumanML3D-272 and the processed BABEL-272-stream dataset:
+   ```bash
+   python get_latent.py --resume-pth Causal_TAE_t2m_babel/net_last.pth --latent_dir babel_272_stream/t2m_babel_latents --dataname t2m_babel_272
+   ```
+    4.3 Train MotionStreamer model:
+   ```bash
+   bash TRAIN_motionstreamer.sh ${NUM_GPUS}
+   ```
+   > e.g., if you have 8 GPUs, run: bash TRAIN_motionstreamer.sh 8
+   > The checkpoint will be stored at:
+    ``Experiments/motionstreamer_model/``
+    > Tensorboard visualization:
+    ```bash
+    tensorboard --logdir='Experiments/motionstreamer_model'
+    ```
+## 📍 Evaluation
+1. Evaluate the metrics of the processed 272-dim [HumanML3D](https://github.com/EricGuo5513/HumanML3D) dataset:
+    ```bash
+    bash EVAL_GT.sh
+    ```
+    ( FID, R@1, R@2, R@3, Diversity and MM-Dist (Matching Score) are reported. )
+2. Evaluate the metrics of Causal TAE:
+    ```bash
+    bash EVAL_causal_TAE.sh
+    ```
+    ( FID and MPJPE (mm) are reported. )
+3. Evaluate the metrics of text to motion model:
+    ```bash
+    bash EVAL_t2m.sh
+    ```
+    ( FID, R@1, R@2, R@3, Diversity and MM-Dist (Matching Score) are reported. )
+## 🎬 Demo Inference
+1. Inference of text to motion model:
+    > [Option1] Recover from joint position
+    ```bash
+    python demo_t2m.py --text 'a person is walking like a mummy.' --mode pos --resume-pth Causal_TAE/net_last.pth --resume-trans Experiments/t2m_model/latest.pth
+    ```
+    > [Option2] Recover from joint rotation
+    ```bash
+    python demo_t2m.py --text 'a person is walking like a mummy.' --mode rot --resume-pth Causal_TAE/net_last.pth --resume-trans Experiments/t2m_model/latest.pth
+    ```
+    > In our 272-dim representation, Inverse Kinematics (IK) is not needed.
+    > For further conversion to BVH format, please refer to [this repo](https://github.com/Li-xingXiao/272-dim-Motion-Representation?tab=readme-ov-file#6-representation_272-to-bvh-conversion-optional) (Step 6: Representation_272 to BVH conversion). The BVH format of motion animation can be visualizd and edited in [Blender](https://www.blender.org/features/animation/).
+## 🌹 Acknowledgement
+This repository builds upon the following awesome datasets and projects:
+- [272-dim-Motion-Representation](https://github.com/Li-xingXiao/272-dim-Motion-Representation)
+- [AMASS](https://amass.is.tue.mpg.de/index.html)
+- [HumanML3D](https://github.com/EricGuo5513/HumanML3D)
+- [T2M-GPT](https://github.com/Mael-zys/T2M-GPT)
+- [TMR](https://github.com/Mathux/TMR)
+- [OpenTMA](https://github.com/LinghaoChan/OpenTMA)
+- [Sigma-VAE](https://github.com/orybkin/sigma-vae-pytorch)
+- [Scamo](https://github.com/shunlinlu/ScaMo_code)
+## 🤝🏼 Citation
+If our project is helpful for your research, please consider citing :
+```
+@article{xiao2025motionstreamer,
+      title={MotionStreamer: Streaming Motion Generation via Diffusion-based Autoregressive Model in Causal Latent Space},
+      author={Xiao, Lixing and Lu, Shunlin and Pi, Huaijin and Fan, Ke and Pan, Liang and Zhou, Yueer and Feng, Ziyong and Zhou, Xiaowei and Peng, Sida and Wang, Jingbo},
+      journal={arXiv preprint arXiv:2503.15451},
+      year={2025}
+    }
+```
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=zju3dv/MotionStreamer&type=Date)](https://www.star-history.com/#zju3dv/MotionStreamer&Date)

TRAIN_causal_TAE.sh ADDED Viewed

	@@ -0,0 +1,22 @@

+NUM_GPUS=${1:-1}  # default: 1 GPU
+dataset_name=${2:-t2m_272} # default: t2m_272, options: t2m_272, t2m_babel_272
+BATCH_SIZE=$((128 / NUM_GPUS))
+echo "Using $NUM_GPUS GPUs, each with a batch size of $BATCH_SIZE"
+accelerate launch --num_processes $NUM_GPUS train_causal_TAE.py \
+--batch-size $BATCH_SIZE \
+--lr 0.00005 \
+--total-iter 2000000 \
+--lr-scheduler 1900000 \
+--down-t 2 \
+--depth 3 \
+--dilation-growth-rate 3 \
+--out-dir Experiments \
+--dataname $dataset_name \
+--exp-name causal_TAE_${dataset_name} \
+--root_loss 7.0 \
+--latent_dim 16 \
+--hidden_size 1024 \
+--num_gpus $NUM_GPUS

TRAIN_evaluator_272.sh ADDED Viewed

	@@ -0,0 +1,6 @@

+export HF_ENDPOINT=https://hf-mirror.com
+cd Evaluator_272
+huggingface-cli download --resume-download distilbert/distilbert-base-uncased --local-dir ./deps/distilbert-base-uncased
+ln -s ../humanml3d_272 ./datasets/humanml3d_272
+python -m train --cfg configs/configs_evaluator_272/H3D-TMR.yaml --cfg_assets configs/assets.yaml --batch_size 256 --nodebug
+cd ..

TRAIN_motionstreamer.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+NUM_GPUS=${1:-1}  # default: 1 GPU
+BATCH_SIZE=$((30 / NUM_GPUS))
+echo "Using $NUM_GPUS GPUs, each with a batch size of $BATCH_SIZE"
+accelerate launch --num_processes $NUM_GPUS train_motionstreamer.py \
+--batch-size $BATCH_SIZE \
+--lr 0.0001 \
+--total-iter 200000 \
+--out-dir Experiments \
+--exp-name motionstreamer_model \
+--dataname t2m_babel_272 \
+--latent_dir babel_272_stream/t2m_babel_latents \
+--num_gpus $NUM_GPUS
+--resume-trans Experiments/motionstreamer_model/100k.pth \

TRAIN_t2m.sh ADDED Viewed

	@@ -0,0 +1,15 @@

+NUM_GPUS=${1:-1}  # default: 1 GPU
+BATCH_SIZE=$((256 / NUM_GPUS))
+echo "Using $NUM_GPUS GPUs, each with a batch size of $BATCH_SIZE"
+accelerate launch --num_processes $NUM_GPUS train_t2m.py \
+--batch-size $BATCH_SIZE \
+--lr 0.0001 \
+--total-iter 100000 \
+--out-dir Experiments \
+--exp-name t2m_model \
+--dataname t2m_272 \
+--latent_dir humanml3d_272/t2m_latents \
+--num_gpus $NUM_GPUS

assets/teaser.jpg ADDED Viewed

Git LFS Details

SHA256: 7958c8564ae20e48165890a08d21d1b63d2a6ce94fed017fb7b5504286f0b5da
Pointer size: 131 Bytes
Size of remote file: 751 kB

babel_272/.gitattributes ADDED Viewed

	@@ -0,0 +1,59 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.lz4 filter=lfs diff=lfs merge=lfs -text
+*.mds filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+# Audio files - uncompressed
+*.pcm filter=lfs diff=lfs merge=lfs -text
+*.sam filter=lfs diff=lfs merge=lfs -text
+*.raw filter=lfs diff=lfs merge=lfs -text
+# Audio files - compressed
+*.aac filter=lfs diff=lfs merge=lfs -text
+*.flac filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.ogg filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text
+# Image files - uncompressed
+*.bmp filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.tiff filter=lfs diff=lfs merge=lfs -text
+# Image files - compressed
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.webp filter=lfs diff=lfs merge=lfs -text
+# Video files - compressed
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.webm filter=lfs diff=lfs merge=lfs -text

babel_272/README.md ADDED Viewed

	@@ -0,0 +1,34 @@

+---
+license: apache-2.0
+---
+## 🚀 Dataset Usage
+To facilitate researchers, we provide the processed 272-dim Motion Representation of [BABEL](https://babel.is.tue.mpg.de/) dataset in this Hugging Face repo.
+Motions are resampled into 30 FPS.
+NOTE: ``t2m_babel_mean_std/`` contains the joint mean and std of both HumanML3D and BABEL dataset for joint training of the proposed [Causal TAE](https://github.com/zju3dv/MotionStreamer/blob/main/TRAIN_causal_TAE.sh).
+❗️❗️❗️ The processed data is solely for academic purposes. Make sure you read through the [BABEL License](https://babel.is.tue.mpg.de/license.html).
+## 📖 Paper & Project Page & Code
+* [Arxiv Paper](https://arxiv.org/abs/2503.15451)
+* [Project Page](https://zju3dv.github.io/MotionStreamer/)
+* [Code](https://github.com/zju3dv/MotionStreamer)
+## 🏃 Processing script
+For more details of how to obtain the 272-dim motion representation, as well as other useful tools (e.g., Visualization and Conversion to BVH format), please refer to our [GitHub repo](https://github.com/Li-xingXiao/272-dim-Motion-Representation).
+## 🌹 Acknowledgement
+This repository builds upon the following awesome datasets and projects:
+- [BABEL](https://babel.is.tue.mpg.de/)
+## 🤝🏼 Citation
+If our project is helpful for your research, please consider citing :
+```
+@article{xiao2025motionstreamer,
+      title={MotionStreamer: Streaming Motion Generation via Diffusion-based Autoregressive Model in Causal Latent Space},
+      author={Xiao, Lixing and Lu, Shunlin and Pi, Huaijin and Fan, Ke and Pan, Liang and Zhou, Yueer and Feng, Ziyong and Zhou, Xiaowei and Peng, Sida and Wang, Jingbo},
+      journal={arXiv preprint arXiv:2503.15451},
+      year={2025}
+    }
+```

babel_272/motion_data.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03ecf1eefd24f828e0717dd0d7d05ad2ad139d79fd09d59baeab711895311525
+size 8093667470

babel_272/split/train.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

babel_272/split/val.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

babel_272/t2m_babel_mean_std/Mean.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0f782aecd1c0479c517aee68959a26f55ddf1f34bb2344b4d9c365c73f3ed80
+size 2304

babel_272/t2m_babel_mean_std/Std.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de477d76de0b03b71779dea84964ccf59c1f53ad49ebef7d99202c4ff19a2ff5
+size 2304

babel_272/texts.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39b0a560144db9d4a261462d21f0eeedefc3f0bd1bb664cb3ec819c17ebead52
+size 38968869

babel_272_stream/.gitattributes ADDED Viewed

	@@ -0,0 +1,59 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.lz4 filter=lfs diff=lfs merge=lfs -text
+*.mds filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+# Audio files - uncompressed
+*.pcm filter=lfs diff=lfs merge=lfs -text
+*.sam filter=lfs diff=lfs merge=lfs -text
+*.raw filter=lfs diff=lfs merge=lfs -text
+# Audio files - compressed
+*.aac filter=lfs diff=lfs merge=lfs -text
+*.flac filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.ogg filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text
+# Image files - uncompressed
+*.bmp filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.tiff filter=lfs diff=lfs merge=lfs -text
+# Image files - compressed
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.webp filter=lfs diff=lfs merge=lfs -text
+# Video files - compressed
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.webm filter=lfs diff=lfs merge=lfs -text

babel_272_stream/README.md ADDED Viewed

	@@ -0,0 +1,62 @@

+---
+license: apache-2.0
+---
+## 🚀 Dataset Usage
+To facilitate researchers, we provide the processed streaming 272-dim Motion Representation of [BABEL](https://babel.is.tue.mpg.de/) dataset in this Hugging Face repo.
+NOTE: We process the original BABEL dataset to support training of streaming motion generation.
+e.g. If there is a motion sequence A, annotated as (A1, A2, A3, A4) in BABEL dataset, each subsequence has text description: (A1_t, A2_t, A3_t, A4_t).
+Then, our BABEL-stream is constructed as:
+seq1: (A1, A2)  ---  seq1_text: (A1_t*A2_t#A1_length)
+seq2: (A2, A3)  ---  seq2_text: (A2_t*A3_t#A2_length)
+seq3: (A3, A4)  ---  seq3_text: (A3_t*A4_t#A3_length)
+Here, * and # is separation symbol, A1_length means the number of frames of subsequence A1.
+Motions are resampled into 30 FPS.
+The dataset is organized as:
+```
+./
+  ├── train_stream
+      ├── seq1.npy
+      ...
+  ├── train_stream_text
+      ├── seq1.txt
+      ...
+  ├── val_stream
+      ├── seq1.npy
+      ...
+  ├── val_stream_text
+      ├── seq1.txt
+      ...
+```
+❗️❗️❗️ The processed data is solely for academic purposes. Make sure you read through the [BABEL License](https://babel.is.tue.mpg.de/license.html).
+## 📖 Paper & Project Page & Code
+* [Arxiv Paper](https://arxiv.org/abs/2503.15451)
+* [Project Page](https://zju3dv.github.io/MotionStreamer/)
+* [Code](https://github.com/zju3dv/MotionStreamer)
+## 🏃 Processing script
+For more details of how to obtain the 272-dim motion representation, as well as other useful tools (e.g., Visualization and Conversion to BVH format), please refer to our [GitHub repo](https://github.com/Li-xingXiao/272-dim-Motion-Representation).
+## 🌹 Acknowledgement
+This repository builds upon the following awesome datasets and projects:
+- [BABEL](https://babel.is.tue.mpg.de/)
+## 🤝🏼 Citation
+If our project is helpful for your research, please consider citing :
+```
+@article{xiao2025motionstreamer,
+      title={MotionStreamer: Streaming Motion Generation via Diffusion-based Autoregressive Model in Causal Latent Space},
+      author={Xiao, Lixing and Lu, Shunlin and Pi, Huaijin and Fan, Ke and Pan, Liang and Zhou, Yueer and Feng, Ziyong and Zhou, Xiaowei and Peng, Sida and Wang, Jingbo},
+      journal={arXiv preprint arXiv:2503.15451},
+      year={2025}
+    }
+```

babel_272_stream/train_stream.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35db924d754e321f673a72c22b80d5d725f55d74151fc34351f554ef6bf33a2e
+size 6901914721

babel_272_stream/train_stream_text.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d46561fcaf62738b1d08cf54a851ffecb3fb7a154f9663b199dfa83f0d677046
+size 4746908

babel_272_stream/val_stream.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0564c64ce642330222b3ed83d031f5f3765c6979a82f17a2259e07d80d0ff78a
+size 2580199524

babel_272_stream/val_stream_text.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba646f2836f03a7fa1a5470aa8c098d1b0e446872d5bf53b8b42283e5c1f368b
+size 1685986

body_models/human_model_files/mano/MANO_LEFT.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4022f7083f2ca7c78b2b3d595abbab52debd32b09d372b16923a801f0ea6a30
+size 3821391

body_models/human_model_files/mano/MANO_RIGHT.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45d60aa3b27ef9107a7afd4e00808f307fd91111e1cfa35afd5c4a62de264767
+size 3821356

body_models/human_model_files/smpl/J_regressor_extra.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc968ea4f9855571e82f90203280836b01f13ee42a8e1b89d8d580b801242a89
+size 496160

body_models/human_model_files/smpl/SMPL_FEMALE.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a583c1b98e4afc19042641f1bae5cd8a1f712a6724886291a7627ec07acd408d
+size 39056454

body_models/human_model_files/smpl/SMPL_MALE.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e8c0bbbbc635dcb166ed29c303fb4bef16ea5f623e5a89263495a9e403575bd
+size 39056404

body_models/human_model_files/smpl/SMPL_NEUTRAL.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98e65c74ad9b998783132f00880d1025a8d64b158e040e6ef13a557e5098bc42
+size 39001280

body_models/human_model_files/smpl/VPOSER_CKPT/TR00_004_00_WO_accad.ini ADDED Viewed

	@@ -0,0 +1,29 @@

+[All]
+adam_beta1 : 0.9
+base_lr : 0.005
+batch_size : 512
+best_model_fname : None
+cuda_id : 0
+data_shape : [1, 21, 3]
+dataset_dir : None
+display_model_gender : male
+expr_code : 004_00_WO_accad
+fp_precision : 32
+ip_avoid : False
+kl_coef : 0.005
+latentD : 32
+log_every_epoch : 2
+model_type : smpl
+n_workers : 10
+num_bodies_to_display : 10
+num_epochs : 100
+num_neurons : 512
+reg_coef : 0.0001
+remove_Zrot : True
+seed : 4815
+sm_coef : 0.01
+test_only : False
+try_num : 0
+use_cont_repr : True
+verbosity : 0
+work_dir : None

body_models/human_model_files/smpl/VPOSER_CKPT/snapshots/._TR00_E096.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e2615cd1d2e78cdfac7169c6182a7352d02992336dad7329d3d97f6947fb515
+size 4096

body_models/human_model_files/smpl/VPOSER_CKPT/snapshots/TR00_E096.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e4ad40f922606989939d3fae6eadf82d1a8e98112dffb6e39d89d6471270d5c
+size 2702962

body_models/human_model_files/smpl/VPOSER_CKPT/vposer_smpl.py ADDED Viewed

	@@ -0,0 +1,164 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+# AMASS: Archive of Motion Capture as Surface Shapes <https://arxiv.org/abs/1904.03278>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://www.linkedin.com/in/nghorbani/>
+# Vassilis Choutas <https://ps.is.tuebingen.mpg.de/employees/vchoutas> for ContinousRotReprDecoder
+#
+# 2018.01.02
+'''
+A human body pose prior built with Auto-Encoding Variational Bayes
+'''
+__all__ = ['VPoser']
+import os, sys, shutil
+import torch
+from torch import nn
+from torch.nn import functional as F
+import numpy as np
+import torchgeometry as tgm
+class ContinousRotReprDecoder(nn.Module):
+    def __init__(self):
+        super(ContinousRotReprDecoder, self).__init__()
+    def forward(self, module_input):
+        reshaped_input = module_input.view(-1, 3, 2)
+        b1 = F.normalize(reshaped_input[:, :, 0], dim=1)
+        dot_prod = torch.sum(b1 * reshaped_input[:, :, 1], dim=1, keepdim=True)
+        b2 = F.normalize(reshaped_input[:, :, 1] - dot_prod * b1, dim=-1)
+        b3 = torch.cross(b1, b2, dim=1)
+        return torch.stack([b1, b2, b3], dim=-1)
+class VPoser(nn.Module):
+    def __init__(self, num_neurons, latentD, data_shape, use_cont_repr=True):
+        super(VPoser, self).__init__()
+        self.latentD = latentD
+        self.use_cont_repr = use_cont_repr
+        n_features = np.prod(data_shape)
+        self.num_joints = data_shape[1]
+        self.bodyprior_enc_bn1 = nn.BatchNorm1d(n_features)
+        self.bodyprior_enc_fc1 = nn.Linear(n_features, num_neurons)
+        self.bodyprior_enc_bn2 = nn.BatchNorm1d(num_neurons)
+        self.bodyprior_enc_fc2 = nn.Linear(num_neurons, num_neurons)
+        self.bodyprior_enc_mu = nn.Linear(num_neurons, latentD)
+        self.bodyprior_enc_logvar = nn.Linear(num_neurons, latentD)
+        self.dropout = nn.Dropout(p=.1, inplace=False)
+        self.bodyprior_dec_fc1 = nn.Linear(latentD, num_neurons)
+        self.bodyprior_dec_fc2 = nn.Linear(num_neurons, num_neurons)
+        if self.use_cont_repr:
+            self.rot_decoder = ContinousRotReprDecoder()
+        self.bodyprior_dec_out = nn.Linear(num_neurons, self.num_joints* 6)
+    def encode(self, Pin):
+        '''
+        :param Pin: Nx(numjoints*3)
+        :param rep_type: 'matrot'/'aa' for matrix rotations or axis-angle
+        :return:
+        '''
+        Xout = Pin.view(Pin.size(0), -1)  # flatten input
+        Xout = self.bodyprior_enc_bn1(Xout)
+        Xout = F.leaky_relu(self.bodyprior_enc_fc1(Xout), negative_slope=.2)
+        Xout = self.bodyprior_enc_bn2(Xout)
+        Xout = self.dropout(Xout)
+        Xout = F.leaky_relu(self.bodyprior_enc_fc2(Xout), negative_slope=.2)
+        return torch.distributions.normal.Normal(self.bodyprior_enc_mu(Xout), F.softplus(self.bodyprior_enc_logvar(Xout)))
+    def decode(self, Zin, output_type='matrot'):
+        assert output_type in ['matrot', 'aa']
+        Xout = F.leaky_relu(self.bodyprior_dec_fc1(Zin), negative_slope=.2)
+        Xout = self.dropout(Xout)
+        Xout = F.leaky_relu(self.bodyprior_dec_fc2(Xout), negative_slope=.2)
+        Xout = self.bodyprior_dec_out(Xout)
+        if self.use_cont_repr:
+            Xout = self.rot_decoder(Xout)
+        else:
+            Xout = torch.tanh(Xout)
+        Xout = Xout.view([-1, 1, self.num_joints, 9])
+        if output_type == 'aa': return VPoser.matrot2aa(Xout)
+        return Xout
+    def forward(self, Pin, input_type='matrot', output_type='matrot'):
+        '''
+        :param Pin: aa: Nx1xnum_jointsx3 / matrot: Nx1xnum_jointsx9
+        :param input_type: matrot / aa for matrix rotations or axis angles
+        :param output_type: matrot / aa
+        :return:
+        '''
+        assert output_type in ['matrot', 'aa']
+        # if input_type == 'aa': Pin = VPoser.aa2matrot(Pin)
+        q_z = self.encode(Pin)
+        q_z_sample = q_z.rsample()
+        Prec = self.decode(q_z_sample)
+        if output_type == 'aa': Prec = VPoser.matrot2aa(Prec)
+        #return Prec, q_z.mean, q_z.sigma
+        return {'pose':Prec, 'mean':q_z.mean, 'std':q_z.scale}
+    def sample_poses(self, num_poses, output_type='aa', seed=None):
+        np.random.seed(seed)
+        dtype = self.bodyprior_dec_fc1.weight.dtype
+        device = self.bodyprior_dec_fc1.weight.device
+        self.eval()
+        with torch.no_grad():
+            Zgen = torch.tensor(np.random.normal(0., 1., size=(num_poses, self.latentD)), dtype=dtype).to(device)
+        return self.decode(Zgen, output_type=output_type)
+    @staticmethod
+    def matrot2aa(pose_matrot):
+        '''
+        :param pose_matrot: Nx1xnum_jointsx9
+        :return: Nx1xnum_jointsx3
+        '''
+        batch_size = pose_matrot.size(0)
+        homogen_matrot = F.pad(pose_matrot.view(-1, 3, 3), [0,1])
+        pose = tgm.rotation_matrix_to_angle_axis(homogen_matrot).view(batch_size, 1, -1, 3).contiguous()
+        return pose
+    @staticmethod
+    def aa2matrot(pose):
+        '''
+        :param Nx1xnum_jointsx3
+        :return: pose_matrot: Nx1xnum_jointsx9
+        '''
+        batch_size = pose.size(0)
+        pose_body_matrot = tgm.angle_axis_to_rotation_matrix(pose.reshape(-1, 3))[:, :3, :3].contiguous().view(batch_size, 1, -1, 9)
+        return pose_body_matrot

body_models/human_model_files/smplx/MANO_SMPLX_vertex_ids.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5abe70b6574de25470475091e8008314a5b90127eb48c3e63bfa0adf8c04dcf
+size 13535

body_models/human_model_files/smplx/SMPL-X__FLAME_vertex_ids.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e70cdc3659aae699b9732e8dd4af49106310c69b90dc83d9f73e96dbf871e49
+size 40312

body_models/human_model_files/smplx/SMPLX_FEMALE.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05e37bd22dff93362c92cea9c791c62a2d4d7e8d44b234f3e41be0020fa1c256
+size 108532279

body_models/human_model_files/smplx/SMPLX_FEMALE.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b870ce1fd05b46dd81e2de6269b2955667c931c8594999eb22eeb489b00e2c1f
+size 146809856

body_models/human_model_files/smplx/SMPLX_MALE.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79360d466228bec1b9f9d922ea48df718a0a09bccddace18cfec98b0edd68b73
+size 108491578

body_models/human_model_files/smplx/SMPLX_MALE.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4f94c40261ac4762bb9b09142d11bf47e1cc3d6b49b6bbcc4a2731451bf5632
+size 543102085

body_models/human_model_files/smplx/SMPLX_NEUTRAL.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15eb61ac2f91dcd6e340913e281b2b8a0a910ebe0955af9251b9bb99fd11d02b
+size 108490191

body_models/human_model_files/smplx/SMPLX_NEUTRAL.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b0279321ea9bd3cec5541c03b1f1c9ab9d197896943035c3abeef47f699bc5e
+size 542798306

body_models/human_model_files/smplx/SMPLX_NEUTRAL_NEW.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:248e277858008fea271d1ea3874eed2310dfd57fa160ea07c467cf6a061e0ecd
+size 167260951

body_models/human_model_files/smplx/SMPLX_NEUTRAL_NEW.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecb628fadd2b40f42cd39378d1e429cd30acc0bab6104676898d4374b804163d
+size 167261087

body_models/human_model_files/smplx/SMPLX_NEUTRAL_NEW_WiFlame.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9047e853fc08caa5cef648aa691bf80cf423ca5f0693d825c029a6a7b0bedc51
+size 215482118