Spaces:
Build error
Build error
| model: | |
| target: vtdm.vtdm_gen_v01.VideoLDM | |
| base_learning_rate: 1.0e-05 | |
| params: | |
| input_key: video | |
| scale_factor: 0.18215 | |
| log_keys: caption | |
| num_samples: 25 #frame_rate | |
| trained_param_keys: | |
| - diffusion_model.label_emb.0.0.weight | |
| - .emb_layers. | |
| - .time_stack. | |
| en_and_decode_n_samples_a_time: 25 #frame_rate | |
| disable_first_stage_autocast: true | |
| denoiser_config: | |
| target: sgm.modules.diffusionmodules.denoiser.Denoiser | |
| params: | |
| scaling_config: | |
| target: sgm.modules.diffusionmodules.denoiser_scaling.VScalingWithEDMcNoise | |
| network_config: | |
| target: sgm.modules.diffusionmodules.video_model.VideoUNet | |
| params: | |
| adm_in_channels: 768 | |
| num_classes: sequential | |
| use_checkpoint: true | |
| in_channels: 8 | |
| out_channels: 4 | |
| model_channels: 320 | |
| attention_resolutions: | |
| - 4 | |
| - 2 | |
| - 1 | |
| num_res_blocks: 2 | |
| channel_mult: | |
| - 1 | |
| - 2 | |
| - 4 | |
| - 4 | |
| num_head_channels: 64 | |
| use_linear_in_transformer: true | |
| transformer_depth: 1 | |
| context_dim: 1024 | |
| spatial_transformer_attn_type: softmax-xformers | |
| extra_ff_mix_layer: true | |
| use_spatial_context: true | |
| merge_strategy: learned_with_images | |
| video_kernel_size: | |
| - 3 | |
| - 1 | |
| - 1 | |
| conditioner_config: | |
| target: sgm.modules.GeneralConditioner | |
| params: | |
| emb_models: | |
| - is_trainable: false | |
| input_key: cond_frames_without_noise | |
| ucg_rate: 0.1 | |
| target: sgm.modules.encoders.modules.FrozenOpenCLIPImagePredictionEmbedder | |
| params: | |
| n_cond_frames: 1 | |
| n_copies: 1 | |
| open_clip_embedding_config: | |
| target: sgm.modules.encoders.modules.FrozenOpenCLIPImageEmbedder | |
| params: | |
| version: ckpts/open_clip_pytorch_model.bin | |
| freeze: true | |
| - is_trainable: false | |
| input_key: video | |
| ucg_rate: 0.0 | |
| target: vtdm.encoders.AesEmbedder | |
| - is_trainable: false | |
| input_key: elevation | |
| target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND | |
| params: | |
| outdim: 256 | |
| - input_key: cond_frames | |
| is_trainable: false | |
| ucg_rate: 0.1 | |
| target: sgm.modules.encoders.modules.VideoPredictionEmbedderWithEncoder | |
| params: | |
| disable_encoder_autocast: true | |
| n_cond_frames: 1 | |
| n_copies: 25 #frame_rate | |
| is_ae: true | |
| encoder_config: | |
| target: sgm.models.autoencoder.AutoencoderKLModeOnly | |
| params: | |
| embed_dim: 4 | |
| monitor: val/rec_loss | |
| ddconfig: | |
| attn_type: vanilla-xformers | |
| double_z: true | |
| z_channels: 4 | |
| resolution: 256 | |
| in_channels: 3 | |
| out_ch: 3 | |
| ch: 128 | |
| ch_mult: | |
| - 1 | |
| - 2 | |
| - 4 | |
| - 4 | |
| num_res_blocks: 2 | |
| attn_resolutions: [] | |
| dropout: 0.0 | |
| lossconfig: | |
| target: torch.nn.Identity | |
| - input_key: cond_aug | |
| is_trainable: false | |
| target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND | |
| params: | |
| outdim: 256 | |
| first_stage_config: | |
| target: sgm.models.autoencoder.AutoencoderKL | |
| params: | |
| embed_dim: 4 | |
| monitor: val/rec_loss | |
| ddconfig: | |
| attn_type: vanilla-xformers | |
| double_z: true | |
| z_channels: 4 | |
| resolution: 256 | |
| in_channels: 3 | |
| out_ch: 3 | |
| ch: 128 | |
| ch_mult: | |
| - 1 | |
| - 2 | |
| - 4 | |
| - 4 | |
| num_res_blocks: 2 | |
| attn_resolutions: [] | |
| dropout: 0.0 | |
| lossconfig: | |
| target: torch.nn.Identity | |
| loss_fn_config: | |
| target: sgm.modules.diffusionmodules.loss.StandardDiffusionLoss | |
| params: | |
| num_frames: 25 #frame_rate | |
| batch2model_keys: | |
| - num_video_frames | |
| - image_only_indicator | |
| sigma_sampler_config: | |
| target: sgm.modules.diffusionmodules.sigma_sampling.EDMSampling | |
| params: | |
| p_mean: 1.0 | |
| p_std: 1.6 | |
| loss_weighting_config: | |
| target: sgm.modules.diffusionmodules.loss_weighting.VWeighting | |
| sampler_config: | |
| target: sgm.modules.diffusionmodules.sampling.LinearMultistepSampler | |
| params: | |
| num_steps: 50 | |
| verbose: True | |
| discretization_config: | |
| target: sgm.modules.diffusionmodules.discretizer.EDMDiscretization | |
| params: | |
| sigma_max: 700.0 | |
| guider_config: | |
| target: sgm.modules.diffusionmodules.guiders.LinearPredictionGuider | |
| params: | |
| num_frames: 25 #frame_rate | |
| max_scale: 2.5 | |
| min_scale: 1.0 | |