
    wi                     j    d dl Z d dlmZmZmZ d dlZd dlZd dlZddl	m
Z
mZmZ  G d de
          ZdS )    N)ListOptionalUnion   )VaeImageProcessoris_valid_imageis_valid_image_imagelistc                       e Zd ZdZddee         dee         dej        fdZ	 ddej        d	e	de
ej        ej        eej        j                 f         fd
ZdS )VideoProcessorzSimple video processor.Nheightwidthreturnc                     t          |t                    rat          |d         t          j                  rA|d         j        dk    r0t          j        dt                     t          j        |d          }t          |t                    rat          |d         t          j
                  rA|d         j        dk    r0t          j        dt                     t          j        |d          }t          |t          j        t          j
        f          r|j        dk    rt          |          }nyt          |t                    rt          |d                   st          |          r|g}n<t          |t                    rt          |d                   r|}nt          d          t          j         fd|D             d          }|                    dd	d
dd          }|S )a  
        Preprocesses input video(s).

        Args:
            video (`List[PIL.Image]`, `List[List[PIL.Image]]`, `torch.Tensor`, `np.array`, `List[torch.Tensor]`, `List[np.array]`):
                The input video. It can be one of the following:
                * List of the PIL images.
                * List of list of PIL images.
                * 4D Torch tensors (expected shape for each tensor `(num_frames, num_channels, height, width)`).
                * 4D NumPy arrays (expected shape for each array `(num_frames, height, width, num_channels)`).
                * List of 4D Torch tensors (expected shape for each tensor `(num_frames, num_channels, height,
                  width)`).
                * List of 4D NumPy arrays (expected shape for each array `(num_frames, height, width, num_channels)`).
                * 5D NumPy arrays: expected shape for each array `(batch_size, num_frames, height, width,
                  num_channels)`.
                * 5D Torch tensors: expected shape for each array `(batch_size, num_frames, num_channels, height,
                  width)`.
            height (`int`, *optional*, defaults to `None`):
                The height in preprocessed frames of the video. If `None`, will use the `get_default_height_width()` to
                get default height.
            width (`int`, *optional*`, defaults to `None`):
                The width in preprocessed frames of the video. If `None`, will use get_default_height_width()` to get
                the default width.
        r      zPassing `video` as a list of 5d np.ndarray is deprecated.Please concatenate the list along the batch dimension and pass it as a single 5d np.ndarray)axiszPassing `video` as a list of 5d torch.Tensor is deprecated.Please concatenate the list along the batch dimension and pass it as a single 5d torch.TensorzeInput is in incorrect format. Currently, we only support numpy.ndarray, torch.Tensor, PIL.Image.Imagec                 @    g | ]}                     |           S ))r   r   )
preprocess).0imgr   selfr   s     i/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/diffusers/video_processor.py
<listcomp>z3VideoProcessor.preprocess_video.<locals>.<listcomp>R   s,    ___RUT__Su_MM___    )dim   r         )
isinstancelistnpndarrayndimwarningswarnFutureWarningconcatenatetorchTensorcatr   r	   
ValueErrorstackpermute)r   videor   r   s   ` ``r   preprocess_videozVideoProcessor.preprocess_video   s   2 eT"" 	2z%(BJ'G'G 	2ERSHM]^L^L^Mn  
 N5q111EeT"" 	-z%(EL'I'I 	-eTUhm_`N`N`Mp  
 Ie!,,,E
 ebj%,788 		UZ1__KKEEt$$ 	a)A)A 	E]^cEdEd 	GEEt$$ 	)A%()K)K 	EEw   ______Y^___efggg aAq!,,r   r    r-   output_typec                 v   |j         d         }g }t          |          D ]K}||                             dddd          }|                     ||          }|                    |           L|dk    rt          j        |          }n3|dk    rt          j        |          }n|dk    st          | d          |S )	z
        Converts a video tensor to a list of frames for export.

        Args:
            video (`torch.Tensor`): The video as a tensor.
            output_type (`str`, defaults to `"np"`): Output type of the postprocessed `video` tensor.
        r   r   r   r   r    ptpilz9 does not exist. Please choose one of ['np', 'pt', 'pil'])	shaperanger,   postprocessappendr    r+   r'   r*   )r   r-   r/   
batch_sizeoutputs	batch_idx	batch_vidbatch_outputs           r   postprocess_videoz VideoProcessor.postprocess_videoY   s     [^
z** 	) 	)Ii(00Aq!<<I++I{CCLNN<(((($hw''GGD  k'**GG%%fffgggr   )NN)r    )__name__
__module____qualname____doc__r   intr'   r(   r.   strr   r    r!   r   PILImager<    r   r   r   r      s        ""; ;hsm ;8TW= ;didp ; ; ; ;| 7; \03	rz5<cio)>>	?     r   r   )r#   typingr   r   r   numpyr    rC   r'   image_processorr   r   r	   r   rE   r   r   <module>rI      s     ( ( ( ( ( ( ( ( ( (     



  X X X X X X X X X XX X X X X& X X X X Xr   