
    wi                        d dl mZ d dlmZmZ d dlZd dlZd dlm	Z	 ddl
mZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZmZ e G d de                      Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de	j                  Z G d de           Z! G d de	j                  Z" G d de	j                  Z#dS )    )	dataclass)OptionalTupleN   )
BaseOutputis_torch_version)randn_tensor   )get_activation)SpatialNorm)AutoencoderTinyBlockUNetMidBlock2Dget_down_blockget_up_blockc                   L    e Zd ZU dZej        ed<   dZeej	                 ed<   dS )DecoderOutputz
    Output of decoding method.

    Args:
        sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)`):
            The decoded output sample from the last layer of the model.
    sampleNcommit_loss)
__name__
__module____qualname____doc__torchTensor__annotations__r   r   FloatTensor     q/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/diffusers/models/autoencoders/vae.pyr   r   !   sD           L/3K%+,33333r   r   c                        e Zd ZdZ	 	 	 	 	 	 	 	 	 dd	ed
edeedf         deedf         dedededef fdZde	j
        de	j
        fdZ xZS )Encodera  
    The `Encoder` layer of a variational autoencoder that encodes its input into a latent representation.

    Args:
        in_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        out_channels (`int`, *optional*, defaults to 3):
            The number of output channels.
        down_block_types (`Tuple[str, ...]`, *optional*, defaults to `("DownEncoderBlock2D",)`):
            The types of down blocks to use. See `~diffusers.models.unet_2d_blocks.get_down_block` for available
            options.
        block_out_channels (`Tuple[int, ...]`, *optional*, defaults to `(64,)`):
            The number of output channels for each block.
        layers_per_block (`int`, *optional*, defaults to 2):
            The number of layers per block.
        norm_num_groups (`int`, *optional*, defaults to 32):
            The number of groups for normalization.
        act_fn (`str`, *optional*, defaults to `"silu"`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
        double_z (`bool`, *optional*, defaults to `True`):
            Whether to double the number of output channels for the last block.
    r   DownEncoderBlock2D@   r
       siluTin_channelsout_channelsdown_block_types.block_out_channelslayers_per_blocknorm_num_groupsact_fndouble_zc
                    t                                                       || _        t          j        ||d         ddd          | _        t          j        g           | _        |d         }
t          |          D ]_\  }}|
}||         }
|t          |          dz
  k    }t          || j        ||
| dd|||
d           }| j                            |           `t          |d         d|dd|d         |d |			  	        | _        t          j        |d         |d
          | _        t          j                    | _        |rd|z  n|}t          j        |d         |dd          | _        d| _        d S )Nr   r      kernel_sizestridepaddingư>)

num_layersr(   r)   add_downsample
resnet_epsdownsample_paddingresnet_act_fnresnet_groupsattention_head_dimtemb_channelsdefault	r(   r9   r;   output_scale_factorresnet_time_scale_shiftr=   r<   r>   add_attentionnum_channels
num_groupsepsr
   r5   F)super__init__r,   nnConv2dconv_in
ModuleListdown_blocks	enumeratelenr   appendr   	mid_block	GroupNormconv_norm_outSiLUconv_actconv_outgradient_checkpointing)selfr(   r)   r*   r+   r,   r-   r.   r/   mid_block_add_attentionoutput_channelidown_block_typeinput_channelis_final_block
down_blockconv_out_channels	__class__s                    r   rK   zEncoder.__init__G   s    	 0yq!
 
 
 =,, ,A."+,<"="= 	0 	0A*M/2N#&8"9"9A"==N'0)+#11#$$-#1"  J ##J//// (*2.  !$-1"5)1

 

 

  \7I"7MZiosttt		08JA,,l	"4R"8:KQXYZZZ&+###r   r   returnc                    |                      |          }| j        r| j        rd }t          dd          rp| j        D ]2}t
          j        j                             ||          |d          }3t
          j        j                             || j                  |d          }n| j        D ]0}t
          j        j                             ||          |          }1t
          j        j                             || j                  |          }n*| j        D ]} ||          }|                     |          }| 	                    |          }| 
                    |          }|                     |          }|S )z*The forward method of the `Encoder` class.c                       fd}|S )Nc                       |  S Nr   inputsmodules    r   custom_forwardzFEncoder.forward.<locals>.create_custom_forward.<locals>.custom_forward       !66?*r   r   rl   rm   s   ` r   create_custom_forwardz.Encoder.forward.<locals>.create_custom_forward   $    + + + + + &%r   >=1.11.0Fuse_reentrant)rN   trainingrZ   r   rP   r   utils
checkpointrT   rV   rX   rY   )r[   r   rp   rb   s       r   forwardzEncoder.forward   s    f%%= 	,T8 	,& & &  h// j"&"2  J"[3>>--j996QV ?  FF /::))$.996QV ;   #'"2 j jJ"[3>>?T?TU_?`?`bhiiFF/::;P;PQUQ_;`;`bhii #. , ,
#F++ ^^F++F ##F++v&&v&&r   )	r   r   r"   r$   r
   r&   r'   TT)r   r   r   r   intr   strboolrK   r   r   ry   __classcell__rd   s   @r   r!   r!   /   s         2 ,C.3 !! $C, C,C, C,  S/	C,
 "#s(OC, C, C, C, C, C, C, C, C, C,J*el *u| * * * * * * * *r   r!   c                        e Zd ZdZ	 	 	 	 	 	 	 	 	 dd
ededeedf         deedf         dedededef fdZ	 ddej	        de
ej	                 dej	        fdZ xZS )Decodera  
    The `Decoder` layer of a variational autoencoder that decodes its latent representation into an output sample.

    Args:
        in_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        out_channels (`int`, *optional*, defaults to 3):
            The number of output channels.
        up_block_types (`Tuple[str, ...]`, *optional*, defaults to `("UpDecoderBlock2D",)`):
            The types of up blocks to use. See `~diffusers.models.unet_2d_blocks.get_up_block` for available options.
        block_out_channels (`Tuple[int, ...]`, *optional*, defaults to `(64,)`):
            The number of output channels for each block.
        layers_per_block (`int`, *optional*, defaults to 2):
            The number of layers per block.
        norm_num_groups (`int`, *optional*, defaults to 32):
            The number of groups for normalization.
        act_fn (`str`, *optional*, defaults to `"silu"`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
        norm_type (`str`, *optional*, defaults to `"group"`):
            The normalization type to use. Can be either `"group"` or `"spatial"`.
    r   UpDecoderBlock2Dr$   r
   r&   r'   groupTr(   r)   up_block_types.r+   r,   r-   r.   	norm_typec
                    t                                                       || _        t          j        ||d         ddd          | _        t          j        g           | _        |dk    r|nd }
t          |d         d|d|dk    rdn||d         ||
|			  	        | _	        t          t          |                    }|d
         }t          |          D ]e\  }}|}||         }|t          |          dz
  k    }t          || j        dz   ||d | d||||
|          }| j                            |           |}f|dk    rt!          |d
         |
          | _        n"t          j        |d
         |d          | _        t          j                    | _        t          j        |d
         |dd          | _        d| _        d S )Nr?   r   r1   r2   spatialr6   r   r@   rA   r   r7   r(   r)   prev_output_channeladd_upsampler9   r;   r<   r=   r>   rC   rE   rI   F)rJ   rK   r,   rL   rM   rN   rO   	up_blocksr   rT   listreversedrQ   rR   r   rS   r   rV   rU   rW   rX   rY   rZ   )r[   r(   r)   r   r+   r,   r-   r.   r   r\   r>   reversed_block_out_channelsr]   r^   up_block_typer   ra   up_blockrd   s                     r   rK   zDecoder.__init__   s    	 0yr"
 
 
 r**'0I'='=4 (*2.  !1:g1E1EII91"5)'1

 

 

 '+84F+G+G&H&H#4Q7 ). 9 9 	1 	1A}"08;N#&8"9"9A"==N#014/+$(!//$-#1+(1  H N!!(+++"0 	!!!,-?-BM!R!RD!#;Ma;P]lrv!w!w!wD			"4Q"7qRSTTT&+###r   Nr   latent_embedsre   c                 ,   |                      |          }t          t          | j                                                            j        }| j        r%| j        rd }t          dd          rt          j
        j                             || j                  ||d          }|                    |          }| j        D ]3}t          j
        j                             ||          ||d          }4nt          j
        j                             || j                  ||          }|                    |          }| j        D ]1}t          j
        j                             ||          ||          }2nA|                     ||          }|                    |          }| j        D ]} |||          }||                     |          }n|                     ||          }|                     |          }|                     |          }|S )z*The forward method of the `Decoder` class.c                       fd}|S )Nc                       |  S ri   r   rj   s    r   rm   zFDecoder.forward.<locals>.create_custom_forward.<locals>.custom_forward)  rn   r   r   ro   s   ` r   rp   z.Decoder.forward.<locals>.create_custom_forward(  rq   r   rr   rs   Frt   )rN   nextiterr   
parametersdtyperv   rZ   r   r   rw   rx   rT   torV   rX   rY   )r[   r   r   upscale_dtyperp   r   s         r   ry   zDecoder.forward  s>    f%%T$.";";"="=>>??E= +	9T8 +	9& & &  h// w/::))$.99!"'	 ;    =11 !%  H"[3>>--h77%&+	 ?  FF /::))$.996=   =11 !% w wH"[3>>?T?TU]?^?^`fhuvvFFw ^^FM::FYY}--F !N 9 9!&-88  ''//FF''>>Fv&&v&&r   )	r   r   r   r$   r
   r&   r'   r   Tri   r   r   r   r   rz   r   r{   rK   r   r   r   ry   r}   r~   s   @r   r   r      s        0 *?.3 !!  $J, J,J, J, c3h	J,
 "#s(OJ, J, J, J, J, J, J, J, J, J,^ 15? ??  -? 
	? ? ? ? ? ? ? ?r   r   c                   T     e Zd ZdZdededdf fdZdej        dej        fdZ xZ	S )	UpSamplea&  
    The `UpSample` layer of a variational autoencoder that upsamples its input.

    Args:
        in_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        out_channels (`int`, *optional*, defaults to 3):
            The number of output channels.
    r(   r)   re   Nc                     t                                                       || _        || _        t	          j        ||ddd          | _        d S )N   r
   r1   r2   )rJ   rK   r(   r)   rL   ConvTranspose2ddeconv)r[   r(   r)   rd   s      r   rK   zUpSample.__init__i  sO    
 	&((lPQZ[efgggr   xc                 X    t          j        |          }|                     |          }|S )z+The forward method of the `UpSample` class.)r   relur   )r[   r   s     r   ry   zUpSample.forwards  s#    JqMMKKNNr   
r   r   r   r   rz   rK   r   r   ry   r}   r~   s   @r   r   r   ^  s         hh h 
	h h h h h h %,        r   r   c                   f     e Zd ZdZ	 	 	 ddedededed	d
f
 fdZddej        d	ej        fdZ xZ	S )MaskConditionEncoderz)
    used in AsymmetricAutoencoderKL
             in_chout_chres_chr4   re   Nc           
         t                                                       g }|dk    r<|dz  }|dz  }||k    r|}|dk    r|}|                    ||f           |dz  }|dk    <g }|D ]\  }}	|                    |	           |                    |d         d                    g }
|}t          t	          |                    D ]q}||         }|dk    s|dk    r-|
                    t          j        ||ddd                     n,|
                    t          j        ||ddd                     |}rt          j        |
 | _        d S )Nr1   r
   r?   r   r   r2   r   )	rJ   rK   rS   rangerR   rL   rM   
Sequentiallayers)r[   r   r   r   r4   channelsin_ch_r)   _in_ch_out_chr   lout_ch_rd   s                r   rK   zMaskConditionEncoder.__init__  s~    	qjjq[FaZF{{OOVV,---aKF qjj ' 	) 	)OFG((((HRLO,,,s<(()) 	 	A"1oGAvvabiQqZ[\\\]]]]biQqZ[\\\]]]FFmV,r   r   c                     i }t          t          | j                            D ]R}| j        |         } ||          }||t          t	          |j                            <   t          j        |          }S|S )z7The forward method of the `MaskConditionEncoder` class.)r   rR   r   r{   tupleshaper   r   )r[   r   maskoutr   layers         r   ry   zMaskConditionEncoder.forward  sn    s4;''(( 	 	AKNEaA'(CE!'NN##$
1AA
r   )r   r   r   ri   r   r~   s   @r   r   r   z  s          #- #-#- #- 	#-
 #- 
#- #- #- #- #- #-J  U\        r   r   c                        e Zd ZdZ	 	 	 	 	 	 	 	 dd	ed
edeedf         deedf         dedededef fdZ	 	 	 ddej	        de
ej	                 de
ej	                 de
ej	                 dej	        f
dZ xZS )MaskConditionDecodera  The `MaskConditionDecoder` should be used in combination with [`AsymmetricAutoencoderKL`] to enhance the model's
    decoder with a conditioner on the mask and masked image.

    Args:
        in_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        out_channels (`int`, *optional*, defaults to 3):
            The number of output channels.
        up_block_types (`Tuple[str, ...]`, *optional*, defaults to `("UpDecoderBlock2D",)`):
            The types of up blocks to use. See `~diffusers.models.unet_2d_blocks.get_up_block` for available options.
        block_out_channels (`Tuple[int, ...]`, *optional*, defaults to `(64,)`):
            The number of output channels for each block.
        layers_per_block (`int`, *optional*, defaults to 2):
            The number of layers per block.
        norm_num_groups (`int`, *optional*, defaults to 32):
            The number of groups for normalization.
        act_fn (`str`, *optional*, defaults to `"silu"`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
        norm_type (`str`, *optional*, defaults to `"group"`):
            The normalization type to use. Can be either `"group"` or `"spatial"`.
    r   r   r$   r
   r&   r'   r   r(   r)   r   .r+   r,   r-   r.   r   c	                    t                                                       || _        t          j        ||d         ddd          | _        t          j        g           | _        |dk    r|nd }	t          |d         d|d|dk    rdn||d         ||		          | _	        t          t          |                    }
|
d
         }t          |          D ]e\  }}|}|
|         }|t          |          dz
  k    }t          || j        dz   ||d | d||||	|          }| j                            |           |}ft!          ||d
         |d                   | _        |dk    rt%          |d
         |	          | _        n"t          j        |d
         |d          | _        t          j                    | _        t          j        |d
         |dd          | _        d| _        d S )Nr?   r   r1   r2   r   r6   r   r@   )r(   r9   r;   rB   rC   r=   r<   r>   r   r   )r   r   r   rE   rI   F)rJ   rK   r,   rL   rM   rN   rO   r   r   rT   r   r   rQ   rR   r   rS   r   condition_encoderr   rV   rU   rW   rX   rY   rZ   )r[   r(   r)   r   r+   r,   r-   r.   r   r>   r   r]   r^   r   r   ra   r   rd   s                    r   rK   zMaskConditionDecoder.__init__  s9    	 0yr"
 
 
 r**'0I'='=4 (*2.  !1:g1E1EII91"5)'	
 	
 	
 '+84F+G+G&H&H#4Q7 ). 9 9 	1 	1A}"08;N#&8"9"9A"==N#014/+$(!//$-#1+(1  H N!!(+++"0 "6%a(%b)"
 "
 "
 	!!!,-?-BM!R!RD!#;Ma;P]lrv!w!w!wD			"4Q"7qRSTTT&+###r   Nzimager   r   re   c                 8	   |}|                      |          }t          t          | j                                                            j        }| j        r| j        rd }t          dd          rit          j
        j                             || j                  ||d          }|                    |          }|@|>d|z
  |z  }t          j
        j                             || j                  ||d          }	| j        D ]}
|f|d|	t          t!          |j                                     }t$          j                            ||j        dd         d	
          }||z  |d|z
  z  z   }t          j
        j                             ||
          ||d          }|5|3||z  |	t          t!          |j                                     d|z
  z  z   }net          j
        j                             || j                  ||          }|                    |          }|>|<d|z
  |z  }t          j
        j                             || j                  ||          }	| j        D ]}
|f|d|	t          t!          |j                                     }t$          j                            ||j        dd         d	
          }||z  |d|z
  z  z   }t          j
        j                             ||
          ||          }|5|3||z  |	t          t!          |j                                     d|z
  z  z   }n|                     ||          }|                    |          }| |d|z
  |z  }|                     ||          }	| j        D ]v}
|f|d|	t          t!          |j                                     }t$          j                            ||j        dd         d	
          }||z  |d|z
  z  z   } |
||          }w|5|3||z  |	t          t!          |j                                     d|z
  z  z   }||                     |          }n|                     ||          }|                     |          }|                     |          }|S )z7The forward method of the `MaskConditionDecoder` class.c                       fd}|S )Nc                       |  S ri   r   rj   s    r   rm   zSMaskConditionDecoder.forward.<locals>.create_custom_forward.<locals>.custom_forward&  rn   r   r   ro   s   ` r   rp   z;MaskConditionDecoder.forward.<locals>.create_custom_forward%  rq   r   rr   rs   Frt   Nr1   nearest)sizemode)rN   r   r   r   r   r   rv   rZ   r   r   rw   rx   rT   r   r   r{   r   r   rL   
functionalinterpolaterV   rX   rY   )r[   r   r   r   r   r   r   rp   masked_imageim_xr   sample_mask_s                r   ry   zMaskConditionDecoder.forward  s    f%%T$.";";"="=>>??E= U	UT8 U	U& & &  h// :Y/::))$.99!"'	 ;    =11 $)9$%H#5L ;1<<--d.DEE$&+	 =  D !% 
 
H(T-="&s5+>+>'?'?"@ " 9 9$V\RTRURUEV]f 9 g g!'%'QY2G!G"[3>>--h77%&+	 ?  FF $)9#d]T#eFL6I6I2J2J-KqSWx-XXF /::))$.996=   =11 $)9$%H#5L ;1<<--d.DEE$ D !% w wH(T-="&s5+>+>'?'?"@ " 9 9$V\RTRURUEV]f 9 g g!'%'QY2G!G"[3>>?T?TU]?^?^`fhuvvFF$)9#d]T#eFL6I6I2J2J-KqSWx-XXF ^^FM::FYY}--F  T%5 !DE1--lDAA !N 9 9$)9"3uV\':':#;#;<GM55dbccARYb5ccE#e^gU.CCF!&-88 T%5$c%2E2E.F.F)G1t8)TT  ''//FF''>>Fv&&v&&r   )r   r   r   r$   r
   r&   r'   r   )NNNr   r~   s   @r   r   r     sO        0 *?.3 !! O, O,O, O, c3h	O,
 "#s(OO, O, O, O, O, O, O, O, O, O,h )-'+04k k<k %k u|$	k
  -k 
k k k k k k k kr   r   c                       e Zd ZdZ	 	 	 	 ddededed	ed
edef fdZde	j
        de	j
        fdZde	j
        de	j
        fdZde	j        dee	j        e	j        ef         fdZde	j
        deedf         de	j        fdZ xZS )VectorQuantizerz
    Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly avoids costly matrix
    multiplications and allows for post-hoc remapping of indices.
    NrandomFTn_evq_embed_dimbetaunknown_indexsane_index_shapelegacyc           	         t                                                       || _        || _        || _        || _        t          j        | j        | j                  | _        | j        j	        j
                            d| j        z  d| j        z             || _        | j        |                     dt          j        t!          j        | j                                       |  | j        j        d         | _        || _        | j        dk    r| j        | _        | j        dz   | _        t-          d| j         d| j         d	| j         d
           n|| _        || _        d S )Ng            ?usedr   extrar1   z
Remapping z indices to z indices. Using z for unknown indices.)rJ   rK   r   r   r   r   rL   	Embedding	embeddingweightdatauniform_remapregister_bufferr   tensornploadr   r   re_embedr   printr   )	r[   r   r   r   r   r   r   r   rd   s	           r   rK   zVectorQuantizer.__init__  se    	(	dh0ABB"++D48OS48^LLL
:!  bgdj6I6I)J)JKKK# IOA.DM!.D!W,,%)]" $ 1CTX C C4= C C+C C C   
  DM 0r   indsre   c                 4   |j         }t          |          dk    sJ |                    |d         d          }| j                            |          }|d d d d d f         |d         k                                    }|                    d          }|                    d          dk     }| j        dk    rDt          j
        d| j        ||         j                                       |j                  ||<   n
| j        ||<   |                    |          S )	Nr1   r   r?   )NN.r
   r   )r   )device)r   rR   reshaper   r   longargmaxsumr   r   randintr   r   )r[   r   ishaper   matchnewunknowns          r   remap_to_usedzVectorQuantizer.remap_to_used  s    6{{Q||F1Ir**y||D!!aaaDj!T/%::@@BBll2))A,,")) =DMG@RSSSVV^a^hViiCLL-CL{{6"""r   c                    |j         }t          |          dk    sJ |                    |d         d          }| j                            |          }| j        | j        j         d         k    rd||| j        j         d         k    <   t          j        |d d d f         |j         d         dgz  d d f         d|          }|                    |          S )Nr1   r   r?   )r   rR   r   r   r   r   r   gather)r[   r   r   r   backs        r   unmap_to_allzVectorQuantizer.unmap_to_all  s    6{{Q||F1Ir**y||D!!=49?1---/0D++,|DqqqM$*Q-1#*=qqq*@A1dKK||F###r   r   c                    |                     dddd                                          }|                    d| j                  }t	          j        t	          j        || j        j                  d          }|                     |                              |j	                  }d }d }| j
        sb| j        t	          j        |                                |z
  dz            z  t	          j        ||                                z
  dz            z   }nat	          j        |                                |z
  dz            | j        t	          j        ||                                z
  dz            z  z   }|||z
                                  z   }|                     dddd                                          }| j        L|                    |j	        d         d          }|                     |          }|                    dd          }| j        r8|                    |j	        d         |j	        d         |j	        d                   }|||||ffS )Nr   r
   r   r1   r?   dim)permute
contiguousviewr   r   argmincdistr   r   r   r   r   meandetachr   r   r   r   )r[   r   z_flattenedmin_encoding_indicesz_q
perplexitymin_encodingslosss           r   ry   zVectorQuantizer.forward  s   IIaAq!!,,..ffR!233  %|EKT^EZ,[,[abcccnn12277@@
 { 	i9uz3::<<!+;*ABBBUZQTWXW_W_WaWaQafgPgEhEhhDD:szz||a/A566UZQTWXW_W_WaWaQafgPgEhEh9hhD q 0 0 2 22 kk!Q1%%0022:!#7#?#?
B#O#O #'#5#56J#K#K #7#?#?A#F#F   	j#7#?#?	!ciXYl\_\efg\h#i#i D:}6JKKKr   indicesr   .c                 L   | j         F|                    |d         d          }|                     |          }|                    d          }|                     |          }|?|                    |          }|                    dddd                                          }|S )Nr   r?   r   r1   r
   )r   r   r   r   r   r   r   )r[   r  r   r  s       r   get_codebook_entryz"VectorQuantizer.get_codebook_entry  s    :!ooeAh33G''00Goob))G !NN733((5//C++aAq))4466C
r   )Nr   FT)r   r   r   r   rz   floatr{   r|   rK   r   
LongTensorr   r   r   r   ry   r  r}   r~   s   @r   r   r     sX         %!&#1 #1#1 #1 	#1 #1 #1 #1 #1 #1 #1 #1 #1J#%"2 #u7G # # # #$!1 $e6F $ $ $ $ L  L%elE0Q*R  L  L  L  LD%*: 5c? W\Wc        r   r   c                       e Zd Zddej        defdZddeej                 dej        fdZ	dd	d dej        fd
Z
g dfdej        deedf         dej        fdZdej        fdZdS )DiagonalGaussianDistributionFr   deterministicc                    || _         t          j        |dd          \  | _        | _        t          j        | j        dd          | _        || _        t          j        d| j        z            | _        t          j        | j                  | _	        | j        r>t          j
        | j        | j         j        | j         j                  x| _	        | _        d S d S )Nr
   r1   r   g      >g      4@      ?)r   r   )r   r   chunkr  logvarclampr  expstdvar
zeros_liker   r   )r[   r   r  s      r   rK   z%DiagonalGaussianDistribution.__init__   s    $!&Z!B!B!B	4;k$+ud;;*9S4;.//9T[)) 	"'"2	$/"8@U# # # DHtxxx	 	r   N	generatorre   c                     t          | j        j        || j        j        | j        j                  }| j        | j        |z  z   }|S )N)r  r   r   )r	   r  r   r   r   r   r  )r[   r  r   r   s       r   r   z#DiagonalGaussianDistribution.sample  sJ    IO?)/'	
 
 
 I6))r   otherc                    | j         rt          j        dg          S |Fdt          j        t          j        | j        d          | j        z   dz
  | j        z
  g d          z  S dt          j        t          j        | j        |j        z
  d          |j        z  | j        |j        z  z   dz
  | j        z
  |j        z   g d          z  S )N        r  r
   r   r1   r
   r   r   )r  r   r   r   powr  r  r  )r[   r  s     r   klzDiagonalGaussianDistribution.kl  s     	<&&&}UYIdi++dh6<t{J!		    
 UYIdi%*4a8859Dh*+ k" l	#
 "		    r   r!  r   dims.c                 
   | j         rt          j        dg          S t          j        dt          j        z            }dt          j        || j        z   t          j        || j	        z
  d          | j
        z  z   |          z  S )Nr   g       @r  r
   r   )r  r   r   r   logpir   r  r"  r  r  )r[   r   r$  logtwopis       r   nllz DiagonalGaussianDistribution.nll*  s     	'<&&&6#+&&UYt{"UYv	/A1%E%E%PP
 
 
 
 	
r   c                     | j         S ri   )r  )r[   s    r   r   z!DiagonalGaussianDistribution.mode3  s
    yr   )Fri   )r   r   r   r   r   r|   rK   r   	Generatorr   r#  r   rz   r)  r   r   r   r   r  r    s        
 
5< 
 
 
 
 
	 	 9 	U\ 	 	 	 	 6 %,    & AJ		 
 
%, 
eCHo 
el 
 
 
 
el      r   r  c            
       |     e Zd ZdZdededeedf         deedf         def
 fdZd	ej	        d
ej	        fdZ
 xZS )EncoderTinya  
    The `EncoderTiny` layer is a simpler version of the `Encoder` layer.

    Args:
        in_channels (`int`):
            The number of input channels.
        out_channels (`int`):
            The number of output channels.
        num_blocks (`Tuple[int, ...]`):
            Each value of the tuple represents a Conv2d layer followed by `value` number of `AutoencoderTinyBlock`'s to
            use.
        block_out_channels (`Tuple[int, ...]`):
            The number of output channels for each block.
        act_fn (`str`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
    r(   r)   
num_blocks.r+   r.   c                 F   t                                                       g }t          |          D ]\  }}||         }	|dk    r,|                    t	          j        ||	dd                     n-|                    t	          j        |	|	dddd                     t          |          D ]&}
|                    t          |	|	|                     '|                    t	          j        |d         |dd                     t	          j        | | _	        d| _
        d S )	Nr   r   r1   r3   r5   r
   F)r3   r5   r4   biasr?   )rJ   rK   rQ   rS   rL   rM   r   r   r   r   rZ   )r[   r(   r)   r.  r+   r.   r   r^   	num_blockrF   _rd   s              r   rK   zEncoderTiny.__init__I  sC    	%j11 	X 	XLAy-a0LAvvbi\qZ[\\\]]]]I$$$% ! "  	 	 	 9%% X X2<vVVWWWWX 	bi 22 6RS]^___```mV,&+###r   r   re   c                    | j         r| j        r}d }t          dd          r6t          j        j                             || j                  |d          }not          j        j                             || j                  |          }n;|                     |                    d                              d                    }|S )z.The forward method of the `EncoderTiny` class.c                       fd}|S )Nc                       |  S ri   r   rj   s    r   rm   zJEncoderTiny.forward.<locals>.create_custom_forward.<locals>.custom_forwardr  rn   r   r   ro   s   ` r   rp   z2EncoderTiny.forward.<locals>.create_custom_forwardq  rq   r   rr   rs   Frt   r1   r
   )	rv   rZ   r   r   rw   rx   r   adddivr[   r   rp   s      r   ry   zEncoderTiny.forwardm  s    = 	-T8 	-& & &  h// ]K*556K6KDK6X6XZ[kp5qqK*556K6KDK6X6XZ[\\ AEE!HHLLOO,,Ar   r   r   r   r   rz   r   r{   rK   r   r   ry   r}   r~   s   @r   r-  r-  7  s         "",", ", #s(O	",
 "#s(O", ", ", ", ", ", ",H %,        r   r-  c                        e Zd ZdZdededeedf         deedf         deded	ef fd
Zdej	        dej	        fdZ
 xZS )DecoderTinya  
    The `DecoderTiny` layer is a simpler version of the `Decoder` layer.

    Args:
        in_channels (`int`):
            The number of input channels.
        out_channels (`int`):
            The number of output channels.
        num_blocks (`Tuple[int, ...]`):
            Each value of the tuple represents a Conv2d layer followed by `value` number of `AutoencoderTinyBlock`'s to
            use.
        block_out_channels (`Tuple[int, ...]`):
            The number of output channels for each block.
        upsampling_scaling_factor (`int`):
            The scaling factor to use for upsampling.
        act_fn (`str`):
            The activation function to use. See `~diffusers.models.activations.get_activation` for available options.
    r(   r)   r.  .r+   upsampling_scaling_factorr.   upsample_fnc           
      b   t                                                       t          j        ||d         dd          t	          |          g}t          |          D ]\  }	}
|	t          |          dz
  k    }||	         }t          |
          D ]&}|                    t          |||                     '|s)|                    t          j
        ||                     |s|n|}|                    t          j        ||dd|                     t          j        | | _        d| _        d S )Nr   r   r1   r0  )scale_factorr   )r3   r5   r1  F)rJ   rK   rL   rM   r   rQ   rR   r   rS   r   Upsampler   r   rZ   )r[   r(   r)   r.  r+   r=  r.   r>  r   r^   r2  ra   rF   r3  conv_out_channelrd   s                  r   rK   zDecoderTiny.__init__  sZ    	 Ik#5a#8aQRSSS6""

 &j11 	 	LAy3z??Q#67N-a0L9%% X X2<vVVWWWW! ebk7PWbcccddd3AS|||MM	 $ !'      mV,&+###r   r   re   c                    t          j        |dz            dz  }| j        r| j        r}d }t	          dd          r6t           j        j                             || j                  |d          }nIt           j        j                             || j                  |          }n|                     |          }|                    d          	                    d          S )	z.The forward method of the `DecoderTiny` class.r   c                       fd}|S )Nc                       |  S ri   r   rj   s    r   rm   zJDecoderTiny.forward.<locals>.create_custom_forward.<locals>.custom_forward  rn   r   r   ro   s   ` r   rp   z2DecoderTiny.forward.<locals>.create_custom_forward  rq   r   rr   rs   Frt   r
   r1   )
r   tanhrv   rZ   r   rw   rx   r   mulsubr9  s      r   ry   zDecoderTiny.forward  s     Jq1u!= 	T8 	& & &  h// ]K*556K6KDK6X6XZ[kp5qqK*556K6KDK6X6XZ[\\ AA uuQxx||Ar   r:  r~   s   @r   r<  r<    s         &',', ', #s(O	',
 "#s(O', $'', ', ', ', ', ', ', ',R %,        r   r<  )$dataclassesr   typingr   r   numpyr   r   torch.nnrL   rw   r   r   utils.torch_utilsr	   activationsr   attention_processorr   unets.unet_2d_blocksr   r   r   r   r   Moduler!   r   r   r   r   r   objectr  r-  r<  r   r   r   <module>rS     s   " ! ! ! ! ! " " " " " " " "            1 1 1 1 1 1 1 1 - - - - - - ( ( ( ( ( ( - - - - - -            
4 
4 
4 
4 
4J 
4 
4 
4G G G G Gbi G G GTb b b b bbi b b bJ    ry   82 2 2 2 229 2 2 2jS S S S S29 S S Slw w w w wbi w w wt5 5 5 5 56 5 5 5pI I I I I") I I IXS S S S S") S S S S Sr   