
    wiC                        d dl mZmZ d dlZd dlmc mZ d dlmZ d dlm	Z	 ddl
mZmZ ddlmZ dd	lmZmZ dd
lmZmZmZmZmZ ddlmZmZ ddlmZ ddlmZmZ ddl m!Z!m"Z"  G d deee          Z# G d dej$                  Z% G d dej$                  Z& G d dej$                  Z' G d dej$                  Z(dS )    )DictUnionN)nn
checkpoint   )ConfigMixinregister_to_config)PeftAdapterMixin   )BasicTransformerBlockSkipFFTransformerBlock)ADDED_KV_ATTENTION_PROCESSORSCROSS_ATTENTION_PROCESSORSAttentionProcessorAttnAddedKVProcessorAttnProcessor)TimestepEmbeddingget_timestep_embedding)
ModelMixin)GlobalResponseNormRMSNorm)Downsample2D
Upsample2Dc            .       2    e Zd ZdZe	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d1dededededededededededededededed ed!ed"ed#ed$ed%ed&ef, fd'            Zd2d(ed)d*fd+Z	d3d,Z
ed)eeef         fd-            Zd.eeeeef         f         fd/Zd0 Z xZS )4UVit2DModelT   F                 @       r               ư>@   hidden_sizeuse_biashidden_dropoutcond_embed_dimmicro_cond_encode_dimmicro_cond_embed_dimencoder_hidden_size
vocab_sizecodebook_sizein_channelsblock_out_channelsnum_res_blocks
downsampleupsampleblock_num_headsnum_hidden_layersnum_attention_headsattention_dropoutintermediate_sizelayer_norm_epsln_elementwise_affinesample_sizec                    t                                                       t          j        |          | _        t                    | _        t          |
||          | _        t          ||z             | _
        t          |||||d          | _        t          |          | _        t          j        |          | _        t          j        fdt!          |          D                       | _        t                    | _        t          j        |          | _        t          ||||d|          | _        t+          ||
|	          | _        d| _        d S )Nbias)sample_proj_biasFc                 P    g | ]"}t          z  d           #S )ada_norm_continuous)dimr:   attention_head_dimdropoutcross_attention_dimattention_bias	norm_type-ada_norm_continous_conditioning_embedding_dimnorm_elementwise_affinenorm_epsada_norm_biasff_inner_dimff_biasattention_out_bias)r   )	.0_r,   r*   r<   r=   r>   r:   r+   s	     n/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/diffusers/models/unets/uvit_2d.py
<listcomp>z(UVit2DModel.__init__.<locals>.<listcomp>k   sj       " ! &#(;'26I'I*(3#+3BM,A+"*!2$'/        )r6   r7   )super__init__r   Linearencoder_projr   encoder_proj_layer_normUVit2DConvEmbedembedr   
cond_embed	UVitBlock
down_blockproject_to_hidden_normproject_to_hidden
ModuleListrangetransformer_layersproject_from_hidden_normproject_from_hiddenup_blockConvMlmLayer	mlm_layergradient_checkpointing)selfr*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   	__class__s    ```             ` ``` rU   rY   zUVit2DModel.__init__*   s   D 	I&9;XVVV'.{NLa'b'b$$+Z9NP^`h
 

 , >1;QY
 
 
 $!
 
 '..@.Rg&h&h#!#+={QY!Z!Z!Z"$-         " 011#  #
 #
, )0^Mb(c(c%#%9[:LS[#\#\#\ !!
 
 
 &X7Ln^k
 
 ',###rW   valuereturnNc                     d S N )rm   modulero   s      rU   _set_gradient_checkpointingz'UVit2DModel._set_gradient_checkpointing   s    rW   c                 T   |                      |          }|                     |          }t          |                                | j        j        dd          }|                    |j        d         df          }t          j	        ||gd          }|
                    | j                  }|                     |          
                    |j                  }|                     |          }|                     ||||          }|j        \  }}	}
}|                    dd	d
d                              ||
|z  |	          }|                     |          }|                     |          }| j        D ])| j        r| j        rfd}n} ||||d|i          }*|                     |          }|                     |          }|                    ||
||	                              dd
dd	          }|                     ||||          }|                     |          }|S )NTr   )flip_sin_to_cosdownscale_freq_shift   rF   )dtype)pooled_text_embencoder_hidden_statescross_attention_kwargsr   r   c                      t          g| R  S rr   r   )argslayers    rU   layer_z#UVit2DModel.forward.<locals>.layer_   s    %e3d3333rW   r}   )r~   r   added_cond_kwargs)r[   r\   r   flattenconfigr.   reshapeshapetorchcattor|   r_   r^   ra   permuterb   rc   rf   trainingrl   rg   rh   ri   rk   )rm   	input_idsr~   r}   micro_condsr   micro_cond_embedshidden_states
batch_sizechannelsheightwidthr   logitsr   s                 @rU   forwardzUVit2DModel.forward   s}    $ 1 12G H H $ < <=R S S2!!4;#DVZqr
 
 
 .55yq7I26NOO)_6G$HaPPP),,4:,>>///::==>S>YZZ

9--+"7#9	 ( 
 
 /<.A+
Hfe%--aAq99AA*fW\n^fgg33MBB..}==, 	 	E} !< 4 4 4 4 4 4 "F&;'=#4o"F	  MM 55mDD00??%--j&%RRZZ[\^_abdeff+"7#9	 & 
 
 ..rW   c                     i }dt           dt          j        j        dt          t           t
          f         ffd|                                 D ]\  }} |||           |S )z
        Returns:
            `dict` of attention processors: A dictionary containing all attention processors used in the model with
            indexed by its weight name.
        namert   
processorsc                     t          |d          r|                    d          ||  d<   |                                D ]\  }} |  d| ||           |S )Nget_processorT)return_deprecated_lora
.processor.)hasattrr   named_children)r   rt   r   sub_namechildfn_recursive_add_processorss        rU   r   z@UVit2DModel.attn_processors.<locals>.fn_recursive_add_processors   s    v// d282F2F^b2F2c2c
d.../#)#8#8#:#: U U%++t,@,@h,@,@%TTTTrW   )strr   r   Moduler   r   r   )rm   r   r   rt   r   s       @rU   attn_processorszUVit2DModel.attn_processors   s     
	c 	58? 	X\]`bt]tXu 	 	 	 	 	 	 !//11 	B 	BLD&''fjAAAArW   	processorc           	      z   t          | j                                                  }t          |t                    r9t          |          |k    r&t          dt          |           d| d| d          dt          dt          j        j	        ffd| 
                                D ]\  }} |||           dS )	a4  
        Sets the attention processor to use to compute attention.

        Parameters:
            processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
                The instantiated processor class or a dictionary of processor classes that will be set as the processor
                for **all** `Attention` layers.

                If `processor` is a dict, the key needs to define the path to the corresponding cross attention
                processor. This is strongly recommended when setting trainable attention processors.

        z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.r   rt   c                 ,   t          |d          rVt          |t                    s|                    |           n+|                    |                    |  d                     |                                D ]\  }} |  d| ||           d S )Nset_processorr   r   )r   
isinstancedictr   popr   )r   rt   r   r   r   fn_recursive_attn_processors        rU   r   zCUVit2DModel.set_attn_processor.<locals>.fn_recursive_attn_processor  s    v// M!)T22 M((3333(($7J7J7J)K)KLLL#)#8#8#:#: T T%++t,@,@h,@,@%SSSST TrW   N)lenr   keysr   r   
ValueErrorr   r   r   r   r   )rm   r   countr   rt   r   s        @rU   set_attn_processorzUVit2DModel.set_attn_processor   s    D(--//00i&& 	3y>>U+B+BmQTU^Q_Q_ m m05m mRWm m m  
	Tc 	T58? 	T 	T 	T 	T 	T 	T !//11 	A 	ALD&''fi@@@@	A 	ArW   c           	         t          d | j                                        D                       rt                      }nt          d | j                                        D                       rt	                      }nCt          dt          t          | j                                                                       |                     |           dS )ze
        Disables custom attention processors and sets the default attention implementation.
        c              3   2   K   | ]}|j         t          v V  d S rr   )rn   r   rS   procs     rU   	<genexpr>z9UVit2DModel.set_default_attn_processor.<locals>.<genexpr>  s*      ii4t~!>>iiiiiirW   c              3   2   K   | ]}|j         t          v V  d S rr   )rn   r   r   s     rU   r   z9UVit2DModel.set_default_attn_processor.<locals>.<genexpr>  s*      hh$#==hhhhhhrW   zOCannot call `set_default_attn_processor` when attention processors are of type N)	allr   valuesr   r   r   nextiterr   )rm   r   s     rU   set_default_attn_processorz&UVit2DModel.set_default_attn_processor  s     ii4K_KfKfKhKhiiiii 	,..IIhh$J^JeJeJgJghhhhh 	%II Nbfgklp  mA  mH  mH  mJ  mJ  hK  hK  cL  cL  N  N   		*****rW   )r   Fr   r   r    r!   r   r"   r#   r   r   r   FFr$   r%   r&   r   r'   r(   Tr)   )Frr   )__name__
__module____qualname__ _supports_gradient_checkpointingr
   intboolfloatrY   ru   r   propertyr   r   r   r   r   r   r   __classcell__rn   s   @rU   r   r   '   sE       '+$   #!%($(#&!"% !!##%#&!% $&*?j, j, j, 	j,
 j, j,  #j, "j, !j, j, j, j,   !j," #j,$ %j,& 'j,( )j,, -j,. !/j,2 !3j,6 7j,: ;j,<  $=j,> ?j, j, j, j, j, j,X  $    ; ; ; ;z c+=&=!>    X0 AE2Dd3PbKbFc2c,d  A  A  A  AF+ + + + + + +rW   r   c                   $     e Zd Z fdZd Z xZS )r]   c                     t                                                       t          j        ||          | _        t          |||          | _        t          j        ||d|          | _        d S Nrz   )kernel_sizerB   )	rX   rY   r   	Embedding
embeddingsr   
layer_normConv2dconv)rm   r3   r4   r1   elementwise_affineepsrB   rn   s          rU   rY   zUVit2DConvEmbed.__init__%  s`    ,z;??!+s4FGGIk+=1SWXXX			rW   c                     |                      |          }|                     |          }|                    dddd          }|                     |          }|S )Nr   r   rz   r   )r   r   r   r   )rm   r   r   s      rU   r   zUVit2DConvEmbed.forward+  sT    __Y//
__Z00
''1a33
YYz**
rW   r   r   r   rY   r   r   r   s   @rU   r]   r]   $  sL        Y Y Y Y Y      rW   r]   c                   2     e Zd Zdededef fdZd Z xZS )r`   r5   r6   r7   c                   	 t                                                       |
rt          ddddd	  	        | _        nd | _        t	          j        fdt          |          D                       | _        t	          j        	fdt          |          D                       | _        |r t          dddd	dd

  
        | _
        d S d | _
        d S )NTr   Conv2d_0r   rms_norm)use_convpaddingr   r   rK   r   r   rB   c           
      8    g | ]}t                    S rs   )ConvNextBlock)rS   ir   r,   r*   r=   r>   r+   s     rU   rV   z&UVitBlock.__init__.<locals>.<listcomp>T  sG     
 
 
  ")" 
 
 
rW   c                 F    g | ]}t          z   	  	        S ))rJ   rR   )r   )rS   rT   r;   r8   r   r*   r+   s     rU   rV   z&UVitBlock.__init__.<locals>.<listcomp>b  sZ         '#/%#+'/
 
 
  rW   r   F)	use_conv_transposer   r   r   rK   r   r   rB   interpolate)rX   rY   r   r6   r   rd   re   
res_blocksattention_blocksr   r7   )rm   r   r5   r*   r,   r>   r=   r+   r8   r;   r6   r7   rn   s    ` ```````  rU   rY   zUVitBlock.__init__4  sl    	 	#*$"#8
 
 
DOO #DO-
 
 
 
 
 
 
 
 
 ~..
 
 

 
 !#        ~..  !
 !
"  	!&#'$"#8!  DMMM !DMMMrW   c                    | j         |                      |          }t          | j        | j                  D ]\  }} |||          }|j        \  }}}	}
|                    |||	|
z                                ddd          } ||||          }|                    ddd                              |||	|
          }| j        |                     |          }|S )Nr   r   rz   )r~   r   )r6   zipr   r   r   viewr   r7   )rm   xr}   r~   r   	res_blockattention_blockr   r   r   r   s              rU   r   zUVitBlock.forward  s    ?&""A*-dot?T*U*U 	M 	M&I	!_--A23'/J&%z8Ve^<<DDQ1MMA)>Wm  A 		!Q""''
HfeLLAA=$a  ArW   )r   r   r   r   r   rY   r   r   r   s   @rU   r`   r`   3  st        L! L! L! L! L! L! L! L! L!\      rW   r`   c                   (     e Zd Z	 d fd	Zd Z xZS )r      c                 H   t                                                       t          j        ||dd||          | _        t          |||          | _        t          j        |t          ||z            |          | _	        t          j
                    | _        t          t          ||z                      | _        t          j        t          ||z            ||          | _        t          j        |          | _        t          j        ||dz  |          | _        d S )Nr   rz   )r   r   groupsrB   rA   r   )rX   rY   r   r   	depthwiser   normrZ   r   channelwise_linear_1GELUchannelwise_actr   channelwise_normchannelwise_linear_2Dropoutchannelwise_dropoutcond_embeds_mapper)	rm   r   r=   r>   r+   r,   r*   res_ffn_factorrn   s	           rU   rY   zConvNextBlock.__init__  s    	
 
 
 Hn6KLL	$&IhH~<U8V8V]e$f$f$f!!wyy 23x.7P3Q3Q R R$&Ic(^2K.L.Lh]e$f$f$f!#%:n#=#= "$)KAx"P"PrW   c                 f   |}|                      |          }|                    dddd          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |          }|                    dddd          }||z   }|                     t          j
        |                                        dd          \  }}|d|d d d d d d f         z   z  |d d d d d d f         z   }|S )Nr   r   r   rz   r{   )r   r   r   r   r   r   r   r   r   Fsiluchunk)rm   r   cond_embedsx_resscaleshifts         rU   r   zConvNextBlock.forward  s7   NN1IIaAq!!IIaLL%%a((  ##!!!$$%%a(($$Q''IIaAq!!I..qvk/B/BCCII!QRISSuU111aaat+,,-aaaD$6F0GGrW   )r   r   r   s   @rU   r   r     sU        uvQ Q Q Q Q Q(      rW   r   c                   >     e Zd Zdedededededef fdZd Z xZS )	rj   r4   r3   r+   r>   r=   r2   c                     t                                                       t          j        ||d|          | _        t          |||          | _        t          j        ||d|          | _        d S r   )rX   rY   r   r   conv1r   r   conv2)rm   r4   r3   r+   r>   r=   r2   rn   s          rU   rY   zConvMlmLayer.__init__  si     	Y1;AT\]]]
!+~?TUUY{MqxXXX


rW   c                     |                      |          }|                     |                    dddd                                        dddd          }|                     |          }|S )Nr   r   r   rz   )r  r   r   r  )rm   r   r   s      rU   r   zConvMlmLayer.forward  sf    

=11(=(=aAq(I(IJJRRSTVWYZ\]^^M**rW   )	r   r   r   r   r   r   rY   r   r   r   s   @rU   rj   rj     s        YY Y 	Y
  $Y Y Y Y Y Y Y Y      rW   rj   ))typingr   r   r   torch.nn.functionalr   
functionalr   torch.utils.checkpointr   configuration_utilsr	   r
   loadersr   	attentionr   r   attention_processorr   r   r   r   r   r   r   r   modeling_utilsr   normalizationr   r   resnetr   r   r   r   r]   r`   r   rj   rs   rW   rU   <module>r     sH                            - - - - - - B B B B B B B B ' ' ' ' ' ' E E E E E E E E              C B B B B B B B ' ' ' ' ' ' 7 7 7 7 7 7 7 7 - - - - - - - -z+ z+ z+ z+ z+*k+; z+ z+ z+z    bi   ` ` ` ` `	 ` ` `F* * * * *BI * * *Z    29     rW   