
    wiQ                     ,   d dl mZ d dlmZmZmZ d dlZd dlZd dlmZ ddl	m
Z
mZ ddlmZmZ dd	lmZmZmZ dd
lmZmZ ddlmZ  ej        e          Ze G d de                      Z G d dej                  Z G d dee
          Z G d dej                  Z G d dej                  Z  G d dej                  Z! G d dej                  Z" G d dej                  Z# G d dej                  Z$ G d dej                  Z%dS )     )	dataclass)DictTupleUnionN)nn   )ConfigMixinregister_to_config)
BaseOutputlogging   )	AttentionAttentionProcessorAttnProcessor)TimestepEmbedding	Timesteps)
ModelMixinc                   (    e Zd ZU dZej        ed<   dS )Kandinsky3UNetOutputNsample)__name__
__module____qualname__r   torchTensor__annotations__     v/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/diffusers/models/unets/unet_kandinsky3.pyr   r       s$         FELr   r   c                   $     e Zd Z fdZd Z xZS )Kandinsky3EncoderProjc                     t                                                       t          j        ||d          | _        t          j        |          | _        d S )NF)bias)super__init__r   Linearprojection_linear	LayerNormprojection_norm)selfencoder_hid_dimcross_attention_dim	__class__s      r   r%   zKandinsky3EncoderProj.__init__&   sM    !#?<OV[!\!\!\!|,?@@r   c                 Z    |                      |          }|                     |          }|S N)r'   r)   )r*   xs     r   forwardzKandinsky3EncoderProj.forward+   s-    ""1%%  ##r   r   r   r   r%   r1   __classcell__r-   s   @r   r!   r!   %   sL        A A A A A
      r   r!   c                       e Zd Ze	 	 	 	 	 	 	 	 dded	ed
ededeeee         f         dee         deeee         f         def fd            Zede	e
ef         fd            Zdeee	e
ef         f         fdZd ZddZddZ xZS )Kandinsky3UNet          @   r   i  i   r8   i      in_channelstime_embedding_dimgroupsattention_head_dimlayers_per_blockblock_out_channelsr,   r+   c	                    t                                                       d}	d}
d}d}|}|d         dz  }t          |dd          | _        t	          ||          | _        t          ||          | _        t          j	        ||dd	          | _
        t          |          | _        |gt          |          z   }t          t          |d d
         |dd                              }fd|D             }t          |          |gz  }|||g}t!          t"          |          }g }t          |          | _        t          j        g           | _        t+          t          |g|R            D ]q\  }\  \  }}}}}|| j        dz
  k    }|                    || j        dz
  k    r|nd           | j                            t/          ||||||||	|
||                     rt          j        g           | _        t+          t          t#          |          g|R            D ]W\  }\  \  }}}}}|dk    }| j                            t3          ||                                |||||||	|
||                     Xt          j        ||          | _        t          j                    | _        t          j	        ||dd	          | _        d S )Nr7   r   )FTTTr   F   )flip_sin_to_cosdownscale_freq_shiftr   kernel_sizepaddingc                     g | ]}|rnd 	S r/   r   ).0is_existr,   s     r   
<listcomp>z+Kandinsky3UNet.__init__.<locals>.<listcomp>Y   s$    ccc8H>(($cccr   ) r$   r%   r   	time_projr   time_embeddingKandinsky3AttentionPoolingadd_time_conditionr   Conv2dconv_inr!   encoder_hid_projlistziplenmapreversed
num_levels
ModuleListdown_blocks	enumerateappendKandinsky3DownSampleBlock	up_blocksKandinsky3UpSampleBlockpop	GroupNormconv_norm_outSiLUconv_act_outconv_out)r*   r=   r>   r?   r@   rA   rB   r,   r+   expansion_ratiocompression_ratioadd_cross_attentionadd_self_attentionout_channelsinit_channelshidden_dimsin_out_dims	text_dims
num_blockslayer_paramsrev_layer_paramscat_dimslevelin_dimout_dimres_block_numtext_dimself_attentiondown_sample	up_sampler-   s          `                      r   r%   zKandinsky3UNet.__init__2   sB    	 76"*1-2"=%^_```/
 

 #= 35G#
 #
 ymTUVVV 5oGZ [ [$o-?(@(@@3{3B3/QRRAABBccccObccc	+,,0@/AA
"I/ABx66k**=,,S\+l+++T
 T
 	 	OEO%VW}h  DOa$78KOOu11D'E'EGG1MMM##)&!&#%"      r**S\%%9(8999T
 T
 	 	OEO%Wf}h 
IN!!'LLNN&!&#%"    "  \&-@@GII	-1VWXXXr   returnc                     i }dt           dt          j        j        dt          t           t
          f         ffd|                                 D ]\  }} |||           |S )z
        Returns:
            `dict` of attention processors: A dictionary containing all attention processors used in the model with
            indexed by its weight name.
        namemodule
processorsc                     t          |d          r|j        ||  d<   |                                D ]\  }} |  d| ||           |S Nset_processorz
.processor.)hasattr	processornamed_children)r   r   r   sub_namechildfn_recursive_add_processorss        r   r   zCKandinsky3UNet.attn_processors.<locals>.fn_recursive_add_processors   sy    v// C282B
d.../#)#8#8#:#: U U%++t,@,@h,@,@%TTTTr   )strr   r   Moduler   r   r   )r*   r   r   r   r   s       @r   attn_processorszKandinsky3UNet.attn_processors   s     
	c 	58? 	X\]`bt]tXu 	 	 	 	 	 	 !//11 	B 	BLD&''fjAAAAr   r   c           	      z   t          | j                                                  }t          |t                    r9t          |          |k    r&t          dt          |           d| d| d          dt          dt          j        j	        ffd| 
                                D ]\  }} |||           dS )	a4  
        Sets the attention processor to use to compute attention.

        Parameters:
            processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
                The instantiated processor class or a dictionary of processor classes that will be set as the processor
                for **all** `Attention` layers.

                If `processor` is a dict, the key needs to define the path to the corresponding cross attention
                processor. This is strongly recommended when setting trainable attention processors.

        z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.r   r   c                 ,   t          |d          rVt          |t                    s|                    |           n+|                    |                    |  d                     |                                D ]\  }} |  d| ||           d S r   )r   
isinstancedictr   rc   r   )r   r   r   r   r   fn_recursive_attn_processors        r   r   zFKandinsky3UNet.set_attn_processor.<locals>.fn_recursive_attn_processor   s    v// M!)T22 M((3333(($7J7J7J)K)KLLL#)#8#8#:#: T T%++t,@,@h,@,@%SSSST Tr   N)rX   r   keysr   r   
ValueErrorr   r   r   r   r   )r*   r   countr   r   r   s        @r   set_attn_processorz!Kandinsky3UNet.set_attn_processor   s    D(--//00i&& 	3y>>U+B+BmQTU^Q_Q_ m m05m mRWm m m  
	Tc 	T58? 	T 	T 	T 	T 	T 	T !//11 	A 	ALD&''fi@@@@	A 	Ar   c                 H    |                      t                                 dS )ze
        Disables custom attention processors and sets the default attention implementation.
        N)r   r   )r*   s    r   set_default_attn_processorz)Kandinsky3UNet.set_default_attn_processor   s"     	00000r   Fc                 8    t          |d          r	||_        d S d S )Ngradient_checkpointing)r   r   )r*   r   values      r   _set_gradient_checkpointingz*Kandinsky3UNet._set_gradient_checkpointing   s-    6344 	2,1F)))	2 	2r   NTc                    |5d|                     |j                  z
  dz  }|                    d          }t          j        |          sKt          |t                    rt          j        nt          j        }t          j	        |g||j
                  }n8t          |j                  dk    r |d                               |j
                  }|                    |j        d                   }|                     |                               |j                  }|                     |          }|                     |          }||                     |||          }g }	|                     |          }t'          | j                  D ]6\  }
} |||||          }|
| j        dz
  k    r|	                    |           7t'          | j                  D ]C\  }
}|
dk    r*t          j        ||	                                gd          } |||||          }D|                     |          }|                     |          }|                     |          }|s|fS t;          |          S )NrD   g     )dtypedevicer   dim)r   )tor   	unsqueezer   	is_tensorr   floatfloat32int32tensorr   rX   shapeexpandrO   rP   rU   rR   rT   r^   r]   r[   r_   ra   catrc   re   rg   rh   r   )r*   r   timestepencoder_hidden_statesencoder_attention_maskreturn_dictr   time_embed_input
time_embedhidden_statesrv   r|   r}   s                r   r1   zKandinsky3UNet.forward   sk   !-&'*@*C*CFL*Q*Q&QU]%]"%;%E%Ea%H%H"x(( 	8%/%%@%@QEMMekE|XJeFMRRRHH  A%%~((77H ??6<?33>>(3366v|DD(()9::
 $ 5 56K L L ,00=RTjkkJf%%"+D,<"="= 	- 	-E; [5JLbccF!+++$$V,,, )$. 9 9 	b 	bE9zzFM,=,=,?,?#@aHHHYvz3HJ`aaFF##F++""6**v&& 	9#62222r   )r7   r8   r9   r:   r   r;   r<   r<   )F)NNT)r   r   r   r
   intr   r   r%   propertyr   r   r   r   r   r   r   r1   r3   r4   s   @r   r6   r6   1   s        "&"$34)?6:#[Y [Y[Y  [Y 	[Y
  [Y  U3Z0[Y "#J[Y #3c
?3[Y [Y [Y [Y [Y [Y [Yz c+=&=!>    X. AE2Dd3PbKbFc2c,d  A  A  A  AD1 1 12 2 2 2'3 '3 '3 '3 '3 '3 '3 '3r   r6   c                   8     e Zd Z	 	 	 	 	 	 	 	 d
 fd	Zdd	Z xZS )rb   Nr   r9   r:   r7   r   Tc                 x   t                                                       d |rdnd d d ggd gdz  g|dz
  z  z   }||z   |fg||fg|dz
  z  z   ||fgz   }g }g }g }|| _        || _        |r(|                    t          ||d |||	                     n&|                    t          j                               t          ||          D ]\  \  }}}|                    t          |||||
|                     |(|                    t          ||||||	                     n&|                    t          j                               |                    t          |||||
                     t          j
        |          | _        t          j
        |          | _        t          j
        |          | _        d S )NTr7   rD   r   r$   r%   r{   context_dimr_   Kandinsky3AttentionBlockr   IdentityrW   Kandinsky3ResNetBlockr\   
attentions
resnets_inresnets_out)r*   r=   cat_dimrm   time_embed_dimr   rr   r?   head_dimri   rj   r}   r{   up_resolutionshidden_channelsr   r   r   
in_channelout_channelup_resolutionr-   s                        r   r%   z Kandinsky3UpSampleBlock.__init__   s!    	!<dDIJtfWXj\]gjk]kMllG#[12[)*j1n=>\*+, 	
 

,& 	-(~tVU]_noo    bkmm,,,8;O^8\8\ 	 	4%Z}%j*nfVgivww   &!!,"NKSb     !!"+--000%j+~vWhii    -
33-
33=55r   c                     t          | j        dd          | j        | j                  D ]4\  }}} |||          }| j         ||||||          } |||          }5| j        r | j        d         |||          }|S )NrD   r   
image_mask)rW   r   r   r   r   r{   	r*   r0   r   contextcontext_maskr   	attention	resnet_in
resnet_outs	            r   r1   zKandinsky3UpSampleBlock.forward9  s    03DOABB4GZ^Zj0k0k 	* 	*,Iy*	!Z((A+IaWlJOO
1j))AA 	I""1jZHHHAr   Nr   r9   r:   r7   r   TTNNNr2   r4   s   @r   rb   rb      sl         86 86 86 86 86 86t	 	 	 	 	 	 	 	r   rb   c                   8     e Zd Z	 	 	 	 	 	 	 	 d
 fd	Zdd	Z xZS )r`   Nr   r9   r:   r7   r   Tc                 f   t                                                       g }g }g }|| _        || _        |r(|                    t          ||d |||                     n&|                    t          j                               d gdz  g|dz
  z  d d |
rdnd d ggz   }||fg||fg|dz
  z  z   }t          ||          D ]\  \  }}}|                    t          |||||	                     |(|                    t          ||||||                     n&|                    t          j                               |                    t          |||||	|                     t          j
        |          | _        t          j
        |          | _        t          j
        |          | _        d S )Nr7   rD   Fr   )r*   r=   rm   r   r   rr   r?   r   ri   rj   r|   r{   r   r   r   r   r   r   r   r   r-   s                       r   r%   z"Kandinsky3DownSampleBlock.__init__F  s    	

,& 	-(ndFT\^mnn    bkmm,,,&1*a8T4R]Igcgim<n;oo'67L,;W:X\fij\j:kk8;O^8\8\ 	 	4%Z}%j+~vWhii   &!!,#^[&(Tc     !!"+--000%nfFWYf     -
33-
33=55r   c                     | j         r | j        d         |||          }t          | j        dd          | j        | j                  D ]4\  }}} |||          }| j         ||||||          } |||          }5|S )Nr   r   rD   )r{   r   rW   r   r   r   r   s	            r   r1   z!Kandinsky3DownSampleBlock.forward}  s     	I""1jZHHHA03DOABB4GZ^Zj0k0k 	* 	*,Iy*	!Z((A+IaWlJOO
1j))AAr   r   r   r2   r4   s   @r   r`   r`   E  sl         56 56 56 56 56 56n	 	 	 	 	 	 	 	r   r`   c                   $     e Zd Z fdZd Z xZS )Kandinsky3ConditionalGroupNormc                    t                                                       t          j        ||d          | _        t          j        t          j                    t          j        |d|z                      | _        | j        d         j	        j
                                         | j        d         j        j
                                         d S )NF)affiner   rD   )r$   r%   r   rd   norm
Sequentialrf   r&   context_mlpweightdatazero_r#   )r*   r?   normalized_shaper   r-   s       r   r%   z'Kandinsky3ConditionalGroupNorm.__init__  s    L)9%HHH	=BIk1O_K_4`4`aa"'--/// %++-----r   c                 "   |                      |          }t          t          |j        dd                              D ]}|                    d          }|                    dd          \  }}|                     |          |dz   z  |z   }|S )Nr   rJ   rD   r   g      ?)r   rangerX   r   r   chunkr   )r*   r0   r   _scaleshifts         r   r1   z&Kandinsky3ConditionalGroupNorm.forward  s    ""7++s17122;''(( 	, 	,A''++GG}}QA}..uIIaLLECK(50r   r2   r4   s   @r   r   r     sG        . . . . .      r   r   c                   &     e Zd Zd fd	Zd Z xZS )Kandinsky3Blockr   r9   Nc                    t                                                       t          |||          | _        t	          j                    | _        | |rt	          j        ||dd          | _        nt	          j	                    | _        t          |dk              }t	          j        ||||          | _        |!|st	          j        ||dd          | _        d S t	          j	                    | _        d S )Nr   rH   striderD   rG   )r$   r%   r   
group_normr   rf   
activationConvTranspose2dr}   r   r   rS   
projectionr|   )	r*   r=   rm   r   rH   norm_groupsr   rI   r-   s	           r   r%   zKandinsky3Block.__init__  s    8kSabb'))$$/[VW`abbbDNN[]]DNkAo&&)K;`ghhh$]$!y|QR[\]]]D!{}}Dr   c                     |                      ||          }|                     |          }|                     |          }|                     |          }|                     |          }|S r/   )r   r   r}   r   r|   )r*   r0   r   s      r   r1   zKandinsky3Block.forward  s`    OOAz**OOANN1OOAQr   )r   r9   Nr2   r4   s   @r   r   r     sL        - - - - - -"      r   r   c                   4     e Zd Zddddgz  f fd	Zd Z xZS )r   r9   r   r7   Nc                 F   t                                                       g d}t          ||          |z  }||fg||fgdz  z   ||fgz   }	t          j        fdt          |	||          D                       | _        d|v rt          j        ||dd          nt          j                    | _	        ||k    rt          j
        ||d          nt          j                    | _        d|v rt          j
        ||dd          nt          j                    | _        d S )	N)rD   r   r   rD   r   c           
      F    g | ]\  \  }}}}t          ||||          S r   )r   )rL   r   r   rH   r   r   r   s        r   rN   z2Kandinsky3ResNetBlock.__init__.<locals>.<listcomp>  sH       I-Z{M  
KVacpqq  r   Tr   rD   )rH   F)r$   r%   maxr   r\   rW   resnet_blocksr   r   shortcut_up_samplerS   shortcut_projectionshortcut_down_sample)r*   r=   rm   r   r   rj   r   kernel_sizeshidden_channelr   r-   s      ``     r   r%   zKandinsky3ResNetBlock.__init__  st    	#||[,77;LL>*+/O.PST.TTYgiuXvWww 	  ]    MP#\>N N  
 
 ~%% {KQqQQQQ 	 DOR^C^C^BIk<Q????dfdodqdq 	 
 && IlLaJJJJ 	!!!r   c                     |}| j         D ]} |||          }|                     |          }|                     |          }|                     |          }||z   }|S r/   )r   r   r   r   )r*   r0   r   outresnet_blocks        r   r1   zKandinsky3ResNetBlock.forward  ss     . 	0 	0L,sJ//CC##A&&$$Q''%%a((Gr   r2   r4   s   @r   r   r     s^        EG[\mnrvqwmw
 
 
 
 
 
>	 	 	 	 	 	 	r   r   c                   (     e Zd Zd fd	ZddZ xZS )rQ   r:   c                 z    t                                                       t          ||||d          | _        d S )NFdim_headrx   out_bias)r$   r%   r   r   )r*   num_channelsr   r   r-   s       r   r%   z#Kandinsky3AttentionPooling.__init__  sB    " 
 
 
r   Nc                     |                     |j                  }|                     |                    dd          ||          }||                    d          z   S )Nr   rD   T)r   keepdim)r   r   r   meansqueeze)r*   r0   r   r   s       r   r1   z"Kandinsky3AttentionPooling.forward  sS    #W];;..!T!B!BG\ZZ7??1%%%%r   )r:   r/   r2   r4   s   @r   rQ   rQ     sQ        
 
 
 
 
 
& & & & & & & &r   rQ   c                   (     e Zd Zd fd	ZddZ xZS )	r   Nr9   r:   r7   c           
         t                                                       t          |||          | _        t	          ||p|||d          | _        ||z  }t          |||          | _        t          j        t          j	        ||dd          t          j
                    t          j	        ||dd                    | _        d S )NFr  rD   )rH   r#   )r$   r%   r   in_normr   r   out_normr   r   rS   rf   feed_forward)	r*   r  r   r   r   r   ri   r   r-   s	           r   r%   z!Kandinsky3AttentionBlock.__init__  s    5k<Q_``"'< 
 
 
 *L86{LR`aaMIlOOOOGIIIo|OOO
 
r   c                 >   |j         dd          \  }}|                     ||          }|                    |j         d         d||z                                ddd          }||n|}||                    |j                  }|                     |||          }|                    ddd                              d                              |j         d         d||          }||z   }|                     ||          }| 	                    |          }||z   }|S )Nr   rJ   r   rD   r  )
r   r  reshapepermuter   r   r   r   r  r  )	r*   r0   r   r   r   r   heightwidthr   s	            r   r1   z Kandinsky3AttentionBlock.forward  s   ll1j))kk!'!*b&5.99AA!QJJ$0''c#'?????LnnS'<88kk!Q"",,R00881r6SXYYGmmAz**$$Gr   )Nr9   r:   r7   r   r2   r4   s   @r   r   r     sQ        
 
 
 
 
 
&       r   r   )&dataclassesr   typingr   r   r   r   torch.utils.checkpointr   configuration_utilsr	   r
   utilsr   r   attention_processorr   r   r   
embeddingsr   r   modeling_utilsr   
get_loggerr   loggerr   r   r!   r6   rb   r`   r   r   r   rQ   r   r   r   r   <module>r      s   " ! ! ! ! ! % % % % % % % % % %            B B B B B B B B ( ( ( ( ( ( ( ( N N N N N N N N N N 5 5 5 5 5 5 5 5 ' ' ' ' ' ' 
	H	%	%          :      	 	 	 	 	BI 	 	 	J3 J3 J3 J3 J3Z J3 J3 J3ZD D D D Dbi D D DNA A A A A	 A A AH    RY   &    bi   6) ) ) ) )BI ) ) )X& & & & & & & &"# # # # #ry # # # # #r   