
    wi1M                       d dl mZ d dlmZ d dlmZmZmZmZm	Z	m
Z
 d dlZd dlZd dlmZmZ ddlmZmZ ddlmZmZmZ dd	lmZ d
dlmZmZmZmZmZmZ d
dlm Z  d
dl!m"Z"m#Z# d
dl$m%Z% d
dl&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z- d
dl.m/Z/  ej0        e1          Z2e G d de                      Z3 G d dej4                  Z5 G d dej4                  Z6 G d dej4                  Z7	 	 	 	 	 	 	 d=de8de8de8d e8d!e8d"ee8         d#ee
e8e	e8         f                  d$ee8         d%ee8         d&e9d'ee9         fd(Z:	 	 	 	 	 	 d>d)e8d*e8d!ee8         d"ee8         d#e8d$ee8         d%ee8         d'e9fd+Z;d,e8d-e8d.ee8         fd/Z< G d0 d1e%e          Z= G d2 d3e%e          Z> G d4 d5ej4                  Z? G d6 d7ej4                  Z@ G d8 d9ej4                  ZAd?d:ZBd; ZCd< ZDdS )@    )	dataclass)gcd)AnyDictListOptionalTupleUnionN)Tensornn   )ConfigMixinregister_to_config)
BaseOutputis_torch_versionlogging)apply_freeu   )ADDED_KV_ATTENTION_PROCESSORSCROSS_ATTENTION_PROCESSORS	AttentionAttentionProcessorAttnAddedKVProcessorAttnProcessor)ControlNetConditioningEmbedding)TimestepEmbedding	Timesteps)
ModelMixin)CrossAttnDownBlock2DCrossAttnUpBlock2DDownsample2DResnetBlock2DTransformer2DModelUNetMidBlock2DCrossAttn
Upsample2D)UNet2DConditionModelc                   "    e Zd ZU dZdZeed<   dS )ControlNetXSOutputa=  
    The output of [`UNetControlNetXSModel`].

    Args:
        sample (`Tensor` of shape `(batch_size, num_channels, height, width)`):
            The output of the `UNetControlNetXSModel`. Unlike `ControlNetOutput` this is NOT to be added to the base
            model output, but is already the final output.
    Nsample)__name__
__module____qualname____doc__r)   r   __annotations__     n/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/diffusers/models/controlnet_xs.pyr(   r(   3   s.           FFr0   r(   c                        e Zd ZdZ	 	 d	dej        dej        dej        deej                 deej                 f
 fdZ xZ	S )
DownBlockControlNetXSAdapterz}Components that together with corresponding components from the base model will form a
    `ControlNetXSCrossAttnDownBlock2D`Nresnetsbase_to_ctrlctrl_to_base
attentionsdownsamplerc                     t                                                       || _        || _        || _        || _        || _        d S N)super__init__r4   r5   r6   r7   downsamplers)selfr4   r5   r6   r7   r8   	__class__s         r1   r<   z%DownBlockControlNetXSAdapter.__init__E   sG     	(($'r0   )NN)
r*   r+   r,   r-   r   
ModuleListr   Conv2dr<   __classcell__r?   s   @r1   r3   r3   A   s        * * /3+/( (( m( m	(
 R]+( bi(( ( ( ( ( ( ( ( ( (r0   r3   c                   D     e Zd ZdZdedej        dej        f fdZ xZS )MidBlockControlNetXSAdapterz|Components that together with corresponding components from the base model will form a
    `ControlNetXSCrossAttnMidBlock2D`midblockr5   r6   c                 r    t                                                       || _        || _        || _        d S r:   )r;   r<   rF   r5   r6   )r>   rF   r5   r6   r?   s       r1   r<   z$MidBlockControlNetXSAdapter.__init__Y   s6     ((r0   )	r*   r+   r,   r-   r$   r   r@   r<   rB   rC   s   @r1   rE   rE   U   sc        ) ))!8 ) )eger ) ) ) ) ) ) ) ) ) )r0   rE   c                   2     e Zd ZdZdej        f fdZ xZS )UpBlockControlNetXSAdapterzwComponents that together with corresponding components from the base model will form a `ControlNetXSCrossAttnUpBlock2D`r6   c                 V    t                                                       || _        d S r:   )r;   r<   r6   )r>   r6   r?   s     r1   r<   z#UpBlockControlNetXSAdapter.__init__c   s'    (r0   )r*   r+   r,   r-   r   r@   r<   rB   rC   s   @r1   rI   rI   `   sR         B  B)R] ) ) ) ) ) ) ) ) ) )r0   rI       T   Fbase_in_channelsbase_out_channelsctrl_in_channelsctrl_out_channelstemb_channelsmax_norm_num_groupstransformer_layers_per_blocknum_attention_headscross_attention_dimadd_downsampleupcast_attentionc                    d}g }g }g }g }t          |t                    r|g|z  }t          |          D ]}|dk    r| n|} |dk    r|n|}|                    t	          | |                      |                    t          || z   ||t          || z   |          t          ||          d                     |rB|                    t          |||z  |||         |	d|t          ||                               |                    t	          ||                     |
r]|                    t	          ||                     t          ||z   d|d	          }|                    t	          ||                     nd }t          t          j        |          t          j        |          t          j        |          
          }|rt          j        |          |_        |||_        |S )Nr   r   
max_factorh㈵>in_channelsout_channelsrQ   groups
groups_outepsTr]   
num_layersrU   use_linear_projectionrW   norm_num_groupsopuse_convr^   name)r4   r5   r6   )
isinstanceintrangeappendmake_zero_convr"   find_largest_factorr#   r!   r3   r   r@   r7   r=   )rM   rN   rO   rP   rQ   rR   has_crossattnrS   rT   rU   rV   rW   rc   r4   r7   r6   r5   ir=   down_block_componentss                       r1   get_down_block_adapterrs   h   s{    JGJLL.44 S(D'E
'R$: #R #R/0Avv++;L/0Avv++;L 	N+;=MNNOOO,/??.+*+;>N+N[nooo./@M`aaa  		
 		
 		
  	"'%)<< 1;A>(;*.%5$78IVi$j$j$j	 	 	   	N+<>OPPQQQQ  	N+<>OPPQQQ# 11DO`gk
 
 
 	N+<>OPPQQQQ8g&&]<00]<00    E+-=+D+D(-9*  r0   base_channelsctrl_channelsc                     t          | |           }t          ||| z   ||t          t          ||| z             |          ||d|	  	        }	t          ||           }
t	          ||	|
          S )NT	rS   r]   r^   rQ   resnet_groupsrU   rT   rd   rW   )r5   rF   r6   )rn   r$   ro   r   rE   )rt   ru   rQ   rR   rS   rT   rU   rW   r5   rF   r6   s              r1   get_mid_block_adapterry      s     "-??L&%A!M1"#)#m]]=Z*[*[]pqq//")  H "-??L&L8bnoooor0   r^   prev_output_channelctrl_skip_channelsc                     g }d}t          |          D ]5}|dk    r|n| }|                    t          ||         |                     6t          t	          j        |                    S )N   r   )r6   )rl   rm   rn   rI   r   r@   )r^   rz   r{   r6   rc   rq   resnet_in_channelss          r1   get_up_block_adapterr      s~    
 LJ: W W45FF00N+=a+@BTUUVVVV%2=3N3NOOOOr0   c                       e Zd ZdZe	 	 	 	 	 	 	 	 	 	 	 	 	 	 d$dededee         dede	de
eee         f         dee         dee         dedee         dee         de
eee         f         de	def fd            Ze	 	 	 	 	 	 	 	 d%d ed!ee         deee                  deee                  de	dedededee         fd"            Zd# Z xZS )&ControlNetXSAdaptera  
    A `ControlNetXSAdapter` model. To use it, pass it into a `UNetControlNetXSModel` (together with a
    `UNet2DConditionModel` base model).

    This model inherits from [`ModelMixin`] and [`ConfigMixin`]. Check the superclass documentation for it's generic
    methods implemented for all models (such as downloading or saving).

    Like `UNetControlNetXSModel`, `ControlNetXSAdapter` is compatible with StableDiffusion and StableDiffusion-XL. It's
    default parameters are compatible with StableDiffusion.

    Parameters:
        conditioning_channels (`int`, defaults to 3):
            Number of channels of conditioning input (e.g. an image)
        conditioning_channel_order (`str`, defaults to `"rgb"`):
            The channel order of conditional image. Will convert to `rgb` if it's `bgr`.
        conditioning_embedding_out_channels (`tuple[int]`, defaults to `(16, 32, 96, 256)`):
            The tuple of output channels for each block in the `controlnet_cond_embedding` layer.
        time_embedding_mix (`float`, defaults to 1.0):
            If 0, then only the control adapters's time embedding is used. If 1, then only the base unet's time
            embedding is used. Otherwise, both are combined.
        learn_time_embedding (`bool`, defaults to `False`):
            Whether a time embedding should be learned. If yes, `UNetControlNetXSModel` will combine the time
            embeddings of the base model and the control adapter. If no, `UNetControlNetXSModel` will use the base
            model's time embedding.
        num_attention_heads (`list[int]`, defaults to `[4]`):
            The number of attention heads.
        block_out_channels (`list[int]`, defaults to `[4, 8, 16, 16]`):
            The tuple of output channels for each block.
        base_block_out_channels (`list[int]`, defaults to `[320, 640, 1280, 1280]`):
            The tuple of output channels for each block in the base unet.
        cross_attention_dim (`int`, defaults to 1024):
            The dimension of the cross attention features.
        down_block_types (`list[str]`, defaults to `["CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D"]`):
            The tuple of downsample blocks to use.
        sample_size (`int`, defaults to 96):
            Height and width of input/output sample.
        transformer_layers_per_block (`Union[int, Tuple[int]]`, defaults to 1):
            The number of transformer blocks of type [`~models.attention.BasicTransformerBlock`]. Only relevant for
            [`~models.unet_2d_blocks.CrossAttnDownBlock2D`], [`~models.unet_2d_blocks.UNetMidBlock2DCrossAttn`].
        upcast_attention (`bool`, defaults to `True`):
            Whether the attention computation should always be upcasted.
        max_norm_num_groups (`int`, defaults to 32):
            Maximum number of groups in group normal. The actual number will the the largest divisor of the respective
            channels, that is <= max_norm_num_groups.
    r}   rgb   rK   `            ?F   r      r   r   i@  i     r   rL   r   r   r   DownBlock2Dr   r   TrK   conditioning_channelsconditioning_channel_order#conditioning_embedding_out_channelstime_embedding_mixlearn_time_embeddingrT   block_out_channelsbase_block_out_channelsrU   down_block_typessample_sizerS   rW   rR   c                 \   t                                                       |d         }|d         dz  }|dvrt          d|           t          |          t          |
          k    rt          d| d|
 d          t	          |t
          t          f          s|gt          |
          z  }t	          |	t
          t          f          s|	gt          |
          z  }	t	          |t
          t          f          s|gt          |
          z  }t          |          t          |
          k    rt          d| d|
 d          t          |d         ||	          | _        |rt          ||          | _
        nd | _
        t          j        g           | _        t          j        g           | _        t          j        d|d         d
d          | _        t#          |d         |d                   | _        |d         }|d         }t'          |
          D ]y\  }}|}||         }|}||         }d|v }|t          |
          dz
  k    }| j                            t+          |||||||||         ||         |	|         | |                     zt-          |d         |d         ||d         |d         |	d         |          | _        |d         gt'          |          D ]8\  }}|t          |          dz
  k     rd
nd}                    |g|z             9t          t3          |                    }|d         }t5          t          |
                    D ]Q}|}||         }fdt5          d
          D             }| j                            t7          |||                     Rd S )Nr   r   )r   bgrz&unknown `conditioning_channel_order`: zbMust provide the same number of `block_out_channels` as `down_block_types`. `block_out_channels`: z. `down_block_types`: .zdMust provide the same number of `num_attention_heads` as `down_block_types`. `num_attention_heads`: conditioning_embedding_channelsr   r   r}   r   kernel_sizepadding	CrossAttn)rM   rN   rO   rP   rQ   rR   rp   rS   rT   rU   rV   rW   )rt   ru   rQ   rS   rT   rU   rW   r   c                 8    g | ]}                                 S r/   pop.0_r{   s     r1   
<listcomp>z0ControlNetXSAdapter.__init__.<locals>.<listcomp>  &    "N"N"N#5#9#9#;#;"N"N"Nr0   )r^   rz   r{   )r;   r<   
ValueErrorlenrj   listtupler   controlnet_cond_embeddingr   time_embeddingr   r@   down_blocksup_connectionsrA   conv_inrn   control_to_base_for_conv_in	enumeraterm   rs   ry   	mid_blockextendreversedrl   r   ) r>   r   r   r   r   r   rT   r   r   rU   r   r   rS   rW   rR   time_embedding_input_dimtime_embedding_dimrN   rP   rq   down_block_typerM   rO   rp   is_final_blockr^   number_of_subblocks reversed_base_block_out_channelsprev_base_output_channelctrl_skip_channels_r{   r?   s                                  @r1   r<   zControlNetXSAdapter.__init__!  s   . 	#:1#= 4Q7!; &^;;bF`bbccc!""c*:&;&;;; s  vH  s  s  `p  s  s  s   6uFF 	b,H+ICP`LaLa+a(-e}== 	P#6"7#>N:O:O"O-e}== 	P#6"7#>N:O:O"O"##s+;'<'<<< v  xK  v  v  cs  v  v  v  
 *I,>q,AB"7*
 *
 *
&   	'"34LN`"a"aD"&D=,, mB// y$6q$9qRSTTT+9:LQ:OQhijQk+l+l( 4A6.q1"+,<"="= 	 	A0 7 :0 21 5'?:M#&6"7"7!";;N##&%5&7%5&7"4(;"/1Ma1P(;A(>(;A(>'5#5%5     $ /1"5,R0,)Eb)I 3B 7 3B 7-
 
 
 134();<< 	L 	LOA|/001444!   %%|n7J&JKKKK+/9P0Q0Q+R+R(<Q?s+,,-- 	 	A'8$ @ C"N"N"N"NU1XX"N"N"N&&$!2(@':     	 	r0   Nunet
size_ratioc
                    |du}
du}|
|z  st          d          |pfd|j        j        D             }||j        j        } | |||	|||||j        j        |j        j        |j        j        |j        j        |j        j        |j        j        |j        j	                  }|
                    |j                   |S )a8  
        Instantiate a [`ControlNetXSAdapter`] from a [`UNet2DConditionModel`].

        Parameters:
            unet (`UNet2DConditionModel`):
                The UNet model we want to control. The dimensions of the ControlNetXSAdapter will be adapted to it.
            size_ratio (float, *optional*, defaults to `None`):
                When given, block_out_channels is set to a fraction of the base model's block_out_channels. Either this
                or `block_out_channels` must be given.
            block_out_channels (`List[int]`, *optional*, defaults to `None`):
                Down blocks output channels in control model. Either this or `size_ratio` must be given.
            num_attention_heads (`List[int]`, *optional*, defaults to `None`):
                The dimension of the attention heads. The naming seems a bit confusing and it is, see
                https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131 for why.
            learn_time_embedding (`bool`, defaults to `False`):
                Whether the `ControlNetXSAdapter` should learn a time embedding.
            time_embedding_mix (`float`, defaults to 1.0):
                If 0, then only the control adapter's time embedding is used. If 1, then only the base unet's time
                embedding is used. Otherwise, both are combined.
            conditioning_channels (`int`, defaults to 3):
                Number of channels of conditioning input (e.g. an image)
            conditioning_channel_order (`str`, defaults to `"rgb"`):
                The channel order of conditional image. Will convert to `rgb` if it's `bgr`.
            conditioning_embedding_out_channels (`Tuple[int]`, defaults to `(16, 32, 96, 256)`):
                The tuple of output channel for each block in the `controlnet_cond_embedding` layer.
        NzePass exactly one of `block_out_channels` (for absolute sizing) or `size_ratio` (for relative sizing).c                 4    g | ]}t          |z            S r/   )rk   )r   br   s     r1   r   z1ControlNetXSAdapter.from_unet.<locals>.<listcomp>  s%    3p3p3pACJ4G4G3p3p3pr0   )r   r   r   r   r   rT   r   r   rU   r   r   rS   rW   rR   )r   configr   attention_head_dimrU   r   r   rS   rW   re   todtype)clsr   r   r   rT   r   r   r   r   r   
fixed_sizerelative_sizemodels     `          r1   	from_unetzControlNetXSAdapter.from_unet  s    R (t3
"$.]* 	w  
 0p3p3p3p3pQUQ\Qo3p3p3p&"&+"@"7'A0S1!5 31$(K$B $ ?![9/)-)Q![9 $ ;
 
 
$ 	r0   c                      t          d          )NzA ControlNetXSAdapter cannot be run by itself. Use it together with a UNet2DConditionModel to instantiate a UNetControlNetXSModel.)r   )r>   argskwargss      r1   forwardzControlNetXSAdapter.forward  s     Q
 
 	
r0   )r}   r   r   r   Fr   r   r   rL   r   r   r   TrK   )NNNFr   r}   r   r   )r*   r+   r,   r-   r   rk   strr	   floatboolr
   r   r<   classmethodr&   r   r   r   rB   rC   s   @r1   r   r      s'       , ,\  &'*/:K$'%*67)7.D#'(
 &(?@!%#%)C C"C %(C .33Z	C
 "C #C #3c
?3C "#JC "'sC !C  *C" c]#C$ ',CsO&<%C& 'C( !)C C C C C CJ  '+2637%*"%%&*/:KI I"I UOI %T#Y/	I
 &d3i0I #I  I  #I %(I .33ZI I I [IV
 
 
 
 
 
 
r0   r   c            -           e Zd ZdZdZe	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dJdee         dee	         dee	         dee         dee         de
eee         f         de
eee         f         de
eee         f         dee	         dee         dedee         dee         d ed!ed"ee         d#e	d$ed%ee         d&e
eee         f         d'ef* fd(            Ze	 	 	 	 	 dKd)ed*ee         d+ee         d%eee                  d ee         d,ee         fd-            ZdLd/ZdMd0Zed.ee	ef         fd1            Zd2e
eee	ef         f         fd3Zd4 Zd5ed6ed7ed8efd9Zd: Zd; Zd< Z	 	 	 	 	 	 	 	 	 dNd=ed>e
e j        eef         d?e j        d@ee j                 dAee         dBee j                 dCee j                 dDee j                 dEeee	e!f                  dFeee	e j        f                  dGedHed.e
e"ef         fdIZ# xZ$S )OUNetControlNetXSModela9  
    A UNet fused with a ControlNet-XS adapter model

    This model inherits from [`ModelMixin`] and [`ConfigMixin`]. Check the superclass documentation for it's generic
    methods implemented for all models (such as downloading or saving).

    `UNetControlNetXSModel` is compatible with StableDiffusion and StableDiffusion-XL. It's default parameters are
    compatible with StableDiffusion.

    It's parameters are either passed to the underlying `UNet2DConditionModel` or used exactly like in
    `ControlNetXSAdapter` . See their documentation for details.
    Tr   r   	UpBlock2Dr    r    r    r   rK   rL   r   r   Nr   r}   r   r   Fr   r   r   r   up_block_typesr   re   rU   rS   rT   addition_embed_typeaddition_time_embed_dimrW   time_cond_proj_dim%projection_class_embeddings_input_dimr   ctrl_conditioning_channels(ctrl_conditioning_embedding_out_channelsctrl_conditioning_channel_orderctrl_learn_time_embeddingctrl_block_out_channelsctrl_num_attention_headsctrl_max_norm_num_groupsc                 
  - t                                                       |dk     s|dk    rt          d          |dk     r|st          d          |	|	dk    rt          d          t          |t          t
          f          s|gt          |          z  }t          |t          t
          f          s|gt          |          z  }t          |t          t
          f          s|gt          |          z  }t          |t          t
          f          s|gt          |          z  }|}d| _        t          j	        d|d         dd	          | _
        t          |d         ||
          | _        t          j	        d|d         dd	          | _        t          |d         |d                   | _        |d         }|d         dz  }t!          |d         dd          | _        t%          |||          | _        t%          ||          | _        |	d | _        d | _        n,t!          |
dd          | _        t%          ||          | _        g }|d         }|d         }t/          |          D ]|\  }}|}||         }|}||         }d|v } |t          |          dz
  k    }!|                    t3          |||||||| ||         ||         ||         ||         |! |                     }t5          |d         |d         ||||d         |d         |d         |d         |
  
        | _        g }"t	          t9          |                    }#t	          t9          |                    }$t	          t9          |                    }%|d         g-t/          |          D ]8\  }}&|t          |          dz
  k     rdnd}'-                    |&g|'z             9t	          t9          |                    }(|(d         }&t/          |          D ]\  }})|&}*|(|         }&|(t=          |dz   t          |          dz
                     }+-fdt?          d          D             },d|)v } |t          |          dz
  k    }!|"                    tA          |+|&|*|,||| |#|         |$|         |%|         |! ||                     t          j!        |          | _"        t          j!        |"          | _#        t          j$        |d         |          | _%        t          j&                    | _'        t          j	        |d         ddd	          | _(        d S )Nr   r   z1`time_embedding_mix` needs to be between 0 and 1.zKTo use `time_embedding_mix` < 1, `ctrl_learn_time_embedding` must be `True`	text_timezAs `UNetControlNetXSModel` currently only supports StableDiffusion and StableDiffusion-XL, `addition_embed_type` must be `None` or `'text_time'`.r   r}   r   r   T)flip_sin_to_cosdownscale_freq_shift)cond_proj_dim)r]   time_embed_dimr   rM   rN   rO   rP   rQ   re   r   rp   rS   base_num_attention_headsr   rU   rV   rW   r   
rt   ru   rQ   re   r   rS   r   r   rU   rW   r   c                 8    g | ]}                                 S r/   r   r   s     r1   r   z2UNetControlNetXSModel.__init__.<locals>.<listcomp>  r   r0   )r]   r^   rz   r{   rQ   resolution_idxrp   rS   rT   rU   add_upsamplerW   re   )num_channels
num_groups))r;   r<   r   rj   r   r   r   r]   r   rA   base_conv_inr   r   ctrl_conv_inrn   r   r   base_time_projr   base_time_embeddingctrl_time_embeddingbase_add_time_projbase_add_embeddingr   rm    ControlNetXSCrossAttnDownBlock2DControlNetXSCrossAttnMidBlock2Dr   r   r   minrl   ControlNetXSCrossAttnUpBlock2Dr@   r   	up_blocks	GroupNormbase_conv_norm_outSiLUbase_conv_actbase_conv_out)/r>   r   r   r   r   re   rU   rS   rT   r   r   rW   r   r   r   r   r   r   r   r   r   r   r   time_embed_input_dimr   r   rN   rP   rq   r   rM   rO   rp   r   r    rev_transformer_layers_per_blockrev_num_attention_headsrev_cross_attention_dimr^   r   reversed_block_out_channelsup_block_typerz   r]   r   r{   r?   s/                                                @r1   r<   zUNetControlNetXSModel.__init__	  s8   @ 	!!%7!%;%;PQQQ!!*C!jkkk*/Bk/Q/Q d   6uFF 	b,H+ICP`LaLa+a(-e}== 	P#6"7#>N:O:O"O-e}== 	P#6"7#>N:O:O"O2T5MBB 	Z(@'ACHXDYDY'Y$#6  Ia);A)>AWXYYY)H,CA,FG"<*
 *
 *
&
 Ia)@)CQR\]^^^+9:QRS:TVhijVk+l+l(  2!4+A.2'(:1(=tjklll#4 ,$
 $
 $
 
 $5AUft#u#u#u &&*D#&*D##&/0GY]tu&v&v&vD#&78]_m&n&nD# .q13A6"+,<"="= 	 	A0 21 50 7 :'?:M#&6"7"7!";;N0%5&7%5&7"0$3-E"/1Ma1P-Ea-H-Ea-H(;A(>'5#5%5     ( 9,R01"5(+%=)Eb)I%=b%A%=b%A 3B 7-
 
 
 	+/9U0V0V+W+W("&x0H'I'I"J"J"&x0C'D'D"E"E 6a89()@AA 	L 	LOA|455999q   %%|n7J&JKKKK&*84F+G+G&H&H#215 ). 9 9 	 	A}".6q9L5c!a%EWAXAX[\A\6]6]^K"N"N"N"NU1XX"N"N"N'=8M#&8"9"9A"==N. +!-(;':"0#$"/1QRS1T(?(B(?(B%3!3%5$3     $ =55y11"$,<Nq<Q^m"n"n"nWYYY'9!'<aQXYZZZr0   r   
controlnetr   ctrl_optional_kwargsc                 (   |t          j        |||fi |}n,t          d ||||fD                       rt          d          g dfd|j                                        D             |j        j        d<   g dfd|j                                        D             |j        j        d	<   |                     i           }g d
}|D ]H}	t          |d|	z             
                    t          ||	                                                     Iddg}
|
D ]h}	t          ||	          rVt          ||	          Ft          |d|	z             
                    t          ||	                                                     i|j        
                    |j                                                   |j        
                    |j                                                   |j        1|j        
                    |j                                                   |j        
                    |j                                                   t'          j        d t+          |j        |j                  D                       |_        t.                              |j        |j                  |_        t'          j        d t+          |j        |j                  D                       |_        |                    |j                   |S )a  
        Instantiate a [`UNetControlNetXSModel`] from a [`UNet2DConditionModel`] and an optional [`ControlNetXSAdapter`]
        .

        Parameters:
            unet (`UNet2DConditionModel`):
                The UNet model we want to control.
            controlnet (`ControlNetXSAdapter`):
                The ConntrolNet-XS adapter with which the UNet will be fused. If none is given, a new ConntrolNet-XS
                adapter will be created.
            size_ratio (float, *optional*, defaults to `None`):
                Used to contruct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details.
            ctrl_block_out_channels (`List[int]`, *optional*, defaults to `None`):
                Used to contruct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details,
                where this parameter is called `block_out_channels`.
            time_embedding_mix (`float`, *optional*, defaults to None):
                Used to contruct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details.
            ctrl_optional_kwargs (`Dict`, *optional*, defaults to `None`):
                Passed to the `init` of the new controlent if no controlent was given.
        Nc              3      K   | ]}|d uV  	d S r:   r/   )r   os     r1   	<genexpr>z2UNetControlNetXSModel.from_unet.<locals>.<genexpr>  s7        "#     r0   zWhen a controlnet is passed, none of these parameters should be passed: size_ratio, ctrl_block_out_channels, time_embedding_mix, ctrl_optional_kwargs.)r   r   r   r   re   rU   rS   r   r   rW   r   r   c                 $    i | ]\  }}|v 	||S r/   r/   )r   kvparams_for_unets      r1   
<dictcomp>z3UNetControlNetXSModel.from_unet.<locals>.<dictcomp>  s)    XXXDAq1CWCW1aCWCWCWr0   rT   )r   r   r   r   r   rT   rR   c                 *    i | ]\  }}|v 	d |z   |S )ctrl_r/   )r   r  r  params_for_controlnets      r1   r  z3UNetControlNetXSModel.from_unet.<locals>.<dictcomp>  s/     t t tDAqYZ^sYsYs1aYsYsYsr0   r   )r   r   conv_norm_outconv_outbase_add_time_projadd_embeddingc              3   R   K   | ]"\  }}t                               ||          V  #d S r:   )r   from_modulesr   r   cs      r1   r  z2UNetControlNetXSModel.from_unet.<locals>.<genexpr>&  sJ       *
 *
1 -99!Q??*
 *
 *
 *
 *
 *
r0   c              3   R   K   | ]"\  }}t                               ||          V  #d S r:   )r   r  r   s      r1   r  z2UNetControlNetXSModel.from_unet.<locals>.<genexpr>+  sJ       (
 (
1 +771==(
 (
 (
 (
 (
 (
r0   )r   r   anyr   r   itemsr   r   from_configgetattrload_state_dict
state_dicthasattrr   r   r   r   r   r   r   r@   zipr   r   r  r   r   r   r   r   )r   r   r  r   r   r   r  r   modules_from_unetmoptional_modules_from_unetr  r  s              @@r1   r   zUNetControlNetXSModel.from_unet  s   < ,6j"9 =Q JJ   (24KM_au'v      ! m  

 
 
 YXXXDK,=,=,?,?XXX151O-.!
 !
 !
 !u t t tJ<M<S<S<U<U t t t6@6G6Z23  L? L6K LMM
 
 
 # 	W 	WAE7Q;''77a8H8H8S8S8U8UVVVV &
" , 	[ 	[AtQ [GD!$4$4$@w{++;;GD!<L<L<W<W<Y<YZZZ 	'77
8\8g8g8i8ijjj**:+=+H+H+J+JKKK$0%55j6O6Z6Z6\6\]]])99*:`:k:k:m:mnnn M *
 *
D,j.DEE*
 *
 *
 
 
 :FFt~WaWkll- (
 (
DNJ,EFF(
 (
 (
 
 
 	r0   returnc                 X                                      D ]	}d|_        
g d} fd|D             }|D ] }|                                 D ]	}d|_        
! j        D ]}|                                  j                                          j        D ]}|                                 dS )Freeze the weights of the parts belonging to the base UNet2DConditionModel, and leave everything else unfrozen for fine
        tuning.T)r   r   r   r   r   r  r  r  c                 P    g | ]"}t          |          t          |          #S r:   )r&  )r   partr>   s     r1   r   z<UNetControlNetXSModel.freeze_unet_params.<locals>.<listcomp>G  s3    ddddGDRVDWDWDcgdD))DcDcDcr0   FN)
parametersrequires_gradr   freeze_base_paramsr   r   )r>   param
base_partsr2  dus   `     r1   freeze_unet_paramsz(UNetControlNetXSModel.freeze_unet_params5  s     __&& 	' 	'E"&E	
 	
 	

 edddjddd
 	, 	,D** , ,&+##, ! 	# 	#A  """"))+++ 	# 	#A  """"	# 	#r0   c                 8    t          |d          r	||_        d S d S )Ngradient_checkpointing)r)  r<  )r>   modulevalues      r1   _set_gradient_checkpointingz1UNetControlNetXSModel._set_gradient_checkpointingR  s-    6344 	2,1F)))	2 	2r0   c                     i }dt           dt          j        j        dt          t           t
          f         ffd|                                 D ]\  }} |||           |S )z
        Returns:
            `dict` of attention processors: A dictionary containing all attention processors used in the model with
            indexed by its weight name.
        ri   r=  
processorsc                     t          |d          r|                    d          ||  d<   |                                D ]\  }} |  d| ||           |S )Nget_processorT)return_deprecated_lora
.processorr   )r)  rC  named_children)ri   r=  rA  sub_namechildfn_recursive_add_processorss        r1   rI  zJUNetControlNetXSModel.attn_processors.<locals>.fn_recursive_add_processorsa  s    v// d282F2F^b2F2c2c
d.../#)#8#8#:#: U U%++t,@,@h,@,@%TTTTr0   )r   torchr   Moduler   r   rF  )r>   rA  ri   r=  rI  s       @r1   attn_processorsz%UNetControlNetXSModel.attn_processorsV  s     
	c 	58? 	X\]`bt]tXu 	 	 	 	 	 	 !//11 	B 	BLD&''fjAAAAr0   	processorc           	      z   t          | j                                                  }t          |t                    r9t          |          |k    r&t          dt          |           d| d| d          dt          dt          j        j	        ffd| 
                                D ]\  }} |||           dS )	a4  
        Sets the attention processor to use to compute attention.

        Parameters:
            processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
                The instantiated processor class or a dictionary of processor classes that will be set as the processor
                for **all** `Attention` layers.

                If `processor` is a dict, the key needs to define the path to the corresponding cross attention
                processor. This is strongly recommended when setting trainable attention processors.

        z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.ri   r=  c                 ,   t          |d          rVt          |t                    s|                    |           n+|                    |                    |  d                     |                                D ]\  }} |  d| ||           d S )Nset_processorrE  r   )r)  rj   dictrP  r   rF  )ri   r=  rM  rG  rH  fn_recursive_attn_processors        r1   rR  zMUNetControlNetXSModel.set_attn_processor.<locals>.fn_recursive_attn_processor  s    v// M!)T22 M((3333(($7J7J7J)K)KLLL#)#8#8#:#: T T%++t,@,@h,@,@%SSSST Tr0   N)r   rL  keysrj   rQ  r   r   rJ  r   rK  rF  )r>   rM  countri   r=  rR  s        @r1   set_attn_processorz(UNetControlNetXSModel.set_attn_processorp  s    D(--//00i&& 	3y>>U+B+BmQTU^Q_Q_ m m05m mRWm m m  
	Tc 	T58? 	T 	T 	T 	T 	T 	T !//11 	A 	ALD&''fi@@@@	A 	Ar0   c           	         t          d | j                                        D                       rt                      }nt          d | j                                        D                       rt	                      }nCt          dt          t          | j                                                                       |                     |           dS )ze
        Disables custom attention processors and sets the default attention implementation.
        c              3   2   K   | ]}|j         t          v V  d S r:   )r?   r   r   procs     r1   r  zCUNetControlNetXSModel.set_default_attn_processor.<locals>.<genexpr>  s*      ii4t~!>>iiiiiir0   c              3   2   K   | ]}|j         t          v V  d S r:   )r?   r   rX  s     r1   r  zCUNetControlNetXSModel.set_default_attn_processor.<locals>.<genexpr>  s*      hh$#==hhhhhhr0   zOCannot call `set_default_attn_processor` when attention processors are of type N)	allrL  valuesr   r   r   nextiterrU  )r>   rM  s     r1   set_default_attn_processorz0UNetControlNetXSModel.set_default_attn_processor  s     ii4K_KfKfKhKhiiiii 	,..IIhh$J^JeJeJgJghhhhh 	%II Nbfgklp  mA  mH  mH  mJ  mJ  hK  hK  cL  cL  N  N   		*****r0   s1s2b1b2c                     t          | j                  D ]I\  }}t          |d|           t          |d|           t          |d|           t          |d|           JdS )a>  Enables the FreeU mechanism from https://arxiv.org/abs/2309.11497.

        The suffixes after the scaling factors represent the stage blocks where they are being applied.

        Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of values that
        are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.

        Args:
            s1 (`float`):
                Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
                mitigate the "oversmoothing effect" in the enhanced denoising process.
            s2 (`float`):
                Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
                mitigate the "oversmoothing effect" in the enhanced denoising process.
            b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
            b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
        r`  ra  rb  rc  N)r   r   setattr)r>   r`  ra  rb  rc  rq   upsample_blocks          r1   enable_freeuz"UNetControlNetXSModel.enable_freeu  sw    $ "+4>!:!: 	. 	.A~ND"---ND"---ND"---ND"----		. 	.r0   c                     h d}t          | j                  D ]<\  }}|D ]4}t          ||          st          ||d          t	          ||d           5=dS )zDisables the FreeU mechanism.>   rb  rc  r`  ra  N)r   r   r)  r&  re  )r>   
freeu_keysrq   rf  r  s        r1   disable_freeuz#UNetControlNetXSModel.disable_freeu  s    ---
!*4>!:!: 	5 	5A~ 5 5>1-- 5D1Q1Q1]NAt4445	5 	5r0   c                 B   d| _         | j                                        D ]/\  }}dt          |j        j                  v rt          d          0| j        | _         |                                 D ]-}t          |t                    r|
                    d           .dS )u1  
        Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value)
        are fused. For cross-attention modules, key and value projection matrices are fused.

        <Tip warning={true}>

        This API is 🧪 experimental.

        </Tip>
        NAddedzQ`fuse_qkv_projections()` is not supported for models having added KV projections.T)fuse)original_attn_processorsrL  r$  r   r?   r*   r   modulesrj   r   fuse_projections)r>   r   attn_processorr=  s       r1   fuse_qkv_projectionsz*UNetControlNetXSModel.fuse_qkv_projections  s     )-%!%!5!;!;!=!= 	v 	vA~#n6?@@@@ !tuuu A )-(<%llnn 	3 	3F&),, 3''T'222	3 	3r0   c                 L    | j         |                     | j                    dS dS )u   Disables the fused QKV projection if enabled.

        <Tip warning={true}>

        This API is 🧪 experimental.

        </Tip>

        N)rn  rU  )r>   s    r1   unfuse_qkv_projectionsz,UNetControlNetXSModel.unfuse_qkv_projections  s2     (4##D$ABBBBB 54r0   r)   timestepencoder_hidden_statescontrolnet_condconditioning_scaleclass_labelstimestep_condattention_maskcross_attention_kwargsadded_cond_kwargsreturn_dictapply_controlc                 	   | j         j        dk    rt          j        |dg          }|5d|                    |j                  z
  dz  }|                    d          }|}t          j        |          sx|j        j	        dk    }t          |t                    r|rt          j        nt          j        }n|rt          j        nt          j        }t          j        |g||j                  }n8t#          |j                  dk    r |d                             |j                  }|                    |j        d                   }|                     |          }|                    |j        	          }| j         j        rL|rJ|                     ||          }|                     ||          }| j         j        d
z  }||z  |d|z
  z  z   }n|                     |          }d}| j         j        n| j         j        dk    rd|
vrt5          | j         d          |
                    d          }d|
vrt5          | j         d          |
                    d          }|                     |                                          }|                    |j        d         df          }t          j         ||gd          }|                    |j                  }| !                    |          }nt5          d| j         j         d          |||z   n|}|}|x}}g g }}| "                    |          }| #                    |          }| $                    |          }|||z  }|r|| %                    |          |z  z   }|&                    |           |&                    |           | j'        D ]D}  | ||||||	||          \  }}}!}"|(                    |!           |(                    |"           E| )                    ||||||	||          \  }}| j*        D ]V}#t#          |#j+                  }$||$ d         }%||$ d         }&|d|$          }|d|$          } |#||%|&||||	||	  	        }W| ,                    |          }| -                    |          }| .                    |          }|s|fS t_          |          S )ay	  
        The [`ControlNetXSModel`] forward method.

        Args:
            sample (`Tensor`):
                The noisy input tensor.
            timestep (`Union[torch.Tensor, float, int]`):
                The number of timesteps to denoise an input.
            encoder_hidden_states (`torch.Tensor`):
                The encoder hidden states.
            controlnet_cond (`Tensor`):
                The conditional input tensor of shape `(batch_size, sequence_length, hidden_size)`.
            conditioning_scale (`float`, defaults to `1.0`):
                How much the control model affects the base model outputs.
            class_labels (`torch.Tensor`, *optional*, defaults to `None`):
                Optional class labels for conditioning. Their embeddings will be summed with the timestep embeddings.
            timestep_cond (`torch.Tensor`, *optional*, defaults to `None`):
                Additional conditional embeddings for timestep. If provided, the embeddings will be summed with the
                timestep_embedding passed through the `self.time_embedding` layer to obtain the final timestep
                embeddings.
            attention_mask (`torch.Tensor`, *optional*, defaults to `None`):
                An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. If `1` the mask
                is kept, otherwise if `0` it is discarded. Mask will be converted into a bias, which adds large
                negative values to the attention scores corresponding to "discard" tokens.
            cross_attention_kwargs (`dict[str]`, *optional*, defaults to `None`):
                A kwargs dictionary that if specified is passed along to the `AttnProcessor`.
            added_cond_kwargs (`dict`):
                Additional conditions for the Stable Diffusion XL UNet.
            return_dict (`bool`, defaults to `True`):
                Whether or not to return a [`~models.controlnet.ControlNetOutput`] instead of a plain tuple.
            apply_control (`bool`, defaults to `True`):
                If `False`, the input is run only through the base model.

        Returns:
            [`~models.controlnetxs.ControlNetXSOutput`] **or** `tuple`:
                If `return_dict` is `True`, a [`~models.controlnetxs.ControlNetXSOutput`] is returned, otherwise a
                tuple is returned where the first element is the sample tensor.
        r   r   )dimsNg     mps)r   devicer   )r   g333333?r   text_embedsz has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `text_embeds` to be passed in `added_cond_kwargs`time_idsz has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`r   dimzgControlNet-XS currently only supports StableDiffusion and StableDiffusion-XL, so addition_embed_type = z is currently not supported.)hidden_states_basehidden_states_ctrltembrv  rx  r|  r{  r  )	hidden_statesres_hidden_states_tuple_baseres_hidden_states_tuple_ctrlr  rv  rx  r|  r{  r  )r)   )0r   r   rJ  flipr   r   	unsqueeze	is_tensorr  typerj   r   float32float64int32int64tensorr   shapeexpandr   r   r   r   r   r   r   r?   getr   flattenreshapeconcatr   r   r   r   r   rm   r   r   r   r   r4   r  r  r  r(   )'r>   r)   ru  rv  rw  rx  ry  rz  r{  r|  r}  r~  r  	timestepsis_mpsr   t_emb	ctrl_temb	base_tembinterpolation_paramr  aug_embr  r  time_embeds
add_embedscembh_ctrlh_basehs_basehs_ctrlguided_hintdownresidual_hbresidual_hcup	n_resnetsskips_hbskips_hcs'                                          r1   r   zUNetControlNetXSModel.forward  s   n ;6%??#jsCCCO %."3"3FL"A"AAXMN+55a88N 	y)) 
	: ]'50F(E** ?)/BU]'->5;i[fmTTTII!!Q&&!$**6=99I $$V\!_55	##I..
 v|,,;0 	3] 	300FFI00FFI"&+"@#"E22Y!FYBY5ZZDD++E22D ;*2[,;;$555 ~  |  |  |   ,//>>K!222 ~  y  y  y   ),,Z88H11(2B2B2D2DEEK%--{/@/CR.HIIK{K&@bIIIJ#tz22J--j99GG xz~  {F  {Z  x  x  x   ")!4tg~~$ % ! r 44_EE ""6**""6**"k!F 	\d>>vFFI[[[Fvv$ 	( 	(D7;t#)#)&*#5'=-+	8 	8 	84FFK NN;'''NN;'''' %%"&1#9)' ( 	
 	
 . 	 	BBJI	z{{+H	z{{+Hk	zk*Gk	zk*GR$-5-5&*#5'=-+
 
 
FF ((00##F++##F++ 	9!0000r0   )r   r   r   r   rK   rL   r   r   NNTNNr   r}   r   r   Fr   r   rK   )NNNNNr.  N)F)	Nr   NNNNNTT)%r*   r+   r,   r-    _supports_gradient_checkpointingr   r   rk   r	   r   r
   r   r   r<   r   r&   r   r   r   r   r:  r?  propertyr   rL  rU  r_  rg  rj  rr  rt  r   rJ  r   r(   r   rB   rC   s   @r1   r   r     s         (,$ &((
 &u)?)+6:?@67-115!%,0?C$'*+?P/4*/.<;<(*;s[ s[ c]s[  *	s[ c
s[ "#Js[ "#s[ #3c
?3s[ ',CsO&<s[ #3c
?3s[  &c]!s[" "*##s[$ %s[& %SM's[( 08})s[, "-s[. %(/s[0 38*1s[2 *-3s[4 $(5s[6 "'s7s[8 #(U3Z"89s[: #&;s[ s[ s[ s[ s[ s[j  59&*9=.2/3s s"s 01s UO	s
 "*$u+!6s %UOs 'tns s s [sj# # # #:2 2 2 2 c+=&=!>    X0 AE2Dd3PbKbFc2c,d  A  A  A  AF+ + + .u .% .U . . . . .25 5 53 3 30C C C$ 37.1/30415;??C "H1 H1H1 eS01H1  %|	H1
 "%,/H1 %UOH1 u|,H1  -H1 !.H1 !)c3h 8H1 $Del):$;<H1 H1 H1 
!5(	)H1 H1 H1 H1 H1 H1 H1 H1r0   r   c                       e Zd Z	 	 	 	 	 	 	 	 	 d&dededed	ed
edededeeeee         f                  dee         dee         dee         dedee         f fdZe	de
defd            Zd'dZ	 	 	 	 	 	 	 d(dededee         dee         dee         d ee         d!eeeef                  d"ee         d#edeeeeed$f         eed$f         f         fd%Z xZS ))r   rK   Tr   rL   FrM   rN   rO   rP   rQ   re   r   rS   r   r   rU   rV   rW   c                    t                                                       g }g }g }g }g }g }d}t          |	t                    r|	g|z  }	t	          |          D ]F}|dk    r|n|}|dk    r|n|}|                    t          ||                     |                    t          ||||                     |                    t          ||z   ||t          ||z   |          t          ||          d                     |ru|                    t          |
||
z  ||	|         |d||                     |                    t          |||z  ||	|         |d|t          ||                               |                    t          ||                     H|rz|                    t          ||                     t          |d|d	
          | _        t          ||z   d|d	
          | _        |                    t          ||                     nd | _        d | _        t          j        |          | _        t          j        |          | _        |rt          j        |          nd g|z  | _        |rt          j        |          nd g|z  | _        t          j        |          | _        t          j        |          | _        d| _        d S )Nr   r   r]   r^   rQ   r_   rY   r[   r\   Trb   rf   rg   F)r;   r<   rj   rk   rl   rm   rn   r"   ro   r#   r!   base_downsamplersctrl_downsamplersr   r@   base_resnetsctrl_resnetsbase_attentionsctrl_attentionsr5   r6   r<  )r>   rM   rN   rO   rP   rQ   re   r   rp   rS   r   r   rU   rV   rW   r  r  r  r  r6   r5   rc   rq   r?   s                          r1   r<   z)ControlNetXSCrossAttnDownBlock2D.__init__  s   " 	
2C88 	W,H+IJ+V(z"" 9	V 9	VA3466//?P3466//?P /?AQ R RSSS 0!2"/*	      03C C!2"/.(+;;H`    33DQijjj	 	 	    &&&0)-EE$5#?#B,?.2)9(7	 	 	    &&&0)-EE$5#?#B,?.2)9(;<MZr(s(s(s	 	 	   /@BS T TUUUU 	* /@BS T TUUU%1!D?PW[& & &D" &2!$55Sdko& & &D" /@BS T TUUUU%)D"%)D"M,77M,77ANgr}_===UYTZ]gTgANgr}_===UYTZ]gTgM,77M,77&+###r0   base_downblockctrl_downblockc                    d }|j         d         j        }|j         d         j        }|j         d         j        |z
  }|j         d         j        }|j         d         j        j        }|j         d         j        j        }	|j         d         j        j        }
t          |d          rbd}t          |j	        d         j
                  } ||          j        } ||          j        } ||          j        } ||          j        }nd}d }d }d }d }d }|j        d u} | ||||||	|
|||||||          }|j                            |j                                                    |j                            |j                                                    |rb|j                            |j	                                                   |j                            |j	                                                   |rh|j                            |j        d                                                    |j                            |j                                                   |j                            |j                                                   |j                            |j                                                   |S )Nc                 <    | j         d         j        d         j        S Nr   r7   transformer_blocksattn2blocks    r1   get_first_cross_attentionzPControlNetXSCrossAttnDownBlock2D.from_modules.<locals>.get_first_cross_attention.      #A&9!<BBr0   r   r7   TFr   )r4   r]   r^   time_emb_projin_featuresnorm1r   r)  r   r7   r  headsrU   rW   r=   r  r'  r(  r  r  r  r  r  r5   r6   )r   r  r  r  rM   rN   rO   rP   rQ   r   ctrl_num_groupsrp   rS   r   r   rU   rW   rV   r   s                      r1   r  z-ControlNetXSCrossAttnDownBlock2D.from_modules+  s   	C 	C 	C *1!4@*215B"1%14DD 	 +215B&.q1?K#+A.4?
(039D><00 	$ M+.~/H/K/^+_+_('@'@'P'P'V$'@'@'P'P'V$";";N"K"K"_88HHY!M+/('+$'+$"&#'4D@ -/-/'&%4')E%=%= 3)-
 
 
$ 	**>+A+L+L+N+NOOO**>+A+L+L+N+NOOO 	Z!11.2K2V2V2X2XYYY!11.2K2V2V2X2XYYY 	^#33N4OPQ4R4]4]4_4_```#33N4O4Z4Z4\4\]]]**>+F+Q+Q+S+STTT**>+F+Q+Q+S+STTTr0   r.  Nc                 L   |                                  D ]	}d|_        
| j        g}t          | j        t
          j                  r|                    | j                   | j        |                    | j                   |D ] }|                                 D ]	}d|_        
!dS r0  TNF)	r3  r4  r  rj   r  r   r@   rm   r  r>   r6  r7  r2  s       r1   r5  z3ControlNetXSCrossAttnDownBlock2D.freeze_base_paramsj  s     __&& 	' 	'E"&E '(
d*BM:: 	4d2333!-d4555 	, 	,D** , ,&+##,	, 	,r0   r   r  r  rv  r  rx  r{  r|  encoder_attention_maskr  .c
           	         |0|                     dd           t                              d           |}
|}d}d}t          t	          | j        | j                            }t          t	          | j        | j                            }dd}t	          ||| j	        | j
                  D ]4\  \  }}\  }}}}|	r!t          j        | ||
          gd          }| j        rE| j        r>t          dd          rd	d
ini }t          j        j        j         ||          |
|fi |}
n ||
|          }
| ||
||||d
          d         }
|	rq| j        rE| j        r>t          dd          rd	d
ini }t          j        j        j         ||          ||fi |}n |||          }| ||||||d
          d         }|	r|
 ||          |z  z   }
||
fz   }||fz   }6| j        | j	        d         }| j
        d         }|	r!t          j        | ||
          gd          }|                     |
          }
|	r|                     |          }|	r|
 ||          |z  z   }
||
fz   }||fz   }|
|||fS )NscaleSPassing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.r/   c                       fd}|S )Nc                  "     | diS  |  S Nr~  r/   inputsr=  r~  s    r1   custom_forwardz_ControlNetXSCrossAttnDownBlock2D.forward.<locals>.create_custom_forward.<locals>.custom_forward  *    *!66C{CCC!66?*r0   r/   r=  r~  r  s   `` r1   create_custom_forwardzGControlNetXSCrossAttnDownBlock2D.forward.<locals>.create_custom_forward  *    + + + + + + "!r0   r   r  >=1.11.0use_reentrantFrv  r|  r{  r  r~  r   r   r:   )r  loggerwarningr   r*  r  r  r  r  r5   r6   rJ  cattrainingr<  r   utils
checkpointr  r  )r>   r  r  rv  r  rx  r{  r|  r  r  r  r  base_output_statesctrl_output_statesbase_blocksctrl_blocksr  b_resb_attnc_resc_attnb2cc2bckpt_kwargss                           r1   r   z(ControlNetXSCrossAttnDownBlock2D.forward{  s    "-%))'488Dtuuu##3t0$2FGGHH3t0$2FGGHH	" 	" 	" 	" ;>d&79J;
 ;
 8	@ 8	@6OUF_eVc3  AFCCKK#8a@@@ } 	-!< 	-JZ[_aiJjJj.r.F.Fpr/:))%00  "	  vt,,!*?+A#1+A %     = 	1T%@ 	1N^_cemNnNn2v?E2J2JtvK"[3>--e44  &	 FF #U6400F%#V.C/E'5/E$)   F  C##f++0B"BB!3vi!?!3vi!?!-#B'C#B'C  AFCCKK#8a@@@++F33F 8//77 C##f++0B"BB!3vi!?!3vi!?v13EEEr0   )	rK   rK   Tr   r   r   rL   TFr  )NNr   NNNT)r*   r+   r,   rk   r   r
   r	   r   r<   r   r   r3   r  r5  r   r   r   r   r   r   rB   rC   s   @r1   r   r     sX         "(*IJ2323-1#+0s, s,s, s, 	s,
 s, s, s, #&s, '/uS%*_/E&Fs, #+3-s, #+3-s, &c]s, s, #4.s, s, s, s, s, s,j <*> <Pl < < < [<|, , , ,* 37/3.1+/;?37"oF oF"oF oF  (/	oF
 %V,oF %UOoF !(oF !)c3h 8oF !) 0oF oF 
vvuVS[153EE	FoF oF oF oF oF oF oF oFr0   r   c                   L    e Zd Z	 	 	 	 	 	 	 	 d"dededee         d	ed
ededee         dee         dee         def fdZedede	fd            Z
d#dZ	 	 	 	 	 	 d$dedededee         dee         deeeef                  dee         dee         d edeeef         fd!Z xZS )%r   NrK   r   rL   Frt   ru   rQ   re   r   rS   r   r   rU   rW   c                 `   t                                                       t          ||          | _        t	          |||||	|d|
          | _        t	          |||z   ||t          t          |||z             |          |	|d|
	  	        | _        t          ||          | _	        d| _
        d S )NT)rS   r]   rQ   rx   rU   rT   rd   rW   rw   F)r;   r<   rn   r5   r$   base_midblockro   r   ctrl_midblockr6   r<  )r>   rt   ru   rQ   re   r   rS   r   r   rU   rW   r?   s              r1   r<   z(ControlNetXSCrossAttnMidBlock2D.__init__  s     	 +=-HH4)E%') 3 8"&-	
 	
 	
 5)E%5&'-M==#@AAC[  !4 8"&-
 
 
" +=-HH&+###r0   r  r  c                 $   |j         }|j        }|j        }d }|j        }|j        }t          |j        d         j                  }|j        d         j	        j
        }	|j        d         j        j        }
|j        d         j        j        } ||          j        } ||          j        } ||          j        } ||          j        } | |||	|
||||||
  
        }|j                             |                                           |j                            |                                           |j                            |                                           |j                            |                                           |S )Nc                 <    | j         d         j        d         j        S r  r  )rF   s    r1   r  zOControlNetXSCrossAttnMidBlock2D.from_modules.<locals>.get_first_cross_attention,  s    &q)<Q?EEr0   r   r   )r5   r6   rF   r^   r]   r   r7   r  r4   r  r  r  r   r  rU   rW   r'  r(  r  r  )r   r  r  r5   r6   r  rt   ru   rS   rQ   r   r  r   r   rU   rW   r   s                    r1   r  z,ControlNetXSCrossAttnMidBlock2D.from_modules!  s    %1$1%.	F 	F 	F %1$0'*=+CA+F+Y'Z'Z$%-a0>J"*1-3>
'/28C#<#<]#K#K#Q #<#<]#K#K#Q 77FFZ44]CCT '''&%4)E%=%= 3-
 
 
 	**<+B+B+D+DEEE++M,D,D,F,FGGG++M,D,D,F,FGGG**<+B+B+D+DEEEr0   r.  c                     |                                  D ]	}d|_        
| j                                         D ]	}d|_        
dS )r0  TFN)r3  r4  r  )r>   r6  s     r1   r5  z2ControlNetXSCrossAttnMidBlock2D.freeze_base_paramsP  s]     __&& 	' 	'E"&E '2244 	( 	(E"'E	( 	(r0   r   Tr  r  rv  r  rx  r|  r{  r  r  c
                 R   |0|                     dd           t                              d           |}
|}|||||d}|	r+t          j        ||                     |
          gd          } | j        |
fi |}
|	r) | j        |fi |}|
|                     |          |z  z   }
|
|fS )Nr  r  )r  rv  r{  r|  r  r   r  )	r  r  r  rJ  r  r5   r  r  r6   )r>   r  r  rv  r  rx  r|  r{  r  r  r  r  
joint_argss                r1   r   z'ControlNetXSCrossAttnMidBlock2D.forward[  s     "-%))'488Dtuuu## %:,&<&<
 

  	KY(9(9&(A(ABJJJF##F99j99 	M'T'==*==Fd//77:LLLFv~r0   )NrK   rK   r   r   r   rL   Fr  )Nr   NNNT)r*   r+   r,   rk   r   r   r<   r   r$   rE   r  r5  r   r   r   r   r   r	   r   rB   rC   s   @r1   r   r     s       
 (,!(*,-2323-1!&1, 1,1, 1,  }	1,
 1, #&1, '*1, #+3-1, #+3-1, &c]1, 1, 1, 1, 1, 1, 1,f ,., 3, , , [,\	( 	( 	( 	(  04.1;?+/37"" """ "  &	"
 %V," %UO" !)c3h 8" !(" !) 0" " 
vv~	" " " " " " " "r0   r   c                   b    e Zd Z	 	 	 	 	 	 	 	 d'deded	ed
ee         dededee         dededededef fdZede	de
fd            Zd(dZ	 	 	 	 	 	 	 d)dedeedf         deedf         dedee         d ee         d!eeeef                  d"ee         d#ee         d$ee         d%edefd&Z xZS )*r   rK   NTr   rL   Fr]   r^   rz   r{   rQ   re   r   rS   rT   rU   r   rW   c                    t                                                       g }g }g }d}|| _        |
| _        t	          |	t
                    r|	g|z  }	t          |          D ]}||dz
  k    r|n|}|dk    r|n|}|                    t          ||         |                     |                    t          ||z   |||                     |r3|                    t          |
||
z  ||	|         |d||                     t          j        |          | _        |rt          j        |          nd g|z  | _        t          j        |          | _        |rt!          |d|          | _        nd | _        d| _        || _        d S )	Nr}   r   r   r  Trb   )rh   r^   F)r;   r<   has_cross_attentionrT   rj   rk   rl   rm   rn   r"   r#   r   r@   r4   r7   r6   r%   
upsamplersr<  r   )r>   r]   r^   rz   r{   rQ   re   r   rp   rS   rT   rU   r   rW   r4   r7   r6   rc   rq   res_skip_channelsr~   r?   s                        r1   r<   z'ControlNetXSCrossAttnUpBlock2D.__init__  s     	

#0 #6 2C88 	W,H+IJ+V(z"" 	 	A01Z!^0C0C,89Q!4!4L/A!/DFX Y YZZZNN 25F F!-"/*	      !!&+$(;;$0#?#B,?.2)9(7	 	 	   }W--7D]"-
3334&S]J]M,77 	#(S_```DOO"DO&+#,r0   base_upblockctrl_upblockc                    |j         }d }|j        d         j        }|j        d         j        |z
  }|j        d         j        |z
  }d |D             }|j        d         j        j        }	|j        d         j        j        }
|j        }t          |d          rRd}t          |j        d         j                  } ||          j        } ||          j        } ||          j        }n
d}d }d }d }d }|j        d u} | |||||	|
|||||||          }|j                            |j                                                   |r1|j                            |j                                                   |r7|j                            |j        d                                                    |j                             |                                           |S )	Nc                 <    | j         d         j        d         j        S r  r  r  s    r1   r  zNControlNetXSCrossAttnUpBlock2D.from_modules.<locals>.get_first_cross_attention  r  r0   r   r   c                     g | ]	}|j         
S r/   )r]   )r   r!  s     r1   r   z?ControlNetXSCrossAttnUpBlock2D.from_modules.<locals>.<listcomp>  s    TTTq}TTTr0   r7   TF)r]   r^   rz   r{   rQ   re   r   rp   rS   rT   rU   r   rW   )r6   r4   r^   r]   r  r  r  r   r   r)  r   r7   r  r  rU   rW   r  r'  r(  )r   r  r  ctrl_to_base_skip_connectionsr  r^   r]   prev_output_channelsctrl_skip_channelssrQ   r   r   rp   rS   rT   rU   rW   r   r   s                      r1   r  z+ControlNetXSCrossAttnUpBlock2D.from_modules  s(   (4(A%	C 	C 	C $+A.;"*2.:\I+3A6B\QTT6STTT$,Q/=I!)!,2=
%4<.. 	$ M+.|/Fq/I/\+]+](";";L"I"I"O";";L"I"I"]88FFW!M+/("&"&##.d: #% 42'&)')E 3 3%-
 
 
" 	%%l&:&E&E&G&GHHH 	S,,\-D-O-O-Q-QRRR 	V,,\-DQ-G-R-R-T-TUUU**+H+S+S+U+UVVVr0   r.  c                 L   |                                  D ]	}d|_        
| j        g}t          | j        t
          j                  r|                    | j                   | j        |                    | j                   |D ] }|                                 D ]	}d|_        
!dS r  )	r3  r4  r4   rj   r7   r   r@   rm   r  r  s       r1   r5  z1ControlNetXSCrossAttnUpBlock2D.freeze_base_params  s     __&& 	' 	'E"&E l^
dor}55 	/do...?&do... 	, 	,D** , ,&+##,	, 	,r0   r   r  r  .r  r  rv  rx  r|  r{  upsample_sizer  r  c           	      $    |0|                     dd           t                              d           t           dd           o2t           dd           o!t           dd           ot           dd           dd} fd}t	           j         j         j        t          |          t          |                    D ]\  }}}}}|r| ||          |z  z  } |||          \  }}t          j
        ||gd	
          } j        rE j        r>t          dd          rddini }t          j        j        j         ||          ||fi |}n |||          }| ||||||
d          d         } j                             ||	          }|S )Nr  r  r`  ra  rb  rc  c                       fd}|S )Nc                  "     | diS  |  S r  r/   r  s    r1   r  z]ControlNetXSCrossAttnUpBlock2D.forward.<locals>.create_custom_forward.<locals>.custom_forward*  r  r0   r/   r  s   `` r1   r  zEControlNetXSCrossAttnUpBlock2D.forward.<locals>.create_custom_forward)  r  r0   c           	      n    r/t          j        | |j        j        j        j                  S | |fS )N)r`  ra  rb  rc  )r   r   r`  ra  rb  rc  )r  
res_h_baseis_freeu_enabledr>   s     r1   maybe_apply_freeu_to_subblockzMControlNetXSCrossAttnUpBlock2D.forward.<locals>.maybe_apply_freeu_to_subblock2  sO     1"'!wwww    %j00r0   r   r  r  r  r  Fr  r   r:   )r  r  r  r&  r*  r4   r7   r6   r   rJ  r  r  r<  r   r  r  r  )r>   r  r  r  r  rv  rx  r|  r{  r  r  r  r  r  resnetattnr  r  
res_h_ctrlr  r  s   `                   @r1   r   z&ControlNetXSCrossAttnUpBlock2D.forward  s=    "-%))'488Dtuuu D$%% *dD))*dD))* dD))	 		" 	" 	" 	"	1 	1 	1 	1 	1 	1 :=LO122122:
 :
  	  	5FD#z:  FZ3E!EE(E(EmU_(`(`%M:!I}j&AqIIIM} 	<!< 	<JZ[_aiJjJj.r.F.Fpr % 6 A))&11!! ! "	! ! !'}d ; ; $!*?+A#1+A %! ! ! ! ?& OOM=IIMr0   )rK   NTr   r   rL   TFr  )Nr   NNNNT)r*   r+   r,   rk   r   r   r   r<   r   r    rI   r  r5  r   r	   r   r   r   r   r   rB   rC   s   @r1   r   r     s$         "(,,-#$#'!!&D- D-D- D- !	D-
 !ID- D- D- !D- '*D- !D- !D- D- D- D- D- D- D- D-L 5(: 5Jd 5 5 5 [5n, , , ,. 37.1;?+/'+37"V VV ',FCK&8V ',FCK&8	V
 V  (/V %UOV !)c3h 8V !(V  }V !) 0V V 
V V V V V V V Vr0   r   c                 L    t          t          j        | |dd                    S )Nr   r   )r   )zero_moduler   rA   )r]   r^   s     r1   rn   rn   i  s#    rylAqIIIJJJr0   c                 r    |                                  D ]!}t          j                            |           "| S r:   )r3  r   initzeros_)r=  ps     r1   r  r  m  s9        
qMr0   c                 Z    |}|| k    r| S |dk    r| |z  }|dk    r|S |dz  }|dk    d S d S )Nr   r   r/   )numberrZ   factorresiduals       r1   ro   ro   s  sS    F
A++F?q==M!	 A++++++r0   )rK   Tr   r   rL   TF)NrK   r   r   rL   Fr:   )Edataclassesr   mathr   typingr   r   r   r   r	   r
   rJ  torch.utils.checkpointr   r   configuration_utilsr   r   r  r   r   r   utils.torch_utilsr   attention_processorr   r   r   r   r   r   r  r   
embeddingsr   r   modeling_utilsr   unets.unet_2d_blocksr   r    r!   r"   r#   r$   r%   unets.unet_2d_conditionr&   
get_loggerr*   r  r(   rK  r3   rE   rI   rk   r   rs   ry   r   r   r   r   r   r   rn   r  ro   r/   r0   r1   <module>r.     s   " ! ! ! ! !       : : : : : : : : : : : : : : : :              A A A A A A A A 9 9 9 9 9 9 9 9 9 9 + + + + + +                8 7 7 7 7 7 4 4 4 4 4 4 4 4 & & & & & &                  : 9 9 9 9 9 
	H	%	% 
 
 
 
 
 
 
 
( ( ( ( (29 ( ( (() ) ) ) )") ) ) )) ) ) ) ) ) ) ) *,EF)*)-',W! W!W!W! W! 	W!
 W! "#W! #+5eCj+A"BW! "#W! "#W! W! tnW! W! W! W!z $()+())*)-"p ppp C=p "#	p
 #&p "#p "#p p p p pDPPP S	P P P PD
 D
 D
 D
 D
*k D
 D
 D
Ny
1 y
1 y
1 y
1 y
1J y
1 y
1 y
1xuF uF uF uF uFry uF uF uFp	P P P P Pbi P P Pff f f f fRY f f fRK K K K      r0   