
    wiFA                     :   d dl mZmZmZ d dlZd dlmZ d dlZd dl	m
Z d dlmZ ddlmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZmZ ej        j         G d de                      Z G d dej                  Z e G d dej        ee                      Z!dS )    )OptionalTupleUnionN)
FrozenDict   )ConfigMixinflax_register_to_config)
BaseOutput   )FlaxTimestepEmbeddingFlaxTimesteps)FlaxModelMixin)FlaxCrossAttnDownBlock2DFlaxDownBlock2DFlaxUNetMidBlock2DCrossAttnc                   <    e Zd ZU dZej        ed<   ej        ed<   dS )FlaxControlNetOutputz
    The output of [`FlaxControlNetModel`].

    Args:
        down_block_res_samples (`jnp.ndarray`):
        mid_block_res_sample (`jnp.ndarray`):
    down_block_res_samplesmid_block_res_sampleN)__name__
__module____qualname____doc__jnpndarray__annotations__     p/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/diffusers/models/controlnet_flax.pyr   r   !   s:            K'''+%%%%%r   r   c                       e Zd ZU eed<   dZeedf         ed<   ej        Z	ej	        ed<   ddZ
d	ej        dej        fd
ZdS )#FlaxControlNetConditioningEmbeddingconditioning_embedding_channels       `      .block_out_channelsdtypereturnNc                    t          j        | j        d         dd| j                  | _        g }t          t          | j                  dz
            D ]}| j        |         }| j        |dz            }t          j        |dd| j                  }|                    |           t          j        |ddd| j                  }|                    |           || _        t          j        | j	        ddt           j
                                        t           j
                                        | j                  | _        d S )	Nr      r-   r   r   r/   )kernel_sizepaddingr)   r   )r   r   r0   stridesr1   r)   r0   r1   kernel_init	bias_initr)   )nnConvr(   r)   conv_inrangelenappendblocksr"   initializers
zeros_initconv_out)selfr=   i
channel_inchannel_outconv1conv2s          r   setupz)FlaxControlNetConditioningEmbedding.setup4   sH   w#A&$*	
 
 
 s4233a788 	! 	!A03J1!a%8KG"(j	  E MM%   G"(j  E MM%    0$2244o0022*
 
 
r   conditioningc                     |                      |          }t          j        |          }| j        D ]!} ||          }t          j        |          }"|                     |          }|S )N)r9   r7   silur=   r@   )rA   rH   	embeddingblocks       r   __call__z,FlaxControlNetConditioningEmbedding.__call__Z   sj    LL..	GI&&	[ 	+ 	+Ei((I	**IIMM),,	r   r*   N)r   r   r   intr   r(   r   r   float32r)   rG   r   rM   r   r   r   r!   r!   /   s         %((((*;c3h;;;{E39"""$
 $
 $
 $
L
S[ 
S[ 
 
 
 
 
 
r   r!   c                   t   e Zd ZU dZdZeed<   dZeed<   dZe	e
df         ed<   d	Zeee	edf         f         ed
<   dZe	edf         ed<   dZeed<   dZeee	edf         f         ed<   dZeeee	edf         f                  ed<   dZeed<   dZeed<   d	Zeed<   ej        Zej        ed<   dZeed<   dZeed<   dZe
ed<   dZe	edf         ed <   d!ej        d"e fd#Z!d.d$Z"	 	 	 d/d&ej#        d'eej#        eef         d(ej#        d)ej#        d*ed+ed,ed"ee$e	e	ej#        df         ej#        f         f         fd-Z%dS )0FlaxControlNetModelu
  
    A ControlNet model.

    This model inherits from [`FlaxModelMixin`]. Check the superclass documentation for it’s generic methods
    implemented for all models (such as downloading or saving).

    This model is also a Flax Linen [`flax.linen.Module`](https://flax.readthedocs.io/en/latest/flax.linen.html#module)
    subclass. Use it as a regular Flax Linen module and refer to the Flax documentation for all matters related to its
    general usage and behavior.

    Inherent JAX features such as the following are supported:

    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

    Parameters:
        sample_size (`int`, *optional*):
            The size of the input sample.
        in_channels (`int`, *optional*, defaults to 4):
            The number of channels in the input sample.
        down_block_types (`Tuple[str]`, *optional*, defaults to `("FlaxCrossAttnDownBlock2D", "FlaxCrossAttnDownBlock2D", "FlaxCrossAttnDownBlock2D", "FlaxDownBlock2D")`):
            The tuple of downsample blocks to use.
        block_out_channels (`Tuple[int]`, *optional*, defaults to `(320, 640, 1280, 1280)`):
            The tuple of output channels for each block.
        layers_per_block (`int`, *optional*, defaults to 2):
            The number of layers per block.
        attention_head_dim (`int` or `Tuple[int]`, *optional*, defaults to 8):
            The dimension of the attention heads.
        num_attention_heads (`int` or `Tuple[int]`, *optional*):
            The number of attention heads.
        cross_attention_dim (`int`, *optional*, defaults to 768):
            The dimension of the cross attention features.
        dropout (`float`, *optional*, defaults to 0):
            Dropout probability for down, up and bottleneck blocks.
        flip_sin_to_cos (`bool`, *optional*, defaults to `True`):
            Whether to flip the sin to cos in the time embedding.
        freq_shift (`int`, *optional*, defaults to 0): The frequency shift to apply to the time embedding.
        controlnet_conditioning_channel_order (`str`, *optional*, defaults to `rgb`):
            The channel order of conditional image. Will convert to `rgb` if it's `bgr`.
        conditioning_embedding_out_channels (`tuple`, *optional*, defaults to `(16, 32, 96, 256)`):
            The tuple of output channel for each block in the `conditioning_embedding` layer.
    r%   sample_size   in_channels)CrossAttnDownBlock2DrV   rV   DownBlock2D.down_block_typesFonly_cross_attention)i@  i     rZ   r(   r   layers_per_block   attention_head_dimNnum_attention_headsrZ   cross_attention_dimg        dropoutuse_linear_projectionr)   Tflip_sin_to_cosr   
freq_shiftrgb%controlnet_conditioning_channel_orderr#   #conditioning_embedding_out_channelsrngr*   c                    d| j         | j        | j        f}t          j        |t          j                  }t          j        dt          j                  }t          j        dd| j        ft          j                  }dd| j        dz  | j        dz  f}t          j        |t          j                  }t          j	        
                    |          \  }}	||	d}
|                     |
||||          d         S )Nr   r)   )r   r-   r\   )paramsr`   rj   )rU   rS   r   zerosrP   onesint32r_   jaxrandomsplitinit)rA   rg   sample_shapesample	timestepsencoder_hidden_statescontrolnet_cond_shapecontrolnet_cond
params_rngdropout_rngrngss              r   init_weightsz FlaxControlNetModel.init_weights   s    4+T-=t?OP<s{;;;HT333	 #	1a1I*JRUR] ^ ^ ^!"At'7!';T=MPQ=Q R)$9MMM"%*"2"23"7"7
K$==yyvy2GYYZbccr   c                    | j         }|d         dz  }| j        p| j        }t          j        |d         ddd| j                  | _        t          |d         | j        | j	        j
                  | _        t          || j                  | _        t          |d         | j        	          | _        | j        }t%          |t&                    r|ft)          | j                  z  }t%          |t,                    r|ft)          | j                  z  }g }g }|d         }t          j        |dd
t          j                                        t          j                                        | j                  }|                    |           t5          | j                  D ]\  }	}
|}||	         }|	t)          |          dz
  k    }|
dk    r:t7          ||| j        | j        ||	         | | j        ||	         | j        	  	        }n%t?          ||| j        | j        | | j                  }|                    |           tA          | j                  D ]n}t          j        |dd
t          j                                        t          j                                        | j                  }|                    |           o|slt          j        |dd
t          j                                        t          j                                        | j                  }|                    |           || _!        || _"        |d         }tG          || j        |d         | j        | j                  | _$        t          j        |dd
t          j                                        t          j                                        | j                  | _%        d S )Nr   rT   r,   r/   r.   r2   )rb   rc   ri   )r"   r(   VALIDr4   r   rV   )	rU   out_channelsr`   
num_layersr^   add_downsamplera   rY   r)   )rU   r~   r`   r   r   r)   )rU   r`   r^   ra   r)   )&r(   r^   r]   r7   r8   r)   r9   r   rb   configrc   	time_projr   time_embeddingr!   rf   controlnet_cond_embeddingrY   
isinstanceboolr;   rX   rO   r>   r?   r<   	enumerater   r`   r[   ra   r   r:   down_blockscontrolnet_down_blocksr   	mid_blockcontrolnet_mid_block)rA   r(   time_embed_dimr^   rY   r   r   output_channelcontrolnet_blockrB   down_block_typeinput_channelis_final_block
down_block_mid_block_channels                   r   rG   zFlaxControlNetModel.setup   s   !4+A.2 #6Q$:Q wq!$*
 
 
 'q!43GTXT_Tj
 
 
 4N$*UUU)L,>q,A#G*
 *
 *
&
  $8*D11 	X$8#:SAV=W=W#W )3// 	V#6"83t?T;U;U"U !#+A.72244o0022*
 
 
 	%%&6777"+D,A"B"B 1	@ 1	@A*M/2N#&8"9"9A"==N"8885 -!/ L#4(;A(>'5#5*.*D)=a)@*
 
 


 - -!/ L#4'5#5*  
 z***4011 	@ 	@#%7" &# " : : < < o88::*$ $ $  '--.>????! 	@#%7" &# " : : < < o88::*$ $ $  '--.>???&&<# /r24)L 3B 7"&"<*
 
 
 %'G2244o0022*%
 %
 %
!!!r         ?rs   rt   ru   rw   conditioning_scalereturn_dicttrainc                 B   | j         }|dk    rt          j        |d          }t          |t          j                  s"t          j        |gt          j                  }ngt          |t          j                  rMt          |j                  dk    r5|	                    t          j
                  }t          j        |d          }|                     |          }	|                     |	          }	t          j        |d          }|                     |          }t          j        |d          }|                     |          }||z  }|f}
| j        D ]B}t          |t$                    r |||	||           \  }}n |||	|           \  }}|
|z  }
C|                     ||	||           }d}t)          |
| j                  D ]\  }} ||          }||fz  }|}
|                     |          }fd	|
D             }
|z  }|s|
|fS t/          |
|
          S )a  
        Args:
            sample (`jnp.ndarray`): (batch, channel, height, width) noisy inputs tensor
            timestep (`jnp.ndarray` or `float` or `int`): timesteps
            encoder_hidden_states (`jnp.ndarray`): (batch_size, sequence_length, hidden_size) encoder hidden states
            controlnet_cond (`jnp.ndarray`): (batch, channel, height, width) the conditional input tensor
            conditioning_scale (`float`, *optional*, defaults to `1.0`): the scale factor for controlnet outputs
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] instead of
                a plain tuple.
            train (`bool`, *optional*, defaults to `False`):
                Use deterministic functions and disable dropout when not training.

        Returns:
            [`~models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] or `tuple`:
                [`~models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] if `return_dict` is True, otherwise
                a `tuple`. When returning a tuple, the first element is the sample tensor.
        bgrr   )axisri   r   )r   r   r-   r   )deterministicr   c                     g | ]}|z  S r   r   ).0rs   r   s     r   
<listcomp>z0FlaxControlNetModel.__call__.<locals>.<listcomp>  s    !c!c!c&&+="=!c!c!cr   )r   r   )re   r   flipr   r   arrayrm   r;   shapeastyperP   expand_dimsr   r   	transposer9   r   r   r   r   zipr   r   r   )rA   rs   rt   ru   rw   r   r   r   channel_ordert_embr   r   res_samples!controlnet_down_block_res_samplesdown_block_res_sampler   r   s        `           r   rM   zFlaxControlNetModel.__call__:  s   8 BE!!!hQ???O )S[11 	6	9+SY???II	3;// 	6C	4H4HA4M4M!((s{(;;I	155Iy))##E** v|44f%%-FF88II/! #)* 	2 	2J*&>?? Y&0j@Uinen&o&o&o#&0jRWi&X&X&X#"k1"" /DX]T]^^ -/)7:;QSWSn7o7o 	J 	J3!#3$4$45J$K$K!-2G1II--!B#88@@ "d!c!c!cLb!c!c!c 22 	B*,@AA##9Pd
 
 
 	
r   rN   )r   TF)&r   r   r   r   rS   rO   r   rU   rX   r   strrY   r   r   r(   r[   r]   r^   r   r_   r`   floatra   r   rP   r)   rb   rc   re   rf   rn   Arrayr   r{   rG   r   r   rM   r   r   r   rR   rR   g   s        + +Z KK)eCHo    ;@%eD#I&6 67???*@c3h@@@c67c5c?23777AE%U38_(<"=>EEE####GU"'4'''{E39""" OT   J16)3666;L'sCxLLLd	 dj d d d d~
 ~
 ~
 ~
L %( Q
 Q
Q
 eS01Q
  #{	Q

 Q
 "Q
 Q
 Q
 
#U5c1A+BCK+O%PP	QQ
 Q
 Q
 Q
 Q
 Q
r   rR   )"typingr   r   r   flax
flax.linenlinenr7   rn   	jax.numpynumpyr   flax.core.frozen_dictr   configuration_utilsr   r	   utilsr
   embeddings_flaxr   r   modeling_flax_utilsr   unets.unet_2d_blocks_flaxr   r   r   struct	dataclassr   Moduler!   rR   r   r   r   <module>r      s   * ) ) ) ) ) ) ) ) )        



       , , , , , , F F F F F F F F       A A A A A A A A / / / / / /          
& 
& 
& 
& 
&: 
& 
& 
&5 5 5 5 5") 5 5 5p c
 c
 c
 c
 c
")^[ c
 c
 c
 c
 c
r   