§
    òwŠiªY  ã                   ó<  — d dl mZ d dlmZmZmZ d dlZd dlZd dl	m
c mZ ddlmZmZ ddlmZ ddlmZ e G d	„ d
e¦  «        ¦   «         Zdej        dedej        fd„Zdej        deej                 dej        fd„Zddefd„Zddefd„Z G d„ dee¦  «        ZdS )é    )Ú	dataclass)ÚOptionalÚTupleÚUnionNé   )ÚConfigMixinÚregister_to_config)Ú
BaseOutputé   )ÚSchedulerMixinc                   ó(   — e Zd ZU dZej        ed<   dS )ÚVQDiffusionSchedulerOutputa.  
    Output class for the scheduler's step function output.

    Args:
        prev_sample (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
            Computed sample x_{t-1} of previous timestep. `prev_sample` should be used as next model input in the
            denoising loop.
    Úprev_sampleN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__ÚtorchÚ
LongTensorÚ__annotations__© ó    ú|/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/diffusers/schedulers/scheduling_vq_diffusion.pyr   r      s-   € € € € € € ðð ð Ô!Ð!Ð!Ñ!Ð!Ð!r   r   ÚxÚnum_classesÚreturnc                 óÒ   — t          j        | |¦  «        }|                     ddd¦  «        }t          j        |                     ¦   «                              d¬¦  «        ¦  «        }|S )aŸ  
    Convert batch of vector of class indices into batch of log onehot vectors

    Args:
        x (`torch.LongTensor` of shape `(batch size, vector length)`):
            Batch of class indices

        num_classes (`int`):
            number of classes to be used for the onehot vectors

    Returns:
        `torch.Tensor` of shape `(batch size, num classes, vector length)`:
            Log onehot vectors
    r   r   r   ç ÂëþKH´9)Úmin)ÚFÚone_hotÚpermuter   ÚlogÚfloatÚclamp)r   r   Úx_onehotÚlog_xs       r   Úindex_to_log_onehotr(   )   s[   € õ Œy˜˜KÑ(Ô(€HØ×Ò  1 aÑ(Ô(€HÝŒIh—n’nÑ&Ô&×,Ò,°Ð,Ñ7Ô7Ñ8Ô8€EØ€Lr   ÚlogitsÚ	generatorc                 ó®   — t          j        | j        | j        |¬¦  «        }t          j        t          j        |dz   ¦  «         dz   ¦  «         }|| z   }|S )z(
    Apply gumbel noise to `logits`
    )Údevicer*   r   )r   ÚrandÚshaper,   r#   )r)   r*   ÚuniformÚgumbel_noiseÚnoiseds        r   Úgumbel_noisedr2   >   sU   € õ Œj˜œ¨f¬mÀyÐQÑQÔQ€GÝ”Iuœy¨°5©Ñ9Ô9Ð9¸EÑAÑBÔBÐB€LØ˜FÑ"€FØ€Mr   çwJëÿï?ç?«Ì”Ößâ>Únum_diffusion_timestepsc                 óè   — t          j        d| ¦  «        | dz
  z  ||z
  z  |z   }t          j        dg|f¦  «        }|dd…         |dd…         z  }t          j        |dd…         dgf¦  «        }||fS )zN
    Cumulative and non-cumulative alpha schedules.

    See section 4.1.
    r   r   Néÿÿÿÿ©ÚnpÚarangeÚconcatenate)r5   Úalpha_cum_startÚalpha_cum_endÚattÚats        r   Úalpha_schedulesr@   H   s   € õ 	Œ	!Ð,Ñ-Ô-Ð1HÈ1Ñ1LÑMÐQ^ÐapÑQpÑqØ
ñ	ð õ Œ.˜1˜#˜s˜Ñ
$Ô
$€CØ	ˆQˆRˆRŒ3s˜s”8Ñ	€BÝ
Œ.˜#˜a˜b˜bœ' A 3˜Ñ
(Ô
(€CØˆsˆ7€Nr   c                 óü   — t          j        d| ¦  «        | dz
  z  ||z
  z  |z   }t          j        dg|f¦  «        }d|z
  }|dd…         |dd…         z  }d|z
  }t          j        |dd…         dgf¦  «        }||fS )zN
    Cumulative and non-cumulative gamma schedules.

    See section 4.1.
    r   r   Nr7   r8   )r5   Úgamma_cum_startÚgamma_cum_endÚcttÚone_minus_cttÚone_minus_ctÚcts          r   Úgamma_schedulesrH   X   s¤   € õ 	Œ	!Ð,Ñ-Ô-Ð1HÈ1Ñ1LÑMÐQ^ÐapÑQpÑqØ
ñ	ð õ Œ.˜1˜#˜s˜Ñ
$Ô
$€CØ˜‘G€MØ    Ô$ }°S°b°SÔ'9Ñ9€LØ	
ˆ\Ñ	€BÝ
Œ.˜#˜a˜b˜bœ' A 3˜Ñ
(Ô
(€CØˆsˆ7€Nr   c                   ó*  — e Zd ZdZdZe	 	 	 	 	 d dededed	ed
edefd„¦   «         Zd!dede	e
ej        f         fd„Z	 	 d"dej        dej        dej        deej                 dede	eef         fd„Zd„ Zdej        dej        dej        defd„Zd„ ZdS )#ÚVQDiffusionScheduleraÉ  
    A scheduler for vector quantized diffusion.

    This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
    methods the library implements for all schedulers such as loading and saving.

    Args:
        num_vec_classes (`int`):
            The number of classes of the vector embeddings of the latent pixels. Includes the class for the masked
            latent pixel.
        num_train_timesteps (`int`, defaults to 100):
            The number of diffusion steps to train the model.
        alpha_cum_start (`float`, defaults to 0.99999):
            The starting cumulative alpha value.
        alpha_cum_end (`float`, defaults to 0.00009):
            The ending cumulative alpha value.
        gamma_cum_start (`float`, defaults to 0.00009):
            The starting cumulative gamma value.
        gamma_cum_end (`float`, defaults to 0.99999):
            The ending cumulative gamma value.
    r   éd   r3   r4   Únum_vec_classesÚnum_train_timestepsr<   r=   rB   rC   c                 óR  — || _         | j         dz
  | _        t          |||¬¦  «        \  }}t          |||¬¦  «        \  }	}
| j         dz
  }d|z
  |	z
  |z  }d|z
  |
z
  |z  }t	          j        |                     d¦  «        ¦  «        }t	          j        |                     d¦  «        ¦  «        }t	          j        |	                     d¦  «        ¦  «        }	t	          j        |¦  «        }t	          j        |¦  «        }t	          j        |	¦  «        }t	          j        |                     d¦  «        ¦  «        }t	          j        |                     d¦  «        ¦  «        }t	          j        |
                     d¦  «        ¦  «        }
t	          j        |¦  «        }t	          j        |¦  «        }t	          j        |
¦  «        }|                     ¦   «         | _	        |                     ¦   «         | _
        |                     ¦   «         | _        |                     ¦   «         | _        |                     ¦   «         | _        |                     ¦   «         | _        d | _        t	          j        t#          j        d|¦  «        d d d…                              ¦   «         ¦  «        | _        d S )Nr   )r<   r=   )rB   rC   Úfloat64r   r7   )Ú	num_embedÚ
mask_classr@   rH   r   ÚtensorÚastyper#   r$   Úlog_atÚlog_btÚlog_ctÚlog_cumprod_atÚlog_cumprod_btÚlog_cumprod_ctÚnum_inference_stepsÚ
from_numpyr9   r:   ÚcopyÚ	timesteps)ÚselfrL   rM   r<   r=   rB   rC   r?   r>   rG   rD   Únum_non_mask_classesÚbtÚbttrT   rU   rV   rW   rX   rY   s                       r   Ú__init__zVQDiffusionScheduler.__init__ƒ   s  € ð )ˆŒð œ.¨1Ñ,ˆŒå!Ð"5ÀÐfsÐtÑtÔt‰ˆˆCÝ!Ð"5ÀÐfsÐtÑtÔt‰ˆˆCà#œ~°Ñ1ÐØ"‰fr‰kÐ1Ñ1ˆØ3‰w˜‰}Ð 4Ñ4ˆåŒ\˜"Ÿ)š) IÑ.Ô.Ñ/Ô/ˆÝŒ\˜"Ÿ)š) IÑ.Ô.Ñ/Ô/ˆÝŒ\˜"Ÿ)š) IÑ.Ô.Ñ/Ô/ˆÝ”˜2‘”ˆÝ”˜2‘”ˆÝ”˜2‘”ˆåŒl˜3Ÿ:š: iÑ0Ô0Ñ1Ô1ˆÝŒl˜3Ÿ:š: iÑ0Ô0Ñ1Ô1ˆÝŒl˜3Ÿ:š: iÑ0Ô0Ñ1Ô1ˆÝœ 3™œˆÝœ 3™œˆÝœ 3™œˆà—l’l‘n”nˆŒØ—l’l‘n”nˆŒØ—l’l‘n”nˆŒØ,×2Ò2Ñ4Ô4ˆÔØ,×2Ò2Ñ4Ô4ˆÔØ,×2Ò2Ñ4Ô4ˆÔð $(ˆÔ ÝÔ)­"¬)°AÐ7JÑ*KÔ*KÈDÈDÈbÈDÔ*Q×*VÒ*VÑ*XÔ*XÑYÔYˆŒˆˆr   NrZ   r,   c                 óJ  — || _         t          j        d| j         ¦  «        ddd…                              ¦   «         }t	          j        |¦  «                             |¦  «        | _        | j                             |¦  «        | _        | j	                             |¦  «        | _	        | j
                             |¦  «        | _
        | j                             |¦  «        | _        | j                             |¦  «        | _        | j                             |¦  «        | _        dS )aË  
        Sets the discrete timesteps used for the diffusion chain (to be run before inference).

        Args:
            num_inference_steps (`int`):
                The number of diffusion steps used when generating samples with a pre-trained model.
            device (`str` or `torch.device`, *optional*):
                The device to which the timesteps and diffusion process parameters (alpha, beta, gamma) should be moved
                to.
        r   Nr7   )rZ   r9   r:   r\   r   r[   Útor]   rT   rU   rV   rW   rX   rY   )r^   rZ   r,   r]   s       r   Úset_timestepsz"VQDiffusionScheduler.set_timesteps²   sç   € ð $7ˆÔ Ý”I˜a Ô!9Ñ:Ô:¸4¸4¸R¸4Ô@×EÒEÑGÔGˆ	ÝÔ)¨)Ñ4Ô4×7Ò7¸Ñ?Ô?ˆŒà”k—n’n VÑ,Ô,ˆŒØ”k—n’n VÑ,Ô,ˆŒØ”k—n’n VÑ,Ô,ˆŒØ"Ô1×4Ò4°VÑ<Ô<ˆÔØ"Ô1×4Ò4°VÑ<Ô<ˆÔØ"Ô1×4Ò4°VÑ<Ô<ˆÔÐÐr   TÚmodel_outputÚtimestepÚsampler*   Úreturn_dictr   c                 ó¸   — |dk    r|}n|                       |||¦  «        }t          ||¦  «        }|                     d¬¦  «        }|s|fS t          |¬¦  «        S )aË  
        Predict the sample from the previous timestep by the reverse transition distribution. See
        [`~VQDiffusionScheduler.q_posterior`] for more details about how the distribution is computer.

        Args:
            log_p_x_0: (`torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`):
                The log probabilities for the predicted classes of the initial latent pixels. Does not include a
                prediction for the masked class as the initial unnoised image cannot be masked.
            t (`torch.long`):
                The timestep that determines which transition matrices are used.
            x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
                The classes of each latent pixel at time `t`.
            generator (`torch.Generator`, or `None`):
                A random number generator for the noise applied to `p(x_{t-1} | x_t)` before it is sampled from.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] or
                `tuple`.

        Returns:
            [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] or `tuple`:
                If return_dict is `True`, [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] is
                returned, otherwise a tuple is returned where the first element is the sample tensor.
        r   r   ©Údim)r   )Úq_posteriorr2   Úargmaxr   )r^   rf   rg   rh   r*   ri   Úlog_p_x_t_min_1Ú	x_t_min_1s           r   ÚstepzVQDiffusionScheduler.stepÈ   sq   € ð> qŠ=ˆ=Ø*ˆOˆOà"×.Ò.¨|¸VÀXÑNÔNˆOå'¨¸ÑCÔCˆà#×*Ò*¨qÐ*Ñ1Ô1ˆ	àð 	 Ø<Ðå)°iÐ@Ñ@Ô@Ð@r   c                 ó  — t          || j        ¦  «        }|                      |||d¬¦  «        }|                      |||d¬¦  «        }||z
  }t          j        |dd¬¦  «        }||z
  }|                      ||dz
  ¦  «        }||z   |z   }	|	S )aè  
        Calculates the log probabilities for the predicted classes of the image at timestep `t-1`:

        ```
        p(x_{t-1} | x_t) = sum( q(x_t | x_{t-1}) * q(x_{t-1} | x_0) * p(x_0) / q(x_t | x_0) )
        ```

        Args:
            log_p_x_0 (`torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`):
                The log probabilities for the predicted classes of the initial latent pixels. Does not include a
                prediction for the masked class as the initial unnoised image cannot be masked.
            x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
                The classes of each latent pixel at time `t`.
            t (`torch.Long`):
                The timestep that determines which transition matrix is used.

        Returns:
            `torch.Tensor` of shape `(batch size, num classes, num latent pixels)`:
                The log probabilities for the predicted classes of the image at timestep `t-1`.
        T)ÚtÚx_tÚlog_onehot_x_tÚ
cumulativeFr   )rl   Úkeepdim)r(   rP   Ú$log_Q_t_transitioning_to_known_classr   Ú	logsumexpÚapply_cumulative_transitions)
r^   Ú	log_p_x_0rt   rs   ru   Úlog_q_x_t_given_x_0Úlog_q_t_given_x_t_min_1ÚqÚq_log_sum_expro   s
             r   rm   z VQDiffusionScheduler.q_posteriorõ   sÈ   € õ* -¨S°$´.ÑAÔAˆà"×GÒGØS¨ÀDð Hñ 
ô 
Ðð #'×"KÒ"KØS¨ÀEð #Lñ #
ô #
Ðð Ð+Ñ+ˆõ œ¨¨q¸$Ð?Ñ?Ô?ˆð Ñˆð ×-Ò-¨a°°Q±Ñ7Ô7ˆð Ð5Ñ5¸ÑEˆð\ Ðr   rs   rt   ru   rv   c                óú  — |r(| j         |         }| j        |         }| j        |         }n'| j        |         }| j        |         }| j        |         }|s"|dd…ddd…f                              d¦  «        }|dd…dd…dd…f         }||z                        |¦  «        }	|| j        k    }
|
                     d¦  «         	                    d| j
        dz
  d¦  «        }
||	|
<   |st          j        |	|fd¬¦  «        }	|	S )a/	  
        Calculates the log probabilities of the rows from the (cumulative or non-cumulative) transition matrix for each
        latent pixel in `x_t`.

        Args:
            t (`torch.Long`):
                The timestep that determines which transition matrix is used.
            x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
                The classes of each latent pixel at time `t`.
            log_onehot_x_t (`torch.Tensor` of shape `(batch size, num classes, num latent pixels)`):
                The log one-hot vectors of `x_t`.
            cumulative (`bool`):
                If cumulative is `False`, the single step transition matrix `t-1`->`t` is used. If cumulative is
                `True`, the cumulative transition matrix `0`->`t` is used.

        Returns:
            `torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`:
                Each _column_ of the returned matrix is a _row_ of log probabilities of the complete probability
                transition matrix.

                When non cumulative, returns `self.num_classes - 1` rows because the initial latent pixel cannot be
                masked.

                Where:
                - `q_n` is the probability distribution for the forward process of the `n`th latent pixel.
                - C_0 is a class of a latent pixel embedding
                - C_k is the class of the masked latent pixel

                non-cumulative result (omitting logarithms):
                ```
                q_0(x_t | x_{t-1} = C_0) ... q_n(x_t | x_{t-1} = C_0)
                          .      .                     .
                          .               .            .
                          .                      .     .
                q_0(x_t | x_{t-1} = C_k) ... q_n(x_t | x_{t-1} = C_k)
                ```

                cumulative result (omitting logarithms):
                ```
                q_0_cumulative(x_t | x_0 = C_0)    ...  q_n_cumulative(x_t | x_0 = C_0)
                          .               .                          .
                          .                        .                 .
                          .                               .          .
                q_0_cumulative(x_t | x_0 = C_{k-1}) ... q_n_cumulative(x_t | x_0 = C_{k-1})
                ```
        Nr7   r   rk   )rW   rX   rY   rT   rU   rV   Ú	unsqueezeÚ	logaddexprQ   ÚexpandrP   r   Úcat)r^   rs   rt   ru   rv   ÚaÚbÚcÚ(log_onehot_x_t_transitioning_from_maskedÚlog_Q_tÚmask_class_masks              r   rx   z9VQDiffusionScheduler.log_Q_t_transitioning_to_known_classd  s2  € ðb ð 	ØÔ# AÔ&ˆAØÔ# AÔ&ˆAØÔ# AÔ&ˆAˆAà”˜A”ˆAØ”˜A”ˆAØ”˜A”ˆAàð 	]ð 8FÀaÀaÀaÈÈQÈQÈQÀhÔ7O×7YÒ7YÐZ[Ñ7\Ô7\Ð4ð
 (¨¨¨¨3¨B¨3°°°¨	Ô2ˆð " AÑ%×0Ò0°Ñ3Ô3ˆð  ¤Ò0ˆØ)×3Ò3°AÑ6Ô6×=Ò=¸bÀ$Ä.ÐSTÑBTÐVXÑYÔYˆØ#$ˆÑ àð 	\Ý”i Ð*RÐ SÐYZÐ[Ñ[Ô[ˆGàˆr   c                 ó  — |j         d         }| j        |         }| j        |         }| j        |         }|j         d         }|                     |d|¦  «        }||z                        |¦  «        }t          j        ||fd¬¦  «        }|S )Nr   r   r   rk   )r.   rW   rX   rY   rƒ   r‚   r   r„   )r^   r~   rs   Úbszr…   r†   r‡   Únum_latent_pixelss           r   rz   z1VQDiffusionScheduler.apply_cumulative_transitionsÇ  sŠ   € ØŒgaŒjˆØÔ Ô"ˆØÔ Ô"ˆØÔ Ô"ˆàœG AœJÐØHŠHS˜!Ð.Ñ/Ô/ˆà‰U×Ò˜aÑ Ô ˆÝŒIq˜!f !Ð$Ñ$Ô$ˆàˆr   )rK   r3   r4   r4   r3   )N)NT)r   r   r   r   Úorderr	   Úintr$   rb   r   Ústrr   r,   re   ÚTensorÚlongr   r   Ú	GeneratorÚboolr   r   rq   rm   rx   rz   r   r   r   rJ   rJ   j   s¹  € € € € € ðð ð, €Eàð $'Ø!(Ø'Ø!)Ø&ð,Zð ,Zàð,Zð !ð,Zð ð	,Zð
 ð,Zð ð,Zð ð,Zð ,Zð ,Zñ Ôð,Zð\=ð =°ð =¸eÀCÈÌÐDUÔ>Vð =ð =ð =ð =ð6 04Ø ð+Að +Aà”lð+Að ”*ð+Að Ô ð	+Að
 ˜EœOÔ,ð+Að ð+Að 
Ð)¨5Ð0Ô	1ð+Að +Að +Að +AðZmð mð mð^aØ”IðaØ$)Ô$4ðaØFKÄlðaØ`dðað að að aðFð ð ð ð r   rJ   )r3   r4   )r4   r3   )Údataclassesr   Útypingr   r   r   Únumpyr9   r   Útorch.nn.functionalÚnnÚ
functionalr    Úconfiguration_utilsr   r	   Úutilsr
   Úscheduling_utilsr   r   r   r   r‘   r(   r“   r2   r@   rH   rJ   r   r   r   ú<module>rž      s½  ðð "Ð !Ð !Ð !Ð !Ð !Ø )Ð )Ð )Ð )Ð )Ð )Ð )Ð )Ð )Ð )à Ð Ð Ð Ø €€€Ø Ð Ð Ð Ð Ð Ð Ð Ð à AÐ AÐ AÐ AÐ AÐ AÐ AÐ AØ Ð Ð Ð Ð Ð Ø ,Ð ,Ð ,Ð ,Ð ,Ð ,ð ð
"ð 
"ð 
"ð 
"ð 
" ñ 
"ô 
"ñ „ð
"ð˜5Ô+ð ¸#ð À%Ä,ð ð ð ð ð*˜%œ,ð °8¸E¼OÔ3Lð ÐQVÔQ]ð ð ð ð ðð ¨Sð ð ð ð ð ð ¨Sð ð ð ð ð$ið ið ið ið i˜>¨;ñ iô ið ið ið ir   