
    wiY                     <   d dl mZ d dlmZmZmZ d dlZd dlZd dl	m
c mZ ddlmZmZ ddlmZ ddlmZ e G d	 d
e                      Zdej        dedej        fdZdej        deej                 dej        fdZddefdZddefdZ G d dee          ZdS )    )	dataclass)OptionalTupleUnionN   )ConfigMixinregister_to_config)
BaseOutput   )SchedulerMixinc                   (    e Zd ZU dZej        ed<   dS )VQDiffusionSchedulerOutputa.  
    Output class for the scheduler's step function output.

    Args:
        prev_sample (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
            Computed sample x_{t-1} of previous timestep. `prev_sample` should be used as next model input in the
            denoising loop.
    prev_sampleN)__name__
__module____qualname____doc__torch
LongTensor__annotations__     |/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/diffusers/schedulers/scheduling_vq_diffusion.pyr   r      s-           !!!!!!r   r   xnum_classesreturnc                     t          j        | |          }|                    ddd          }t          j        |                                                    d                    }|S )a  
    Convert batch of vector of class indices into batch of log onehot vectors

    Args:
        x (`torch.LongTensor` of shape `(batch size, vector length)`):
            Batch of class indices

        num_classes (`int`):
            number of classes to be used for the onehot vectors

    Returns:
        `torch.Tensor` of shape `(batch size, num classes, vector length)`:
            Log onehot vectors
    r   r   r   KH9)min)Fone_hotpermuter   logfloatclamp)r   r   x_onehotlog_xs       r   index_to_log_onehotr(   )   s[     yK((H1a((HIhnn&&,,,7788ELr   logits	generatorc                     t          j        | j        | j        |          }t          j        t          j        |dz              dz              }|| z   }|S )z(
    Apply gumbel noise to `logits`
    )devicer*   r   )r   randshaper,   r#   )r)   r*   uniformgumbel_noisenoiseds        r   gumbel_noisedr2   >   sU     jfmyQQQGIuy5999EABBBLF"FMr   wJ??̔>num_diffusion_timestepsc                     t          j        d|           | dz
  z  ||z
  z  |z   }t          j        dg|f          }|dd         |dd         z  }t          j        |dd         dgf          }||fS )zN
    Cumulative and non-cumulative alpha schedules.

    See section 4.1.
    r   r   Nnparangeconcatenate)r5   alpha_cum_startalpha_cum_endattats        r   alpha_schedulesr@   H   s     		!,--1H11LMQ^apQpq
	  .1#s
$
$C	QRR3ss8	B
.#abb'A3
(
(Cs7Nr   c                     t          j        d|           | dz
  z  ||z
  z  |z   }t          j        dg|f          }d|z
  }|dd         |dd         z  }d|z
  }t          j        |dd         dgf          }||fS )zN
    Cumulative and non-cumulative gamma schedules.

    See section 4.1.
    r   r   Nr7   r8   )r5   gamma_cum_startgamma_cum_endcttone_minus_cttone_minus_ctcts          r   gamma_schedulesrH   X   s     		!,--1H11LMQ^apQpq
	  .1#s
$
$CGM $}SbS'99L	
\	B
.#abb'A3
(
(Cs7Nr   c                   *   e Zd ZdZdZe	 	 	 	 	 d dededed	ed
edefd            Zd!dede	e
ej        f         fdZ	 	 d"dej        dej        dej        deej                 dede	eef         fdZd Zdej        dej        dej        defdZd ZdS )#VQDiffusionSchedulera  
    A scheduler for vector quantized diffusion.

    This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
    methods the library implements for all schedulers such as loading and saving.

    Args:
        num_vec_classes (`int`):
            The number of classes of the vector embeddings of the latent pixels. Includes the class for the masked
            latent pixel.
        num_train_timesteps (`int`, defaults to 100):
            The number of diffusion steps to train the model.
        alpha_cum_start (`float`, defaults to 0.99999):
            The starting cumulative alpha value.
        alpha_cum_end (`float`, defaults to 0.00009):
            The ending cumulative alpha value.
        gamma_cum_start (`float`, defaults to 0.00009):
            The starting cumulative gamma value.
        gamma_cum_end (`float`, defaults to 0.99999):
            The ending cumulative gamma value.
    r   d   r3   r4   num_vec_classesnum_train_timestepsr<   r=   rB   rC   c                 R   || _         | j         dz
  | _        t          |||          \  }}t          |||          \  }	}
| j         dz
  }d|z
  |	z
  |z  }d|z
  |
z
  |z  }t	          j        |                    d                    }t	          j        |                    d                    }t	          j        |	                    d                    }	t	          j        |          }t	          j        |          }t	          j        |	          }t	          j        |                    d                    }t	          j        |                    d                    }t	          j        |
                    d                    }
t	          j        |          }t	          j        |          }t	          j        |
          }|                                | _	        |                                | _
        |                                | _        |                                | _        |                                | _        |                                | _        d | _        t	          j        t#          j        d|          d d d                                                   | _        d S )Nr   )r<   r=   )rB   rC   float64r   r7   )	num_embed
mask_classr@   rH   r   tensorastyper#   r$   log_atlog_btlog_ctlog_cumprod_atlog_cumprod_btlog_cumprod_ctnum_inference_steps
from_numpyr9   r:   copy	timesteps)selfrL   rM   r<   r=   rB   rC   r?   r>   rG   rD   num_non_mask_classesbtbttrT   rU   rV   rW   rX   rY   s                       r   __init__zVQDiffusionScheduler.__init__   s    ) .1,!"5fstttC!"5fstttC#~1"frk113w} 44\"))I..//\"))I..//\"))I..//222l3::i0011l3::i0011l3::i0011333llnnllnnllnn,2244,2244,2244 $( )")A7J*K*KDDbD*Q*V*V*X*XYYr   NrZ   r,   c                 J   || _         t          j        d| j                   ddd                                         }t	          j        |                              |          | _        | j                            |          | _        | j	                            |          | _	        | j
                            |          | _
        | j                            |          | _        | j                            |          | _        | j                            |          | _        dS )a  
        Sets the discrete timesteps used for the diffusion chain (to be run before inference).

        Args:
            num_inference_steps (`int`):
                The number of diffusion steps used when generating samples with a pre-trained model.
            device (`str` or `torch.device`, *optional*):
                The device to which the timesteps and diffusion process parameters (alpha, beta, gamma) should be moved
                to.
        r   Nr7   )rZ   r9   r:   r\   r   r[   tor]   rT   rU   rV   rW   rX   rY   )r^   rZ   r,   r]   s       r   set_timestepsz"VQDiffusionScheduler.set_timesteps   s     $7 Ia!9::44R4@EEGG	))4477??knnV,,knnV,,knnV,,"144V<<"144V<<"144V<<r   Tmodel_outputtimestepsampler*   return_dictr   c                     |dk    r|}n|                      |||          }t          ||          }|                    d          }|s|fS t          |          S )a  
        Predict the sample from the previous timestep by the reverse transition distribution. See
        [`~VQDiffusionScheduler.q_posterior`] for more details about how the distribution is computer.

        Args:
            log_p_x_0: (`torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`):
                The log probabilities for the predicted classes of the initial latent pixels. Does not include a
                prediction for the masked class as the initial unnoised image cannot be masked.
            t (`torch.long`):
                The timestep that determines which transition matrices are used.
            x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
                The classes of each latent pixel at time `t`.
            generator (`torch.Generator`, or `None`):
                A random number generator for the noise applied to `p(x_{t-1} | x_t)` before it is sampled from.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] or
                `tuple`.

        Returns:
            [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] or `tuple`:
                If return_dict is `True`, [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] is
                returned, otherwise a tuple is returned where the first element is the sample tensor.
        r   r   dim)r   )q_posteriorr2   argmaxr   )r^   rf   rg   rh   r*   ri   log_p_x_t_min_1	x_t_min_1s           r   stepzVQDiffusionScheduler.step   sq    > q==*OO"..|VXNNO'CC#**q*11	 	 <)i@@@@r   c                    t          || j                  }|                     |||d          }|                     |||d          }||z
  }t          j        |dd          }||z
  }|                     ||dz
            }||z   |z   }	|	S )a  
        Calculates the log probabilities for the predicted classes of the image at timestep `t-1`:

        ```
        p(x_{t-1} | x_t) = sum( q(x_t | x_{t-1}) * q(x_{t-1} | x_0) * p(x_0) / q(x_t | x_0) )
        ```

        Args:
            log_p_x_0 (`torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`):
                The log probabilities for the predicted classes of the initial latent pixels. Does not include a
                prediction for the masked class as the initial unnoised image cannot be masked.
            x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
                The classes of each latent pixel at time `t`.
            t (`torch.Long`):
                The timestep that determines which transition matrix is used.

        Returns:
            `torch.Tensor` of shape `(batch size, num classes, num latent pixels)`:
                The log probabilities for the predicted classes of the image at timestep `t-1`.
        T)tx_tlog_onehot_x_t
cumulativeFr   )rl   keepdim)r(   rP   $log_Q_t_transitioning_to_known_classr   	logsumexpapply_cumulative_transitions)
r^   	log_p_x_0rt   rs   ru   log_q_x_t_given_x_0log_q_t_given_x_t_min_1qq_log_sum_expro   s
             r   rm   z VQDiffusionScheduler.q_posterior   s    * -S$.AA"GGSD H 
 
 #'"K"KSE #L #
 #
 ++ q$???  --aQ77 55E\ r   rs   rt   ru   rv   c                   |r(| j         |         }| j        |         }| j        |         }n'| j        |         }| j        |         }| j        |         }|s"|dddddf                             d          }|ddddddf         }||z                       |          }	|| j        k    }
|
                    d          	                    d| j
        dz
  d          }
||	|
<   |st          j        |	|fd          }	|	S )a/	  
        Calculates the log probabilities of the rows from the (cumulative or non-cumulative) transition matrix for each
        latent pixel in `x_t`.

        Args:
            t (`torch.Long`):
                The timestep that determines which transition matrix is used.
            x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
                The classes of each latent pixel at time `t`.
            log_onehot_x_t (`torch.Tensor` of shape `(batch size, num classes, num latent pixels)`):
                The log one-hot vectors of `x_t`.
            cumulative (`bool`):
                If cumulative is `False`, the single step transition matrix `t-1`->`t` is used. If cumulative is
                `True`, the cumulative transition matrix `0`->`t` is used.

        Returns:
            `torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`:
                Each _column_ of the returned matrix is a _row_ of log probabilities of the complete probability
                transition matrix.

                When non cumulative, returns `self.num_classes - 1` rows because the initial latent pixel cannot be
                masked.

                Where:
                - `q_n` is the probability distribution for the forward process of the `n`th latent pixel.
                - C_0 is a class of a latent pixel embedding
                - C_k is the class of the masked latent pixel

                non-cumulative result (omitting logarithms):
                ```
                q_0(x_t | x_{t-1} = C_0) ... q_n(x_t | x_{t-1} = C_0)
                          .      .                     .
                          .               .            .
                          .                      .     .
                q_0(x_t | x_{t-1} = C_k) ... q_n(x_t | x_{t-1} = C_k)
                ```

                cumulative result (omitting logarithms):
                ```
                q_0_cumulative(x_t | x_0 = C_0)    ...  q_n_cumulative(x_t | x_0 = C_0)
                          .               .                          .
                          .                        .                 .
                          .                               .          .
                q_0_cumulative(x_t | x_0 = C_{k-1}) ... q_n_cumulative(x_t | x_0 = C_{k-1})
                ```
        Nr7   r   rk   )rW   rX   rY   rT   rU   rV   	unsqueeze	logaddexprQ   expandrP   r   cat)r^   rs   rt   ru   rv   abc(log_onehot_x_t_transitioning_from_maskedlog_Q_tmask_class_masks              r   rx   z9VQDiffusionScheduler.log_Q_t_transitioning_to_known_classd  s2   b  	#A&A#A&A#A&AAAAAAAA 	] 8FaaaQQQh7O7Y7YZ[7\7\4
 (3B3	2 "A%0033 0)33A66==b$.STBTVXYY#$  	\i*R SYZ[[[Gr   c                    |j         d         }| j        |         }| j        |         }| j        |         }|j         d         }|                    |d|          }||z                       |          }t          j        ||fd          }|S )Nr   r   r   rk   )r.   rW   rX   rY   r   r   r   r   )r^   r~   rs   bszr   r   r   num_latent_pixelss           r   rz   z1VQDiffusionScheduler.apply_cumulative_transitions  s    gaj"""GAJHHS!.//Ua  Iq!f!$$$r   )rK   r3   r4   r4   r3   )N)NT)r   r   r   r   orderr	   intr$   rb   r   strr   r,   re   Tensorlongr   r   	Generatorboolr   r   rq   rm   rx   rz   r   r   r   rJ   rJ   j   s        , E $'!('!)&,Z ,Z,Z !,Z 	,Z
 ,Z ,Z ,Z ,Z ,Z ,Z\= = =eCDU>V = = = =6 04 +A +Al+A *+A  	+A
 EO,+A +A 
)50	1+A +A +A +AZm m m^aIa$)$4aFKla`da a a aF    r   rJ   )r3   r4   )r4   r3   )dataclassesr   typingr   r   r   numpyr9   r   torch.nn.functionalnn
functionalr    configuration_utilsr   r	   utilsr
   scheduling_utilsr   r   r   r   r   r(   r   r2   r@   rH   rJ   r   r   r   <module>r      s   " ! ! ! ! ! ) ) ) ) ) ) ) ) ) )               A A A A A A A A       , , , , , , 
" 
" 
" 
" 
" 
" 
" 
"5+ # %,    *%, 8EO3L QVQ]     S      S    $i i i i i>; i i i i ir   