
    wi&                     ~    d dl mZmZmZmZmZ d dlZddlmZm	Z	 ddl
mZ ddlmZ ddlmZmZ  G d	 d
e          ZdS )    )DictListOptionalTupleUnionN   )AutoencoderKLDiTTransformer2DModel)KarrasDiffusionSchedulers)randn_tensor   )DiffusionPipelineImagePipelineOutputc                   N    e Zd ZdZdZ	 ddedededee	e
ef                  f fdZd	eeee         f         d
ee
         fdZ ej                    	 	 	 	 	 ddee
         dedeeej        eej                 f                  de
dee         ded
eeef         fd            Z xZS )DiTPipelinea  
    Pipeline for image generation based on a Transformer backbone instead of a UNet.

    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
    implemented for all pipelines (downloading, saving, running on a particular device, etc.).

    Parameters:
        transformer ([`DiTTransformer2DModel`]):
            A class conditioned `DiTTransformer2DModel` to denoise the encoded image latents.
        vae ([`AutoencoderKL`]):
            Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
        scheduler ([`DDIMScheduler`]):
            A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
    ztransformer->vaeNtransformervae	schedulerid2labelc                    t                                                       |                     |||           i | _        ||                                D ]X\  }}|                    d          D ]=}t          |          | j        |                                                                <   >Yt          t          | j                                                            | _        d S d S )N)r   r   r   ,)super__init__register_moduleslabelsitemssplitintlstriprstripdictsorted)	selfr   r   r   r   keyvaluelabel	__class__s	           t/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/diffusers/pipelines/dit/pipeline_dit.pyr   zDiTPipeline.__init__1   s     	+3)TTT &nn.. D D
U"[[-- D DE;>s88DK 5 5 7 788Dvdk&7&7&9&9::;;DKKK	      r&   returnc                      t          |t                    st          |          }|D ]%}| j        vrt          | d j         d          & fd|D             S )a0  

        Map label strings from ImageNet to corresponding class ids.

        Parameters:
            label (`str` or `dict` of `str`):
                Label strings to be mapped to class ids.

        Returns:
            `list` of `int`:
                Class ids to be processed by pipeline.
        zK does not exist. Please make sure to select one of the following labels: 
 .c                 *    g | ]}j         |         S  )r   ).0lr#   s     r(   
<listcomp>z-DiTPipeline.get_label_ids.<locals>.<listcomp>Z   s    ...1A...r)   )
isinstancelistr   
ValueError)r#   r&   r0   s   `  r(   get_label_idszDiTPipeline.get_label_idsC   s     %&& 	 KKE 	 	A## ttfjfqttt   $
 /.......r)         @2   pilTclass_labelsguidance_scale	generatornum_inference_stepsoutput_typereturn_dictc                    t          |          }| j        j        j        }| j        j        j        }	t          ||	||f|| j        | j        j                  }
|dk    rt          j	        |
gdz            n|
}t          j
        || j                                      d          }t          j
        dg|z  | j                  }|dk    rt          j	        ||gd          n|}| j                            |           |                     | j        j                  D ]6}|dk    r2|dt          |          dz           }t          j	        ||gd	          }| j                            ||          }|}t          j        |          sx|j        j        d
k    }t)          |t*                    r|rt          j        nt          j        }n|rt          j        nt          j        }t          j
        |g||j                  }n8t          |j                  dk    r |d                             |j                  }|                    |j        d                   }|                     |||          j        }|dk    r|ddd|	f         |dd|	df         }}t          j        |t          |          dz  d	          \  }}||||z
  z  z   }t          j	        ||gd	          }t          j	        ||gd	          }| j        j        j        dz  |	k    rt          j        ||	d	          \  }}n|}| j                             |||          j!        }8|dk    r|"                    dd	          \  }
}n|}
d| j#        j        j$        z  |
z  }
| j#        %                    |
          j        }|dz  dz   &                    dd          }|'                                (                    dddd                                          )                                }|dk    r| *                    |          }| +                                 |s|fS tY          |          S )a>	  
        The call function to the pipeline for generation.

        Args:
            class_labels (List[int]):
                List of ImageNet class labels for the images to be generated.
            guidance_scale (`float`, *optional*, defaults to 4.0):
                A higher guidance scale value encourages the model to generate images closely linked to the text
                `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
            generator (`torch.Generator`, *optional*):
                A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
                generation deterministic.
            num_inference_steps (`int`, *optional*, defaults to 250):
                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
                expense of slower inference.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generated image. Choose between `PIL.Image` or `np.array`.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`ImagePipelineOutput`] instead of a plain tuple.

        Examples:

        ```py
        >>> from diffusers import DiTPipeline, DPMSolverMultistepScheduler
        >>> import torch

        >>> pipe = DiTPipeline.from_pretrained("facebook/DiT-XL-2-256", torch_dtype=torch.float16)
        >>> pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
        >>> pipe = pipe.to("cuda")

        >>> # pick words from Imagenet class labels
        >>> pipe.labels  # to print all available words

        >>> # pick words that exist in ImageNet
        >>> words = ["white shark", "umbrella"]

        >>> class_ids = pipe.get_label_ids(words)

        >>> generator = torch.manual_seed(33)
        >>> output = pipe(class_labels=class_ids, num_inference_steps=25, generator=generator)

        >>> image = output.images[0]  # label 'white shark'
        ```

        Returns:
            [`~pipelines.ImagePipelineOutput`] or `tuple`:
                If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
                returned where the first element is a list with the generated images
        )shaper;   devicedtype   r   )rA   i  r   N)dimmps)rB   rA   )timestepr9   g      ?r   r8   )images)-lenr   configsample_sizein_channelsr   _execution_devicerB   torchcattensorreshaper   set_timestepsprogress_bar	timestepsscale_model_input	is_tensorrA   typer2   floatfloat32float64int32int64r@   toexpandsampler   out_channelsstepprev_samplechunkr   scaling_factordecodeclampcpupermutenumpynumpy_to_pilmaybe_free_model_hooksr   )r#   r9   r:   r;   r<   r=   r>   
batch_sizelatent_sizelatent_channelslatentslatent_model_input
class_nullclass_labels_inputthalfrT   is_mpsrB   
noise_predepsrestcond_eps
uncond_epshalf_epsmodel_output_sampless                               r(   __call__zDiTPipeline.__call__\   s   x &&
&-9*1=[I)"(	
 
 
 :H!9K9KUYy1}555QX|L9OPPPXXY[\\\4&:"5d>TUUU
IWZ[I[I[UYj'A1EEEam 	$$%8999""4>#;<< *	f *	fA!!)*HC0B,C,Cq,H*HI%*Yd|%C%C%C"!%!A!ABTVW!X!XI?9-- 
J ,275@i// C-3FEMMEE+1BEKKu{E!L)EJ\Jcddd		Y_%%**%dO../A/HII	!(();)A!)DEEI))"YEW *   
 !!&qqq*:?*:':;Z?K[K[H[=\T',{3CA1'M'M'M$*%(Z:O(PPi8 4!<<<"YT{:::
 &3q8OKK"'+j/q"Q"Q"Qaa) "&!4!4\1FX!Y!Y!eA+11!1;;JGQQ(Gdho44w>(//'**1Q;$++Aq11 ++--''1a3399;;AACC%''00G 	##%%% 	:"'2222r)   )N)r6   Nr7   r8   T)__name__
__module____qualname____doc__model_cpu_offload_seqr
   r	   r   r   r   r   strr   r   r   r5   rN   no_gradrX   	Generatorboolr   r   r   __classcell__)r'   s   @r(   r   r      s|         / .2< <*< < -	<
 4S>*< < < < < <$/5d3i#8 /T#Y / / / /2 U]__ !$MQ#%%* O3 O33iO3 O3 E%/43H"HIJ	O3
 !O3 c]O3 O3 
"E)	*O3 O3 O3 _O3 O3 O3 O3 O3r)   r   )typingr   r   r   r   r   rN   modelsr	   r
   
schedulersr   utils.torch_utilsr   pipeline_utilsr   r   r   r.   r)   r(   <module>r      s   * 6 5 5 5 5 5 5 5 5 5 5 5 5 5  : : : : : : : : 3 3 3 3 3 3 - - - - - - C C C C C C C CM3 M3 M3 M3 M3# M3 M3 M3 M3 M3r)   