
    wi              
       v   U d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlmZ d dlmZ ddgZ e            s:d dlZ G d	 d
          Zd Zeej        d         _        eej        d         _        dS d dlmZ d dlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)  e j*        e+          Z,er(	 d dl-m.Z. n # e/$ r e,0                    d           Y nw xY w G d dej1                  Z2 e2            Z3e2e4d<   dde5fdZ6 G d d          Zddde5dee7df         de
ee5df                  defdZdS )    Nreduce)chain)DictListOptionalTupleTYPE_CHECKINGUnion)is_available)not_noneinit_device_mesh
DeviceMeshc                       e Zd ZdS )_DeviceMeshStubN)__name__
__module____qualname__     m/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/torch/distributed/device_mesh.pyr   r      s        r   r   c                      d S Nr   r   r   r   _init_device_mesh_stubr      s    r   ztorch.distributed.device_mesh)Backend)_find_pg_by_ranks_and_tag_get_default_group_get_group_tagget_backendget_process_group_ranksget_rankget_world_sizeinit_process_groupis_initialized	new_groupProcessGroupsplit_group)	ArrayLikezCDeviceMesh requires numpy >= 1.21 to be installed for type checkingc            	       x   e Zd ZddZddZdddeedf         d	eeedf                  ddfd
Z		 dddde
e         ddfdZddZddde
e         fdZededefd            Zededefd            ZdddedefdZ	 ddedede
ej                 ddfdZdeeedf                  fdZdddeded         fdZdS )_MeshEnvreturnNc                 L    g | _         i | _        i | _        i | _        i | _        d S r   )
mesh_stackchild_to_root_mappingmesh_dim_group_optionsroot_to_flatten_mappingflatten_name_to_root_dimsselfs    r   __init__z_MeshEnv.__init__C   s=    02DOGID&  ' UWD(  ***r   r   c                 j    t          | j                  dk    rt          d          | j        d         S )Nr   z#No device mesh is currently active!)lenr-   RuntimeErrorr2   s    r   get_current_meshz_MeshEnv.get_current_meshO   s3    4?##q(("#HIII?2&&r   device_meshsubmesh_dim_names.submesh_dimsc                 p   fd|D             }j         }g }g }d}t          ||          D ]\  }	}
t          |	          dk    r|                    |	d         |z
  |	d         |z
            }|                    |	d         |z
             |t          |	          dz
  z  }|                    | j                 |
         j        d                    |                    |	d         |z
             |                    j        |	d                             t          t          |j	                            }|D ]}|
                    |             |j        g ||R  j        dg|R  }                                }|D ] }t          j        ||d          }||v r|}!||_        | j        |<   |S )Nc                 8    g | ]}t          fd |d          S )c                 >    | j                             |          z  S r   )meshsize)xyr:   s     r   <lambda>z5_MeshEnv.create_sub_mesh.<locals>.<listcomp>.<lambda>`   s    [%5%:%:1%=%=!= r      r   ).0mesh_dimr:   s     r   
<listcomp>z,_MeshEnv.create_sub_mesh.<locals>.<listcomp>^   sG         ====   r   r   rE   r6   )	start_dimend_dimFmesh_dim_names_init_backend)r@   zipr7   flattenappendr0   _dim_group_infoslistrangendimremovepermutereshaper!   r   device_typer.   )r3   r:   r;   r<   slice_dim_sizemesh_tensorslice_dim_idxslice_dim_group_infonum_dims_flattenmesh_dim_indicesmesh_dim_namemesh_dims_remained_idxidxpg_ranks_by_dimcur_rankmesh_ndsubmeshres_submeshs    `                r   create_sub_meshz_MeshEnv.create_sub_meshT   sy       !-  N &*KM#%   !36|EV3W3W  / - '((1,,"-"5"5"21"58H"H 0 47G G #6 # #K "(()9!)<?O)OPPP$,<(=(=(AA$(//4[A)*1.    "(()9!)<?O)OPPP(//#45Ea5HI   
 &*%0@*A*A%B%B"$ 3 3&--c2222
1k1 '*7  b+)+ + +O #++--H* * *$+#4"'	   w&&")K+?K(6AD&{3r   r_   c                 <   t                               |          fdt          |j                  D             }|s!d                    fd|D                       }| j                            i            t          g t          t          j                            | j                 	                                R  }||v rt          | d dd| d          | j        v r"|| j                 v r| j                 |         S t          j        |j                                                  }t          t!          j        j                            }|D ]}|                    |            j        j        g ||R                      d|          }                                }	|D ] }
t-          j        |
|f	          }|	|
v r|}!| j        |<   || j                            i           |<   t3          |          | j                 |<   |S )
Nc                 ^    g | ])}t          j                                      |          *S r   )r   rL   index)rF   flattened_mesh_dim_name	root_meshs     r   rH   z0_MeshEnv.create_flatten_mesh.<locals>.<listcomp>   sC     $ $ $+ 122889PQQ$ $ $r   _c                 D    g | ]}t          j                  |         S r   )r   rL   )rF   dimrl   s     r   rH   z0_MeshEnv.create_flatten_mesh.<locals>.<listcomp>   s9        !!9::3?  r   z# already exists for submesh of the . z5The mesh_dim_names of submesh and flattened mesh are z-. Please specify another valid mesh_dim_name.r6   rL   )_mesh_resourcesget_root_meshr   rL   joinr1   
setdefaultr   rR   keysr8   r0   mathprodr@   rA   rS   rT   rU   rV   rW   r!   r   rX   r.   tuple)r3   r:   r_   flatten_dims_in_rootinvalid_dim_namesflattened_mesh_dim_sizeremained_dims_in_rootflatten_dim_in_rootrb   rc   rd   flattened_meshres_flattened_meshrl   s                @r   create_flatten_meshz_MeshEnv.create_flatten_mesh   s    (55kBBI$ $ $ $/78R/S/S$ $ $ 
 !  #   #7  ! ! *55iDDD % !hy78899!/	:??AA! ! !  111"$VVVVVCL] C C C   T999!T%A)%LLL3I>}MM&*i0@0E0E0G0G&H&H#$(y~/B)C)C$D$D!'; B B#%,,-@AAAA4in4 &)=  gb122  !))++H* 8 8!+)$1#3" " "
 w&&)7&=FD&'9:TfD(33IrBB=QGLMaGbGbD*95mD%%r   c                 D    | j                             |d           }|s|n|S r   )r.   get)r3   r:   rl   s      r   rs   z_MeshEnv.get_root_mesh   s+     266{DIII&/>;;Y>r   c                     |                      |          }|j        }|r=|r;t          |          dk    s
J d            |d         }|                     ||          S dS )z
            Returns the index of the mesh dim in the root mesh.
            The device_mesh passed in needs to be sliced out from the root mesh
            or submesh of the root mesh.
            rE   z"The submesh can only be a 1D mesh.r   N)rs   rL   r7   get_mesh_dim_by_name)r3   r:   rl   child_mesh_dim_nameschild_mesh_dim_names        r   get_root_mesh_dimz_MeshEnv.get_root_mesh_dim   s~     **;77I#.#=  Q1 Q,--2227 322&:1&=#00<OPPP4r   rX   c                 D    t          |                                           S r   )_get_device_handledevice_countrX   s    r   num_devices_per_hostz_MeshEnv.num_devices_per_host   s    %k22??AAAr   c                 T    t                      t                              |           z  S r   )r"   r*   r   r   s    r   	num_hostsz_MeshEnv.num_hosts   s#     "##x'D'D['Q'QQQr   c                     |j         t          |j                   dk    rt          d          ||j         vrt          d| dd|j                    t          |j                             |                    S )Nr   zNo `mesh_dim_names` found.zMesh dimension 'z' does not exist.z.Available mesh dimensions are: mesh_dim_names=)rL   r7   KeyErrorr   rj   )r3   r:   r_   s      r   r   z_MeshEnv.get_mesh_dim_by_name  s     *2{122a770   K$>>>G}GGGa[E_aa   K6<<]KKLLLr   ro   backend
pg_optionsc                     ||f| j         |<   d S r   )r/   )r3   ro   r   r   s       r   _set_mesh_dim_group_optionsz$_MeshEnv._set_mesh_dim_group_options  s     18/DD',,,r   c                 (  	 ||                      |          k    rt          d          | j                            |i            | j        |         }g |j        |	t          	fd|D                       st          d| d	 d          d}g }|D ]}}||v r&||         }|d         }|                    |           n0|j                            |          }|                    |f           ||k    rt          d| dd| d	d
          |}~|S )z
            Validate whether the mesh_dim_names is valid for slicing the given device_mesh.
            If valid, return dim indexes of the slice mesh in the device mesh.
            z'Cannot create a submesh from a submesh.c              3       K   | ]}|v V  	d S r   r   )rF   r_   valid_mesh_dim_namess     r   	<genexpr>z0_MeshEnv._get_slice_mesh_dims.<locals>.<genexpr>2  s=        ! !55     r   zInvalid mesh_dim_names z% specified. Valid mesh_dim_names are .r6   z specified. z!Found mesh dim indices to slice: rp   z.Mesh dim indices should be in ascending order.)	rs   r8   r1   ru   rL   allr   rP   rj   )
r3   r:   rL   r1   curr_idxslice_mesh_dimsr_   mesh_indicesnext_idxr   s
            @r   _get_slice_mesh_dimsz_MeshEnv._get_slice_mesh_dims  s    d00===="#LMMM *55k2FFF(,(F{(S%$+$*$ 
     %3      Hn H H0DH H H   H O!/ $ $ $===#<]#KL  ,B/H#**<8888*9??NNH#**H;777x''"N.NNNOOOOOH  
 $""r   c                    |                      ||          }|j                            d|                              d|j                            |                    }|                                }g }|D ]I}t          |j        ||fd          }||v r|j        |         gng |_        |	                    |           J|S )z`
            Return all the submeshes of a given mesh dimension of the device mesh.
            r6   FrK   )
r   r@   swapdimsrW   rA   r!   r   rX   rQ   rP   )	r3   r:   r_   rG   rb   rc   res_submeshesmesh_1dre   s	            r   _get_all_submeshesz_MeshEnv._get_all_submeshesS  s     00mLLH).77HEEMMK$))(33 O #++--HM* . .$+$1#3"'	    7** !1(;<< (
 $$W----  r   r+   Nr+   r   r   )r:   r   r+   r   )r   r   r   r4   r9   r	   strr   intrg   r   r   rs   r   staticmethodr   r   r   C10dBackendOptionsr   r   r   r   r   r   r*   r*   B   s^       
	 
	 
	 
		' 	' 	' 	'
M	%M	  %S#XM	 uS#X/	M	
 M	 M	 M	 M	` MQ@	& @	&+@	&<DSM@	&@	& @	& @	& @	&D	? 	? 	? 	?	 	(3- 	 	 	 	  
	Bc 	Bc 	B 	B 	B 
	B 
	R3 	R3 	R 	R 	R 
	R
	M+	M<?	M	M 	M 	M 	M* 9=		E 	E	E 	E !!45		E
 	E 	E 	E 	E2	#%S/"2	# 2	# 2	# 2	#h	!+	!<?	!,	! 	! 	! 	! 	! 	!r   r*   rr   cudarX   c                 .    t          t          | d          S )a:  
        Get the module corresponding to the device_type which is cuda or cuda-like device.
        For example, when the device_type is cuda, the module `torch.cuda` is returned.
        Return None when there is no corresponding module for device_type, otherwise
        return the corresponding module.
        N)getattrtorchr   s    r   r   r   r  s     uk4000r   c                      e Zd ZU dZeed<   ej        ed<   ee	edf                  ed<   ddddede
ej        d	f         dee	edf                  d
eddf
dZd Zd Zd$dZd%dZdefdZd ZdedefdZde
ee	edf         f         dd fdZd&dee
eef                  defdZdee         fdZe	 d&ddde
eee         f         dedee
ej        d	f                  dee	edf                  dd f
d            Zd&dee         defdZedefd            Zede	edf         fd            Z defdZ!d&dee
eef                  defd Z"deee                  fd!Z#d&d"ee         dd fd#Z$dS )'r   a  
        DeviceMesh represents a mesh of devices, where layout of devices could be
        represented as a n-d dimension array, and each value of the n-d dimensional
        array is the global id of the default process group ranks.

        DeviceMesh could be used to describe the layout of devices across the cluster,
        and serves as a proxy for communication among the device lists within the cluster.

        DeviceMesh can be used as a context manager.

        .. note::
            DeviceMesh follows SPMD programming model, which means the same PyTorch Python program
            is running on all processes/ranks in the cluster. Therefore, users need to make sure the
            `mesh` array (which describes the layout of devices) should be identical across all ranks.
            Inconsistent `mesh` will lead to silent hang.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
            mesh (ndarray): A multi-dimensional array or an integer tensor describing the layout
                of devices, where the IDs are global IDs of the default process group.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        The following program runs on each process/rank in an SPMD manner. In this example, we have 2
        hosts with 4 GPUs each.
        A reduction over the first dimension of mesh will reduce across
        columns (0, 4), .. and (3, 7), a reduction over the second dimension
        of mesh reduces across rows (0, 1, 2, 3) and (4, 5, 6, 7).

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import DeviceMesh
            >>>
            >>> # Initialize device mesh as (2, 4) to represent the topology
            >>> # of cross-host(dim 0), and within-host (dim 1).
            >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
        rX   r@   .rL   NTrK   r(   rM   r+   c                   || _         t          |t          j                  r"|j        j        dk    rt          d|           t          |t          j                  r2|                                                    t          j	                  n t          j
        |dt          j	                  | _        |rt          |          nd | _        t          | j                                                                                  | _        d | _        |dk    r|r(|                                  |                                  t)                      r*t+                      dk    rt-          j                    | _        | j        t1                      k                                    }|                    d          dv sJ |                    d          dk    r|d                                         nd | _        d S d S )	Ncpuz!`mesh` must be a CPU tensor, got dtypedevicer   xlathreadedr   )r   rE   )rX   
isinstancer   Tensorr   type
ValueErrordetachtor   tensorr@   ry   rL   rO   tolist_flatten_mesh_list
_thread_id_get_or_create_default_group_init_process_groupsr$   r   	threading	get_identr!   nonzerorA   _coordinate_on_dim)r3   rX   r@   rL   rM   rank_coordss         r   r4   zDeviceMesh.__init__  s     +D$-- M$+2Be2K2K !KT!K!KLLL dEL11G  uy 111\$uEIFFF I
 <J"S%"7"7"7tD ',DI,=,=,?,?,F,F,H,H&I&ID#"DO e## ! 055777--///!## <(C(C&/&9&;&;DO  $yHJJ6??AA"''**f4444/:/?/?/B/BQ/F/FKN))+++D ''' $#r   c           	         t                      }|st                       t                      }| j                                        |k    r-t          d| d| j                                         d          t          | j                  }|sg|re|                                }||k    r'||z  dk    rt          d| d| d| j         d          |	                    t                      |z             t                      S )	Nz2Mesh should not be bigger than default world size z, but found z ranks!r   z8DeviceMesh only support homogeneous hardware, but found z ranks and  z	 devices!)r$   r#   r"   r@   numelr8   r   rX   r   
set_devicer!   r   )r3   default_initialized
world_sizedevice_handler   s        r   r   z'DeviceMesh._get_or_create_default_group  sJ   "0"2"2& %"$$$'))Jy  :--"{{{aeajapaparar{{{   /t/?@@M& L= L (5'A'A'C'C$!555"%99Q>>&e%e e2Fe eIMIYe e e   ((6J)JKKK%'''r   c           	         g }t                      }| j        j        dk    r| j                                        t	                      k    rt          t          t	                                          }t          j        	                                r%t          |          dk    rt          d|d          n|}|                    t          |          ||j        f           nt          | j        j                  D ]}| j                            d|                              d| j                            |                    }|t$          j        v rt$          j        |         \  }}nd\  }}| j        rd| j        |          nd	| }	d }t+          |d
d           x}
	 %t-          |||                                |	          }|D ]}|                                }|
t          ||||	          }|                                 |v rrt3          |          |k    r(t5          d|                                  d| d          |                    t          t7          |                    ||j        f           || _        d S )NrE   gloozcpu:gloo,cuda:ncclmesh_default)r   ranks
group_descr6   )NNmesh_	mesh_dim_bound_device_id)	parent_pgr   split_ranksr   )r   r   r   r   zFEach device mesh dimension should get only one process group, but got z in !)r   r@   rT   r   r"   rR   rS   r   r   r   r   r%   rP   r   
group_namer   rW   rA   rr   r/   rL   r   r'   r   r!   r7   r8   r   rQ   )r3   dim_group_infosdefault_groupr   	dim_groupro   rb   r   r   r   r   dim_meshsubgroup_rankss                r   r   zDeviceMesh._init_process_groups  s    ACO.00My~""ty'8'8N<L<L'L'L U>#3#34455 z..00' $M22f<<  4##1    '   &&&y11!,    !00 P PC '+i&8&8S&A&A&I&IDINN3//' 'O oDDD ,B3G#&J /9+  ./: 3C 8:::...  !%I+2)+<d, ,  "	"
 %0&3'1(7(>(>(@(@'1	% % %	 %4  )1):):
 +2(1&4(/+5+5	) ) )I  ==??n<<"?33c99&2%<mqmzmzm|m| %< %<*8%< %< %<'" '" !" ,22$28I3F3F$G$G$2$-$8!"  +8 %4D!!!r   c                 D    t           j                            |            | S r   )rr   r-   rP   r2   s    r   	__enter__zDeviceMesh.__enter__b  s    &--d333Kr   c                 B    t           j                                         d S r   )rr   r-   pop)r3   exc_type	exc_valueexc_tracebacks       r   __exit__zDeviceMesh.__exit__h  s    &**,,,,,r   c                     | j         s%d| j         d| j                                         dn,d| j         d| j                                         d| j          d}|S )NzDeviceMesh('z', )z, mesh_dim_names=)rL   rX   r@   r   )r3   device_mesh_reprs     r   __repr__zDeviceMesh.__repr__l  sz     *uIt/IIDI4D4D4F4FIIIItD$4tt9I9I9K9Ktt^b^qttt 
 $#r   c                     t          | dd           | _        | j        s7t          | j        | j        j        | j        | j        | j        f          | _        | j        S )N_hash)	r   r   hashr   r@   shaperX   rL   r   r2   s    r   __hash__zDeviceMesh.__hash__t  s]     w55DJ: 	!/	(+ 
 :r   otherc                 (   t          |t                    sdS t          |           t          |          k    rdS | j        |j        k    oI| j        j        |j        j        k    o/| j        |j        k    o| j        |j        k    o| j        |j        k    S )NFT)	r   r   idr   r@   r   rX   rL   r   )r3   r   s     r   __eq__zDeviceMesh.__eq__  s    eZ00 u$xx2e99$$t +u/GG <	5:+;;<(E,==< +u/CC< 5+;;r   c                 d   | j         st          d          t          |t                    r|fn|}|| j         k    r| S t                              | |          }t          j        j        	                                5  t          
                    | ||          }ddd           n# 1 swxY w Y   |S )aU
  
            Slice the current DeviceMesh based on the mesh_dim_names given to create a submesh.
            The submesh created consists of the dimensions and the communicators indicated by
            ``mesh_dim_names``

            Args:
                mesh_dim_names (Union[str, Tuple[str]]): the name or the tuple of names of the
                mesh dimension of the DeviceMesh to create the submesh for.
            Returns:
                A :class:`DeviceMesh` object

            The following program runs on each process/rank in an SPMD manner in a world size of 8.
            In the first example:
                Calling mesh_2d["tp"] on rank 0, 1, 2, 3 returns a 1D submesh of DeviceMesh:([0, 1, 2, 3]).
                Calling mesh_2d["tp"] on rank 4, 5, 6, 7 returns a 1D submesh of  DeviceMesh:([4, 5, 6, 7]).
                Calling mesh_2d["dp"] on rank 0, 4 returns a 1D submesh of  DeviceMesh:([0, 4]).
                Calling mesh_2d["dp"] on rank 1, 5 returns a 1D submesh of  DeviceMesh:([1, 5]).
                Calling mesh_2d["dp"] on rank 2, 6 returns a 1D submesh of  DeviceMesh:([2, 6]).
                Calling mesh_2d["dp"] on rank 3, 7 returns a 1D submesh of  DeviceMesh:([3, 7]).

            In the second example:
                Calling mesh_3d["dp", "cp"] on rank 0, 1, 4, 5 returns a 2D submesh of DeviceMesh:([[0, 1], [4, 5]]).
                Calling mesh_3d["dp", "cp"] on rank 2, 3, 6, 7 returns a 2D submesh of DeviceMesh:([[2, 3], [6, 7]]).
                Calling mesh_3d["cp", "dp"] on rank 0, 1, 4, 5 returns a 2D submesh of DeviceMesh:([[0, 4], [1, 5]]).
                Calling mesh_3d["cp", "dp"] on rank 2, 3, 6, 7 returns a 2D submesh of DeviceMesh:([[2, 6], [3, 7]]).

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize a 2D device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh_2d = init_device_mesh(device_type="cuda", (2,4), mesh_dim_names=("dp", "tp"))
                >>> tp_mesh = mesh_2d["tp"]
                >>> dp_mesh = mesh_2d["dp"]
                >>>
                >>> # Initialize a 3D mesh.
                >>> mesh_3d = init_device_mesh(device_type="cuda", (2,2,2), mesh_dim_names=("dp", "pp", "cp"))
                >>> # The order of the mesh_dim_names provided deteremines the order of dimensions in the submesh.
                >>> dp_cp_mesh = mesh_3d["dp", "cp"]
                >>> cp_dp_mesh = mesh_3d["cp", "dp"]
            z1Cannot slice a DeviceMesh without mesh_dim_names!N)rL   r8   r   r   rr   r   r   _subclassesfake_tensorunset_fake_temporarilyrg   )r3   rL   r   re   s       r   __getitem__zDeviceMesh.__getitem__  s   Z & X"#VWWW &0%D%DX!!.  !444"1"F"F.# # &2IIKK  -==no G               s   <B%%B),B)rG   c                    t          | d          st          d          | j        j        dk    r!|t          d| j        j         ddd          | j        j        dk    r+|)t	          t          | j        d	         dd
                    S t                              |           }t          j	        
                    |d          }|rG||                                v r1||         j        d	         dd
         }t	          t          |           S t          |t                    rt                              | |          n|}t	          t          | j        |         dd
                    S )a  
            Returns the single ProcessGroup specified by mesh_dim, or, if mesh_dim is not specified and the
            DeviceMesh is 1-dimensional, returns the only ProcessGroup in the mesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                A :class:`ProcessGroup` object.
            rQ   z*DeviceMesh process groups not initialized!rE   NFound the DeviceMesh have  dimensionsJOptional kwarg `mesh_dim` needs to be specified when device_mesh.ndim > 1.zmIf you want to get the list of all the ProcessGroups in the DeviceMesh,please use `get_all_groups()` instead.r      )hasattrr8   r@   rT   r   r   rQ   rr   rs   r0   r   rv   r   r   r   )r3   rG   rl   r0   r   s        r   	get_groupzDeviceMesh.get_group  s    4!344 Q"#OPPPy~!!h&6"LLLL`=   y~""x'7-t/DQ/G/KL   (55d;;I&5&M&Q&Q4' '# ' 87N7S7S7U7U+U+U"9("C"TUV"WXZYZXZ"[ 9? KLLL "(C00"O88xHHH! 
  -t/DX/NrPQr/RS  r   c                 N      fdt           j        j                  D             S )z
            Returns a list of ProcessGroups for all mesh dimensions.

            Returns:
                A list of :class:`ProcessGroup` object.
            c                 :    g | ]}                     |          S r   )r  )rF   ir3   s     r   rH   z-DeviceMesh.get_all_groups.<locals>.<listcomp>  s%    EEE!DNN1%%EEEr   )rS   r@   rT   r2   s   `r   get_all_groupszDeviceMesh.get_all_groups  s,     FEEEuTY^/D/DEEEEr   rq   groupc                "   t          | t                    rt          |           }t          |t          j                  r|                                |k    s"|Bt          |t          j                  s(||k    r"t          dt          |           d|           t          j        |dt          j	                  }t          |||d          }t          |           || j        fg|_        |S t          |           }t          |          dk    rt          d	          |t          d
          t          |t          j                  r3|                                                    t          j	        d          n t          j        |dt          j	                  }|j        t          |          k    r5t          d|                                 dt          |           d          t          |||d          }d |D             |_        |S )aM  
            Constructs a :class:`DeviceMesh` with ``device_type`` from an
            existing :class:`ProcessGroup`.

            The constructed device mesh has number of dimensions equal to the
            number of groups passed. If more than one group is passed, then the
            ``mesh`` argument is required.
            NzInvalid mesh z for ProcessGroup with ranks r   r   FrK   r   z.Expects at least one ProcessGroup to be passedz0Must pass mesh if passing multiple ProcessGroups)r   r   zEExpects mesh with ndim equal to number of ProcessGroups but got mesh z and z ProcessGroupsc                 V    g | ]&}t          |          t          |          |j        f'S r   )r   r    r   )rF   r  s     r   rH   z)DeviceMesh.from_group.<locals>.<listcomp>J  sH     , , , 	 #5))+E22$, , ,r   )r   r&   r    r   r   r   r   r   r   r   r   r   r   rQ   rR   r7   r   r   rT   )r  rX   r@   rL   group_ranksr:   groupss          r   
from_groupzDeviceMesh.from_group  s;     %.. #5e<<tU\22	7;{{}}7S7S$&tU\:: %++$]D		]]P[]]   |KUYOOO(#1"'	   $E**K9IJ0, #"%[[F6{{a !QRRR| !STTT dEL11G  uy ???\$uEIFFF 
 yCKK'' L KKMML L03FL L L   %T.PU  K, , $, , ,K( r   c                 l    || j                                         n| j                             |          S r   )r@   r   rA   )r3   rG   s     r   rA   zDeviceMesh.sizeT  s,    (0(849??$$$dinnX>V>VVr   c                     | j         j        S r   )r@   rT   r2   s    r   rT   zDeviceMesh.ndimW  s    9>!r   c                 4    t          | j        j                  S r   )ry   r@   r   r2   s    r   r   zDeviceMesh.shape[  s    )))r   c                     t                      S )z:
            Returns the current global rank.
            )r!   r2   s    r   r!   zDeviceMesh.get_rank_  s     ::r   c                    | j         dk    r |t          d| j        j          dd          |d}t          |                     |                    }t          |t                    s
J d            t          t          |                    S )a{  
            Returns the local rank of the given mesh_dim of the DeviceMesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                An integer denotes the local rank.

            The following program runs on each process/rank in an SPMD manner. In this example, we have 2
            hosts with 4 GPUs each.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 0, 1, 2, 3 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 4, 5, 6, 7 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 0, 4 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 1, 5 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 2, 6 would return 2.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 3, 7 would return 3.

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
            rE   Nr   r   r   r   z1We expect ProcessGroup before calling `get_rank`!)rT   r8   r@   r   r  r   r&   r!   )r3   rG   mesh_dim_groups      r   get_local_rankzDeviceMesh.get_local_ranke  s    8 y1}}!1"LLLL`   !%dnnX&>&>??N  C CBC C  H^44555r   c                 "    | j         r| j         ndS )z
            Return the relative indices of this rank relative to all
            dimensions of the mesh. If this rank is not part of the mesh, return None.
            N)r   r2   s    r   get_coordinatezDeviceMesh.get_coordinate  s    
 /3.EO4**4Or   r_   c                 d    | j         st          d          t                              | |          S )a\  
            Returns a 1D DeviceMesh by flattening the current DeviceMesh.

            If no mesh_dim_name is provided, the default is a string concatentaing the mesh_dim_names of the
            given submesh with each mesh_dim_name separated by "_". For example, if we have a 3D mesh
            DeviceMesh([[[0, 1], [2, 3]], [[4, 5], [6, 7]]], mesh_dim_names=("dp", "cp", "tp")), calling
            mesh_3d["dp", "cp"]._flatten() will create a 1D submesh DeviceMesh([0, 1, 2, 3], mesh_dim_names=("dp_cp",))
            on rank 0, 1, 2, 3 and a 1D submesh DeviceMesh([4, 5, 6, 7], mesh_dim_names=("dp_cp",)) on rank 4, 5, 6, 7.

            After the flattened dimension is created, to access the flattened dimesnion in mesh_3d, one can use the
            existing slicing method to obtain the flattened mesh through calling mesh_3d["dp_cp"].
            z3Cannot flatten a DeviceMesh without mesh_dim_names!)rL   r8   rr   r   )r3   r_   s     r   _flattenzDeviceMesh._flatten  s<     & "I   #66t]KKKr   r   r   r   )%r   r   r   __doc__r   __annotations__r   r   r   r	   r   boolr4   r   r   r   r   r   r   objectr   r   r   r&   r  r   r  r   r  rA   propertyrT   r   r!   r  r  r  r   r   r   r   r   {  s        %	 %	N l sCx1111 9="&(	 (	 (	(	 k12(	
 %U38_5(	  (	 (	 (	 (	 (	T	( 	( 	(:r	4 r	4 r	4h	 	 	 		- 	- 	- 	-	$c 	$ 	$ 	$ 	$	 	 		 	4 	 	 	 	G	"'U38_(<"=G	G	 G	 G	 G	R,	 ,	huS#X&? ,	< ,	 ,	 ,	 ,	\	FD$6 	F 	F 	F 	F 
 @D@	
 9=@	 @	 @	tL'99:@	@	 5{!:;<@	
 %U38_5@	 @	 @	 @	 
@	D	W 	W# 	W# 	W 	W 	W 	W 
	"# 	" 	" 	" 
	" 
	*5c? 	* 	* 	* 
	*	c 	 	 	 	(	6 (	68E#s(O+D (	6PS (	6 (	6 (	6 (	6T	PHT#Y$7 	P 	P 	P 	P	L 	L(3- 	L< 	L 	L 	L 	L 	L 	Lr   rq   
mesh_shape.rL   r+   c          	         |t          t          |                    t          |          k    rt          dd|           t          |          t          |          k    r1t          ddt          |           dt          |           d          | r(|                                 st          d|  d	d
          t	          j        d          5  t	          j        t          j        |          t          j	                  
                    |          }ddd           n# 1 swxY w Y   t          | ||          }|S )a  
        Initializes a `DeviceMesh` based on `device_type`, `mesh_shape`, and `mesh_dim_names` parameters.

        This creates a DeviceMesh with an n-dimensional array layout, where `n` is the length of `mesh_shape`.
        If `mesh_dim_names` is provided, each dimension is labeled as `mesh_dim_names[i]`.

        .. note::
            `init_device_mesh` follows SPMD programming model, meaning the same PyTorch Python program
            runs on all processes/ranks in the cluster. Ensure `mesh_shape` (the dimensions of the nD array
            describing device layout) is identical across all ranks. Inconsistent `mesh_shape` may lead to hanging.

        .. note::
            If no process group is found, init_device_mesh will initialize distributed process group/groups
            required for distributed communications behind the scene.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
                Passing in a device type with a GPU index, such as "cuda:0", is not allowed.
            mesh_shape (Tuple[int]): A tuple defining the dimensions of the multi-dimensional array
                describing the layout of devices.
            mesh_dim_names (Tuple[str], optional): A tuple of mesh dimension names to assign to each dimension
                of the multi-dimensional array describing the layout of devices. Its length must match the length
                of `mesh_shape`. Each string in `mesh_dim_names` must be unique.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import init_device_mesh
            >>>
            >>> mesh_1d = init_device_mesh("cuda", mesh_shape=(8,))
            >>> mesh_2d = init_device_mesh("cuda", mesh_shape=(2, 8), mesh_dim_names=("dp", "tp"))

        Nz"Each mesh_dim_name must be unique.z/Found repeated mesh_dim_name in mesh_dim_names z6mesh_shape and mesh_dim_names should have same length!zFound len(mesh_dim_names): z and len(mesh_shape):r   z0Device type with index is not supported but got rp   zUIf you maintained a 'torch.device' object, it's recommended to pass in 'device.type'.r   r   )rX   r@   rL   )r7   setr8   isalphar   r   arangerw   rx   r   viewr   )rX   r  rL   r@   r:   s        r   r   r     s   R %3~&&''3~+>+>>>"8VnVV  
 :#n"5"555"Ln#n2E2Enn\_`j\k\knnn    	{2244 	R;RRRg   \%   	Y 	Y<	* 5 5UYGGGLLZXXD	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y #)
 
 
 s   AD$$D(+D()r   )8loggingrw   r   	functoolsr   	itertoolsr   typingr   r   r   r	   r
   r   r   torch.distributedr   torch.utils._typing_utilsr   __all__sysr   r   modulesr   r   torch._C._distributed_c10dr   r   "torch.distributed.distributed_c10dr   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   	getLoggerr   loggernumpy.typingr(   ImportErrorwarninglocalr*   rr   r  r   r   r   r   r   r   <module>r5     sd                     D D D D D D D D D D D D D D D D  * * * * * * . . . . . . |
, |~~ _JJJ          ?NCK/0; 0 K' BAAAAA                            Wx((F  	....... 	 	 	NNU    	
l! l! l! l! l!9? l! l! l!\	 !)

OX***1 1 1 1 1 1mL mL mL mL mL mL mL mLf 59	G G GG#s(OG !sCx1	G
 
G G G G G Gs   7B> >CC