
    wi5                    b   d dl mZ ddlmZ ddlmZ ddlmZ dBd	ZdBd
Zej	        ed                         Z
ej	        e ej        d          d                                     Zej	        e ej        d          dCd                                    Zej	        ed                         Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zej	        e ej        ddd          dDd                                    Zej	        e ej        d d!"          dEd#                                    Zed$             Zed%             Zed&             Zed'             Zej	        e ej        d(dd          dDd)                                    Zej	        e ej        d*d!"          dEd+                                    Zed,             Zej	        e ej        d-          dFd.                                    Z ed/             Z!ej	        ej"         ej        d0          dGd1                                    Z#ej	        e ej$        d2          dHd3                                    Z%ed4             Z&ej	        e ej$        d5          dHd6                                    Z'edId8            Z(edJd;            Z)ej	        edej*        fdKd>                        Z+d? Z,ej	        edLd@                        Z-edA             Z.dS )M    )annotations   )jit   )core)mathicore.constexprc                h    d}| j         }|dk    r|dz  }|dz  }|dk    t          j        |          S )Nr   r   valuer   	constexpr)r	   log2ns      h/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/triton/language/standard.py_log2r   
   sF    D	A
a%%	a	 a%% >$    c                X    | j         }t          j        ||dz
  z  dk    o|dk              S )Nr   r   r   )r	   r   s     r   _is_power_of_twor      s/    	A>1A;1,7a888r   c                    | |z   dz
  |z  S )z
    Computes the ceiling division of :code:`x` by :code:`div`

    :param x: the input number
    :type x: Block
    :param div: the divisor
    :type div: Block
    r    )xdivs     r   cdivr      s     GaKCr   sigmoidc                8    ddt          j        |            z   z  S )Nr   )r   expr   s    r   r   r   +   s     DHaRLL !!r   softmaxFc                    | t          | d          z
  }t          j        |          }t          |d          }t          j        |||          S )Nr   )maxr   r   sumfdiv)r   ieee_roundingznumdens        r   r   r   2   sC     	
C1IIA
(1++C
c1++C9S#}---r   c                <    t          j        | | j        gd          S )zn
    Returns a contiguous flattened view of :code:`x`.

    :param x: the input tensor
    :type x: Block
    T)can_reorder)r   reshapenumelr   s    r   ravelr,   <   s     <AG9$7777r   c                    | |z  |z   }||z  }||z  }||z  }t          j        ||z
  |          }||z  }|||z  z   }	||z  }
|	|
fS )a  
    Transforms the indices of a row-major `size_i * size_j` matrix into
    the indices of a column-major matrix for each group of `size_g` rows.

    For example, for :code:`size_i = size_j = 4` and :code:`size_g = 2`, it will
    transform ::

        [[0 , 1 , 2 , 3 ],
         [4 , 5 , 6 , 7 ],
         [8 , 9 , 10, 11],
         [12, 13, 14, 15]]

    into ::

        [[0, 2,  4 , 6 ],
         [1, 3,  5 , 7 ],
         [8, 10, 12, 14],
         [9, 11, 13, 15]]
    r   minimum)r	   jsize_isize_jsize_gijsize_gjgroup_idoff_inew_inew_js              r   	swizzle2dr:   H   sn    , 
VaB voGW}HvE\&5.&11F	gBBKE&LE%<r   c                .    t          j        | d|          S )a'  
    Returns a tensor filled with the scalar value 0 for the given :code:`shape` and :code:`dtype`.

    :param shape: Shape of the new array, e.g., (8, 16) or (8, )
    :type shape: tuple of ints
    :param dtype: Data-type of the new array, e.g., :code:`tl.float16`
    :type dtype: DType
    r   )r   full)shapedtypes     r   zerosr?   p   s     9UAu%%%r   c                6    t          | j        | j                  S )z
    Returns a tensor of zeros with the same shape and type as a given tensor.

    :param input: input tensor
    :type input: Tensor
    )r?   r=   r>   )inputs    r   
zeros_likerB   }   s     ek***r   c                    |r| |k    o||k     }nd}| |k    p|}t          j        || |          }t          j        |||          }||fS NFr   where)	value1index1value2index2tie_break_lefttiegtv_reti_rets	            r   _argmax_combinerP      sb     26F?	&	CBJr66**EJr66**E%<r   c                (    t          | |||d          S NTrP   rG   rH   rI   rJ   s       r   _argmax_combine_tie_break_leftrU          66664@@@r   c                (    t          | |||d          S rD   rS   rT   s       r   _argmax_combine_tie_break_fastrX          66665AAAr   c                ,    t          j        | |          S N)r   maximumabs     r   _elementwise_maxr`          <1r   r\   return_indicesreturn_indices_tie_break_left)return_indices_argtie_break_argNTc                b   t          j        |           } |r<|rt          j        | |t          |          S t          j        | |t          |          S t          j        | j        j                  t          j        d          k     rt          j        | j                                                  r | 	                    t           j
                  } nB| j                                        s
J d            | 	                    t           j                  } t          j        | |t          |          S N	keep_dims    z"Expecting input to be integer type)r   _promote_bfloat16_to_float32_reduce_with_indicesrU   rX   r   r>   primitive_bitwidthis_floatingtofloat32is_intint32reducer`   rA   axisrb   rc   ri   s        r   r!   r!      s   
 -e44E O( 	o,UD:Xdmnnnn,UD:Xdmnnnn>%+899DN2<N<NNN~ek557788 -..{))++QQ-QQQ+,,{5$(8INNNNr   zmaximum indexrK   )re   c                4    t          | |d||          \  }}|S NT)rb   rc   ri   )r!   rA   ru   rK   ri   _rets         r   argmaxr{      s'     5$tSamvwwwHQJr   c                    |r| |k    o||k     }nd}| |k     p|}t          j        || |          }t          j        |||          }||fS rD   rE   )	rG   rH   rI   rJ   rK   rL   lt	value_ret	index_rets	            r   _argmin_combiner      sc     26F?	&	CB
2vv..I
2vv..Iir   c                (    t          | |||d          S rR   r   rT   s       r   _argmin_combine_tie_break_leftr      rV   r   c                (    t          | |||d          S rD   r   rT   s       r   _argmin_combine_tie_break_fastr      rY   r   c                ,    t          j        | |          S r[   r.   r]   s     r   _elementwise_minr      ra   r   r/   c                >   t          j        |           } |r<|rt          j        | |t          |          S t          j        | |t          |          S t          j        | j        j                  dk     rt          j        | j                                                  r | 	                    t           j
                  } nB| j                                        s
J d            | 	                    t           j                  } t          j        | |t          |          S rg   )r   rk   rl   r   r   r   r>   rm   rn   ro   rp   rq   rr   rs   r   rt   s        r   minr      s    
 -e44E O( 	o,UD:Xdmnnnn,UD:Xdmnnnn>%+899B>>~ek557788 -..{))++QQ-QQQ+,,{5$(8INNNNr   zminimum indexc                4    t          | |d||          \  }}|S rw   )r   rx   s         r   argminr      s'     TQ_ktuuuFAsJr   c                    | |z   S r[   r   r]   s     r   _sum_combiner          q5Lr   r"   c                d    t          j        |           } t          j        | |t          |          S )Nrh   )r   rk   rs   r   )rA   ru   ri   s      r   r"   r"     s-     -e44E;udLIFFFFr   c                    | |z  S r[   r   r]   s     r   _xor_combiner     r   r   zxor sumc                    | j         j        }|                                st          d          t	          j        | |          } t	          j        | |t          |||          S )Nz#xor_sum only supported for integers)_builder)ri   r   
_generator)typescalarrq   
ValueErrorr   rk   rs   r   )rA   ru   ri   r   r   	scalar_tys         r   xor_sumr     sf     
!I @>???-ehGGGE;udLIPXeoppppr   cumsumc                b    t          j        |           } t          j        | |t          |          S r[   )r   rk   associative_scanr   rA   ru   reverses      r   r   r   %  s+    
 -e44E lGDDDr   c                    | |z  S r[   r   r]   s     r   _prod_combiner   1  r   r   cumprodc                b    t          j        |           } t          j        | |t          |          S r[   )r   rk   r   r   r   s      r   r   r   6  s+    
 -e44E mWEEEr   n_dimsc                   | j         |z	  }|d|z  z  dd||z
  dz
  z  g}t          j        | |          }t          j        dd          d d d d f         }t          j        t          |d|z
  z  d          d d d d d f         |                              |j                  }t          j        t          ||z  d          d d d d d f         |                              |j                  }	t          j        || j                  }t          j        |	| j                  }	t          j	        | j        j
        d          }
|                    |
d          }|	                    |
d          }|                     |
d          }|t          j        ||	k    |k    ||z  t          |                    z  }|                    | j        d          S )Nr   r   r   T)bitwidthsigned)bitcast)r+   r   r*   arangebroadcast_tor"   ro   r>   r=   get_int_dtyperm   rF   rB   )r   flipr	   r   n_outerr=   ymaskleftrightidtypeileftirightixrz   s                  r   _compare_and_swapr   B  s   g/G$q!t^QFQJN0CDEQA;q!T111d]+DSa$h33AAAtQQQJ?GGJJ17SSDc!d(A..qqq$z:EBBEEagNNE<ag&&DL((E)CDQQQFGGFDG))EXXfdX++F	
fd	#	#B
tz4%<D0%&.*R..QQ
QC66!'46(((r   stageorderc                x   | j         |z	  }t          j        ||k               |dk    r`|d|dz
  |z
  z  z  dd|z  g}t          j        t          j        t          j        dd          ddddf         |          | j                  }n|}t          j        |          D ]}t          | ||||z
  z   |          } | S )zb
    order_type 0 == ascending
    order_type 1 == descending
    order_type 2 == alternating
    r   r   r   N)	r+   r   static_assertr*   r   r   r=   static_ranger   )r   r   r   r   r   r=   r   r	   s           r   _bitonic_merger   V  s      g/Gu''' zz!(1vzE/A+B!BAq%x P|D-dk!Q.?.?aaa.NPUVVXYX_``u%% E EaqFUN';VDDHr   dim
descendingc                6   |t          | j                  dz
  n|}t          j        |t          | j                  dz
  k    d           t	          | j        |                   }t          j        d|dz             D ]}t          | |||k     rdn||          } | S )a  
    Sorts a tensor along a specified dimension.

    :param x: The input tensor to be sorted.
    :type x: Tensor
    :param dim: The dimension along which to sort the tensor. If None, the tensor is sorted along the last dimension. Currently, only sorting along the last dimension is supported.
    :type dim: int, optional
    :param descending: If set to True, the tensor is sorted in descending order. If set to False, the tensor is sorted in ascending order.
    :type descending: bool, optional
    Nr   z+only minor dimension is currently supportedr   )lenr=   r   r   r   r   r   )r   r   r   _dimr   r	   s         r   sortr   o  s     03{3qw<<!++Dts17||a//1^___"174=11Fq&1*-- J J1aa&jjj&IIHr   c                    t          j        |           } t          j        |          }| t          |          dz
  } | t          |          dz
  k    s
J d            t          j        |           S )Nr   z2Currently only support flipping the last dimension)r   _unwrap_if_constexprr   r   )r   r=   s     r   _get_flip_dimr     sj    

#C
(
(C%e,,E
{%jj1n#e**q.   "V   >#r   c           	     `   t          j        t          | j        t	          || j                                                t          j        t          | j                             t          | j                  }t          | j                  t          | j        t	          || j                                     z
  }t          j        | dg|z            }t          j        ||          }t          j	        dd          dddf         dt          j	        dd          z
  k    }t          j
        ||          D ][}|}t          j
        d|dz             D ]&}||k    r||dz   k    rt          j        ||          }'t          ||z  |dz   d          }\t          j        || j                  } | S )z
    Flips a tensor `x` along the dimension `dim`.

    :param x: the first input tensor
    :type x: Block
    :param dim: the dimension to flip along (currently only final dimension supported)
    :type dim: int
    r   r   Nr   Trh   )r   r   r   r=   r   r+   r   r*   expand_dimsr   r   r"   )	r   r   stepsstartr   r   r	   flip2r0   s	            r   r   r     s    	'c170K0K(LMMNNN'00111 "!'NNE!!'NNU17=ag;V;V3W-X-XXEQe$$AE""AK1aaag&!dk!Q.?.?*??Due,, 2 2"1eai00 	3 	3AAvv!q1u**(22E	1q5D111Q  AHr   c                    t          j        | |          }t          |j                  dk    r|S t          j        ||j        dd         d|j        d         z  gz             S )a7  
    Interleaves the values of two tensors along their last dimension. The two tensors must have the same shape.
    Equivalent to `tl.join(a, b).reshape(a.shape[:-1] + [2 * a.shape[-1]])`

    :param a: The first input tensor.
    :type a: Tensor
    :param b: The second input tensor.
    :type b: Tensor
    r   Nr   )r   joinr   r=   r*   )r^   r_   cs      r   
interleaver     s[     		!QA
17||q
 |Aqwss|q172;.??@@@r   )r	   r
   )F)NFTF)TFrD   )NFNN)r   F)r	   r
   r   r
   )r   r
   r   r
   r   r
   )r   r
   r   r
   r[   )/
__future__r   runtime.jitr    r   r   r   r   _tensor_member_fnr   _add_math_1arg_docstrr   r   r,   r:   r?   rB   rP   rU   rX   r`   _add_reduction_docstrr!   r{   r   r   r   r   r   r   r   r"   r   builtinr   _add_scan_docstrr   r   r   r   r   CONSTEXPR_0r   r   r   r   r   r   r   <module>r      s;   " " " " " "                  
       9 9 9 9 	  	   	  I&&" " '&  " I&&. . . '&  . 8 8  8 $ $ $N 	& 	& 	& + + +    A A A B B B    I:J*IK K KO O OK K  O" O;KLLL   ML         A A A B B B    I:J*IK K KO O OK K  O" O;KLLL   ML  
    E""G G G #"  G
    I&&q q q '&  q x  E E E !   E    y!!F F F "!  F ) ) ) )&    0 "&TEU      0       8 A A A A Ar   