
    wiK?                     0   d dl mZmZmZmZ d dlmZmZmZm	Z	 d dl
mZ d dlmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ defd	Z ed
           G d d                      Ze G d de                      Z G d de          ZdS )    )BaseBackend	GPUTargetAttrsDescriptorregister_descriptor)irpassesllvmamd)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                 2    | j         }d|v rd S d|v rd S d S )Ngfx94c                 Z    |                                  s|                                 rdndS )N   r   r   r   r      )is_int8lhsTyperhsTypes     l/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/triton/backends/amd/compiler.py<lambda>zmin_dot_size.<locals>.<lambda>   s)    9J9J(qgooN_N_(qfq     gfx9c                     dS )Nr    r   s     r   r   zmin_dot_size.<locals>.<lambda>   s     r   c                     dS )Nr   r"   r   s     r   r   zmin_dot_size.<locals>.<lambda>   s    L r   )arch)r   arch_strs     r   min_dot_sizer&      s<    {H (qqq333000r   T)frozenc                      e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   dZ	eed	<   dZ
eed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZee         ed<   dZee         ed<   dZeed<   dZee         ed<   dZeed<   dZeed<   dZeed <   dZeed!<   dZeed"<   d#Z eed$<   d%Z!eed&<   d' Z"d( Z#dS ))
HIPOptions   	num_warps   waves_per_eu   
num_stagesnum_ctasr   num_buffers_warp_specnum_consumer_groupsreg_dec_producerreg_inc_consumerNextern_libs)r,   r,   r,   cluster_dimsFdebugTsanitize_overflowr$   )fp8e5supported_fp8_dtypesr"   deprecated_fp8_dtypesieeedefault_dot_input_precision)r<   allowed_dot_input_precisionsenable_fp_fusionmatrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_namedefaultinstruction_sched_variantc                    t          t                    j        dz  }| j        i nt	          | j                  }d| j        v sd| j        v s	d| j        v rdnd}t                              | d|           dd	g}|D ]}t          || d
z            ||<   t                              | dt          |
                                                     | j        dk    r| j        | j        dz
  z  dk    s
J d            d S )Nlibgfx10gfx11gfx12    @   	warp_sizeocmlocklz.bcr5   r   r,   znum_warps must be a power of 2)r   __file__parentr5   dictr$   object__setattr__strtupleitemsr+   )selfdefault_libdirr5   rO   libsrI   s         r   __post_init__zHIPOptions.__post_init__=   s   h.6 ,4bb$t?O:P:P!TY..'TY2F2F'UYU^J^J^BBdf	4i888 	A 	AC">sKKK#?@@K4k6G6G6I6I0J0JKKK~!!t~!9K'LQR&R&R&R/ 'S&RR&R&Rr   c                     d                     d | j                                        D                       }t          j        |                    d                                                    S )N_c                 "    g | ]\  }}| d | S )-r"   ).0namevals      r   
<listcomp>z#HIPOptions.hash.<locals>.<listcomp>K   s&    OOOID#4#OOOr   utf-8)join__dict__rY   hashlibsha256encode	hexdigest)rZ   keys     r   hashzHIPOptions.hashJ   sX    hhOO9L9L9N9NOOOPP~cjj1122<<>>>r   )$__name__
__module____qualname__r+   int__annotations__r-   r/   r0   r1   r2   r3   r4   r5   rT   r6   rX   r7   boolr8   r$   rW   r:   r   r;   r=   r>   r?   r@   rA   rB   rC   rE   rG   r]   rn   r"   r   r   r)   r)      s        IsL#JHc!"3"""    ccK#L%###E4"t"""D#'2%*222(*5:***'----/9 %*999!d!!! !#!!!E3NNN$$$$)*!3***L# &/s...0 0 0? ? ? ? ?r   r)   c                   F    e Zd ZdZddZed             Zed             ZdS )HIPAttrsDescriptorpointer_range_32Nc                 j    d| j         d<   ||d S d t          ||          D             | j        d<   d S )NrM   ztt.pointer_rangec                 r    g | ]4\  }}t                               |          |j        &|j        -|j        5S r"   )rv   is_within2gbdo_not_specializedo_not_specialize_on_alignmentnum)rb   paramargs      r   re   z>HIPAttrsDescriptor._add_backend_properties.<locals>.<listcomp>_   sY     3
 3
 3
$%?Q?^?^_b?c?c3
+3
494X3
I3
 3
 3
r   )property_valuesziparg_properties)rZ   paramsvaluess      r   _add_backend_propertiesz*HIPAttrsDescriptor._add_backend_propertiesZ   sU    35/0>V^F3
 3
(+FF(;(;3
 3
 3
.///r   c                    t          | d          r|                                 dk    S dt          t          |                     v r:t          | d          r*|                                                                 dk    S dS )N	ptr_rangeiztorch.Tensoruntyped_storageF)hasattrr   rW   typer   size)r   s    r   rz   zHIPAttrsDescriptor.is_within2gbd   sv    3$$ 	0==??i//Sc^^++=N0O0O+&&((--//9<<ur   c                     t          j        | |          }t                              |           rdnd}||z                       dd          }|r|ndS )NSN )r   get_property_keyrv   rz   replace)rd   aligngeneric_keyhip_keyrm   s        r   r   z#HIPAttrsDescriptor.get_property_keym   sY    %6sEBB+88==F##3W$--c266"sss"r   )NN)ro   rp   rq   	__slots__r   staticmethodrz   r   r"   r   r   rv   rv   O   se         $I
 
 
 
   \ # # \# # #r   rv   c                   T    e Zd Zedefd            Zdeddf fdZdefdZd Z	d Z
deeef         fd	Zd
 Zd Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zd Z ej                    d             Z xZS )
HIPBackendr   c                     | j         dk    S )NrD   )backend)r   s    r   supports_targetzHIPBackend.supports_targetw   s    ~&&r   returnNc                     t                                          |           t          |j        t                    sJ d| _        d S )Nhsaco)super__init__
isinstancer$   rW   
binary_ext)rZ   r   	__class__s     r   r   zHIPBackend.__init__{   s>       &+s+++++!r   c                    d| j         j        i}dvr]t          t          j                  }| j         j        dv r|                    ddh           t          t          |                    |d<   dvrt          j	        dd          dk    |d<   |                    fd	t          j
                                        D                        t          d
i |S )Nr$   r:   )gfx940gfx941gfx942fp8e4b8fp8e5b16r?   TRITON_DEFAULT_FP_FUSION1c                 *    i | ]}|v ||         S r"   r"   )rb   koptss     r   
<dictcomp>z,HIPBackend.parse_options.<locals>.<dictcomp>   s+    ]]]ASTX\S\S\QQS\S\S\r   r"   )r   r$   setr)   r:   updaterX   sortedosgetenv__dataclass_fields__keys)rZ   r   argsr:   s    `  r   parse_optionszHIPBackend.parse_options   s    ()!--#&z'F#G#G {#AAA$++Y
,CDDD+08L1M1M+N+ND'(T))')y1KS'Q'QUX'XD#$]]]])H)M)M)O)O]]]^^^!!D!!!r   c                 r    |j         |j        |j        |j        d         |j        d         |j        d         fS )Nr   r,   r.   )r+   r0   sharedr6   )rZ   metadatas     r   pack_metadatazHIPBackend.pack_metadata   s>    O!!$!!$!!$
 	
r   c                 2    dt          | j                  i}|S )Nr&   )r&   r   )rZ   codegen_fnss     r   get_codegen_implementationz%HIPBackend.get_codegen_implementation   s    %|DK'@'@Ar   c                     ddl m} d|iS )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )rZ   r   s     r   get_module_mapzHIPBackend.get_module_map   s    77777719==r   c                 .    t          j        |           d S N)r
   load_dialects)rZ   ctxs     r   r   zHIPBackend.load_dialects   s    #r   c                 "    t          ||          S r   )rv   )rZ   r   r   s      r   get_attrs_descriptorzHIPBackend.get_attrs_descriptor   s    !&$///r   c                 8    t                               | |          S r   )rv   r   )r   r   s     r   compute_spec_keyzHIPBackend.compute_spec_key   s    !223>>>r   c                     t          j        d          } | %t          |           }|                                r|S t          t                    j        dz  }|                                r|S t          d          }|                                r|S t          d          }|                                r|S t          d          )NTRITON_HIP_LLD_PATHzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldzWROCm linker /opt/rocm/llvm/bin/ld.lld not found. Set 'TRITON_HIP_LLD_PATH' to its path.)r   r   r   is_filerR   rS   	Exception)lld_env_pathllds     r   path_to_rocm_lldzHIPBackend.path_to_rocm_lld   s     y!677#|$$C{{}} 
8nn#&77;;== 	J.//;;== 	J$%%;;== 	Jqrrrr   c                    t          j        | j                  }|                                 t          j                            |           t          j                            |           t          j        	                    |           t          j        
                    |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           |                    |            | S r   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointeradd_combineadd_canonicalizeradd_reorder_broadcastadd_cseadd_licmadd_symbol_dceadd_loop_unrollrunmodr   optionspms       r   	make_ttirzHIPBackend.make_ttir   s    _S[))
!!"%%%..r222###''+++))"---b!!!r"""$$R(((##B'''
s
r   c                    t          j        | j                  }|                                 t          j                            |d|j         |j        |j	        |j
                   |                    |            t          j        | j                  }|                                 t          j                            |           t          j                            |           t          j                            |           t           j        j                            ||j        |j        |j                   t          j                            |           t           j        j                            |           t          j                            |d           t!          j        |j                  r^|j        dk    s
J d            t           j        j                            ||j                   t          j                            |           t           j        j                            |           t          j                            |d           t          j                            |           t          j                            |           t!          j        |j                  r$t           j        j                            |           t<          j                             dd          dk    rgt           j        j        !                    |           t          j                            |           t           j        j        "                    |           t          j                            |           t          j        #                    |           t          j        $                    |           |                    |            | S )Nzhip:Tr   zTriton AMD backend pipeliner has been updated. We used to trigger software pipelining with num_stages == 0. Now it will not happen anymore; please update to use num_stages == 2 for equivalent behavior in the past.AMDGCN_USE_BUFFER_OPS0r   )%r   r   r   r   r   r   add_convert_to_ttgpuirr$   r+   rO   r0   r   ttgpuiradd_coalesceadd_remove_layout_conversionsadd_optimize_thread_localityr
   add_accelerate_matmulr@   rA   add_optimize_epilogueadd_optimize_dot_operandshas_matrix_core_featurer/   add_stream_pipelinev2r   r   insert_instruction_sched_hintsadd_reduce_data_duplicationadd_reorder_instructionsr   environgetadd_canonicalize_pointersadd_convert_to_buffer_opsr   r   r   s       r   
make_ttgirzHIPBackend.make_ttgir   s   _S[))
**2/Dgl/D/DgFWY`Yj+2+;	= 	= 	=
s_S[))
##B'''44R88833B777
00W\7C_ahanooo44R888
0044400T:::&w|44 	0%*** .P***
 J44R9KLLLM++B///
99"===00T:::44R888222666&w|44 	<J77;;;:>>13773>>J88<<<M++B///J88<<<''+++b!!!$$R(((
s
r   c                  
   | }t          j        |j                  }|                                 t          j        j                            ||j                   d}t          j        j        	                    ||j        |           t
          j
                            |           t
          j
                            |           t
          j                            |           d}t          j        j                            ||j        |           t
          j                            |           t
          j                            |           t
          j
                            |           t
          j
                            |           t
          j                            |           t
          j                            |           t
          j                            |           t          j        j                            ||j                   t.          j                            dd          dk    rt
          j                            |           t          j        j                            ||           |                    |           t=          j                     t=          j                    }t=          j         ||          t	          j!                   t=          j"        t          j#        |j        d           t	          j$        |j                   t	          j%        d           t	          j&        dd           t	          j&        d	d           t	          j&        d
d           t	          j&        d|j'        dk               d (                                D             }|d         )                    t          j*                   |d         +                    dd|j,        |j'        z              |d         +                    d|j-                    |j.        rdnd}	|d         +                    d|	           t	          j/        |d                    |j0        r(fd|j0        D             }
t=          j1        |
           t=          j2        t<          j3        |j        dg |j4                   | 5                    d          |d<   t	          j6                   to                    S )Nr   TTRITON_DISABLE_LINE_INFOr   r   i  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rN   c                 :    g | ]}|                                 |S r"   )is_declaration)rb   fns     r   re   z(HIPBackend.make_llir.<locals>.<listcomp>0  s)    PPPbB<M<M<O<OPrPPPr   zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr<   zdenormal-fp-math-f32c                 D    g | ]\  }}t          j        |          |S r"   )r
   need_extern_lib)rb   rc   pathllvm_mods      r   re   z(HIPBackend.make_llir.<locals>.<listcomp>>  s1    iiiltTSEXYacgEhEhiTiiir   ztriton_gpu.sharedr   )8r   r   r   r   r
   r   r   %add_decompose_unsupported_conversionsr$   add_optimize_lds_usageconvertadd_scf_to_cfadd_index_to_llvmiradd_allocate_shared_memoryadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   lower_instruction_sched_hintsrG   r   r   r   llvmiradd_di_scopeadd_builtin_func_to_llvmirr   r	   init_targets	to_moduleattach_target_tripleattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrO   get_functionsset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr+   r-   rB   set_all_fn_arg_inregr5   link_extern_libsoptimize_moduleOPTIMIZE_O3r?   get_int_attrcleanup_bitcode_metadatarW   )srcr   r   r   r   custom_lds_size_HIPBackend__HIP_FTZr   fnsdenormal_modepathsr	  s              @r   	make_llirzHIPBackend.make_llir   s4   _S[))

@@W\RRR 
11"glOTTT$$R(((**2...11"555 	
((W\9EEE''+++b!!!''+++**2...''+++b!!!$$R(((
88W=^___:>>4c::cAAM&&r***
55b)DDD
s 	,..>#w// ***x):GL"MMM 	Hgl333Hc***%h0H%PPP%h0QSWXXX%h0H%PPP%h0H'J[_aJabbb QPH2244PPPA >???A8:dw?PQXQb?b:d:deeeA0W5I2KLLL+2+EQ6A1=AAA
 	 Q((( 	3iiiig.AiiiE!(E222Xt'7r2wOghhh !--.ABB$X...8}}r   c           	      P   t          j        d|           }t          |          dk    sJ |d         |d<   t          j        | t
          j        |j        dg |j        d          }t          j
                            dd          d	k    rt          d
           t          |           |S )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r,   r   rc   r   FAMDGCN_ENABLE_DUMPr   r   z!// -----// AMDGCN Dump //----- //)refindalllenr	   translate_to_asmr
   r  r$   r?   r   r   r   print)r)  r   r   namesamdgcns        r   make_amdgcnzHIPBackend.make_amdgcnI  s    
 
QSVWW5zzQ 8&sC,=w|RQSU\Umotuu:>>.44;;5666&MMMr   c                 j   t          j        | |j        d          }t                                          }t          j                    5 }t          j                    5 }t          |j        d          5 }|	                    |           d d d            n# 1 swxY w Y   t          j        |ddd|j        d|j        g           d d d            n# 1 swxY w Y   t          |j        d          5 }|                                }	d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   |	S )Nr   wbz-flavorgnuz-sharedz-orb)r
   assemble_amdgcnr$   r   r   tempfileNamedTemporaryFileopenrc   write
subprocess
check_callread)
r)  r   r   r   	rocm_pathtmp_outtmp_infd_infd_outrets
             r   
make_hsacozHIPBackend.make_hsacoX  s   #Cr:://11	(** 	$g,.. q&&+t,, 'KK&&&' ' ' ' ' ' ' ' ' ' ' ' ' ' '%y)UIv{\`bibn&opppq q q q q q q q q q q q q q q glD)) $Vkkmm$ $ $ $ $ $ $ $ $ $ $ $ $ $ $	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 
s~   D(C2BCBCB(CD(C	D(C	D(0DD(D	D(D	D((D,/D,c                 d      fd|d<    fd|d<    fd|d<    fd|d<    fd	|d
<   d S )Nc                 2                         | |          S r   )r   r)  r   r   rZ   s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>g      t~~c8W/U/U r   r   c                 2                         | |          S r   )r   rO  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>h      Xw0W0W r   ttgirc                 2                         | |          S r   )r/  rO  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>i  rP  r   llirc                 2                         | |          S r   )r9  rO  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>j  s    1A1A#xQX1Y1Y r   r8  c                 2                         | |          S r   )rL  rO  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>k  rR  r   r   r"   )rZ   stagesr   s   ` `r   
add_stageszHIPBackend.add_stagesf  sr    UUUUUvWWWWWwUUUUUvYYYYYxWWWWWwr   c                 x    t          j        t                                          dgd          }| d| j         S )Nz	--versionrf   )encodingra   )rC  check_outputr   r   r   )rZ   versions     r   rn   zHIPBackend.hashm  s?    ):+F+F+H+H+*Vahiii))DK)))r   )ro   rp   rq   r   r   r   r   r   r   r   r   r   rW   r   r   r   r   r   r   r   r   r/  r9  rL  rY  	functools	lru_cachern   __classcell__)r   s   @r   r   r   u   s       '	 ' ' ' \'"y "T " " " " " "
"S " " " "
 
 
  >S*_ 5 > > > >  0 0 0 ? ? \? s s \s&   \ % % \%N P P \Pd   \   \X X X Y* * * * * * *r   r   )triton.backends.compilerr   r   r   r   triton._C.libtritonr   r   r	   r
   dataclassesr   typingr   r   r   typesr   ri   r?  r   r2  rC  r^  pathlibr   r&   r)   rv   r   r"   r   r   <module>rg     s   a a a a a a a a a a a a 5 5 5 5 5 5 5 5 5 5 5 5 ! ! ! ! ! ! # # # # # # # # # #         				 				              
1 
1 
1 
1 
1 $/? /? /? /? /? /? /? /?d "# "# "# "# "# "# "# "#J{* {* {* {* {* {* {* {* {* {*r   