
    wiD                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlmc mZ d dlmc mZ d dlmZmZ d dl m!Z! d dl"m#Z#m$Z$ g dZ%d	e&d
ee&e&f         fdZ'dee         dej(        dee&e	f         fdZ)dej(        dee&e	f         dej        j*        fdZ+d+dej        j*        d
ej        j*        fdZ,dej*        d
ej*        fdZ-dej*        deej(                 deej(                 deej(                 fdZ.ej/        ej0        ej1        ej2        ej3        ej4        ej5        ej6        ej7        ej8        ej6        ej9        ej:        gZ;ej<        ej=        gZ>ej/        ej?        ej0        ej@        ej1        d iZAdeej(                 dee&ej*        f         fdZBdeej(                 dee&ej*        f         deej*        ej*        f         fdZC G d d           ZDd,d#ZEd$eDd
eFfd%ZG G d& d'          ZHdejI        fdej        j*        d(eee&e	f                  d)eejI                 d
ej        j*        fd*ZJdS )-    N)defaultdict)Enum)AnycastDictIterableListOptionalTupleType)ArgumentTarget)	ShapeProp)fuse_conv_bn_evalfuse_linear_bn_eval)matches_module_patternreplace_node_modulefuseremove_dropoutextract_subgraphmodules_to_mkldnnreset_modulesMklSubgraphgen_mkl_autotuneruse_mkl_length	UnionFindoptimize_for_inferencetargetreturnc                 P    |                      dd          ^ }}|r|d         nd|fS )zp
    Splits a qualname into parent path and last atom.
    For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
    .   r    )rsplit)r   parentnames      r/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/torch/fx/experimental/optimization.py_parent_namer(   $   s3    
 MM#q))MVT&6!99B,,    patternnodemodulesc                 j   t          |j                  dk    rdS |j        d         |f}t          | |          D ]x\  }}t          |t          j                  s dS |j        dk    r dS t          |j        t                    s dS |j        |vr dS t          ||j                           |ur dS ydS )Nr   Fcall_moduleT)
lenargszip
isinstancefxNodeopr   strtype)r*   r+   r,   nodesexpected_typecurrent_nodes         r'   r   r   .   s     49~~u"&)A,!5E'*7E':': 
 
#|,00 	55?m++55,-s33 	55g--55+,--]BB55 C4r)   
new_modulec                     t          | j        t                    sJ t          | j                  \  }}||| j        <   t	          ||         ||           d S N)r2   r   r6   r(   setattr)r+   r,   r;   parent_namer&   s        r'   r   r   B   sY     dk3'''''$T[11K%GDKGK $
33333r)   Fmodelc                    t           j        t           j        ft           j        t           j        ft           j        t           j        ft           j        t           j        fg}|st          j	        |           } |rt          | t          j        j                  st          j        |           }n| }t          |                                          }t          j	        |j                  }|D ]}|j        D ]}t'          |||          rt)          |j        d         j                  dk    r8||j        d         j                 }	||j                 }
|
j        se|d         t           j        t           j        t           j        fv rt3          |	|
          }nt5          |	|
          }t7          |j        d         ||           |                    |j        d                    |                    |           t          j        ||          S )z
    Fuses convolution/BN and linear/BN layers for inference purposes.
    Will deepcopy your model by default, but can modify the model inplace as well.
    r   r"   )nnConv1dBatchNorm1dConv2dBatchNorm2dConv3dBatchNorm3dLinearcopydeepcopyr2   torchr3   GraphModulesymbolic_tracedictnamed_modulesgraphr8   r   r/   r0   usersr   track_running_statsr   r   r   replace_all_uses_with
erase_node)r@   inplaceno_tracepatternsfx_modelr,   	new_graphr*   r+   first_layerbnfused_layers               r'   r   r   K   s    
BN#	BN#	BN#	BN#	H  %e$$ :eUX-ABB $U++8))++,,Ghn--I + +O 	+ 	+D%gtW== +ty|)**Q..%dil&9:T[)- 1:")RY	!BBB"3K"D"DKK"5k2"F"FK#DIaL';GGG**49Q<888$$T***	+  >(I...r)   c                     t          j        |           } G d dt          j         j                  } ||                                          S )z5
    Removes all dropout layers from the module.
    c                   P     e Zd Zdedeedf         deeef         def fdZ	 xZ
S )&remove_dropout.<locals>.DropoutRemoverr   r0   .kwargsr   c                     t          | j        |         t          j                  rt	          |          dk    sJ |d         S t                                          |||          S )Nr"   r   )r2   
submodulesrB   Dropoutr/   superr.   )selfr   r0   ra   	__class__s       r'   r.   z2remove_dropout.<locals>.DropoutRemover.call_modulez   sY     $/&12:>> A4yyA~~~~Awww**64@@@r)   )__name__
__module____qualname__r   r   r   r   r6   r   r.   __classcell__)rg   s   @r'   DropoutRemoverr`   y   s}        	A 	A(-hm(<	AFJ3PS8n	A	A 	A 	A 	A 	A 	A 	A 	A 	A 	Ar)   rl   )r3   rN   rL   Transformer	transform)r@   rY   rl   s      r'   r   r   s   si      ''HA A A A A- A A A >(##--///r)   orig_moduler8   inputsoutputsc                 P   t          j                    }i |D ]!}|                    |j                  }||<   "|D ] }|                    |fd          }||<   !|                    fd|D                        |                                 t          j        | |          S )z
    Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph.
    c                     |          S r=    )xenvs    r'   <lambda>z"extract_subgraph.<locals>.<lambda>   s    s1v r)   c                      g | ]
}|         S rt   rt   ).0outputrv   s     r'   
<listcomp>z$extract_subgraph.<locals>.<listcomp>   s    888fc&k888r)   )r3   Graphplaceholderr&   	node_copyrz   lintrM   )	ro   r8   rp   rq   rZ   inputnew_noder+   rv   s	           @r'   r   r      s     

I"$C  ((44E

  &&t-=-=-=-=>>D		8888888999NN>+y111r)   c                 *    t          j        |           S r=   )	th_mkldnnMkldnnBatchNorm)a_s     r'   rw   rw      s    !:1!=!= r)   c                    i }| D ]}|j         dk    rt          |j        t                    sJ ||j                 }t	          |          t
          v rqt          t	          |                   |t          j                  }t          |t          j	                  sJ t          j        |          ||<   t          |||           |S )z
    For each node, if it's a module that can be preconverted into MKLDNN,
    then we do so and create a mapping to allow us to convert from the MKLDNN
    version of the module to the original.
    r.   )r5   r2   r   r6   r7   
mkldnn_maprL   floatrB   ModulerJ   rK   r   )r8   r,   old_modulesr+   
cur_moduler;   s         r'   r   r      s     /1K ? ?7m##dk3///// -JJ:--'Z(8(89*ekRR
!*bi88888*.-
*C*CJ'#D':>>>r)   r   c                     | D ]Q}|j         dk    rDt          |j        t                    sJ ||j                 }||v rt	          ||||                    RdS )za
    Maps each module that's been changed with `modules_to_mkldnn` back to its
    original.
    r.   N)r5   r2   r   r6   r   )r8   r,   r   r+   r   s        r'   r   r      su      L L7m##dk3///// -J[((#D';z3JKKKL Lr)   c                   $    e Zd Zdej        fdZdS )r   fx_graphc                 >    || _         g | _        g | _        g | _        d S r=   )r   r8   start_nodes	end_nodes)rf   r   s     r'   __init__zMklSubgraph.__init__   s#     $&
*,(*r)   N)rh   ri   rj   r3   r|   r   rt   r)   r'   r   r      s1        + + + + + + +r)   r   
   r"   c                 H     dddt           dt          f fd}|S )aW  
    This generates a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by running it with the example_inputs.

    Example usage:
        heuristic = gen_mkl_autotuner(example_inputs, iters=10)
        fast_model = optimization.optimize_for_inference(model, heuristic)
    NrQ   r   c                    | j         }	:| j        j        	| j        j        t	          	                                         d |D             t          t          t          j	                 d | j
        D                       }t          	| j        ||          
fd} |fd          }t          j        j        t                                                                |fd          }||k     S )Nc                 @    g | ]}t          j        |j                  S rt   )rL   randnshapery   r+   s     r'   r{   z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>   s$    IIITTZ00IIIr)   c                 (    g | ]}|j         d          S )r   )r0   r   s     r'   r{   z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>   s    *T*T*TD49Q<*T*T*Tr)   c                     t                    D ]} |              t          j                    }t                    D ]} |              t          j                    |z
  S r=   )rangetime)fr   beginiterswarmups      r'   	benchmarkz?gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.benchmark   sc    6]]  IKKE5\\  9;;&&r)   c                  6    d  d  D              D             S )Nc                 6    g | ]}|                                 S rt   )to_densery   is     r'   r{   zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>  s-       !"

  r)   c                 6    g | ]}|                                 S rt   )	to_mkldnnr   s     r'   r{   zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>  s     1W1W1WA!++--1W1W1Wr)   rt   sample_inputs	submodules   r'   rw   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>  s7      &/i1W1W1W1W1W&X   r)   c                         S r=   rt   r   s   r'   rw   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>  s    		=(A r)   )r   r   owning_moduler   r   	propagater   r	   r3   r4   r   r   r8   r   rQ   rO   rP   )rQ   input_nodesoutput_argsr   mkl_timeno_mkl_timer   r   example_inputsrY   r   r   r   s         @@r'   use_mkl_heuristicz,gen_mkl_autotuner.<locals>.use_mkl_heuristic   s8   '~3H.4Kh)).999II[III4=*T*TEO*T*T*TUU$Xu{KUU		' 	' 	' 	' 	' 	' 9    
 
 	O!4	(?(?(A(A#B#BK	
 	
 	
  i A A A A ABB+%%r)   )r   bool)r   r   r   r   rY   r   s   ``` @@r'   r   r      s[     HK& & & & & & & & & & & &> r)   rQ   c                 2    t          | j                  dk    S )z
    This is a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by checking if there
    are more than 2 nodes in it
       )r/   r8   )rQ   s    r'   r   r     s     u{ar)   c                   @    e Zd Zd ZdefdZdedefdZdedefdZd	S )
r   c                 2    d g|z  | _         dg|z  | _        d S )Nr   r%   size)rf   ns     r'   r   zUnionFind.__init__  s     ,06A: !sQw			r)   vc                 .    || j         |<   d| j        |<   d S )Nr"   r   )rf   r   s     r'   make_setzUnionFind.make_set  s    A	!r)   r   c                     | j         |         }||k    r|S |J |                     |          | j         |<   t          t          | j         |                   S r=   )r%   findr   int)rf   r   pars      r'   r   zUnionFind.find#  sO    k!n88H3ACQ(((r)   r   bc                     |                      |          |                      |          }}||k    r|S | j        |         | j        |         k     r||}}|| j        |<   | j        |xx         | j        |         z  cc<   d S r=   )r   r   r%   )rf   r   r   s      r'   joinzUnionFind.join+  sz    yy||TYYq\\166H9Q<$)A,&&aqAA	!	!$r)   N)rh   ri   rj   r   r   r   r   r   rt   r)   r'   r   r     s        ' ' '#    )c )c ) ) ) )%c %c % % % % % %r)   r   pass_configtracerc                    dddt           id}|i }|                    |           |d         rt          |           } |d         rt          |           } |d         du r| S t	          |d         t
                    st          d	          d|d         vrt          d
          |d         d         } |            }|                    t          j	        |                     t          j        |j                   t          |                                           } G d dt                    }t          j                  D ]}|j        }	|j        dk    r||j                 }
t)          |
          t*          v rq|j        }	t/          |
                                d          }|F|j        t4          j        k    s
J d            |j        t5          j        d          k    s
J d            n6|j        dk    r+|j        t*          v r|j        }	n|j        t:          v r|j        }	|	|j        k    r|	|j        k    r t?          d |j         D                       s!                    |          5  t          j"        |j         fd          }ddd           n# 1 swxY w Y   tG          tH          t          j%        j&                 |          |_         '                    |          5  (                    dd|f          }|)                    |           |f|_         ddd           n# 1 swxY w Y   tU          t          j                  |          }|_+        j        D ]}|j        dk    r|j        dk    r|j         d         }t          |j,                  }|D ]B}|j        dk    r5|j        dk    r*|)                    |           -                    |           Ct]          |j,                  dk    r-                    |           t]          j                  }t_          |          fdta          j                  D ]
\  }}|j        dk    r(|j        dk    r||_1        2                    |           9|j        dk    r?|j        dk    r4 |j         d                   J  |j         d                   |_3        fd|j4        D             }t]          |          dk    rt?          d |D                       rJ tk          |          }|d         |_6        |dd         D ]}7                    |d         |           tq          fd          }j        D ]}ts          |d          r8|:                    |j6                           j        ;                    |           ts          |d          r8|:                    |j1                           j<        ;                    |           ts          |d          r8|:                    |j3                           j=        ;                    |           |>                                D ]l} ||          s_|j<        |j=        z   D ]9}|j         d         }|)                    |           -                    |           :t          |j        ||           md}j        D ]}|j        dk    s|j        dk    r|dz  }t          jA        t                    C                    d |           D                                 t          j        |           }|S )!a  
    Performs a set of optimization passes to optimize a model for the
    purposes of inference. Specifically, the passes that are run are:
    1. Conv/BN fusion
    2. Dropout removal
    3. MKL layout optimizations

    The third optimization takes a function `use_mkl_heuristic` that's used
    to determine whether a subgraph should be explicitly run in MKL layout.

    Note: As FX does not currently handle aliasing, this pass currently
    assumes nothing aliases. If that isn't true, use at your own risk.
    T	heuristic)conv_bn_fuser   mkldnn_layout_optimizeNr   r   r   Fz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configc                       e Zd ZdZdZdZdS )*optimize_for_inference.<locals>.MklSupportr"   r      N)rh   ri   rj   NOYESUNKNOWNrt   r)   r'   
MklSupportr   a  s        r)   r   r.   z)this pass is only for torch.float modulescpuz!this pass is only for CPU modulescall_functionc              3   ,   K   | ]}|j         d k    V  dS )r   N)r   )ry   args     r'   	<genexpr>z)optimize_for_inference.<locals>.<genexpr>  s)      II3:3IIIIIIr)   c                 2                         d| f          S )Nr   )call_method)r   r   s    r'   rw   z(optimize_for_inference.<locals>.<lambda>  s    )=)=kA4)P)P r)   r   r   r   r   c                     t          | d          r                    | j                  S t          | d          r                    | j                  S d S )Ncolorstart_color)hasattrr   r   r   )r   ufs    r'   	get_colorz)optimize_for_inference.<locals>.get_color  sT    1g 	$7717###1m$$ 	*771=)))tr)   c                 p    g | ]2}t          |t          j                  r |          ' |          3S r=   )r2   r3   r4   )ry   r   r   s     r'   r{   z*optimize_for_inference.<locals>.<listcomp>  sQ       a)) 9Q<<+ 	! ,++r)   c              3      K   | ]}|d u V  	d S r=   rt   r   s     r'   r   z)optimize_for_inference.<locals>.<genexpr>  s&      9919999999r)   r"   c                  "    t                     S r=   )r   )r   s   r'   rw   z(optimize_for_inference.<locals>.<lambda>  s    H@U@U r)   r   r   	end_colorzmkldnn conversions: %s)Er   updater   r   r2   rO   RuntimeErrortracerJ   rK   r3   rM   rootrP   r   listr8   r   r5   r   r7   mkldnn_supportedr   next
parametersdtyperL   r   devicemkldnn_supported_unknownr   anyr0   inserting_beforemap_argr   r   r+   r   inserting_aftercreate_noderT   r   r   rR   rU   r/   r   	enumerater   r   r   all_input_nodessortedr   r   r   r   r   appendr   r   valuesr   logging	getLoggerrh   infor   )r@   r   r   default_pass_configr   
cur_tracerr,   r   r+   supports_mkldnnr   sample_parametermkldnn_argsdense_xr   prv_noderR   user	num_nodescur_idx
cur_colorsother_colormkldnn_graphsrQ   prvmkldnn_conversionsresultr   r   r   s                              @@@r'   r   r   5  sc   & #."? 
 {+++>* U+, &u%%34==)*BCTJJ JHIII-.FGGGQRRR+,DEkRJe 4 455HN:?H---$()<)<)>)>$?$?G    T    X^$$ "' "'$-7m## -JJ#333",.#'
(=(=(?(?#F#F #/(.%+===B >==+2el7 7   :   W''{...",. 888","4jm++*"444IItyIIIII **400   jIPPPP               
 U27#34kBBDI))$// ' '"..}j4'RR**7333 $w' ' ' ' ' ' ' ' ' ' ' ' ' ' ' $D$8$8'BBK&H  	* 	*7m##z(A(Ay|H$$E . .7m++{0J0J..x888''---4:!####D)))HN##I	9		B    $ #8>22 4 47m##{(B(B&DKK    W%%$+*C*C9TYq\**666&Yty|44DNN   -  J :!##99j99999999
++J#ADJ)!""~ 4 4
1{33334 -88U8U8U8U,V,VM J J4!! 	B"''$*--.4;;DAAA4'' 	N"''$"2334@GGMMM4%% 	J"''$.112<CCDIII %%'' = =  '' 	=)EO; * *il**3///##D))))%+w<<< $ $;+%%
)B)B!#h$$%=?QRRRMMOOO^E8,,FMs$   4JJ"	%J"	-6L//L3	6L3	)FF)r   r"   )KrJ   r   operatorr   collectionsr   enumr   typingr   r   r   r   r	   r
   r   r   rL   torch.fxr3   torch.nnrB   torch.nn.functional
functionalFtorch.utils.mkldnnutilsmkldnnr   torch.fx.noder   r   torch.fx.passes.shape_propr   torch.nn.utils.fusionr   r   __all__r6   r(   r4   r   r   r   r   r   r   rE   rI   rF   ReLU	MaxPool2d	AvgPool2dAdaptiveAvgPool2drelu	transposesigmoid
avg_pool2dadaptive_avg_pool2dr   addmulr   MkldnnConv2dMkldnnLinearr   r   r   r   r   r   r   r   Tracerr   rt   r)   r'   <module>r-     s       # # # # # #       I I I I I I I I I I I I I I I I I I I I                       & & & & & & & & & * * * * * * * * 0 0 0 0 0 0 H H H H H H H H   - -sCx - - - -d^#%759#s(^   (4
'4 cN48=4 4 4 4%/ %/ %/58? %/ %/ %/ %/P0") 0	 0 0 0 0&22=2 M2 "']	2 2 2 2. IINGLL	J	O	MFL & %L(,7 Iy%Iy%N==
T"'] T#ry.5I    &L=L#ry.!L bi*+L L L L"+ + + + + + + ++ + + +\ +  $        % % % % % % % %: -1 ip p8?p$sCx.)p Op X_	p p p p p pr)   