
    wi>              "          d dl mZmZmZmZmZ d dlZd dlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZ ddgZ G d de          Zd	e d
e de de de	 de_        dee         dee         dee         dee         dee         dee         dededededededededefdZdee         dee         dee         dee         dee         dee         dededededededededefdZ ee          	 	 	 	 	 d"dee         dee         dee         dee         dee         dee         d ee         dededededededededef d!            ZdS )#    )castListOptionalTupleUnionN)Tensor   )_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported_foreach_doc!_get_capturable_supported_devices_get_scalar_dtype
_get_value_maximize_doc_params_doc_use_grad_for_differentiable_view_as_real	OptimizerParamsTASGDasgdc                        e Zd Z	 	 	 	 	 	 	 	 	 dded	eeef         d
ededededee         dededef fdZ	 fdZ
d Zedd            Z xZS )r   {Gz?-C6?      ?    .Ar   NFparamslrlambdalphat0weight_decayforeachmaximizedifferentiable
capturablec                 T   t          |t                    r'|                                dk    rt          d          d|k    st          d|           d|k    st          d|           t	          ||||||||	|
	  	        }t                                          ||           d S )Nr	   zTensor lr must be 1-elementg        zInvalid learning rate: zInvalid weight_decay value: )	r    r!   r"   r#   r$   r%   r&   r'   r(   )
isinstancer   numel
ValueErrordictsuper__init__)selfr   r    r!   r"   r#   r$   r%   r&   r'   r(   defaults	__class__s               `/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/torch/optim/asgd.pyr/   zASGD.__init__   s     b&!! 	<bhhjjAoo:;;;byy;r;;<<<l""JLJJKKK%)!

 

 

 	*****    c                 h   t                                          |           | j        D ]}|                    dd            |                    dd           |                    dd           |                    dd           |d         D ]!}| j                            |g           }t          |          dk    rt          j        |d                   s@t          |d                   }t          j
        |t                      |j        	          |d<   t          j        |d
                   s1t          j
        |d
         t                      |j        	          |d
<   t          j        |d                   s1t          j
        |d         t                      |j        	          |d<   #d S )Nr%   r&   Fr'   r(   r   r   step)dtypedeviceetamu)r.   __setstate__param_groups
setdefaultstategetlentorch	is_tensorfloattensorr   r8   )r0   r>   grouppp_statestep_valr2   s         r3   r;   zASGD.__setstate__?   s   U###& 	 	EY---Z///-u555\51118_  *..B//w<<1$$ ?76?;; #(#9#9*/,$,=,?,?+ + + !?75>:: ).#EN2C2E2Eah* * * !?74=99 (-#DM1B1D1DQX) ) )	 	r4   c                    d}|d         D ]}	|	j         |t          j        |	          z  }|                    |	           |	j         j        rt          d          |                    |	j                    | j        |	         }
t          |
          dk    rt          j        d|	j	        t                                |
d<   t          j        |d         |	j	        t                                                                                                |
d	<   t          j        d|	j	        t                                |
d
<   t          j        |	t          j                  |
d<   |                    |
d
                    |                    |
d                    |                    |
d	                    |                    |
d                    |S )NFr   z&ASGD does not support sparse gradientsr    )r8   r7   r6   r    r9   r:   )memory_formatax)gradrA   
is_complexappend	is_sparseRuntimeErrorr>   r@   zerosr8   r   	as_tensorclonedetachones
zeros_likepreserve_format)r0   rE   params_with_gradgradsmusaxsetasstate_stepshas_complexrF   r>   s              r3   _init_groupzASGD._init_groupW   s   x 	2 	2Av!u/222 ''***6# Q&'OPPPQV$$$
1u::??$)K183D3F3F% % %E&M !$K@Q@S@S    %L #(*183D3F3F# # #E$K #("2)># # #E$K 

5;'''

5;'''E%L)))""5=111r4   c                    |                                   d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]~}g }g }g }g }g }g }	|                     |||||||	          }
t          ||||||	|d         |d         |d         |d         |d         |d         |d         |d	         |d
         |
           |S )zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr!   r    r#   r"   r$   r%   r&   r'   r(   )
r!   r    r#   r"   r$   r%   r&   r'   r(   r_   ) _cuda_graph_capture_health_checkrA   enable_gradr<   r`   r   )r0   closurelossrE   rY   rZ   r[   r\   r]   r^   r_   s              r3   r6   z	ASGD.step{   sg    	--///"$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & 	 	E-/"$E "C "C!#D(*K**'S$ K  Gn;;Gn">2i(z*$%56 .'!    & s   AA
A)	r   r   r   r   r   NFFFN)__name__
__module____qualname__r   r   rC   r   r   boolr/   r;   r`   r   r6   __classcell__)r2   s   @r3   r   r      s        $("&$ + ++ %- + 	+
 + + + $+ + + + + + + + +B    0" " "H "- - - "!- - - - -r4   zImplements Averaged Stochastic Gradient Descent.

    It has been proposed in `Acceleration of stochastic approximation by
    averaging`_.

    Args:
        am  
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lambd (float, optional): decay term (default: 1e-4)
        alpha (float, optional): power for eta update (default: 0.75)
        t0 (float, optional): point at which to start averaging (default: 1e6)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        z	
        zx

    .. _Acceleration of stochastic approximation by averaging:
        https://dl.acm.org/citation.cfm?id=131098

    r   rZ   r\   r[   r]   r^   r!   r    r#   r"   r$   r&   r'   r(   r_   c       	   
      &   t          |           D ]\  }}||         }|s|n| }||         }||         }||         }||         }t          j                                        sk|rit	                      }|j        j        |j        j        cxk    r!|j        j        cxk    r|j        j        k    rn n|j        j        |v sJ d| d            t          j        |          r<t          j        |          }t          j        |          }t          j        |          }|dz  }|
dk    r|	                    ||
          }|r4|
                    d||z  z
             |                    ||d           nBt          |          }|
                    d||z  z
             |                    ||            |s|                                dk    r<|                    |                    |          
                    |                     n|                    |           |rf|                    |d||z  |z  z   |	z  z             |                    dt          j        ||z
  t          j        |                    z             {t          |          }t          j        |d||z  |z  z   |	z  z            }|                    |           t          j        dt)          d||z
            z            }|                    |           d S )NUIf capturable=True, params, mus, etas, and state_steps must be on supported devices: .r	   r   r"   value)	enumeraterA   compileris_compilingr   r8   typerN   view_as_realaddmul_addcmul_r   add_itemsubcopy_maximum	ones_likerS   max)r   rZ   r\   r[   r]   r^   r!   r    r#   r"   r$   r&   r'   r(   r_   iparamrM   r:   rL   r9   step_tcapturable_supported_devices	eta_valuer6   new_etanew_mus                              r3   _single_tensor_asgdr      sX   $ f%% 7 75Qx#.tt$VV1gQ ~**,, 	 	+L+N+N(!9>& & & &:?& & & & =%& & & & & L%)EEEEI)EI I I FE	F E"" 	(%d++D&u--E#B''B 	!188E866D 	/JJq53;'''NN4BN////"3IJJq59,,---JJtI:J...  	aGGEIIbMM&&r**++++HHUOOO 	IIbQf!44>?@@@HHQv{EOF4K4KLLLMMMMf%%DobQd1B-Bu,L&MNNGIIg_QQr	):):%:;;FHHVo7 7r4   c       	   	        	 ! t          |           dk    rd S |r
J d            t          j                                        sK|rIt	          d           t           fdt          | |||          D                       sJ d  d            t          j        | |||||g          }|	                                D ]I\  \  !}\  \  }}}}}}}t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }|rt          |||           |rt          j        |          }t          j                                        s9|d         j        r,t          j        |t          j        dd	
          d           nt          j        |d           |
dk    rK|rt          j        |||
           |}nt          j        |||
          }t          j        ||           nt          j        ||          }t          j        |||d           ~t          j        ||          }t          j        |||           ~|rt          j        |          }t          j        |d           t          j        |           t          j        ||           ~t          j        |          }t          j        |           t          j        |d           t          j        |	           t          j        |           t          j        |           t          j        ||           	!fd|D             }!fd|D             }t          j        ||           t          j        ||           Kd S )Nr   z#_foreach ops don't support autogradF)supports_xlac              3      K   | ]V\  }}}}|j         j        |j         j        cxk    o |j         j        cxk    o|j         j        k    nc o|j         j        v V  Wd S rf   )r8   rv   ).0rF   r:   r9   r6   r   s        r3   	<genexpr>z%_multi_tensor_asgd.<locals>.<genexpr>+  s       
 
 !2sD HMRY^RRRRszRRRR$+BRRRRR >!==
 
 
 
 
 
r4   rm   rn   g      ?cpur8   ro   r	   rp   rq   c                 Z    g | ]'}t          j        d z  |z  z   z  z            (S r	   r   )rA   rS   )r   r6   r"   r8   r!   r    s     r3   
<listcomp>z&_multi_tensor_asgd.<locals>.<listcomp>  sQ        q52:+<'<&F GPVWWW  r4   c                 ~    g | ]9}t          j        d t          d t          |          z
            z            :S r   )rA   rS   r   r   )r   r6   r8   r#   s     r3   r   z&_multi_tensor_asgd.<locals>.<listcomp>  sT        C:d+;+;b+@$A$A A&QQQ  r4   )r@   rA   rt   ru   r   allzipr   "_group_tensors_by_device_and_dtypeitemsr   r   r   r   _foreach_negis_cpu_foreach_add_rD   _foreach_add_foreach_addcmul__foreach_sub_foreach_maximum__foreach_reciprocal__foreach_copy__foreach_mul_foreach_mul__foreach_pow_)"r   rZ   r\   r[   r]   r^   r!   r    r#   r"   r$   r&   r'   r(   r_   grouped_tensors_grouped_params_grouped_grads_grouped_axs_grouped_mus_grouped_etas_grouped_state_steps_grouped_paramsgrouped_gradsgrouped_axsgrouped_musgrouped_etasgrouped_state_stepsintermediatenew_musnew_etasr   r8   s"         ````                      @@r3   _multi_tensor_asgdr     s   $ 6{{aDDDDD >&&(( CZ C'H(
 (
 (
$  
 
 
 
 %(T;$G$G
 
 
 
 
 	C 	C Cc  C  C  C		C 	C 
  B	S$4 O 
			 	 c7 c7 
	 
	
 	d6lO<<T&\>::4<664<66DL-88"4<1EFF 	F.-EEE 	>!.}==M ~**,, 	81DQ1G1N 	8#U\#e%D%D%DC      3Q777 1 #M>VVVV,$1!>      nEJJJJJ -~U  L 	lRTUUUU ).+FF\;GGG  	7()<bAAG#GS111&w/// g666 )*=uEEH"---!,,,%000&x000"--- x8888      /  H    /  G  x888 g6666Gc7 c7r4   )single_tensor_fnFr%   c                $   |t          | |d          \  }}|r-t          j                                        rt	          d          |r&t          j                                        st
          }nt          } || |||||||||||||	|
           dS )znFunctional API that performs asgd algorithm computation.

    See :class:`~torch.optim.ASGD` for details.
    NF)	use_fusedz6torch.jit.script not supported with foreach optimizers)	r!   r    r#   r"   r$   r&   r'   r(   r_   )r   rA   jitis_scriptingrQ   r   r   )r   rZ   r\   r[   r]   r^   r%   r&   r'   r(   r_   r!   r    r#   r"   r$   r   funcs                     r3   r   r     s    4 1Ne
 
 

7  U59))++ USTTT #uy--// #!"D!%     r4   )NFFFF)typingr   r   r   r   r   rA   r   	optimizerr
   r   r   r   r   r   r   r   r   r   r   r   r   r   __all__r   __doc__rC   rj   r   r   r   rJ   r4   r3   <module>r      s1   6 5 5 5 5 5 5 5 5 5 5 5 5 5                                      $ 6
L L L L L9 L L L^ 
  
  
  
  
  .ILI<I 
fI 
f	I
 v,I fI I 	I 	I I I I I I  !I I I IXH7LH7<H7 
fH7 
f	H7
 v,H7 fH7 H7 	H7 	H7 H7 H7 H7 H7 H7  !H7 H7 H7 H7V  1DEEE # 6 6L6<6 
f6 
f	6
 v,6 f6 d^6 6 6 6 6 6  	!6" 	#6$ %6& '6 6 6 FE6 6 6r4   