
    wi3                        d dl Z d dlmZ d dlmZmZmZmZmZm	Z	 d dl
Z
d dlmZ d dlmZ d dlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ  ed          deeee         f         fd            ZdefdZd Z d Z!d Z"dS )    N)	lru_cache)BinaryIODictOptionalTupleTypeUnion)lazy_import_sox_ext)CodecConfig)lazy_import_ffmpeg_ext   )soundfile_backend)Backend)AudioMetaData)FFmpegBackend)SoundfileBackend)
SoXBackendreturnc                      i } t                                                      r
t          | d<   t                                                      r
t          | d<   t
          j        r
t          | d<   | S )Nffmpegsox	soundfile)r   is_availabler   r
   r   r   _IS_SOUNDFILE_AVAILABLEr   )backend_specss    i/root/.openclaw/workspace/chatterbox_venv_py311/lib/python3.11/site-packages/torchaudio/_backend/utils.pyget_available_backendsr      sj    .0M,,.. 0"/h))++ *)e0 6%5k"    c                     |                     |           x}r|S t          d|  ddt          |                                           d          )NzUnsupported backend 'z' specified; zplease select one of z	 instead.)get
ValueErrorlistkeys)backend_namebackendsbackends      r   get_backendr'       s^    ,,|,,,w 
?L???DD$9$9DDD
 
 	
r   c                     t                      dt          t          t          t          j        f         dt          t                   dt          t                   dt          ffd	 	 	 d
dt          t          t          t          j        f         dt          t                   dt          dt          t                   dt          f
fd	} | S )Nuriformatr$   r   c                     |t          |          S                                 D ]}|                    | |          r|c S t          d|  d| d          Nz0Couldn't find appropriate backend to handle uri z and format .r'   values
can_decodeRuntimeErrorr)   r*   r$   r&   r%   s       r   
dispatcherz!get_info_func.<locals>.dispatcher-   }     #|X666(( 	 	G!!#v.. hchh_ehhhiiir      buffer_sizer&   c                 L     | ||          }|                     | ||          S )a  Get signal information of an audio file.

        Note:
            When the input type is file-like object, this function cannot
            get the correct length (``num_samples``) for certain formats,
            such as ``vorbis``.
            In this case, the value of ``num_samples`` is ``0``.

        Args:
            uri (path-like object or file-like object):
                Source of audio data. The following types are accepted:

                * ``path-like``: File path or URL.
                * ``file-like``: Object with ``read(size: int) -> bytes`` method,
                  which returns byte string of at most ``size`` length.

            format (str or None, optional):
                If not ``None``, interpreted as hint that may allow backend to override the detected format.
                (Default: ``None``)

            buffer_size (int, optional):
                Size of buffer to use when processing file-like objects, in bytes. (Default: ``4096``)

            backend (str or None, optional):
                I/O backend to use.
                If ``None``, function selects backend given input and available backends.
                Otherwise, must be one of [``"ffmpeg"``, ``"sox"``, ``"soundfile"``],
                with the corresponding backend available.
                (Default: ``None``)

                .. seealso::
                   :ref:`backend`

        Returns:
            AudioMetaData
        )info)r)   r*   r6   r&   r3   s       r   r8   zget_info_func.<locals>.info8   s/    T *S&'22||C555r   )Nr5   N)
r   r	   r   strosPathLiker   r   intr   )r8   r%   r3   s    @@r   get_info_funcr=   *   s    %''H	j8S"+-.	j8@	jU]^aUb	j		j 	j 	j 	j 	j 	j !%!%	+6 +68S"+-.+6+6 +6 #	+6
 
+6 +6 +6 +6 +6 +6Z Kr   c                     t                      dt          t          t          t          j        f         dt          t                   dt          t                   dt          ffd	 	 	 	 	 	 	 ddt          t          t          t          j        f         d
t          dt          dt          dt          dt          t                   dt          dt          t                   dt          t          j        t          f         ffd} | S )Nr)   r*   r$   r   c                     |t          |          S                                 D ]}|                    | |          r|c S t          d|  d| d          r,   r.   r2   s       r   r3   z!get_load_func.<locals>.dispatcherk   r4   r   r   Tr5   frame_offset
num_frames	normalizechannels_firstr6   r&   c           	      T     | ||          }|                     | ||||||          S )a  Load audio data from source.

        By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with
        ``float32`` dtype, and the shape of `[channel, time]`.

        Note:
            The formats this function can handle depend on the availability of backends.
            Please use the following functions to fetch the supported formats.

            - FFmpeg: :py:func:`torchaudio.utils.ffmpeg_utils.get_audio_decoders`
            - Sox: :py:func:`torchaudio.utils.sox_utils.list_read_formats`
            - SoundFile: Refer to `the official document <https://pysoundfile.readthedocs.io/>`__.

        .. warning::

            ``normalize`` argument does not perform volume normalization.
            It only converts the sample type to `torch.float32` from the native sample
            type.

            When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit
            signed integer, 24-bit signed integer, and 8-bit unsigned integer, by providing ``normalize=False``,
            this function can return integer Tensor, where the samples are expressed within the whole range
            of the corresponding dtype, that is, ``int32`` tensor for 32-bit signed PCM,
            ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM. Since torch does not
            support ``int24`` dtype, 24-bit signed PCM are converted to ``int32`` tensors.

            ``normalize`` argument has no effect on 32-bit floating-point WAV and other formats, such as
            ``flac`` and ``mp3``.

            For these formats, this function always returns ``float32`` Tensor with values.


        Args:
            uri (path-like object or file-like object):
                Source of audio data.
            frame_offset (int, optional):
                Number of frames to skip before start reading data.
            num_frames (int, optional):
                Maximum number of frames to read. ``-1`` reads all the remaining samples,
                starting from ``frame_offset``.
                This function may return the less number of frames if there is not enough
                frames in the given file.
            normalize (bool, optional):
                When ``True``, this function converts the native sample type to ``float32``.
                Default: ``True``.

                If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
                integer type.
                This argument has no effect for formats other than integer WAV type.

            channels_first (bool, optional):
                When True, the returned Tensor has dimension `[channel, time]`.
                Otherwise, the returned Tensor's dimension is `[time, channel]`.

            format (str or None, optional):
                If not ``None``, interpreted as hint that may allow backend to override the detected format.
                (Default: ``None``)

            buffer_size (int, optional):
                Size of buffer to use when processing file-like objects, in bytes. (Default: ``4096``)

            backend (str or None, optional):
                I/O backend to use.
                If ``None``, function selects backend given input and available backends.
                Otherwise, must be one of [``"ffmpeg"``, ``"sox"``, ``"soundfile"``],
                with the corresponding backend being available. (Default: ``None``)

                .. seealso::
                   :ref:`backend`

        Returns:
            (torch.Tensor, int): Resulting Tensor and sample rate.
                If the input file has integer wav format and normalization is off, then it has
                integer type, else ``float32`` type. If ``channels_first=True``, it has
                `[channel, time]` else `[time, channel]`.
        )load)	r)   rA   rB   rC   rD   r*   r6   r&   r3   s	           r   rF   zget_load_func.<locals>.loadv   s9    l *S&'22||Cz9nV\^ijjjr   )r   r@   TTNr5   N)r   r	   r   r9   r:   r;   r   r   r<   boolr   torchTensor)rF   r%   r3   s    @@r   get_load_funcrJ   h   sI   %''H	j8S"+-.	j8@	jU]^aUb	j		j 	j 	j 	j 	j 	j # $!%Wk Wk8S"+-.WkWk Wk 	Wk
 Wk Wk Wk #Wk 
u|S 	!Wk Wk Wk Wk Wk Wkr Kr   c                  Z   t                      dt          t          t          t          j        f         dt          t                   dt          t                   dt          ffd	 	 	 	 	 	 	 ddt          t          t          t          j        f         dt          j	        d	t          d
t          dt          t                   dt          t                   dt          t                   dt          dt          t                   dt          t          t          t          t          f                  ffd} | S )Nr)   r*   r$   r   c                     |t          |          S                                 D ]}|                    | |          r|c S t          d|  d| d          r,   )r'   r/   
can_encoder1   r2   s       r   r3   z!get_save_func.<locals>.dispatcher   r4   r   Tr5   srcsample_raterD   encodingbits_per_sampler6   r&   compressionc
                 X     
| ||          }|                     | ||||||||		  	        S )ad  Save audio data to file.

        Note:
            The formats this function can handle depend on the availability of backends.
            Please use the following functions to fetch the supported formats.

            - FFmpeg: :py:func:`torchaudio.utils.ffmpeg_utils.get_audio_encoders`
            - Sox: :py:func:`torchaudio.utils.sox_utils.list_write_formats`
            - SoundFile: Refer to `the official document <https://pysoundfile.readthedocs.io/>`__.

        Args:
            uri (str or pathlib.Path): Path to audio file.
            src (torch.Tensor): Audio data to save. must be 2D tensor.
            sample_rate (int): sampling rate
            channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`,
                otherwise `[time, channel]`.
            format (str or None, optional): Override the audio format.
                When ``uri`` argument is path-like object, audio format is
                inferred from file extension. If the file extension is missing or
                different, you can specify the correct format with this argument.

                When ``uri`` argument is file-like object,
                this argument is required.

                Valid values are ``"wav"``, ``"ogg"``, and ``"flac"``.
            encoding (str or None, optional): Changes the encoding for supported formats.
                This argument is effective only for supported formats, i.e.
                ``"wav"`` and ``""flac"```. Valid values are

                - ``"PCM_S"`` (signed integer Linear PCM)
                - ``"PCM_U"`` (unsigned integer Linear PCM)
                - ``"PCM_F"`` (floating point PCM)
                - ``"ULAW"`` (mu-law)
                - ``"ALAW"`` (a-law)

            bits_per_sample (int or None, optional): Changes the bit depth for the
                supported formats.
                When ``format`` is one of ``"wav"`` and ``"flac"``,
                you can change the bit depth.
                Valid values are ``8``, ``16``, ``24``, ``32`` and ``64``.

            buffer_size (int, optional):
                Size of buffer to use when processing file-like objects, in bytes. (Default: ``4096``)

            backend (str or None, optional):
                I/O backend to use.
                If ``None``, function selects backend given input and available backends.
                Otherwise, must be one of [``"ffmpeg"``, ``"sox"``, ``"soundfile"``],
                with the corresponding backend being available.
                (Default: ``None``)

                .. seealso::
                   :ref:`backend`

            compression (CodecConfig, float, int, or None, optional):
                Compression configuration to apply.

                If the selected backend is FFmpeg, an instance of :py:class:`CodecConfig` must be provided.

                Otherwise, if the selected backend is SoX, a float or int value corresponding to option ``-C`` of the
                ``sox`` command line interface must be provided. For instance:

                ``"mp3"``
                    Either bitrate (in ``kbps``) with quality factor, such as ``128.2``, or
                    VBR encoding with quality factor such as ``-4.2``. Default: ``-4.5``.

                ``"flac"``
                    Whole number from ``0`` to ``8``. ``8`` is default and highest compression.

                ``"ogg"``, ``"vorbis"``
                    Number from ``-1`` to ``10``; ``-1`` is the highest compression
                    and lowest quality. Default: ``3``.

                Refer to http://sox.sourceforge.net/soxformat.html for more details.

        )save)r)   rN   rO   rD   r*   rP   rQ   r6   r&   rR   r3   s             r   rT   zget_save_func.<locals>.save   sD    p *S&'22||k>68_Vacn
 
 	
r   )TNNNr5   NN)r   r	   r   r9   r:   r;   r   r   rH   rI   r<   rG   r   float)rT   r%   r3   s    @@r   get_save_funcrV      sS   %''H	j8S"+-.	j8@	jU]^aUb	j		j 	j 	j 	j 	j 	j  $ $"&)-!%@D[
 [
8S"+-.[
\[
 [
 	[

 [
 3-[
 "#[
 [
 #[
 eK$;<=[
 [
 [
 [
 [
 [
z Kr   )#r:   	functoolsr   typingr   r   r   r   r   r	   rH   torchaudio._extensionr
   torchaudio.ior   torio._extensionr    r   r&   r   commonr   r   r   r   r   r   r   r9   r   r'   r=   rJ   rV    r   r   <module>r_      s   				       ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?  5 5 5 5 5 5 % % % % % % 3 3 3 3 3 3             ! ! ! ! ! ! ! ! ! ! ! ! ' ' ' ' ' '       4S$w-%7 8    
7 
 
 
 
; ; ;|g g gTk k k k kr   