o
    
Fi                     @   s6  d dl Z d dlZd dlmZ ddlmZ ddlmZmZ ddl	m
Z
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ e,ej-dZ.G dd dZ/G dd dZ0dS )    N)JSONDecodeError   )ApiError)AsyncClientWrapperSyncClientWrapper)AsyncHttpResponseHttpResponse)parse_obj_as)RequestOptions)'convert_and_respect_annotation_metadata)BadRequestError)ForbiddenError)InternalServerError)TooManyRequestsError)UnprocessableEntityError)"ChatCompletionRequestMessageParams)ChatCompletionToolParams)StopConfigurationParams)ToolChoiceOptionParams)ChatCompletionChunk)CreateChatCompletionResponse)ReasoningEffort)SarvamModelIds.c                &   @   z  e Zd ZdefddZejdddddddddddddddeje de	dej
e d	ej
e d
ej
e dej
e dejd dej
e dej
e dej
e dej
e dej
e dej
e dej
eje  dej
e dej
e deje f"ddZejddddddddddddddddeje de	dej
e d	ej
e d
ej
e dej
e dej
ejd  dej
e dej
e dej
e dej
e dej
e dej
e dej
eje  dej
e dej
e dee f"ddZeeeeeeeeeeeeedddeje de	dej
e d	ej
e d
ej
e dej
e dej
e dej
e dej
e dej
e dej
e dej
e dej
e dej
eje  dej
e dej
e dejee eje f f"ddZeeeeeeeeeeeedddeje de	dej
e d	ej
e d
ej
e dej
e dej
e dej
e dej
e dej
e dej
e dej
e dej
eje  dej
e dej
e deje f dd ZdS )!RawChatClientclient_wrapperc                C   
   || _ d S N_client_wrapperselfr    r"   e/home/aiteam/pcaa-dev/dashboard-backend/venv/lib/python3.10/site-packages/sarvamai/chat/raw_client.py__init__       
zRawChatClient.__init__.temperaturetop_preasoning_effort
max_tokensstopnseedfrequency_penaltypresence_penaltywiki_groundingtoolstool_choicerequest_optionsmessagesmodelr'   r(   r)   r*   streamTr+   r,   r-   r.   r/   r0   r1   r2   r3   returnc                C      d S r   r"   r!   r4   r5   r'   r(   r)   r*   r6   r+   r,   r-   r.   r/   r0   r1   r2   r3   r"   r"   r#   completions#      zRawChatClient.completionsr'   r(   r)   r*   r6   r+   r,   r-   r.   r/   r0   r1   r2   r3   Fc                C   r8   r   r"   r9   r"   r"   r#   r:   9   r;   Nc                C   sz  |du r| j ||||||||	|
||||||dS | jjjd| j jdt|tjt	 dd||||||t|t
dd|	|
|||t|tjt ddt|tddddd	i|td
}zd|j  krbdk rwn ntttt| d}t||dW S |jdkrtt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd| }W n ty1   t|jt|j|jdw t|jt|j|d)a`  
        Parameters
        ----------
        messages : typing.Sequence[ChatCompletionRequestMessageParams]
            A list of messages comprising the conversation so far.

        model : SarvamModelIds
            Model ID used to generate the response, like `sarvam-m`.

        temperature : typing.Optional[float]
            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
            We generally recommend altering this or `top_p` but not both.

        top_p : typing.Optional[float]
            An alternative to sampling with temperature, called nucleus sampling,
            where the model considers the results of the tokens with top_p probability
            mass. So 0.1 means only the tokens comprising the top 10% probability mass
            are considered.

            We generally recommend altering this or `temperature` but not both.

        reasoning_effort : typing.Optional[ReasoningEffort]
            The effort to use for reasoning

        max_tokens : typing.Optional[int]
            The maximum number of tokens that can be generated in the chat completion.

        stream : typing.Optional[bool]
            If set to true, the model response data will be streamed to the client
            as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
            When true, returns an Iterator[ChatCompletionChunk] instead of HttpResponse.

        stop : typing.Optional[StopConfigurationParams]

        n : typing.Optional[int]
            How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.

        seed : typing.Optional[int]
            This feature is in Beta.
            If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.
            Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.

        frequency_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            their existing frequency in the text so far, decreasing the model's
            likelihood to repeat the same line verbatim.

        presence_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            whether they appear in the text so far, increasing the model's likelihood
            to talk about new topics.

        wiki_grounding : typing.Optional[bool]
            If set to true, the model response will be wiki grounded.

        tools : typing.Optional[typing.Sequence[ChatCompletionToolParams]]
            A list of tools the model may call. Currently, only functions are supported as a tool.

        tool_choice : typing.Optional[ToolChoiceOptionParams]
            Controls which (if any) tool is called by the model.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        HttpResponse[CreateChatCompletionResponse] or Iterator[ChatCompletionChunk]
            When stream=False (default): HttpResponse wrapping CreateChatCompletionResponse.
            When stream=True: Iterator yielding ChatCompletionChunk objects.
        Tr4   r5   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   v1/chat/completionsPOSTwriteobject_
annotation	directionr4   r5   r'   r(   r)   r*   r6   r+   r,   r-   r.   r/   r0   r1   r2   content-typeapplication/jsonbase_urlmethodjsonheadersr3   omit   ,  type_rB   responsedata  rL   body        status_coderL   rW   ) _completions_streamr   httpx_clientrequestget_environmentbaser   typingSequencer   r   r   r   OMITr]   castr   r	   rK   r   r   dictrL   OptionalAnyr   r   r   r   r   r   textr!   r4   r5   r'   r(   r)   r*   r6   r+   r,   r-   r.   r/   r0   r1   r2   r3   	_response_data_response_jsonr"   r"   r#   r:   O   s   Z
#


















c                c   s   | j jjd| j  jdt|tjt dd|||||dt|t	dd||	|
||t|tjt
 ddt|tdddddi|td	|}d
|j  krKdk snn |  z| }W n tyb   |j}Y nw t|jt|j|d| D ]@}|swqr|dr|tdd  }| dkr W d    d S zt|}tttt|d}|V  W qr tjy   Y qrw qrW d    d S 1 sw   Y  d S Nr>   r?   r@   rA   TrE   rF   rG   rH   rN   rO   r\   zdata: z[DONE]rP   )r   r_   r6   ra   rb   r   rc   rd   r   r   r   r   re   r]   readrK   	Exceptionrj   r   rg   rL   
iter_lines
startswithlenstriploadsrf   r   r	   r   r!   r4   r5   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   rl   _body_line	_data_str_chunk_json_chunkr"   r"   r#   r^   %  s   
"

2

"z!RawChatClient._completions_stream)__name__
__module____qualname__r   r$   rc   overloadrd   r   r   rh   floatr   intLiteralr   boolr   r   r
   Iteratorr   r:   r   r   re   Unionr^   r"   r"   r"   r#   r         	
	
	

 \	
r   c                &   @   r   )!AsyncRawChatClientr   c                C   r   r   r   r    r"   r"   r#   r$   y  r%   zAsyncRawChatClient.__init__.r&   r4   r5   r'   r(   r)   r*   r6   Tr+   r,   r-   r.   r/   r0   r1   r2   r3   r7   c                      d S r   r"   r9   r"   r"   r#   r:   |     zAsyncRawChatClient.completionsr<   Fc                   r   r   r"   r9   r"   r"   r#   r:     r   Nc                   s  |du r| j ||||||||	|
||||||dS | jjjd| j jdt|tjt	 dd||||||t|t
dd|	|
|||t|tjt ddt|tddddd	i|td
I dH }zd|j  krfdk r{n ntttt| d}t||dW S |jdkrtt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd|jdkrtt|jttjtj ttjtj | dd| }W n ty5   t|jt|j|jdw t|jt|j|d)a~  
        Parameters
        ----------
        messages : typing.Sequence[ChatCompletionRequestMessageParams]
            A list of messages comprising the conversation so far.

        model : SarvamModelIds
            Model ID used to generate the response, like `sarvam-m`.

        temperature : typing.Optional[float]
            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
            We generally recommend altering this or `top_p` but not both.

        top_p : typing.Optional[float]
            An alternative to sampling with temperature, called nucleus sampling,
            where the model considers the results of the tokens with top_p probability
            mass. So 0.1 means only the tokens comprising the top 10% probability mass
            are considered.

            We generally recommend altering this or `temperature` but not both.

        reasoning_effort : typing.Optional[ReasoningEffort]
            The effort to use for reasoning

        max_tokens : typing.Optional[int]
            The maximum number of tokens that can be generated in the chat completion.

        stream : typing.Optional[bool]
            If set to true, the model response data will be streamed to the client
            as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
            When true, returns an AsyncIterator[ChatCompletionChunk] instead of AsyncHttpResponse.

        stop : typing.Optional[StopConfigurationParams]

        n : typing.Optional[int]
            How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.

        seed : typing.Optional[int]
            This feature is in Beta.
            If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.
            Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.

        frequency_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            their existing frequency in the text so far, decreasing the model's
            likelihood to repeat the same line verbatim.

        presence_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            whether they appear in the text so far, increasing the model's likelihood
            to talk about new topics.

        wiki_grounding : typing.Optional[bool]
            If set to true, the model response will be wiki grounded.

        tools : typing.Optional[typing.Sequence[ChatCompletionToolParams]]
            A list of tools the model may call. Currently, only functions are supported as a tool.

        tool_choice : typing.Optional[ToolChoiceOptionParams]
            Controls which (if any) tool is called by the model.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        AsyncHttpResponse[CreateChatCompletionResponse] or AsyncIterator[ChatCompletionChunk]
            When stream=False (default): AsyncHttpResponse wrapping CreateChatCompletionResponse.
            When stream=True: AsyncIterator yielding ChatCompletionChunk objects.
        Tr=   r>   r?   r@   rA   rE   rF   rG   rH   NrN   rO   rP   rR   rU   rV   rX   rY   rZ   r[   r\   ) r^   r   r_   r`   ra   rb   r   rc   rd   r   r   r   r   re   r]   rf   r   r	   rK   r   r   rg   rL   rh   ri   r   r   r   r   r   r   rj   rk   r"   r"   r#   r:     s   Z
#


















c                C  s  | j jjd| j  jdt|tjt dd|||||dt|t	dd||	|
||t|tjt
 ddt|tdddddi|td	4 I d H }d
|j  krOdk sun | I d H  z| }W n tyi   |j}Y nw t|jt|j|d| 2 zG3 d H W }|sqy|dr|tdd  }| dkr W d   I d H  d S zt|}tttt|d}|V  W qy tjy   Y qyw qy6 W d   I d H  d S 1 I d H sw   Y  d S ro   )r   r_   r6   ra   rb   r   rc   rd   r   r   r   r   re   r]   areadrK   rq   rj   r   rg   rL   aiter_linesrs   rt   ru   rv   rf   r   r	   r   rw   r"   r"   r#   r^   ~  s   
"

2

.z&AsyncRawChatClient._completions_stream)r}   r~   r   r   r$   rc   r   rd   r   r   rh   r   r   r   r   r   r   r   r   r
   AsyncIteratorr   r:   r   r   re   r   r^   r"   r"   r"   r#   r   x  r   r   )1rK   rc   json.decoderr   core.api_errorr   core.client_wrapperr   r   core.http_responser   r   core.pydantic_utilitiesr	   core.request_optionsr
   core.serializationr   errors.bad_request_errorr   errors.forbidden_errorr   errors.internal_server_errorr   errors.too_many_requests_errorr   !errors.unprocessable_entity_errorr   (requests.chat_completion_request_messager   requests.chat_completion_toolr   requests.stop_configurationr   requests.tool_choice_optionr   types.chat_completion_chunkr   %types.create_chat_completion_responser   types.reasoning_effortr   types.sarvam_model_idsr   rf   ri   re   r   r   r"   r"   r"   r#   <module>   s6     [