# This file was auto-generated by Fern from our API Definition.

import typing

from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
from ..core.request_options import RequestOptions
from ..requests.chat_completion_request_message import ChatCompletionRequestMessageParams
from ..requests.chat_completion_tool import ChatCompletionToolParams
from ..requests.stop_configuration import StopConfigurationParams
from ..requests.tool_choice_option import ToolChoiceOptionParams
from ..types.chat_completion_chunk import ChatCompletionChunk
from ..types.create_chat_completion_response import CreateChatCompletionResponse
from ..types.reasoning_effort import ReasoningEffort
from ..types.sarvam_model_ids import SarvamModelIds
from .raw_client import AsyncRawChatClient, RawChatClient

# this is used as the default value for optional parameters
OMIT = typing.cast(typing.Any, ...)


class ChatClient:
    def __init__(self, *, client_wrapper: SyncClientWrapper):
        self._raw_client = RawChatClient(client_wrapper=client_wrapper)

    @property
    def with_raw_response(self) -> RawChatClient:
        """
        Retrieves a raw implementation of this client that returns raw responses.

        Returns
        -------
        RawChatClient
        """
        return self._raw_client

    @typing.overload
    def completions(
        self,
        *,
        messages: typing.Sequence[ChatCompletionRequestMessageParams],
        model: SarvamModelIds,
        temperature: typing.Optional[float] = ...,
        top_p: typing.Optional[float] = ...,
        reasoning_effort: typing.Optional[ReasoningEffort] = ...,
        max_tokens: typing.Optional[int] = ...,
        stream: typing.Literal[True],
        stop: typing.Optional[StopConfigurationParams] = ...,
        n: typing.Optional[int] = ...,
        seed: typing.Optional[int] = ...,
        frequency_penalty: typing.Optional[float] = ...,
        presence_penalty: typing.Optional[float] = ...,
        wiki_grounding: typing.Optional[bool] = ...,
        tools: typing.Optional[typing.Sequence[ChatCompletionToolParams]] = ...,
        tool_choice: typing.Optional[ToolChoiceOptionParams] = ...,
        request_options: typing.Optional[RequestOptions] = ...,
    ) -> typing.Iterator[ChatCompletionChunk]: ...

    @typing.overload
    def completions(
        self,
        *,
        messages: typing.Sequence[ChatCompletionRequestMessageParams],
        model: SarvamModelIds,
        temperature: typing.Optional[float] = ...,
        top_p: typing.Optional[float] = ...,
        reasoning_effort: typing.Optional[ReasoningEffort] = ...,
        max_tokens: typing.Optional[int] = ...,
        stream: typing.Optional[typing.Literal[False]] = ...,
        stop: typing.Optional[StopConfigurationParams] = ...,
        n: typing.Optional[int] = ...,
        seed: typing.Optional[int] = ...,
        frequency_penalty: typing.Optional[float] = ...,
        presence_penalty: typing.Optional[float] = ...,
        wiki_grounding: typing.Optional[bool] = ...,
        tools: typing.Optional[typing.Sequence[ChatCompletionToolParams]] = ...,
        tool_choice: typing.Optional[ToolChoiceOptionParams] = ...,
        request_options: typing.Optional[RequestOptions] = ...,
    ) -> CreateChatCompletionResponse: ...

    def completions(
        self,
        *,
        messages: typing.Sequence[ChatCompletionRequestMessageParams],
        model: SarvamModelIds,
        temperature: typing.Optional[float] = OMIT,
        top_p: typing.Optional[float] = OMIT,
        reasoning_effort: typing.Optional[ReasoningEffort] = OMIT,
        max_tokens: typing.Optional[int] = OMIT,
        stream: typing.Optional[bool] = OMIT,
        stop: typing.Optional[StopConfigurationParams] = OMIT,
        n: typing.Optional[int] = OMIT,
        seed: typing.Optional[int] = OMIT,
        frequency_penalty: typing.Optional[float] = OMIT,
        presence_penalty: typing.Optional[float] = OMIT,
        wiki_grounding: typing.Optional[bool] = OMIT,
        tools: typing.Optional[typing.Sequence[ChatCompletionToolParams]] = OMIT,
        tool_choice: typing.Optional[ToolChoiceOptionParams] = OMIT,
        request_options: typing.Optional[RequestOptions] = None,
    ) -> typing.Union[CreateChatCompletionResponse, typing.Iterator[ChatCompletionChunk]]:
        """
        Parameters
        ----------
        messages : typing.Sequence[ChatCompletionRequestMessageParams]
            A list of messages comprising the conversation so far.

        model : SarvamModelIds
            Model ID used to generate the response, like `sarvam-m`.

        temperature : typing.Optional[float]
            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
            We generally recommend altering this or `top_p` but not both.

        top_p : typing.Optional[float]
            An alternative to sampling with temperature, called nucleus sampling,
            where the model considers the results of the tokens with top_p probability
            mass. So 0.1 means only the tokens comprising the top 10% probability mass
            are considered.

            We generally recommend altering this or `temperature` but not both.

        reasoning_effort : typing.Optional[ReasoningEffort]
            The effort to use for reasoning

        max_tokens : typing.Optional[int]
            The maximum number of tokens that can be generated in the chat completion.

        stream : typing.Optional[bool]
            If set to true, the model response data will be streamed to the client
            as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
            When true, returns an Iterator[ChatCompletionChunk] instead of CreateChatCompletionResponse.

        stop : typing.Optional[StopConfigurationParams]

        n : typing.Optional[int]
            How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.

        seed : typing.Optional[int]
            This feature is in Beta.
            If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.
            Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.

        frequency_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            their existing frequency in the text so far, decreasing the model's
            likelihood to repeat the same line verbatim.

        presence_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            whether they appear in the text so far, increasing the model's likelihood
            to talk about new topics.

        wiki_grounding : typing.Optional[bool]
            If set to true, the model response will be wiki grounded.

        tools : typing.Optional[typing.Sequence[ChatCompletionToolParams]]
            A list of tools the model may call. Currently, only functions are supported as a tool.

        tool_choice : typing.Optional[ToolChoiceOptionParams]
            Controls which (if any) tool is called by the model.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        CreateChatCompletionResponse or Iterator[ChatCompletionChunk]
            When stream=False (default): CreateChatCompletionResponse.
            When stream=True: Iterator yielding ChatCompletionChunk objects.

        Examples
        --------
        from sarvamai import SarvamAI

        client = SarvamAI(
            api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
        )

        # Non-streaming
        response = client.chat.completions(
            messages=[{"role": "user", "content": "Hello"}],
            model="sarvam-m",
        )

        # Streaming
        for chunk in client.chat.completions(
            messages=[{"role": "user", "content": "Hello"}],
            model="sarvam-m",
            stream=True,
        ):
            print(chunk)
        """
        if stream is True:
            return self._raw_client.completions(
                messages=messages,
                model=model,
                temperature=temperature,
                top_p=top_p,
                reasoning_effort=reasoning_effort,
                max_tokens=max_tokens,
                stream=True,
                stop=stop,
                n=n,
                seed=seed,
                frequency_penalty=frequency_penalty,
                presence_penalty=presence_penalty,
                wiki_grounding=wiki_grounding,
                tools=tools,
                tool_choice=tool_choice,
                request_options=request_options,
            )

        _response = self._raw_client.completions(
            messages=messages,
            model=model,
            temperature=temperature,
            top_p=top_p,
            reasoning_effort=reasoning_effort,
            max_tokens=max_tokens,
            stream=stream,
            stop=stop,
            n=n,
            seed=seed,
            frequency_penalty=frequency_penalty,
            presence_penalty=presence_penalty,
            wiki_grounding=wiki_grounding,
            tools=tools,
            tool_choice=tool_choice,
            request_options=request_options,
        )
        return _response.data


class AsyncChatClient:
    def __init__(self, *, client_wrapper: AsyncClientWrapper):
        self._raw_client = AsyncRawChatClient(client_wrapper=client_wrapper)

    @property
    def with_raw_response(self) -> AsyncRawChatClient:
        """
        Retrieves a raw implementation of this client that returns raw responses.

        Returns
        -------
        AsyncRawChatClient
        """
        return self._raw_client

    @typing.overload
    async def completions(
        self,
        *,
        messages: typing.Sequence[ChatCompletionRequestMessageParams],
        model: SarvamModelIds,
        temperature: typing.Optional[float] = ...,
        top_p: typing.Optional[float] = ...,
        reasoning_effort: typing.Optional[ReasoningEffort] = ...,
        max_tokens: typing.Optional[int] = ...,
        stream: typing.Literal[True],
        stop: typing.Optional[StopConfigurationParams] = ...,
        n: typing.Optional[int] = ...,
        seed: typing.Optional[int] = ...,
        frequency_penalty: typing.Optional[float] = ...,
        presence_penalty: typing.Optional[float] = ...,
        wiki_grounding: typing.Optional[bool] = ...,
        tools: typing.Optional[typing.Sequence[ChatCompletionToolParams]] = ...,
        tool_choice: typing.Optional[ToolChoiceOptionParams] = ...,
        request_options: typing.Optional[RequestOptions] = ...,
    ) -> typing.AsyncIterator[ChatCompletionChunk]: ...

    @typing.overload
    async def completions(
        self,
        *,
        messages: typing.Sequence[ChatCompletionRequestMessageParams],
        model: SarvamModelIds,
        temperature: typing.Optional[float] = ...,
        top_p: typing.Optional[float] = ...,
        reasoning_effort: typing.Optional[ReasoningEffort] = ...,
        max_tokens: typing.Optional[int] = ...,
        stream: typing.Optional[typing.Literal[False]] = ...,
        stop: typing.Optional[StopConfigurationParams] = ...,
        n: typing.Optional[int] = ...,
        seed: typing.Optional[int] = ...,
        frequency_penalty: typing.Optional[float] = ...,
        presence_penalty: typing.Optional[float] = ...,
        wiki_grounding: typing.Optional[bool] = ...,
        tools: typing.Optional[typing.Sequence[ChatCompletionToolParams]] = ...,
        tool_choice: typing.Optional[ToolChoiceOptionParams] = ...,
        request_options: typing.Optional[RequestOptions] = ...,
    ) -> CreateChatCompletionResponse: ...

    async def completions(
        self,
        *,
        messages: typing.Sequence[ChatCompletionRequestMessageParams],
        model: SarvamModelIds,
        temperature: typing.Optional[float] = OMIT,
        top_p: typing.Optional[float] = OMIT,
        reasoning_effort: typing.Optional[ReasoningEffort] = OMIT,
        max_tokens: typing.Optional[int] = OMIT,
        stream: typing.Optional[bool] = OMIT,
        stop: typing.Optional[StopConfigurationParams] = OMIT,
        n: typing.Optional[int] = OMIT,
        seed: typing.Optional[int] = OMIT,
        frequency_penalty: typing.Optional[float] = OMIT,
        presence_penalty: typing.Optional[float] = OMIT,
        wiki_grounding: typing.Optional[bool] = OMIT,
        tools: typing.Optional[typing.Sequence[ChatCompletionToolParams]] = OMIT,
        tool_choice: typing.Optional[ToolChoiceOptionParams] = OMIT,
        request_options: typing.Optional[RequestOptions] = None,
    ) -> typing.Union[CreateChatCompletionResponse, typing.AsyncIterator[ChatCompletionChunk]]:
        """
        Parameters
        ----------
        messages : typing.Sequence[ChatCompletionRequestMessageParams]
            A list of messages comprising the conversation so far.

        model : SarvamModelIds
            Model ID used to generate the response, like `sarvam-m`.

        temperature : typing.Optional[float]
            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
            We generally recommend altering this or `top_p` but not both.

        top_p : typing.Optional[float]
            An alternative to sampling with temperature, called nucleus sampling,
            where the model considers the results of the tokens with top_p probability
            mass. So 0.1 means only the tokens comprising the top 10% probability mass
            are considered.

            We generally recommend altering this or `temperature` but not both.

        reasoning_effort : typing.Optional[ReasoningEffort]
            The effort to use for reasoning

        max_tokens : typing.Optional[int]
            The maximum number of tokens that can be generated in the chat completion.

        stream : typing.Optional[bool]
            If set to true, the model response data will be streamed to the client
            as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
            When true, returns an AsyncIterator[ChatCompletionChunk] instead of CreateChatCompletionResponse.

        stop : typing.Optional[StopConfigurationParams]

        n : typing.Optional[int]
            How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.

        seed : typing.Optional[int]
            This feature is in Beta.
            If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.
            Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.

        frequency_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            their existing frequency in the text so far, decreasing the model's
            likelihood to repeat the same line verbatim.

        presence_penalty : typing.Optional[float]
            Number between -2.0 and 2.0. Positive values penalize new tokens based on
            whether they appear in the text so far, increasing the model's likelihood
            to talk about new topics.

        wiki_grounding : typing.Optional[bool]
            If set to true, the model response will be wiki grounded.

        tools : typing.Optional[typing.Sequence[ChatCompletionToolParams]]
            A list of tools the model may call. Currently, only functions are supported as a tool.

        tool_choice : typing.Optional[ToolChoiceOptionParams]
            Controls which (if any) tool is called by the model.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        CreateChatCompletionResponse or AsyncIterator[ChatCompletionChunk]
            When stream=False (default): CreateChatCompletionResponse.
            When stream=True: AsyncIterator yielding ChatCompletionChunk objects.

        Examples
        --------
        import asyncio
        from sarvamai import AsyncSarvamAI

        client = AsyncSarvamAI(
            api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
        )

        async def main() -> None:
            # Non-streaming
            response = await client.chat.completions(
                messages=[{"role": "user", "content": "Hello"}],
                model="sarvam-m",
            )

            # Streaming
            async for chunk in client.chat.completions(
                messages=[{"role": "user", "content": "Hello"}],
                model="sarvam-m",
                stream=True,
            ):
                print(chunk)

        asyncio.run(main())
        """
        if stream is True:
            return await self._raw_client.completions(
                messages=messages,
                model=model,
                temperature=temperature,
                top_p=top_p,
                reasoning_effort=reasoning_effort,
                max_tokens=max_tokens,
                stream=True,
                stop=stop,
                n=n,
                seed=seed,
                frequency_penalty=frequency_penalty,
                presence_penalty=presence_penalty,
                wiki_grounding=wiki_grounding,
                tools=tools,
                tool_choice=tool_choice,
                request_options=request_options,
            )

        _response = await self._raw_client.completions(
            messages=messages,
            model=model,
            temperature=temperature,
            top_p=top_p,
            reasoning_effort=reasoning_effort,
            max_tokens=max_tokens,
            stream=stream,
            stop=stop,
            n=n,
            seed=seed,
            frequency_penalty=frequency_penalty,
            presence_penalty=presence_penalty,
            wiki_grounding=wiki_grounding,
            tools=tools,
            tool_choice=tool_choice,
            request_options=request_options,
        )
        return _response.data