diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 3741b313a5..4ce109ae13 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "1.17.1"
+ ".": "1.18.0"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index c550abf3c6..47c2bce1cc 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1 +1 @@
-configured_endpoints: 51
+configured_endpoints: 55
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7e18ab5f54..03285021ee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,14 @@
# Changelog
+## 1.18.0 (2024-04-15)
+
+Full Changelog: [v1.17.1...v1.18.0](https://github.com/openai/openai-python/compare/v1.17.1...v1.18.0)
+
+### Features
+
+* **api:** add batch API ([#1316](https://github.com/openai/openai-python/issues/1316)) ([3e6f19e](https://github.com/openai/openai-python/commit/3e6f19e6e7489bf1c94944a5f8f9b1d4535cdc43))
+* **api:** updates ([#1314](https://github.com/openai/openai-python/issues/1314)) ([8281dc9](https://github.com/openai/openai-python/commit/8281dc956178f5de345645660081f7d0c15a57a6))
+
## 1.17.1 (2024-04-12)
Full Changelog: [v1.17.0...v1.17.1](https://github.com/openai/openai-python/compare/v1.17.0...v1.17.1)
diff --git a/api.md b/api.md
index dbc95cd0b4..38f77592e8 100644
--- a/api.md
+++ b/api.md
@@ -159,16 +159,34 @@ Methods:
Types:
```python
-from openai.types.fine_tuning import FineTuningJob, FineTuningJobEvent
+from openai.types.fine_tuning import (
+ FineTuningJob,
+ FineTuningJobEvent,
+ FineTuningJobIntegration,
+ FineTuningJobWandbIntegration,
+ FineTuningJobWandbIntegrationObject,
+)
```
Methods:
-- client.fine_tuning.jobs.create(\*\*params) -> FineTuningJob
-- client.fine_tuning.jobs.retrieve(fine_tuning_job_id) -> FineTuningJob
-- client.fine_tuning.jobs.list(\*\*params) -> SyncCursorPage[FineTuningJob]
-- client.fine_tuning.jobs.cancel(fine_tuning_job_id) -> FineTuningJob
-- client.fine_tuning.jobs.list_events(fine_tuning_job_id, \*\*params) -> SyncCursorPage[FineTuningJobEvent]
+- client.fine_tuning.jobs.create(\*\*params) -> FineTuningJob
+- client.fine_tuning.jobs.retrieve(fine_tuning_job_id) -> FineTuningJob
+- client.fine_tuning.jobs.list(\*\*params) -> SyncCursorPage[FineTuningJob]
+- client.fine_tuning.jobs.cancel(fine_tuning_job_id) -> FineTuningJob
+- client.fine_tuning.jobs.list_events(fine_tuning_job_id, \*\*params) -> SyncCursorPage[FineTuningJobEvent]
+
+### Checkpoints
+
+Types:
+
+```python
+from openai.types.fine_tuning.jobs import FineTuningJobCheckpoint
+```
+
+Methods:
+
+- client.fine_tuning.jobs.checkpoints.list(fine_tuning_job_id, \*\*params) -> SyncCursorPage[FineTuningJobCheckpoint]
# Beta
@@ -220,7 +238,15 @@ Methods:
Types:
```python
-from openai.types.beta import Thread, ThreadDeleted
+from openai.types.beta import (
+ AssistantResponseFormat,
+ AssistantResponseFormatOption,
+ AssistantToolChoice,
+ AssistantToolChoiceFunction,
+ AssistantToolChoiceOption,
+ Thread,
+ ThreadDeleted,
+)
```
Methods:
@@ -335,3 +361,17 @@ Methods:
- client.beta.threads.messages.files.retrieve(file_id, \*, thread_id, message_id) -> MessageFile
- client.beta.threads.messages.files.list(message_id, \*, thread_id, \*\*params) -> SyncCursorPage[MessageFile]
+
+# Batches
+
+Types:
+
+```python
+from openai.types import Batch, BatchError, BatchRequestCounts
+```
+
+Methods:
+
+- client.batches.create(\*\*params) -> Batch
+- client.batches.retrieve(batch_id) -> Batch
+- client.batches.cancel(batch_id) -> Batch
diff --git a/pyproject.toml b/pyproject.toml
index 9eb6330616..505f1a3e7a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "openai"
-version = "1.17.1"
+version = "1.18.0"
description = "The official Python library for the openai API"
dynamic = ["readme"]
license = "Apache-2.0"
diff --git a/src/openai/__init__.py b/src/openai/__init__.py
index 1daa26f7b7..490ba017f0 100644
--- a/src/openai/__init__.py
+++ b/src/openai/__init__.py
@@ -335,6 +335,7 @@ def _reset_client() -> None: # type: ignore[reportUnusedFunction]
files as files,
images as images,
models as models,
+ batches as batches,
embeddings as embeddings,
completions as completions,
fine_tuning as fine_tuning,
diff --git a/src/openai/_client.py b/src/openai/_client.py
index e9169df72a..5a6852e571 100644
--- a/src/openai/_client.py
+++ b/src/openai/_client.py
@@ -57,6 +57,7 @@ class OpenAI(SyncAPIClient):
models: resources.Models
fine_tuning: resources.FineTuning
beta: resources.Beta
+ batches: resources.Batches
with_raw_response: OpenAIWithRawResponse
with_streaming_response: OpenAIWithStreamedResponse
@@ -134,6 +135,7 @@ def __init__(
self.models = resources.Models(self)
self.fine_tuning = resources.FineTuning(self)
self.beta = resources.Beta(self)
+ self.batches = resources.Batches(self)
self.with_raw_response = OpenAIWithRawResponse(self)
self.with_streaming_response = OpenAIWithStreamedResponse(self)
@@ -257,6 +259,7 @@ class AsyncOpenAI(AsyncAPIClient):
models: resources.AsyncModels
fine_tuning: resources.AsyncFineTuning
beta: resources.AsyncBeta
+ batches: resources.AsyncBatches
with_raw_response: AsyncOpenAIWithRawResponse
with_streaming_response: AsyncOpenAIWithStreamedResponse
@@ -334,6 +337,7 @@ def __init__(
self.models = resources.AsyncModels(self)
self.fine_tuning = resources.AsyncFineTuning(self)
self.beta = resources.AsyncBeta(self)
+ self.batches = resources.AsyncBatches(self)
self.with_raw_response = AsyncOpenAIWithRawResponse(self)
self.with_streaming_response = AsyncOpenAIWithStreamedResponse(self)
@@ -458,6 +462,7 @@ def __init__(self, client: OpenAI) -> None:
self.models = resources.ModelsWithRawResponse(client.models)
self.fine_tuning = resources.FineTuningWithRawResponse(client.fine_tuning)
self.beta = resources.BetaWithRawResponse(client.beta)
+ self.batches = resources.BatchesWithRawResponse(client.batches)
class AsyncOpenAIWithRawResponse:
@@ -472,6 +477,7 @@ def __init__(self, client: AsyncOpenAI) -> None:
self.models = resources.AsyncModelsWithRawResponse(client.models)
self.fine_tuning = resources.AsyncFineTuningWithRawResponse(client.fine_tuning)
self.beta = resources.AsyncBetaWithRawResponse(client.beta)
+ self.batches = resources.AsyncBatchesWithRawResponse(client.batches)
class OpenAIWithStreamedResponse:
@@ -486,6 +492,7 @@ def __init__(self, client: OpenAI) -> None:
self.models = resources.ModelsWithStreamingResponse(client.models)
self.fine_tuning = resources.FineTuningWithStreamingResponse(client.fine_tuning)
self.beta = resources.BetaWithStreamingResponse(client.beta)
+ self.batches = resources.BatchesWithStreamingResponse(client.batches)
class AsyncOpenAIWithStreamedResponse:
@@ -500,6 +507,7 @@ def __init__(self, client: AsyncOpenAI) -> None:
self.models = resources.AsyncModelsWithStreamingResponse(client.models)
self.fine_tuning = resources.AsyncFineTuningWithStreamingResponse(client.fine_tuning)
self.beta = resources.AsyncBetaWithStreamingResponse(client.beta)
+ self.batches = resources.AsyncBatchesWithStreamingResponse(client.batches)
Client = OpenAI
diff --git a/src/openai/_module_client.py b/src/openai/_module_client.py
index 9227f5e2b4..6f7356eb3c 100644
--- a/src/openai/_module_client.py
+++ b/src/openai/_module_client.py
@@ -42,6 +42,12 @@ def __load__(self) -> resources.Models:
return _load_client().models
+class BatchesProxy(LazyProxy[resources.Batches]):
+ @override
+ def __load__(self) -> resources.Batches:
+ return _load_client().batches
+
+
class EmbeddingsProxy(LazyProxy[resources.Embeddings]):
@override
def __load__(self) -> resources.Embeddings:
@@ -72,6 +78,7 @@ def __load__(self) -> resources.FineTuning:
audio: resources.Audio = AudioProxy().__as_proxied__()
images: resources.Images = ImagesProxy().__as_proxied__()
models: resources.Models = ModelsProxy().__as_proxied__()
+batches: resources.Batches = BatchesProxy().__as_proxied__()
embeddings: resources.Embeddings = EmbeddingsProxy().__as_proxied__()
completions: resources.Completions = CompletionsProxy().__as_proxied__()
moderations: resources.Moderations = ModerationsProxy().__as_proxied__()
diff --git a/src/openai/_version.py b/src/openai/_version.py
index a4ffbb2c35..2957462e3d 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "openai"
-__version__ = "1.17.1" # x-release-please-version
+__version__ = "1.18.0" # x-release-please-version
diff --git a/src/openai/resources/__init__.py b/src/openai/resources/__init__.py
index 64aa12d260..ecae4243fc 100644
--- a/src/openai/resources/__init__.py
+++ b/src/openai/resources/__init__.py
@@ -48,6 +48,14 @@
ModelsWithStreamingResponse,
AsyncModelsWithStreamingResponse,
)
+from .batches import (
+ Batches,
+ AsyncBatches,
+ BatchesWithRawResponse,
+ AsyncBatchesWithRawResponse,
+ BatchesWithStreamingResponse,
+ AsyncBatchesWithStreamingResponse,
+)
from .embeddings import (
Embeddings,
AsyncEmbeddings,
@@ -142,4 +150,10 @@
"AsyncBetaWithRawResponse",
"BetaWithStreamingResponse",
"AsyncBetaWithStreamingResponse",
+ "Batches",
+ "AsyncBatches",
+ "BatchesWithRawResponse",
+ "AsyncBatchesWithRawResponse",
+ "BatchesWithStreamingResponse",
+ "AsyncBatchesWithStreamingResponse",
]
diff --git a/src/openai/resources/batches.py b/src/openai/resources/batches.py
new file mode 100644
index 0000000000..0921ccb194
--- /dev/null
+++ b/src/openai/resources/batches.py
@@ -0,0 +1,354 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import Batch, batch_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._base_client import (
+ make_request_options,
+)
+
+__all__ = ["Batches", "AsyncBatches"]
+
+
+class Batches(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> BatchesWithRawResponse:
+ return BatchesWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> BatchesWithStreamingResponse:
+ return BatchesWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ completion_window: Literal["24h"],
+ endpoint: Literal["/v1/chat/completions"],
+ input_file_id: str,
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Batch:
+ """
+ Creates and executes a batch from an uploaded file of requests
+
+ Args:
+ completion_window: The time frame within which the batch should be processed. Currently only `24h`
+ is supported.
+
+ endpoint: The endpoint to be used for all requests in the batch. Currently only
+ `/v1/chat/completions` is supported.
+
+ input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+ See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+ for how to upload a file.
+
+ Your input file must be formatted as a JSONL file, and must be uploaded with the
+ purpose `batch`.
+
+ metadata: Optional custom metadata for the batch.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/batches",
+ body=maybe_transform(
+ {
+ "completion_window": completion_window,
+ "endpoint": endpoint,
+ "input_file_id": input_file_id,
+ "metadata": metadata,
+ },
+ batch_create_params.BatchCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Batch,
+ )
+
+ def retrieve(
+ self,
+ batch_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Batch:
+ """
+ Retrieves a batch.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not batch_id:
+ raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+ return self._get(
+ f"/batches/{batch_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Batch,
+ )
+
+ def cancel(
+ self,
+ batch_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Batch:
+ """
+ Cancels an in-progress batch.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not batch_id:
+ raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+ return self._post(
+ f"/batches/{batch_id}/cancel",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Batch,
+ )
+
+
+class AsyncBatches(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+ return AsyncBatchesWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+ return AsyncBatchesWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ completion_window: Literal["24h"],
+ endpoint: Literal["/v1/chat/completions"],
+ input_file_id: str,
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Batch:
+ """
+ Creates and executes a batch from an uploaded file of requests
+
+ Args:
+ completion_window: The time frame within which the batch should be processed. Currently only `24h`
+ is supported.
+
+ endpoint: The endpoint to be used for all requests in the batch. Currently only
+ `/v1/chat/completions` is supported.
+
+ input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+ See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+ for how to upload a file.
+
+ Your input file must be formatted as a JSONL file, and must be uploaded with the
+ purpose `batch`.
+
+ metadata: Optional custom metadata for the batch.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/batches",
+ body=await async_maybe_transform(
+ {
+ "completion_window": completion_window,
+ "endpoint": endpoint,
+ "input_file_id": input_file_id,
+ "metadata": metadata,
+ },
+ batch_create_params.BatchCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Batch,
+ )
+
+ async def retrieve(
+ self,
+ batch_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Batch:
+ """
+ Retrieves a batch.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not batch_id:
+ raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+ return await self._get(
+ f"/batches/{batch_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Batch,
+ )
+
+ async def cancel(
+ self,
+ batch_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Batch:
+ """
+ Cancels an in-progress batch.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not batch_id:
+ raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+ return await self._post(
+ f"/batches/{batch_id}/cancel",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=Batch,
+ )
+
+
+class BatchesWithRawResponse:
+ def __init__(self, batches: Batches) -> None:
+ self._batches = batches
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ batches.create,
+ )
+ self.retrieve = _legacy_response.to_raw_response_wrapper(
+ batches.retrieve,
+ )
+ self.cancel = _legacy_response.to_raw_response_wrapper(
+ batches.cancel,
+ )
+
+
+class AsyncBatchesWithRawResponse:
+ def __init__(self, batches: AsyncBatches) -> None:
+ self._batches = batches
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ batches.create,
+ )
+ self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+ batches.retrieve,
+ )
+ self.cancel = _legacy_response.async_to_raw_response_wrapper(
+ batches.cancel,
+ )
+
+
+class BatchesWithStreamingResponse:
+ def __init__(self, batches: Batches) -> None:
+ self._batches = batches
+
+ self.create = to_streamed_response_wrapper(
+ batches.create,
+ )
+ self.retrieve = to_streamed_response_wrapper(
+ batches.retrieve,
+ )
+ self.cancel = to_streamed_response_wrapper(
+ batches.cancel,
+ )
+
+
+class AsyncBatchesWithStreamingResponse:
+ def __init__(self, batches: AsyncBatches) -> None:
+ self._batches = batches
+
+ self.create = async_to_streamed_response_wrapper(
+ batches.create,
+ )
+ self.retrieve = async_to_streamed_response_wrapper(
+ batches.retrieve,
+ )
+ self.cancel = async_to_streamed_response_wrapper(
+ batches.cancel,
+ )
diff --git a/src/openai/resources/beta/assistants/assistants.py b/src/openai/resources/beta/assistants/assistants.py
index 232451ab25..9e88794ebc 100644
--- a/src/openai/resources/beta/assistants/assistants.py
+++ b/src/openai/resources/beta/assistants/assistants.py
@@ -2,7 +2,7 @@
from __future__ import annotations
-from typing import List, Iterable, Optional
+from typing import List, Union, Iterable, Optional
from typing_extensions import Literal
import httpx
@@ -57,7 +57,29 @@ def with_streaming_response(self) -> AssistantsWithStreamingResponse:
def create(
self,
*,
- model: str,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ ],
description: Optional[str] | NotGiven = NOT_GIVEN,
file_ids: List[str] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -87,7 +109,7 @@ def create(
attached to this assistant. There can be a maximum of 20 files attached to the
assistant. Files are ordered by their creation date in ascending order.
- instructions: The system instructions that the assistant uses. The maximum length is 32768
+ instructions: The system instructions that the assistant uses. The maximum length is 256,000
characters.
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -194,7 +216,7 @@ def update(
file was previously attached to the list but does not show up in the list, it
will be deleted from the assistant.
- instructions: The system instructions that the assistant uses. The maximum length is 32768
+ instructions: The system instructions that the assistant uses. The maximum length is 256,000
characters.
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -360,7 +382,29 @@ def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
async def create(
self,
*,
- model: str,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ ],
description: Optional[str] | NotGiven = NOT_GIVEN,
file_ids: List[str] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -390,7 +434,7 @@ async def create(
attached to this assistant. There can be a maximum of 20 files attached to the
assistant. Files are ordered by their creation date in ascending order.
- instructions: The system instructions that the assistant uses. The maximum length is 32768
+ instructions: The system instructions that the assistant uses. The maximum length is 256,000
characters.
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -497,7 +541,7 @@ async def update(
file was previously attached to the list but does not show up in the list, it
will be deleted from the assistant.
- instructions: The system instructions that the assistant uses. The maximum length is 32768
+ instructions: The system instructions that the assistant uses. The maximum length is 256,000
characters.
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index 8576a5c09a..9fa7239c0b 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -4,7 +4,7 @@
import time
import typing_extensions
-from typing import Iterable, Optional, overload
+from typing import Union, Iterable, Optional, overload
from functools import partial
from typing_extensions import Literal
@@ -31,7 +31,12 @@
from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
from ....._streaming import Stream, AsyncStream
from .....pagination import SyncCursorPage, AsyncCursorPage
-from .....types.beta import AssistantToolParam, AssistantStreamEvent
+from .....types.beta import (
+ AssistantToolParam,
+ AssistantStreamEvent,
+ AssistantToolChoiceOptionParam,
+ AssistantResponseFormatOptionParam,
+)
from ....._base_client import (
AsyncPaginator,
make_request_options,
@@ -77,11 +82,40 @@ def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -107,6 +141,18 @@ def create(
[instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
of the assistant. This is useful for modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -117,6 +163,21 @@ def create(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
stream: If `true`, returns a stream of events that happen during the Run as server-sent
events, terminating when the Run enters a terminal state with a `data: [DONE]`
message.
@@ -125,6 +186,13 @@ def create(
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -148,10 +216,39 @@ def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -181,6 +278,18 @@ def create(
[instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
of the assistant. This is useful for modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -191,10 +300,32 @@ def create(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -218,10 +349,39 @@ def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -251,6 +411,18 @@ def create(
[instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
of the assistant. This is useful for modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -261,10 +433,32 @@ def create(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -287,11 +481,40 @@ def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -310,11 +533,16 @@ def create(
"additional_instructions": additional_instructions,
"additional_messages": additional_messages,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"stream": stream,
"temperature": temperature,
+ "tool_choice": tool_choice,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
run_create_params.RunCreateParams,
),
@@ -518,10 +746,39 @@ def create_and_poll(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
thread_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -542,12 +799,17 @@ def create_and_poll(
additional_instructions=additional_instructions,
additional_messages=additional_messages,
instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
metadata=metadata,
model=model,
+ response_format=response_format,
temperature=temperature,
+ tool_choice=tool_choice,
# We assume we are not streaming when polling
stream=False,
tools=tools,
+ truncation_strategy=truncation_strategy,
extra_headers=extra_headers,
extra_query=extra_query,
extra_body=extra_body,
@@ -572,10 +834,39 @@ def create_and_stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -596,10 +887,39 @@ def create_and_stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AssistantEventHandlerT,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -620,10 +940,39 @@ def create_and_stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AssistantEventHandlerT | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -652,11 +1001,16 @@ def create_and_stream(
"additional_instructions": additional_instructions,
"additional_messages": additional_messages,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"temperature": temperature,
+ "tool_choice": tool_choice,
"stream": True,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
run_create_params.RunCreateParams,
),
@@ -722,10 +1076,39 @@ def stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -745,10 +1128,39 @@ def stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AssistantEventHandlerT,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -768,10 +1180,39 @@ def stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AssistantEventHandlerT | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -800,11 +1241,16 @@ def stream(
"additional_instructions": additional_instructions,
"additional_messages": additional_messages,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"temperature": temperature,
+ "tool_choice": tool_choice,
"stream": True,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
run_create_params.RunCreateParams,
),
@@ -1123,11 +1569,40 @@ async def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -1153,6 +1628,18 @@ async def create(
[instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
of the assistant. This is useful for modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -1163,6 +1650,21 @@ async def create(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
stream: If `true`, returns a stream of events that happen during the Run as server-sent
events, terminating when the Run enters a terminal state with a `data: [DONE]`
message.
@@ -1171,6 +1673,13 @@ async def create(
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -1194,10 +1703,39 @@ async def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -1227,6 +1765,18 @@ async def create(
[instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
of the assistant. This is useful for modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -1237,10 +1787,32 @@ async def create(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -1264,10 +1836,39 @@ async def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -1297,6 +1898,18 @@ async def create(
[instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
of the assistant. This is useful for modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -1307,10 +1920,32 @@ async def create(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -1333,11 +1968,40 @@ async def create(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -1356,11 +2020,16 @@ async def create(
"additional_instructions": additional_instructions,
"additional_messages": additional_messages,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"stream": stream,
"temperature": temperature,
+ "tool_choice": tool_choice,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
run_create_params.RunCreateParams,
),
@@ -1564,10 +2233,39 @@ async def create_and_poll(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
thread_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1588,12 +2286,17 @@ async def create_and_poll(
additional_instructions=additional_instructions,
additional_messages=additional_messages,
instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
metadata=metadata,
model=model,
+ response_format=response_format,
temperature=temperature,
+ tool_choice=tool_choice,
# We assume we are not streaming when polling
stream=False,
tools=tools,
+ truncation_strategy=truncation_strategy,
extra_headers=extra_headers,
extra_query=extra_query,
extra_body=extra_body,
@@ -1618,10 +2321,39 @@ def create_and_stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1642,10 +2374,39 @@ def create_and_stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AsyncAssistantEventHandlerT,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1666,10 +2427,39 @@ def create_and_stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AsyncAssistantEventHandlerT | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1700,11 +2490,16 @@ def create_and_stream(
"additional_instructions": additional_instructions,
"additional_messages": additional_messages,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"temperature": temperature,
+ "tool_choice": tool_choice,
"stream": True,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
run_create_params.RunCreateParams,
),
@@ -1770,10 +2565,39 @@ def stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1793,10 +2617,39 @@ def stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AsyncAssistantEventHandlerT,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1816,10 +2669,39 @@ def stream(
additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
thread_id: str,
event_handler: AsyncAssistantEventHandlerT | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1850,11 +2732,16 @@ def stream(
"additional_instructions": additional_instructions,
"additional_messages": additional_messages,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"temperature": temperature,
+ "tool_choice": tool_choice,
"stream": True,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
run_create_params.RunCreateParams,
),
diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py
index 3509267d4f..9c2e2f0043 100644
--- a/src/openai/resources/beta/threads/threads.py
+++ b/src/openai/resources/beta/threads/threads.py
@@ -2,7 +2,7 @@
from __future__ import annotations
-from typing import Iterable, Optional, overload
+from typing import Union, Iterable, Optional, overload
from functools import partial
from typing_extensions import Literal
@@ -40,6 +40,8 @@
Thread,
ThreadDeleted,
AssistantStreamEvent,
+ AssistantToolChoiceOptionParam,
+ AssistantResponseFormatOptionParam,
thread_create_params,
thread_update_params,
thread_create_and_run_params,
@@ -241,12 +243,41 @@ def create_and_run(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -265,6 +296,18 @@ def create_and_run(
instructions: Override the default system message of the assistant. This is useful for
modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -275,6 +318,21 @@ def create_and_run(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
stream: If `true`, returns a stream of events that happen during the Run as server-sent
events, terminating when the Run enters a terminal state with a `data: [DONE]`
message.
@@ -285,6 +343,13 @@ def create_and_run(
thread: If no thread is provided, an empty thread will be created.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -305,11 +370,40 @@ def create_and_run(
assistant_id: str,
stream: Literal[True],
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -332,6 +426,18 @@ def create_and_run(
instructions: Override the default system message of the assistant. This is useful for
modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -342,12 +448,34 @@ def create_and_run(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
thread: If no thread is provided, an empty thread will be created.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -368,11 +496,40 @@ def create_and_run(
assistant_id: str,
stream: bool,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -395,6 +552,18 @@ def create_and_run(
instructions: Override the default system message of the assistant. This is useful for
modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -405,12 +574,34 @@ def create_and_run(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
thread: If no thread is provided, an empty thread will be created.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -430,12 +621,41 @@ def create_and_run(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -450,12 +670,17 @@ def create_and_run(
{
"assistant_id": assistant_id,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"stream": stream,
"temperature": temperature,
"thread": thread,
+ "tool_choice": tool_choice,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
thread_create_and_run_params.ThreadCreateAndRunParams,
),
@@ -472,11 +697,40 @@ def create_and_run_poll(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -493,11 +747,16 @@ def create_and_run_poll(
run = self.create_and_run(
assistant_id=assistant_id,
instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
metadata=metadata,
model=model,
+ response_format=response_format,
temperature=temperature,
stream=False,
thread=thread,
+ tool_choice=tool_choice,
+ truncation_strategy=truncation_strategy,
tools=tools,
extra_headers=extra_headers,
extra_query=extra_query,
@@ -512,11 +771,40 @@ def create_and_run_stream(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -533,11 +821,40 @@ def create_and_run_stream(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
event_handler: AssistantEventHandlerT,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -554,11 +871,40 @@ def create_and_run_stream(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
event_handler: AssistantEventHandlerT | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -581,12 +927,17 @@ def create_and_run_stream(
{
"assistant_id": assistant_id,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"temperature": temperature,
+ "tool_choice": tool_choice,
"stream": True,
"thread": thread,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
thread_create_and_run_params.ThreadCreateAndRunParams,
),
@@ -780,12 +1131,41 @@ async def create_and_run(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -804,6 +1184,18 @@ async def create_and_run(
instructions: Override the default system message of the assistant. This is useful for
modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -814,6 +1206,21 @@ async def create_and_run(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
stream: If `true`, returns a stream of events that happen during the Run as server-sent
events, terminating when the Run enters a terminal state with a `data: [DONE]`
message.
@@ -824,6 +1231,13 @@ async def create_and_run(
thread: If no thread is provided, an empty thread will be created.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -844,11 +1258,40 @@ async def create_and_run(
assistant_id: str,
stream: Literal[True],
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -871,6 +1314,18 @@ async def create_and_run(
instructions: Override the default system message of the assistant. This is useful for
modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -881,12 +1336,34 @@ async def create_and_run(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
thread: If no thread is provided, an empty thread will be created.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -907,11 +1384,40 @@ async def create_and_run(
assistant_id: str,
stream: bool,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -934,6 +1440,18 @@ async def create_and_run(
instructions: Override the default system message of the assistant. This is useful for
modifying the behavior on a per-run basis.
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
+ max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format. Keys
can be a maximum of 64 characters long and values can be a maxium of 512
@@ -944,12 +1462,34 @@ async def create_and_run(
model associated with the assistant. If not, the model associated with the
assistant will be used.
+ response_format: Specifies the format that the model must output. Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
make the output more random, while lower values like 0.2 will make it more
focused and deterministic.
thread: If no thread is provided, an empty thread will be created.
+ tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+
tools: Override the tools the assistant can use for this run. This is useful for
modifying the behavior on a per-run basis.
@@ -969,12 +1509,41 @@ async def create_and_run(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -989,12 +1558,17 @@ async def create_and_run(
{
"assistant_id": assistant_id,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"stream": stream,
"temperature": temperature,
"thread": thread,
+ "tool_choice": tool_choice,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
thread_create_and_run_params.ThreadCreateAndRunParams,
),
@@ -1011,11 +1585,40 @@ async def create_and_run_poll(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1032,11 +1635,16 @@ async def create_and_run_poll(
run = await self.create_and_run(
assistant_id=assistant_id,
instructions=instructions,
+ max_completion_tokens=max_completion_tokens,
+ max_prompt_tokens=max_prompt_tokens,
metadata=metadata,
model=model,
+ response_format=response_format,
temperature=temperature,
stream=False,
thread=thread,
+ tool_choice=tool_choice,
+ truncation_strategy=truncation_strategy,
tools=tools,
extra_headers=extra_headers,
extra_query=extra_query,
@@ -1053,11 +1661,40 @@ def create_and_run_stream(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -1074,11 +1711,40 @@ def create_and_run_stream(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
event_handler: AsyncAssistantEventHandlerT,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1095,11 +1761,40 @@ def create_and_run_stream(
*,
assistant_id: str,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[object] | NotGiven = NOT_GIVEN,
- model: Optional[str] | NotGiven = NOT_GIVEN,
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
+ | NotGiven = NOT_GIVEN,
+ response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+ tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+ truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
event_handler: AsyncAssistantEventHandlerT | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1124,12 +1819,17 @@ def create_and_run_stream(
{
"assistant_id": assistant_id,
"instructions": instructions,
+ "max_completion_tokens": max_completion_tokens,
+ "max_prompt_tokens": max_prompt_tokens,
"metadata": metadata,
"model": model,
+ "response_format": response_format,
"temperature": temperature,
+ "tool_choice": tool_choice,
"stream": True,
"thread": thread,
"tools": tools,
+ "truncation_strategy": truncation_strategy,
},
thread_create_and_run_params.ThreadCreateAndRunParams,
),
diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
index 3000603689..1a23e7876e 100644
--- a/src/openai/resources/chat/completions.py
+++ b/src/openai/resources/chat/completions.py
@@ -50,6 +50,8 @@ def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -137,8 +139,7 @@ def create(
logprobs: Whether to return log probabilities of the output tokens or not. If true,
returns the log probabilities of each output token returned in the `content` of
- `message`. This option is currently not available on the `gpt-4-vision-preview`
- model.
+ `message`.
max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
completion.
@@ -240,6 +241,8 @@ def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -334,8 +337,7 @@ def create(
logprobs: Whether to return log probabilities of the output tokens or not. If true,
returns the log probabilities of each output token returned in the `content` of
- `message`. This option is currently not available on the `gpt-4-vision-preview`
- model.
+ `message`.
max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
completion.
@@ -430,6 +432,8 @@ def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -524,8 +528,7 @@ def create(
logprobs: Whether to return log probabilities of the output tokens or not. If true,
returns the log probabilities of each output token returned in the `content` of
- `message`. This option is currently not available on the `gpt-4-vision-preview`
- model.
+ `message`.
max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
completion.
@@ -620,6 +623,8 @@ def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -717,6 +722,8 @@ async def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -804,8 +811,7 @@ async def create(
logprobs: Whether to return log probabilities of the output tokens or not. If true,
returns the log probabilities of each output token returned in the `content` of
- `message`. This option is currently not available on the `gpt-4-vision-preview`
- model.
+ `message`.
max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
completion.
@@ -907,6 +913,8 @@ async def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -1001,8 +1009,7 @@ async def create(
logprobs: Whether to return log probabilities of the output tokens or not. If true,
returns the log probabilities of each output token returned in the `content` of
- `message`. This option is currently not available on the `gpt-4-vision-preview`
- model.
+ `message`.
max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
completion.
@@ -1097,6 +1104,8 @@ async def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -1191,8 +1200,7 @@ async def create(
logprobs: Whether to return log probabilities of the output tokens or not. If true,
returns the log probabilities of each output token returned in the `content` of
- `message`. This option is currently not available on the `gpt-4-vision-preview`
- model.
+ `message`.
max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
completion.
@@ -1287,6 +1295,8 @@ async def create(
model: Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
diff --git a/src/openai/resources/fine_tuning/fine_tuning.py b/src/openai/resources/fine_tuning/fine_tuning.py
index 659b3e8501..0404fed6ec 100644
--- a/src/openai/resources/fine_tuning/fine_tuning.py
+++ b/src/openai/resources/fine_tuning/fine_tuning.py
@@ -11,6 +11,7 @@
AsyncJobsWithStreamingResponse,
)
from ..._compat import cached_property
+from .jobs.jobs import Jobs, AsyncJobs
from ..._resource import SyncAPIResource, AsyncAPIResource
__all__ = ["FineTuning", "AsyncFineTuning"]
diff --git a/src/openai/resources/fine_tuning/jobs/__init__.py b/src/openai/resources/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..94cd1fb7e7
--- /dev/null
+++ b/src/openai/resources/fine_tuning/jobs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .jobs import (
+ Jobs,
+ AsyncJobs,
+ JobsWithRawResponse,
+ AsyncJobsWithRawResponse,
+ JobsWithStreamingResponse,
+ AsyncJobsWithStreamingResponse,
+)
+from .checkpoints import (
+ Checkpoints,
+ AsyncCheckpoints,
+ CheckpointsWithRawResponse,
+ AsyncCheckpointsWithRawResponse,
+ CheckpointsWithStreamingResponse,
+ AsyncCheckpointsWithStreamingResponse,
+)
+
+__all__ = [
+ "Checkpoints",
+ "AsyncCheckpoints",
+ "CheckpointsWithRawResponse",
+ "AsyncCheckpointsWithRawResponse",
+ "CheckpointsWithStreamingResponse",
+ "AsyncCheckpointsWithStreamingResponse",
+ "Jobs",
+ "AsyncJobs",
+ "JobsWithRawResponse",
+ "AsyncJobsWithRawResponse",
+ "JobsWithStreamingResponse",
+ "AsyncJobsWithStreamingResponse",
+]
diff --git a/src/openai/resources/fine_tuning/jobs/checkpoints.py b/src/openai/resources/fine_tuning/jobs/checkpoints.py
new file mode 100644
index 0000000000..e9ea6aad9a
--- /dev/null
+++ b/src/openai/resources/fine_tuning/jobs/checkpoints.py
@@ -0,0 +1,176 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+ AsyncPaginator,
+ make_request_options,
+)
+from ....types.fine_tuning.jobs import FineTuningJobCheckpoint, checkpoint_list_params
+
+__all__ = ["Checkpoints", "AsyncCheckpoints"]
+
+
+class Checkpoints(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> CheckpointsWithRawResponse:
+ return CheckpointsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+ return CheckpointsWithStreamingResponse(self)
+
+ def list(
+ self,
+ fine_tuning_job_id: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SyncCursorPage[FineTuningJobCheckpoint]:
+ """
+ List checkpoints for a fine-tuning job.
+
+ Args:
+ after: Identifier for the last checkpoint ID from the previous pagination request.
+
+ limit: Number of checkpoints to retrieve.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not fine_tuning_job_id:
+ raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+ return self._get_api_list(
+ f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+ page=SyncCursorPage[FineTuningJobCheckpoint],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "limit": limit,
+ },
+ checkpoint_list_params.CheckpointListParams,
+ ),
+ ),
+ model=FineTuningJobCheckpoint,
+ )
+
+
+class AsyncCheckpoints(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+ return AsyncCheckpointsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+ return AsyncCheckpointsWithStreamingResponse(self)
+
+ def list(
+ self,
+ fine_tuning_job_id: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncPaginator[FineTuningJobCheckpoint, AsyncCursorPage[FineTuningJobCheckpoint]]:
+ """
+ List checkpoints for a fine-tuning job.
+
+ Args:
+ after: Identifier for the last checkpoint ID from the previous pagination request.
+
+ limit: Number of checkpoints to retrieve.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not fine_tuning_job_id:
+ raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+ return self._get_api_list(
+ f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+ page=AsyncCursorPage[FineTuningJobCheckpoint],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "limit": limit,
+ },
+ checkpoint_list_params.CheckpointListParams,
+ ),
+ ),
+ model=FineTuningJobCheckpoint,
+ )
+
+
+class CheckpointsWithRawResponse:
+ def __init__(self, checkpoints: Checkpoints) -> None:
+ self._checkpoints = checkpoints
+
+ self.list = _legacy_response.to_raw_response_wrapper(
+ checkpoints.list,
+ )
+
+
+class AsyncCheckpointsWithRawResponse:
+ def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+ self._checkpoints = checkpoints
+
+ self.list = _legacy_response.async_to_raw_response_wrapper(
+ checkpoints.list,
+ )
+
+
+class CheckpointsWithStreamingResponse:
+ def __init__(self, checkpoints: Checkpoints) -> None:
+ self._checkpoints = checkpoints
+
+ self.list = to_streamed_response_wrapper(
+ checkpoints.list,
+ )
+
+
+class AsyncCheckpointsWithStreamingResponse:
+ def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+ self._checkpoints = checkpoints
+
+ self.list = async_to_streamed_response_wrapper(
+ checkpoints.list,
+ )
diff --git a/src/openai/resources/fine_tuning/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py
similarity index 89%
rename from src/openai/resources/fine_tuning/jobs.py
rename to src/openai/resources/fine_tuning/jobs/jobs.py
index a0c3e24dac..229f716c48 100644
--- a/src/openai/resources/fine_tuning/jobs.py
+++ b/src/openai/resources/fine_tuning/jobs/jobs.py
@@ -2,26 +2,34 @@
from __future__ import annotations
-from typing import Union, Optional
+from typing import Union, Iterable, Optional
from typing_extensions import Literal
import httpx
-from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
maybe_transform,
async_maybe_transform,
)
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...pagination import SyncCursorPage, AsyncCursorPage
-from ..._base_client import (
+from ...._compat import cached_property
+from .checkpoints import (
+ Checkpoints,
+ AsyncCheckpoints,
+ CheckpointsWithRawResponse,
+ AsyncCheckpointsWithRawResponse,
+ CheckpointsWithStreamingResponse,
+ AsyncCheckpointsWithStreamingResponse,
+)
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
AsyncPaginator,
make_request_options,
)
-from ...types.fine_tuning import (
+from ....types.fine_tuning import (
FineTuningJob,
FineTuningJobEvent,
job_list_params,
@@ -33,6 +41,10 @@
class Jobs(SyncAPIResource):
+ @cached_property
+ def checkpoints(self) -> Checkpoints:
+ return Checkpoints(self._client)
+
@cached_property
def with_raw_response(self) -> JobsWithRawResponse:
return JobsWithRawResponse(self)
@@ -47,6 +59,8 @@ def create(
model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
training_file: str,
hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+ integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+ seed: Optional[int] | NotGiven = NOT_GIVEN,
suffix: Optional[str] | NotGiven = NOT_GIVEN,
validation_file: Optional[str] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -82,6 +96,12 @@ def create(
hyperparameters: The hyperparameters used for the fine-tuning job.
+ integrations: A list of integrations to enable for your fine-tuning job.
+
+ seed: The seed controls the reproducibility of the job. Passing in the same seed and
+ job parameters should produce the same results, but may differ in rare cases. If
+ a seed is not specified, one will be generated for you.
+
suffix: A string of up to 18 characters that will be added to your fine-tuned model
name.
@@ -116,6 +136,8 @@ def create(
"model": model,
"training_file": training_file,
"hyperparameters": hyperparameters,
+ "integrations": integrations,
+ "seed": seed,
"suffix": suffix,
"validation_file": validation_file,
},
@@ -294,6 +316,10 @@ def list_events(
class AsyncJobs(AsyncAPIResource):
+ @cached_property
+ def checkpoints(self) -> AsyncCheckpoints:
+ return AsyncCheckpoints(self._client)
+
@cached_property
def with_raw_response(self) -> AsyncJobsWithRawResponse:
return AsyncJobsWithRawResponse(self)
@@ -308,6 +334,8 @@ async def create(
model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
training_file: str,
hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+ integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+ seed: Optional[int] | NotGiven = NOT_GIVEN,
suffix: Optional[str] | NotGiven = NOT_GIVEN,
validation_file: Optional[str] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -343,6 +371,12 @@ async def create(
hyperparameters: The hyperparameters used for the fine-tuning job.
+ integrations: A list of integrations to enable for your fine-tuning job.
+
+ seed: The seed controls the reproducibility of the job. Passing in the same seed and
+ job parameters should produce the same results, but may differ in rare cases. If
+ a seed is not specified, one will be generated for you.
+
suffix: A string of up to 18 characters that will be added to your fine-tuned model
name.
@@ -377,6 +411,8 @@ async def create(
"model": model,
"training_file": training_file,
"hyperparameters": hyperparameters,
+ "integrations": integrations,
+ "seed": seed,
"suffix": suffix,
"validation_file": validation_file,
},
@@ -574,6 +610,10 @@ def __init__(self, jobs: Jobs) -> None:
jobs.list_events,
)
+ @cached_property
+ def checkpoints(self) -> CheckpointsWithRawResponse:
+ return CheckpointsWithRawResponse(self._jobs.checkpoints)
+
class AsyncJobsWithRawResponse:
def __init__(self, jobs: AsyncJobs) -> None:
@@ -595,6 +635,10 @@ def __init__(self, jobs: AsyncJobs) -> None:
jobs.list_events,
)
+ @cached_property
+ def checkpoints(self) -> AsyncCheckpointsWithRawResponse:
+ return AsyncCheckpointsWithRawResponse(self._jobs.checkpoints)
+
class JobsWithStreamingResponse:
def __init__(self, jobs: Jobs) -> None:
@@ -616,6 +660,10 @@ def __init__(self, jobs: Jobs) -> None:
jobs.list_events,
)
+ @cached_property
+ def checkpoints(self) -> CheckpointsWithStreamingResponse:
+ return CheckpointsWithStreamingResponse(self._jobs.checkpoints)
+
class AsyncJobsWithStreamingResponse:
def __init__(self, jobs: AsyncJobs) -> None:
@@ -636,3 +684,7 @@ def __init__(self, jobs: AsyncJobs) -> None:
self.list_events = async_to_streamed_response_wrapper(
jobs.list_events,
)
+
+ @cached_property
+ def checkpoints(self) -> AsyncCheckpointsWithStreamingResponse:
+ return AsyncCheckpointsWithStreamingResponse(self._jobs.checkpoints)
diff --git a/src/openai/types/__init__.py b/src/openai/types/__init__.py
index 0917e22a8f..4bbcdddc2a 100644
--- a/src/openai/types/__init__.py
+++ b/src/openai/types/__init__.py
@@ -2,6 +2,7 @@
from __future__ import annotations
+from .batch import Batch as Batch
from .image import Image as Image
from .model import Model as Model
from .shared import (
@@ -12,6 +13,7 @@
from .embedding import Embedding as Embedding
from .completion import Completion as Completion
from .moderation import Moderation as Moderation
+from .batch_error import BatchError as BatchError
from .file_object import FileObject as FileObject
from .file_content import FileContent as FileContent
from .file_deleted import FileDeleted as FileDeleted
@@ -22,6 +24,8 @@
from .completion_choice import CompletionChoice as CompletionChoice
from .image_edit_params import ImageEditParams as ImageEditParams
from .file_create_params import FileCreateParams as FileCreateParams
+from .batch_create_params import BatchCreateParams as BatchCreateParams
+from .batch_request_counts import BatchRequestCounts as BatchRequestCounts
from .image_generate_params import ImageGenerateParams as ImageGenerateParams
from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
from .completion_create_params import CompletionCreateParams as CompletionCreateParams
diff --git a/src/openai/types/batch.py b/src/openai/types/batch.py
new file mode 100644
index 0000000000..bde04d1a24
--- /dev/null
+++ b/src/openai/types/batch.py
@@ -0,0 +1,85 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import builtins
+from typing import List, Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .batch_error import BatchError
+from .batch_request_counts import BatchRequestCounts
+
+__all__ = ["Batch", "Errors"]
+
+
+class Errors(BaseModel):
+ data: Optional[List[BatchError]] = None
+
+ object: Optional[str] = None
+ """The object type, which is always `list`."""
+
+
+class Batch(BaseModel):
+ id: str
+
+ completion_window: str
+ """The time frame within which the batch should be processed."""
+
+ created_at: str
+ """The Unix timestamp (in seconds) for when the batch was created."""
+
+ endpoint: str
+ """The OpenAI API endpoint used by the batch."""
+
+ input_file_id: str
+ """The ID of the input file for the batch."""
+
+ object: Literal["batch"]
+ """The object type, which is always `batch`."""
+
+ status: Literal[
+ "validating", "failed", "in_progress", "finalizing", "completed", "expired", "cancelling", "cancelled"
+ ]
+ """The current status of the batch."""
+
+ cancelled_at: Optional[str] = None
+ """The Unix timestamp (in seconds) for when the batch was cancelled."""
+
+ cancelling_at: Optional[str] = None
+ """The Unix timestamp (in seconds) for when the batch started cancelling."""
+
+ completed_at: Optional[str] = None
+ """The Unix timestamp (in seconds) for when the batch was completed."""
+
+ error_file_id: Optional[str] = None
+ """The ID of the file containing the outputs of requests with errors."""
+
+ errors: Optional[Errors] = None
+
+ expired_at: Optional[str] = None
+ """The Unix timestamp (in seconds) for when the batch expired."""
+
+ expires_at: Optional[str] = None
+ """The Unix timestamp (in seconds) for when the batch will expire."""
+
+ failed_at: Optional[str] = None
+ """The Unix timestamp (in seconds) for when the batch failed."""
+
+ finalizing_at: Optional[str] = None
+ """The Unix timestamp (in seconds) for when the batch started finalizing."""
+
+ in_progress_at: Optional[str] = None
+ """The Unix timestamp (in seconds) for when the batch started processing."""
+
+ metadata: Optional[builtins.object] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format. Keys can be a maximum of 64 characters long and values can be
+ a maxium of 512 characters long.
+ """
+
+ output_file_id: Optional[str] = None
+ """The ID of the file containing the outputs of successfully executed requests."""
+
+ request_counts: Optional[BatchRequestCounts] = None
+ """The request counts for different statuses within the batch."""
diff --git a/src/openai/types/batch_create_params.py b/src/openai/types/batch_create_params.py
new file mode 100644
index 0000000000..6a22be8626
--- /dev/null
+++ b/src/openai/types/batch_create_params.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BatchCreateParams"]
+
+
+class BatchCreateParams(TypedDict, total=False):
+ completion_window: Required[Literal["24h"]]
+ """The time frame within which the batch should be processed.
+
+ Currently only `24h` is supported.
+ """
+
+ endpoint: Required[Literal["/v1/chat/completions"]]
+ """The endpoint to be used for all requests in the batch.
+
+ Currently only `/v1/chat/completions` is supported.
+ """
+
+ input_file_id: Required[str]
+ """The ID of an uploaded file that contains requests for the new batch.
+
+ See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+ for how to upload a file.
+
+ Your input file must be formatted as a JSONL file, and must be uploaded with the
+ purpose `batch`.
+ """
+
+ metadata: Optional[Dict[str, str]]
+ """Optional custom metadata for the batch."""
diff --git a/src/openai/types/batch_error.py b/src/openai/types/batch_error.py
new file mode 100644
index 0000000000..1cdd808dbd
--- /dev/null
+++ b/src/openai/types/batch_error.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["BatchError"]
+
+
+class BatchError(BaseModel):
+ code: Optional[str] = None
+ """An error code identifying the error type."""
+
+ line: Optional[int] = None
+ """The line number of the input file where the error occurred, if applicable."""
+
+ message: Optional[str] = None
+ """A human-readable message providing more details about the error."""
+
+ param: Optional[str] = None
+ """The name of the parameter that caused the error, if applicable."""
diff --git a/src/openai/types/batch_request_counts.py b/src/openai/types/batch_request_counts.py
new file mode 100644
index 0000000000..068b071af1
--- /dev/null
+++ b/src/openai/types/batch_request_counts.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["BatchRequestCounts"]
+
+
+class BatchRequestCounts(BaseModel):
+ completed: int
+ """Number of requests that have been completed successfully."""
+
+ failed: int
+ """Number of requests that have failed."""
+
+ total: int
+ """Total number of requests in the batch."""
diff --git a/src/openai/types/beta/__init__.py b/src/openai/types/beta/__init__.py
index a7de0272b4..0171694587 100644
--- a/src/openai/types/beta/__init__.py
+++ b/src/openai/types/beta/__init__.py
@@ -15,9 +15,21 @@
from .thread_create_params import ThreadCreateParams as ThreadCreateParams
from .thread_update_params import ThreadUpdateParams as ThreadUpdateParams
from .assistant_list_params import AssistantListParams as AssistantListParams
+from .assistant_tool_choice import AssistantToolChoice as AssistantToolChoice
from .code_interpreter_tool import CodeInterpreterTool as CodeInterpreterTool
from .assistant_stream_event import AssistantStreamEvent as AssistantStreamEvent
from .assistant_create_params import AssistantCreateParams as AssistantCreateParams
from .assistant_update_params import AssistantUpdateParams as AssistantUpdateParams
+from .assistant_response_format import AssistantResponseFormat as AssistantResponseFormat
+from .assistant_tool_choice_param import AssistantToolChoiceParam as AssistantToolChoiceParam
from .code_interpreter_tool_param import CodeInterpreterToolParam as CodeInterpreterToolParam
+from .assistant_tool_choice_option import AssistantToolChoiceOption as AssistantToolChoiceOption
from .thread_create_and_run_params import ThreadCreateAndRunParams as ThreadCreateAndRunParams
+from .assistant_tool_choice_function import AssistantToolChoiceFunction as AssistantToolChoiceFunction
+from .assistant_response_format_param import AssistantResponseFormatParam as AssistantResponseFormatParam
+from .assistant_response_format_option import AssistantResponseFormatOption as AssistantResponseFormatOption
+from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam as AssistantToolChoiceOptionParam
+from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam as AssistantToolChoiceFunctionParam
+from .assistant_response_format_option_param import (
+ AssistantResponseFormatOptionParam as AssistantResponseFormatOptionParam,
+)
diff --git a/src/openai/types/beta/assistant.py b/src/openai/types/beta/assistant.py
index 32561a9aa8..0a0d28ed01 100644
--- a/src/openai/types/beta/assistant.py
+++ b/src/openai/types/beta/assistant.py
@@ -29,7 +29,7 @@ class Assistant(BaseModel):
instructions: Optional[str] = None
"""The system instructions that the assistant uses.
- The maximum length is 32768 characters.
+ The maximum length is 256,000 characters.
"""
metadata: Optional[object] = None
diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py
index 8bad323640..011121485f 100644
--- a/src/openai/types/beta/assistant_create_params.py
+++ b/src/openai/types/beta/assistant_create_params.py
@@ -2,8 +2,8 @@
from __future__ import annotations
-from typing import List, Iterable, Optional
-from typing_extensions import Required, TypedDict
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
from .assistant_tool_param import AssistantToolParam
@@ -11,7 +11,31 @@
class AssistantCreateParams(TypedDict, total=False):
- model: Required[str]
+ model: Required[
+ Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ ]
+ ]
"""ID of the model to use.
You can use the
@@ -34,7 +58,7 @@ class AssistantCreateParams(TypedDict, total=False):
instructions: Optional[str]
"""The system instructions that the assistant uses.
- The maximum length is 32768 characters.
+ The maximum length is 256,000 characters.
"""
metadata: Optional[object]
diff --git a/src/openai/types/beta/assistant_response_format.py b/src/openai/types/beta/assistant_response_format.py
new file mode 100644
index 0000000000..f53bdaf62a
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["AssistantResponseFormat"]
+
+
+class AssistantResponseFormat(BaseModel):
+ type: Optional[Literal["text", "json_object"]] = None
+ """Must be one of `text` or `json_object`."""
diff --git a/src/openai/types/beta/assistant_response_format_option.py b/src/openai/types/beta/assistant_response_format_option.py
new file mode 100644
index 0000000000..d4e05e0ea9
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_option.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_response_format import AssistantResponseFormat
+
+__all__ = ["AssistantResponseFormatOption"]
+
+AssistantResponseFormatOption = Union[Literal["none", "auto"], AssistantResponseFormat]
diff --git a/src/openai/types/beta/assistant_response_format_option_param.py b/src/openai/types/beta/assistant_response_format_option_param.py
new file mode 100644
index 0000000000..46e04125d1
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_option_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_response_format_param import AssistantResponseFormatParam
+
+__all__ = ["AssistantResponseFormatOptionParam"]
+
+AssistantResponseFormatOptionParam = Union[Literal["none", "auto"], AssistantResponseFormatParam]
diff --git a/src/openai/types/beta/assistant_response_format_param.py b/src/openai/types/beta/assistant_response_format_param.py
new file mode 100644
index 0000000000..96e1d02115
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["AssistantResponseFormatParam"]
+
+
+class AssistantResponseFormatParam(TypedDict, total=False):
+ type: Literal["text", "json_object"]
+ """Must be one of `text` or `json_object`."""
diff --git a/src/openai/types/beta/assistant_tool_choice.py b/src/openai/types/beta/assistant_tool_choice.py
new file mode 100644
index 0000000000..4314d4b41e
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .assistant_tool_choice_function import AssistantToolChoiceFunction
+
+__all__ = ["AssistantToolChoice"]
+
+
+class AssistantToolChoice(BaseModel):
+ type: Literal["function", "code_interpreter", "retrieval"]
+ """The type of the tool. If type is `function`, the function name must be set"""
+
+ function: Optional[AssistantToolChoiceFunction] = None
diff --git a/src/openai/types/beta/assistant_tool_choice_function.py b/src/openai/types/beta/assistant_tool_choice_function.py
new file mode 100644
index 0000000000..87f38310ca
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_function.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["AssistantToolChoiceFunction"]
+
+
+class AssistantToolChoiceFunction(BaseModel):
+ name: str
+ """The name of the function to call."""
diff --git a/src/openai/types/beta/assistant_tool_choice_function_param.py b/src/openai/types/beta/assistant_tool_choice_function_param.py
new file mode 100644
index 0000000000..428857de91
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_function_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["AssistantToolChoiceFunctionParam"]
+
+
+class AssistantToolChoiceFunctionParam(TypedDict, total=False):
+ name: Required[str]
+ """The name of the function to call."""
diff --git a/src/openai/types/beta/assistant_tool_choice_option.py b/src/openai/types/beta/assistant_tool_choice_option.py
new file mode 100644
index 0000000000..0045a5986e
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_option.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_tool_choice import AssistantToolChoice
+
+__all__ = ["AssistantToolChoiceOption"]
+
+AssistantToolChoiceOption = Union[Literal["none", "auto"], AssistantToolChoice]
diff --git a/src/openai/types/beta/assistant_tool_choice_option_param.py b/src/openai/types/beta/assistant_tool_choice_option_param.py
new file mode 100644
index 0000000000..618e7bff98
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_option_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_tool_choice_param import AssistantToolChoiceParam
+
+__all__ = ["AssistantToolChoiceOptionParam"]
+
+AssistantToolChoiceOptionParam = Union[Literal["none", "auto"], AssistantToolChoiceParam]
diff --git a/src/openai/types/beta/assistant_tool_choice_param.py b/src/openai/types/beta/assistant_tool_choice_param.py
new file mode 100644
index 0000000000..5cf6ea27be
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam
+
+__all__ = ["AssistantToolChoiceParam"]
+
+
+class AssistantToolChoiceParam(TypedDict, total=False):
+ type: Required[Literal["function", "code_interpreter", "retrieval"]]
+ """The type of the tool. If type is `function`, the function name must be set"""
+
+ function: AssistantToolChoiceFunctionParam
diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py
index 7c96aca8c1..6e9d9ed5db 100644
--- a/src/openai/types/beta/assistant_update_params.py
+++ b/src/openai/types/beta/assistant_update_params.py
@@ -26,7 +26,7 @@ class AssistantUpdateParams(TypedDict, total=False):
instructions: Optional[str]
"""The system instructions that the assistant uses.
- The maximum length is 32768 characters.
+ The maximum length is 256,000 characters.
"""
metadata: Optional[object]
diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py
index d4266fc48c..50f947a40a 100644
--- a/src/openai/types/beta/thread_create_and_run_params.py
+++ b/src/openai/types/beta/thread_create_and_run_params.py
@@ -8,12 +8,15 @@
from .function_tool_param import FunctionToolParam
from .retrieval_tool_param import RetrievalToolParam
from .code_interpreter_tool_param import CodeInterpreterToolParam
+from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
__all__ = [
"ThreadCreateAndRunParamsBase",
"Thread",
"ThreadMessage",
"Tool",
+ "TruncationStrategy",
"ThreadCreateAndRunParamsNonStreaming",
"ThreadCreateAndRunParamsStreaming",
]
@@ -33,6 +36,24 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
This is useful for modifying the behavior on a per-run basis.
"""
+ max_completion_tokens: Optional[int]
+ """
+ The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+ """
+
+ max_prompt_tokens: Optional[int]
+ """The maximum number of prompt tokens that may be used over the course of the run.
+
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+ """
+
metadata: Optional[object]
"""Set of 16 key-value pairs that can be attached to an object.
@@ -41,7 +62,30 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
a maxium of 512 characters long.
"""
- model: Optional[str]
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
"""
The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
be used to execute this run. If a value is provided here, it will override the
@@ -49,6 +93,25 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
assistant will be used.
"""
+ response_format: Optional[AssistantResponseFormatOptionParam]
+ """Specifies the format that the model must output.
+
+ Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+ """
+
temperature: Optional[float]
"""What sampling temperature to use, between 0 and 2.
@@ -59,12 +122,24 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
thread: Thread
"""If no thread is provided, an empty thread will be created."""
+ tool_choice: Optional[AssistantToolChoiceOptionParam]
+ """
+ Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+ """
+
tools: Optional[Iterable[Tool]]
"""Override the tools the assistant can use for this run.
This is useful for modifying the behavior on a per-run basis.
"""
+ truncation_strategy: Optional[TruncationStrategy]
+
class ThreadMessage(TypedDict, total=False):
content: Required[str]
@@ -115,6 +190,23 @@ class Thread(TypedDict, total=False):
Tool = Union[CodeInterpreterToolParam, RetrievalToolParam, FunctionToolParam]
+class TruncationStrategy(TypedDict, total=False):
+ type: Required[Literal["auto", "last_messages"]]
+ """The truncation strategy to use for the thread.
+
+ The default is `auto`. If set to `last_messages`, the thread will be truncated
+ to the n most recent messages in the thread. When set to `auto`, messages in the
+ middle of the thread will be dropped to fit the context length of the model,
+ `max_prompt_tokens`.
+ """
+
+ last_messages: Optional[int]
+ """
+ The number of most recent messages from the thread when constructing the context
+ for the run.
+ """
+
+
class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase):
stream: Optional[Literal[False]]
"""
diff --git a/src/openai/types/beta/threads/run.py b/src/openai/types/beta/threads/run.py
index 3ab276245f..2efc3c77fa 100644
--- a/src/openai/types/beta/threads/run.py
+++ b/src/openai/types/beta/threads/run.py
@@ -6,9 +6,28 @@
from ...._models import BaseModel
from .run_status import RunStatus
from ..assistant_tool import AssistantTool
+from ..assistant_tool_choice_option import AssistantToolChoiceOption
+from ..assistant_response_format_option import AssistantResponseFormatOption
from .required_action_function_tool_call import RequiredActionFunctionToolCall
-__all__ = ["Run", "LastError", "RequiredAction", "RequiredActionSubmitToolOutputs", "Usage"]
+__all__ = [
+ "Run",
+ "IncompleteDetails",
+ "LastError",
+ "RequiredAction",
+ "RequiredActionSubmitToolOutputs",
+ "TruncationStrategy",
+ "Usage",
+]
+
+
+class IncompleteDetails(BaseModel):
+ reason: Optional[Literal["max_completion_tokens", "max_prompt_tokens"]] = None
+ """The reason why the run is incomplete.
+
+ This will point to which specific token limit was reached over the course of the
+ run.
+ """
class LastError(BaseModel):
@@ -32,6 +51,23 @@ class RequiredAction(BaseModel):
"""For now, this is always `submit_tool_outputs`."""
+class TruncationStrategy(BaseModel):
+ type: Literal["auto", "last_messages"]
+ """The truncation strategy to use for the thread.
+
+ The default is `auto`. If set to `last_messages`, the thread will be truncated
+ to the n most recent messages in the thread. When set to `auto`, messages in the
+ middle of the thread will be dropped to fit the context length of the model,
+ `max_prompt_tokens`.
+ """
+
+ last_messages: Optional[int] = None
+ """
+ The number of most recent messages from the thread when constructing the context
+ for the run.
+ """
+
+
class Usage(BaseModel):
completion_tokens: int
"""Number of completion tokens used over the course of the run."""
@@ -76,6 +112,12 @@ class Run(BaseModel):
this run.
"""
+ incomplete_details: Optional[IncompleteDetails] = None
+ """Details on why the run is incomplete.
+
+ Will be `null` if the run is not incomplete.
+ """
+
instructions: str
"""
The instructions that the
@@ -86,6 +128,18 @@ class Run(BaseModel):
last_error: Optional[LastError] = None
"""The last error associated with this run. Will be `null` if there are no errors."""
+ max_completion_tokens: Optional[int] = None
+ """
+ The maximum number of completion tokens specified to have been used over the
+ course of the run.
+ """
+
+ max_prompt_tokens: Optional[int] = None
+ """
+ The maximum number of prompt tokens specified to have been used over the course
+ of the run.
+ """
+
metadata: Optional[object] = None
"""Set of 16 key-value pairs that can be attached to an object.
@@ -110,6 +164,25 @@ class Run(BaseModel):
Will be `null` if no action is required.
"""
+ response_format: Optional[AssistantResponseFormatOption] = None
+ """Specifies the format that the model must output.
+
+ Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+ """
+
started_at: Optional[int] = None
"""The Unix timestamp (in seconds) for when the run was started."""
@@ -126,6 +199,16 @@ class Run(BaseModel):
that was executed on as a part of this run.
"""
+ tool_choice: Optional[AssistantToolChoiceOption] = None
+ """
+ Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+ """
+
tools: List[AssistantTool]
"""
The list of tools that the
@@ -133,6 +216,8 @@ class Run(BaseModel):
this run.
"""
+ truncation_strategy: Optional[TruncationStrategy] = None
+
usage: Optional[Usage] = None
"""Usage statistics related to the run.
diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py
index e9bc19d980..9f2d4ba18b 100644
--- a/src/openai/types/beta/threads/run_create_params.py
+++ b/src/openai/types/beta/threads/run_create_params.py
@@ -6,8 +6,16 @@
from typing_extensions import Literal, Required, TypedDict
from ..assistant_tool_param import AssistantToolParam
+from ..assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from ..assistant_response_format_option_param import AssistantResponseFormatOptionParam
-__all__ = ["RunCreateParamsBase", "AdditionalMessage", "RunCreateParamsNonStreaming", "RunCreateParamsStreaming"]
+__all__ = [
+ "RunCreateParamsBase",
+ "AdditionalMessage",
+ "TruncationStrategy",
+ "RunCreateParamsNonStreaming",
+ "RunCreateParamsStreaming",
+]
class RunCreateParamsBase(TypedDict, total=False):
@@ -35,6 +43,24 @@ class RunCreateParamsBase(TypedDict, total=False):
of the assistant. This is useful for modifying the behavior on a per-run basis.
"""
+ max_completion_tokens: Optional[int]
+ """
+ The maximum number of completion tokens that may be used over the course of the
+ run. The run will make a best effort to use only the number of completion tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ completion tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+ """
+
+ max_prompt_tokens: Optional[int]
+ """The maximum number of prompt tokens that may be used over the course of the run.
+
+ The run will make a best effort to use only the number of prompt tokens
+ specified, across multiple turns of the run. If the run exceeds the number of
+ prompt tokens specified, the run will end with status `complete`. See
+ `incomplete_details` for more info.
+ """
+
metadata: Optional[object]
"""Set of 16 key-value pairs that can be attached to an object.
@@ -43,7 +69,30 @@ class RunCreateParamsBase(TypedDict, total=False):
a maxium of 512 characters long.
"""
- model: Optional[str]
+ model: Union[
+ str,
+ Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ ],
+ None,
+ ]
"""
The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
be used to execute this run. If a value is provided here, it will override the
@@ -51,6 +100,25 @@ class RunCreateParamsBase(TypedDict, total=False):
assistant will be used.
"""
+ response_format: Optional[AssistantResponseFormatOptionParam]
+ """Specifies the format that the model must output.
+
+ Compatible with
+ [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+ all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+ Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+ message the model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to
+ produce JSON yourself via a system or user message. Without this, the model may
+ generate an unending stream of whitespace until the generation reaches the token
+ limit, resulting in a long-running and seemingly "stuck" request. Also note that
+ the message content may be partially cut off if `finish_reason="length"`, which
+ indicates the generation exceeded `max_tokens` or the conversation exceeded the
+ max context length.
+ """
+
temperature: Optional[float]
"""What sampling temperature to use, between 0 and 2.
@@ -58,12 +126,24 @@ class RunCreateParamsBase(TypedDict, total=False):
0.2 will make it more focused and deterministic.
"""
+ tool_choice: Optional[AssistantToolChoiceOptionParam]
+ """
+ Controls which (if any) tool is called by the model. `none` means the model will
+ not call any tools and instead generates a message. `auto` is the default value
+ and means the model can pick between generating a message or calling a tool.
+ Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+ `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+ call that tool.
+ """
+
tools: Optional[Iterable[AssistantToolParam]]
"""Override the tools the assistant can use for this run.
This is useful for modifying the behavior on a per-run basis.
"""
+ truncation_strategy: Optional[TruncationStrategy]
+
class AdditionalMessage(TypedDict, total=False):
content: Required[str]
@@ -95,6 +175,23 @@ class AdditionalMessage(TypedDict, total=False):
"""
+class TruncationStrategy(TypedDict, total=False):
+ type: Required[Literal["auto", "last_messages"]]
+ """The truncation strategy to use for the thread.
+
+ The default is `auto`. If set to `last_messages`, the thread will be truncated
+ to the n most recent messages in the thread. When set to `auto`, messages in the
+ middle of the thread will be dropped to fit the context length of the model,
+ `max_prompt_tokens`.
+ """
+
+ last_messages: Optional[int]
+ """
+ The number of most recent messages from the thread when constructing the context
+ for the run.
+ """
+
+
class RunCreateParamsNonStreaming(RunCreateParamsBase):
stream: Optional[Literal[False]]
"""
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index ab6a747021..1e0f7f8195 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -32,6 +32,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
Union[
str,
Literal[
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-4-1106-preview",
@@ -102,8 +104,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
"""Whether to return log probabilities of the output tokens or not.
If true, returns the log probabilities of each output token returned in the
- `content` of `message`. This option is currently not available on the
- `gpt-4-vision-preview` model.
+ `content` of `message`.
"""
max_tokens: Optional[int]
diff --git a/src/openai/types/fine_tuning/__init__.py b/src/openai/types/fine_tuning/__init__.py
index 0bb2b90438..92b81329b1 100644
--- a/src/openai/types/fine_tuning/__init__.py
+++ b/src/openai/types/fine_tuning/__init__.py
@@ -7,3 +7,8 @@
from .job_create_params import JobCreateParams as JobCreateParams
from .fine_tuning_job_event import FineTuningJobEvent as FineTuningJobEvent
from .job_list_events_params import JobListEventsParams as JobListEventsParams
+from .fine_tuning_job_integration import FineTuningJobIntegration as FineTuningJobIntegration
+from .fine_tuning_job_wandb_integration import FineTuningJobWandbIntegration as FineTuningJobWandbIntegration
+from .fine_tuning_job_wandb_integration_object import (
+ FineTuningJobWandbIntegrationObject as FineTuningJobWandbIntegrationObject,
+)
diff --git a/src/openai/types/fine_tuning/fine_tuning_job.py b/src/openai/types/fine_tuning/fine_tuning_job.py
index 23fe96d1a0..1593bf50c7 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job.py
@@ -4,6 +4,7 @@
from typing_extensions import Literal
from ..._models import BaseModel
+from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
__all__ = ["FineTuningJob", "Error", "Hyperparameters"]
@@ -80,6 +81,9 @@ class FineTuningJob(BaseModel):
[Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
"""
+ seed: int
+ """The seed used for the fine-tuning job."""
+
status: Literal["validating_files", "queued", "running", "succeeded", "failed", "cancelled"]
"""
The current status of the fine-tuning job, which can be either
@@ -105,3 +109,6 @@ class FineTuningJob(BaseModel):
You can retrieve the validation results with the
[Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
"""
+
+ integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
+ """A list of integrations to enable for this fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
new file mode 100644
index 0000000000..8076313cae
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+
+from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
+
+FineTuningJobIntegration = FineTuningJobWandbIntegrationObject
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
new file mode 100644
index 0000000000..4ac282eb54
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobWandbIntegration"]
+
+
+class FineTuningJobWandbIntegration(BaseModel):
+ project: str
+ """The name of the project that the new run will be created under."""
+
+ entity: Optional[str] = None
+ """The entity to use for the run.
+
+ This allows you to set the team or username of the WandB user that you would
+ like associated with the run. If not set, the default entity for the registered
+ WandB API key is used.
+ """
+
+ name: Optional[str] = None
+ """A display name to set for the run.
+
+ If not set, we will use the Job ID as the name.
+ """
+
+ tags: Optional[List[str]] = None
+ """A list of tags to be attached to the newly created run.
+
+ These tags are passed through directly to WandB. Some default tags are generated
+ by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+ """
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
new file mode 100644
index 0000000000..5b94354d50
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .fine_tuning_job_wandb_integration import FineTuningJobWandbIntegration
+
+__all__ = ["FineTuningJobWandbIntegrationObject"]
+
+
+class FineTuningJobWandbIntegrationObject(BaseModel):
+ type: Literal["wandb"]
+ """The type of the integration being enabled for the fine-tuning job"""
+
+ wandb: FineTuningJobWandbIntegration
+ """The settings for your integration with Weights and Biases.
+
+ This payload specifies the project that metrics will be sent to. Optionally, you
+ can set an explicit display name for your run, add tags to your run, and set a
+ default entity (team, username, etc) to be associated with your run.
+ """
diff --git a/src/openai/types/fine_tuning/job_create_params.py b/src/openai/types/fine_tuning/job_create_params.py
index 79e0b67e13..892c737fa3 100644
--- a/src/openai/types/fine_tuning/job_create_params.py
+++ b/src/openai/types/fine_tuning/job_create_params.py
@@ -2,10 +2,10 @@
from __future__ import annotations
-from typing import Union, Optional
+from typing import List, Union, Iterable, Optional
from typing_extensions import Literal, Required, TypedDict
-__all__ = ["JobCreateParams", "Hyperparameters"]
+__all__ = ["JobCreateParams", "Hyperparameters", "Integration", "IntegrationWandb"]
class JobCreateParams(TypedDict, total=False):
@@ -32,6 +32,17 @@ class JobCreateParams(TypedDict, total=False):
hyperparameters: Hyperparameters
"""The hyperparameters used for the fine-tuning job."""
+ integrations: Optional[Iterable[Integration]]
+ """A list of integrations to enable for your fine-tuning job."""
+
+ seed: Optional[int]
+ """The seed controls the reproducibility of the job.
+
+ Passing in the same seed and job parameters should produce the same results, but
+ may differ in rare cases. If a seed is not specified, one will be generated for
+ you.
+ """
+
suffix: Optional[str]
"""
A string of up to 18 characters that will be added to your fine-tuned model
@@ -76,3 +87,45 @@ class Hyperparameters(TypedDict, total=False):
An epoch refers to one full cycle through the training dataset.
"""
+
+
+class IntegrationWandb(TypedDict, total=False):
+ project: Required[str]
+ """The name of the project that the new run will be created under."""
+
+ entity: Optional[str]
+ """The entity to use for the run.
+
+ This allows you to set the team or username of the WandB user that you would
+ like associated with the run. If not set, the default entity for the registered
+ WandB API key is used.
+ """
+
+ name: Optional[str]
+ """A display name to set for the run.
+
+ If not set, we will use the Job ID as the name.
+ """
+
+ tags: List[str]
+ """A list of tags to be attached to the newly created run.
+
+ These tags are passed through directly to WandB. Some default tags are generated
+ by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+ """
+
+
+class Integration(TypedDict, total=False):
+ type: Required[Literal["wandb"]]
+ """The type of integration to enable.
+
+ Currently, only "wandb" (Weights and Biases) is supported.
+ """
+
+ wandb: Required[IntegrationWandb]
+ """The settings for your integration with Weights and Biases.
+
+ This payload specifies the project that metrics will be sent to. Optionally, you
+ can set an explicit display name for your run, add tags to your run, and set a
+ default entity (team, username, etc) to be associated with your run.
+ """
diff --git a/src/openai/types/fine_tuning/jobs/__init__.py b/src/openai/types/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..6c93da1b69
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .checkpoint_list_params import CheckpointListParams as CheckpointListParams
+from .fine_tuning_job_checkpoint import FineTuningJobCheckpoint as FineTuningJobCheckpoint
diff --git a/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py b/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py
new file mode 100644
index 0000000000..adceb3b218
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["CheckpointListParams"]
+
+
+class CheckpointListParams(TypedDict, total=False):
+ after: str
+ """Identifier for the last checkpoint ID from the previous pagination request."""
+
+ limit: int
+ """Number of checkpoints to retrieve."""
diff --git a/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py b/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
new file mode 100644
index 0000000000..bd07317a3e
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FineTuningJobCheckpoint", "Metrics"]
+
+
+class Metrics(BaseModel):
+ full_valid_loss: Optional[float] = None
+
+ full_valid_mean_token_accuracy: Optional[float] = None
+
+ step: Optional[float] = None
+
+ train_loss: Optional[float] = None
+
+ train_mean_token_accuracy: Optional[float] = None
+
+ valid_loss: Optional[float] = None
+
+ valid_mean_token_accuracy: Optional[float] = None
+
+
+class FineTuningJobCheckpoint(BaseModel):
+ id: str
+ """The checkpoint identifier, which can be referenced in the API endpoints."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) for when the checkpoint was created."""
+
+ fine_tuned_model_checkpoint: str
+ """The name of the fine-tuned checkpoint model that is created."""
+
+ fine_tuning_job_id: str
+ """The name of the fine-tuning job that this checkpoint was created from."""
+
+ metrics: Metrics
+ """Metrics at the step number during the fine-tuning job."""
+
+ object: Literal["fine_tuning.job.checkpoint"]
+ """The object type, which is always "fine_tuning.job.checkpoint"."""
+
+ step_number: int
+ """The step number that the checkpoint was created at."""
diff --git a/tests/api_resources/beta/test_assistants.py b/tests/api_resources/beta/test_assistants.py
index 6edbe4b491..a509627b8e 100644
--- a/tests/api_resources/beta/test_assistants.py
+++ b/tests/api_resources/beta/test_assistants.py
@@ -24,14 +24,14 @@ class TestAssistants:
@parametrize
def test_method_create(self, client: OpenAI) -> None:
assistant = client.beta.assistants.create(
- model="string",
+ model="gpt-4-turbo",
)
assert_matches_type(Assistant, assistant, path=["response"])
@parametrize
def test_method_create_with_all_params(self, client: OpenAI) -> None:
assistant = client.beta.assistants.create(
- model="string",
+ model="gpt-4-turbo",
description="string",
file_ids=["string", "string", "string"],
instructions="string",
@@ -44,7 +44,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
@parametrize
def test_raw_response_create(self, client: OpenAI) -> None:
response = client.beta.assistants.with_raw_response.create(
- model="string",
+ model="gpt-4-turbo",
)
assert response.is_closed is True
@@ -55,7 +55,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
@parametrize
def test_streaming_response_create(self, client: OpenAI) -> None:
with client.beta.assistants.with_streaming_response.create(
- model="string",
+ model="gpt-4-turbo",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -235,14 +235,14 @@ class TestAsyncAssistants:
@parametrize
async def test_method_create(self, async_client: AsyncOpenAI) -> None:
assistant = await async_client.beta.assistants.create(
- model="string",
+ model="gpt-4-turbo",
)
assert_matches_type(Assistant, assistant, path=["response"])
@parametrize
async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
assistant = await async_client.beta.assistants.create(
- model="string",
+ model="gpt-4-turbo",
description="string",
file_ids=["string", "string", "string"],
instructions="string",
@@ -255,7 +255,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
@parametrize
async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
response = await async_client.beta.assistants.with_raw_response.create(
- model="string",
+ model="gpt-4-turbo",
)
assert response.is_closed is True
@@ -266,7 +266,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
@parametrize
async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
async with async_client.beta.assistants.with_streaming_response.create(
- model="string",
+ model="gpt-4-turbo",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/beta/test_threads.py b/tests/api_resources/beta/test_threads.py
index fd3f7c5102..7c07251433 100644
--- a/tests/api_resources/beta/test_threads.py
+++ b/tests/api_resources/beta/test_threads.py
@@ -207,8 +207,11 @@ def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI)
thread = client.beta.threads.create_and_run(
assistant_id="string",
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
stream=False,
temperature=1,
thread={
@@ -234,7 +237,12 @@ def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI)
],
"metadata": {},
},
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
assert_matches_type(Run, thread, path=["response"])
@@ -276,8 +284,11 @@ def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI)
assistant_id="string",
stream=True,
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
temperature=1,
thread={
"messages": [
@@ -302,7 +313,12 @@ def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI)
],
"metadata": {},
},
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
thread_stream.response.close()
@@ -521,8 +537,11 @@ async def test_method_create_and_run_with_all_params_overload_1(self, async_clie
thread = await async_client.beta.threads.create_and_run(
assistant_id="string",
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
stream=False,
temperature=1,
thread={
@@ -548,7 +567,12 @@ async def test_method_create_and_run_with_all_params_overload_1(self, async_clie
],
"metadata": {},
},
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
assert_matches_type(Run, thread, path=["response"])
@@ -590,8 +614,11 @@ async def test_method_create_and_run_with_all_params_overload_2(self, async_clie
assistant_id="string",
stream=True,
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
temperature=1,
thread={
"messages": [
@@ -616,7 +643,12 @@ async def test_method_create_and_run_with_all_params_overload_2(self, async_clie
],
"metadata": {},
},
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
await thread_stream.response.aclose()
diff --git a/tests/api_resources/beta/threads/test_runs.py b/tests/api_resources/beta/threads/test_runs.py
index 271bcccdd3..cf5b2998b9 100644
--- a/tests/api_resources/beta/threads/test_runs.py
+++ b/tests/api_resources/beta/threads/test_runs.py
@@ -57,11 +57,19 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
},
],
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
stream=False,
temperature=1,
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
assert_matches_type(Run, run, path=["response"])
@@ -136,10 +144,18 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
},
],
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
temperature=1,
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
run_stream.response.close()
@@ -553,11 +569,19 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
},
],
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
stream=False,
temperature=1,
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
assert_matches_type(Run, run, path=["response"])
@@ -632,10 +656,18 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
},
],
instructions="string",
+ max_completion_tokens=256,
+ max_prompt_tokens=256,
metadata={},
- model="string",
+ model="gpt-4-turbo",
+ response_format="none",
temperature=1,
+ tool_choice="none",
tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+ truncation_strategy={
+ "type": "auto",
+ "last_messages": 1,
+ },
)
await run_stream.response.aclose()
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index bb0658f3d9..c54b56a37d 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -26,7 +26,7 @@ def test_method_create_overload_1(self, client: OpenAI) -> None:
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
)
assert_matches_type(ChatCompletion, completion, path=["response"])
@@ -40,7 +40,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
"name": "string",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
frequency_penalty=-2,
function_call="none",
functions=[
@@ -102,7 +102,7 @@ def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
)
assert response.is_closed is True
@@ -119,7 +119,7 @@ def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -138,7 +138,7 @@ def test_method_create_overload_2(self, client: OpenAI) -> None:
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
)
completion_stream.response.close()
@@ -153,7 +153,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
"name": "string",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
frequency_penalty=-2,
function_call="none",
@@ -215,7 +215,7 @@ def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
)
@@ -232,7 +232,7 @@ def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
) as response:
assert not response.is_closed
@@ -256,7 +256,7 @@ async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
)
assert_matches_type(ChatCompletion, completion, path=["response"])
@@ -270,7 +270,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
"name": "string",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
frequency_penalty=-2,
function_call="none",
functions=[
@@ -332,7 +332,7 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
)
assert response.is_closed is True
@@ -349,7 +349,7 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncOpe
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -368,7 +368,7 @@ async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
)
await completion_stream.response.aclose()
@@ -383,7 +383,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
"name": "string",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
frequency_penalty=-2,
function_call="none",
@@ -445,7 +445,7 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
)
@@ -462,7 +462,7 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncOpe
"role": "system",
}
],
- model="gpt-3.5-turbo",
+ model="gpt-4-turbo",
stream=True,
) as response:
assert not response.is_closed
diff --git a/tests/api_resources/fine_tuning/jobs/__init__.py b/tests/api_resources/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/fine_tuning/jobs/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/fine_tuning/jobs/test_checkpoints.py b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
new file mode 100644
index 0000000000..915d5c6f63
--- /dev/null
+++ b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
@@ -0,0 +1,117 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.fine_tuning.jobs import FineTuningJobCheckpoint
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestCheckpoints:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_list(self, client: OpenAI) -> None:
+ checkpoint = client.fine_tuning.jobs.checkpoints.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ )
+ assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ @parametrize
+ def test_method_list_with_all_params(self, client: OpenAI) -> None:
+ checkpoint = client.fine_tuning.jobs.checkpoints.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ after="string",
+ limit=0,
+ )
+ assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ @parametrize
+ def test_raw_response_list(self, client: OpenAI) -> None:
+ response = client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ checkpoint = response.parse()
+ assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ @parametrize
+ def test_streaming_response_list(self, client: OpenAI) -> None:
+ with client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ checkpoint = response.parse()
+ assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_list(self, client: OpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+ client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+ "",
+ )
+
+
+class TestAsyncCheckpoints:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+ checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ )
+ assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ @parametrize
+ async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+ checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ after="string",
+ limit=0,
+ )
+ assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ @parametrize
+ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ checkpoint = response.parse()
+ assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
+ "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ checkpoint = await response.parse()
+ assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+ await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+ "",
+ )
diff --git a/tests/api_resources/fine_tuning/test_jobs.py b/tests/api_resources/fine_tuning/test_jobs.py
index f4974ebbcd..1ff6d63b31 100644
--- a/tests/api_resources/fine_tuning/test_jobs.py
+++ b/tests/api_resources/fine_tuning/test_jobs.py
@@ -39,6 +39,36 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
"learning_rate_multiplier": "auto",
"n_epochs": "auto",
},
+ integrations=[
+ {
+ "type": "wandb",
+ "wandb": {
+ "project": "my-wandb-project",
+ "name": "string",
+ "entity": "string",
+ "tags": ["custom-tag", "custom-tag", "custom-tag"],
+ },
+ },
+ {
+ "type": "wandb",
+ "wandb": {
+ "project": "my-wandb-project",
+ "name": "string",
+ "entity": "string",
+ "tags": ["custom-tag", "custom-tag", "custom-tag"],
+ },
+ },
+ {
+ "type": "wandb",
+ "wandb": {
+ "project": "my-wandb-project",
+ "name": "string",
+ "entity": "string",
+ "tags": ["custom-tag", "custom-tag", "custom-tag"],
+ },
+ },
+ ],
+ seed=42,
suffix="x",
validation_file="file-abc123",
)
@@ -248,6 +278,36 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
"learning_rate_multiplier": "auto",
"n_epochs": "auto",
},
+ integrations=[
+ {
+ "type": "wandb",
+ "wandb": {
+ "project": "my-wandb-project",
+ "name": "string",
+ "entity": "string",
+ "tags": ["custom-tag", "custom-tag", "custom-tag"],
+ },
+ },
+ {
+ "type": "wandb",
+ "wandb": {
+ "project": "my-wandb-project",
+ "name": "string",
+ "entity": "string",
+ "tags": ["custom-tag", "custom-tag", "custom-tag"],
+ },
+ },
+ {
+ "type": "wandb",
+ "wandb": {
+ "project": "my-wandb-project",
+ "name": "string",
+ "entity": "string",
+ "tags": ["custom-tag", "custom-tag", "custom-tag"],
+ },
+ },
+ ],
+ seed=42,
suffix="x",
validation_file="file-abc123",
)
diff --git a/tests/api_resources/test_batches.py b/tests/api_resources/test_batches.py
new file mode 100644
index 0000000000..aafeff8116
--- /dev/null
+++ b/tests/api_resources/test_batches.py
@@ -0,0 +1,268 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import Batch
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestBatches:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_create(self, client: OpenAI) -> None:
+ batch = client.batches.create(
+ completion_window="24h",
+ endpoint="/v1/chat/completions",
+ input_file_id="string",
+ )
+ assert_matches_type(Batch, batch, path=["response"])
+
+ @parametrize
+ def test_method_create_with_all_params(self, client: OpenAI) -> None:
+ batch = client.batches.create(
+ completion_window="24h",
+ endpoint="/v1/chat/completions",
+ input_file_id="string",
+ metadata={"foo": "string"},
+ )
+ assert_matches_type(Batch, batch, path=["response"])
+
+ @parametrize
+ def test_raw_response_create(self, client: OpenAI) -> None:
+ response = client.batches.with_raw_response.create(
+ completion_window="24h",
+ endpoint="/v1/chat/completions",
+ input_file_id="string",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ batch = response.parse()
+ assert_matches_type(Batch, batch, path=["response"])
+
+ @parametrize
+ def test_streaming_response_create(self, client: OpenAI) -> None:
+ with client.batches.with_streaming_response.create(
+ completion_window="24h",
+ endpoint="/v1/chat/completions",
+ input_file_id="string",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ batch = response.parse()
+ assert_matches_type(Batch, batch, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_method_retrieve(self, client: OpenAI) -> None:
+ batch = client.batches.retrieve(
+ "string",
+ )
+ assert_matches_type(Batch, batch, path=["response"])
+
+ @parametrize
+ def test_raw_response_retrieve(self, client: OpenAI) -> None:
+ response = client.batches.with_raw_response.retrieve(
+ "string",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ batch = response.parse()
+ assert_matches_type(Batch, batch, path=["response"])
+
+ @parametrize
+ def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+ with client.batches.with_streaming_response.retrieve(
+ "string",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ batch = response.parse()
+ assert_matches_type(Batch, batch, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_retrieve(self, client: OpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+ client.batches.with_raw_response.retrieve(
+ "",
+ )
+
+ @parametrize
+ def test_method_cancel(self, client: OpenAI) -> None:
+ batch = client.batches.cancel(
+ "string",
+ )
+ assert_matches_type(Batch, batch, path=["response"])
+
+ @parametrize
+ def test_raw_response_cancel(self, client: OpenAI) -> None:
+ response = client.batches.with_raw_response.cancel(
+ "string",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ batch = response.parse()
+ assert_matches_type(Batch, batch, path=["response"])
+
+ @parametrize
+ def test_streaming_response_cancel(self, client: OpenAI) -> None:
+ with client.batches.with_streaming_response.cancel(
+ "string",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ batch = response.parse()
+ assert_matches_type(Batch, batch, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_cancel(self, client: OpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+ client.batches.with_raw_response.cancel(
+ "",
+ )
+
+
+class TestAsyncBatches:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+ batch = await async_client.batches.create(
+ completion_window="24h",
+ endpoint="/v1/chat/completions",
+ input_file_id="string",
+ )
+ assert_matches_type(Batch, batch, path=["response"])
+
+ @parametrize
+ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+ batch = await async_client.batches.create(
+ completion_window="24h",
+ endpoint="/v1/chat/completions",
+ input_file_id="string",
+ metadata={"foo": "string"},
+ )
+ assert_matches_type(Batch, batch, path=["response"])
+
+ @parametrize
+ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.batches.with_raw_response.create(
+ completion_window="24h",
+ endpoint="/v1/chat/completions",
+ input_file_id="string",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ batch = response.parse()
+ assert_matches_type(Batch, batch, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.batches.with_streaming_response.create(
+ completion_window="24h",
+ endpoint="/v1/chat/completions",
+ input_file_id="string",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ batch = await response.parse()
+ assert_matches_type(Batch, batch, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+ batch = await async_client.batches.retrieve(
+ "string",
+ )
+ assert_matches_type(Batch, batch, path=["response"])
+
+ @parametrize
+ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.batches.with_raw_response.retrieve(
+ "string",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ batch = response.parse()
+ assert_matches_type(Batch, batch, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.batches.with_streaming_response.retrieve(
+ "string",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ batch = await response.parse()
+ assert_matches_type(Batch, batch, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+ await async_client.batches.with_raw_response.retrieve(
+ "",
+ )
+
+ @parametrize
+ async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+ batch = await async_client.batches.cancel(
+ "string",
+ )
+ assert_matches_type(Batch, batch, path=["response"])
+
+ @parametrize
+ async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.batches.with_raw_response.cancel(
+ "string",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ batch = response.parse()
+ assert_matches_type(Batch, batch, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.batches.with_streaming_response.cancel(
+ "string",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ batch = await response.parse()
+ assert_matches_type(Batch, batch, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+ await async_client.batches.with_raw_response.cancel(
+ "",
+ )