diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 3741b313a5..4ce109ae13 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.17.1"
+  ".": "1.18.0"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index c550abf3c6..47c2bce1cc 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1 +1 @@
-configured_endpoints: 51
+configured_endpoints: 55
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7e18ab5f54..03285021ee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Changelog
 
+## 1.18.0 (2024-04-15)
+
+Full Changelog: [v1.17.1...v1.18.0](https://github.com/openai/openai-python/compare/v1.17.1...v1.18.0)
+
+### Features
+
+* **api:** add batch API ([#1316](https://github.com/openai/openai-python/issues/1316)) ([3e6f19e](https://github.com/openai/openai-python/commit/3e6f19e6e7489bf1c94944a5f8f9b1d4535cdc43))
+* **api:** updates ([#1314](https://github.com/openai/openai-python/issues/1314)) ([8281dc9](https://github.com/openai/openai-python/commit/8281dc956178f5de345645660081f7d0c15a57a6))
+
 ## 1.17.1 (2024-04-12)
 
 Full Changelog: [v1.17.0...v1.17.1](https://github.com/openai/openai-python/compare/v1.17.0...v1.17.1)
diff --git a/api.md b/api.md
index dbc95cd0b4..38f77592e8 100644
--- a/api.md
+++ b/api.md
@@ -159,16 +159,34 @@ Methods:
 Types:
 
 ```python
-from openai.types.fine_tuning import FineTuningJob, FineTuningJobEvent
+from openai.types.fine_tuning import (
+    FineTuningJob,
+    FineTuningJobEvent,
+    FineTuningJobIntegration,
+    FineTuningJobWandbIntegration,
+    FineTuningJobWandbIntegrationObject,
+)
 ```
 
 Methods:
 
-- <code title="post /fine_tuning/jobs">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">create</a>(\*\*<a href="src/openai/types/fine_tuning/job_create_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
-- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">retrieve</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
-- <code title="get /fine_tuning/jobs">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">list</a>(\*\*<a href="src/openai/types/fine_tuning/job_list_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">SyncCursorPage[FineTuningJob]</a></code>
-- <code title="post /fine_tuning/jobs/{fine_tuning_job_id}/cancel">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">cancel</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
-- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/events">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs.py">list_events</a>(fine_tuning_job_id, \*\*<a href="src/openai/types/fine_tuning/job_list_events_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job_event.py">SyncCursorPage[FineTuningJobEvent]</a></code>
+- <code title="post /fine_tuning/jobs">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">create</a>(\*\*<a href="src/openai/types/fine_tuning/job_create_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">retrieve</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+- <code title="get /fine_tuning/jobs">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">list</a>(\*\*<a href="src/openai/types/fine_tuning/job_list_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">SyncCursorPage[FineTuningJob]</a></code>
+- <code title="post /fine_tuning/jobs/{fine_tuning_job_id}/cancel">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">cancel</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/events">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">list_events</a>(fine_tuning_job_id, \*\*<a href="src/openai/types/fine_tuning/job_list_events_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job_event.py">SyncCursorPage[FineTuningJobEvent]</a></code>
+
+### Checkpoints
+
+Types:
+
+```python
+from openai.types.fine_tuning.jobs import FineTuningJobCheckpoint
+```
+
+Methods:
+
+- <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/checkpoints">client.fine_tuning.jobs.checkpoints.<a href="./src/openai/resources/fine_tuning/jobs/checkpoints.py">list</a>(fine_tuning_job_id, \*\*<a href="src/openai/types/fine_tuning/jobs/checkpoint_list_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py">SyncCursorPage[FineTuningJobCheckpoint]</a></code>
 
 # Beta
 
@@ -220,7 +238,15 @@ Methods:
 Types:
 
 ```python
-from openai.types.beta import Thread, ThreadDeleted
+from openai.types.beta import (
+    AssistantResponseFormat,
+    AssistantResponseFormatOption,
+    AssistantToolChoice,
+    AssistantToolChoiceFunction,
+    AssistantToolChoiceOption,
+    Thread,
+    ThreadDeleted,
+)
 ```
 
 Methods:
@@ -335,3 +361,17 @@ Methods:
 
 - <code title="get /threads/{thread_id}/messages/{message_id}/files/{file_id}">client.beta.threads.messages.files.<a href="./src/openai/resources/beta/threads/messages/files.py">retrieve</a>(file_id, \*, thread_id, message_id) -> <a href="./src/openai/types/beta/threads/messages/message_file.py">MessageFile</a></code>
 - <code title="get /threads/{thread_id}/messages/{message_id}/files">client.beta.threads.messages.files.<a href="./src/openai/resources/beta/threads/messages/files.py">list</a>(message_id, \*, thread_id, \*\*<a href="src/openai/types/beta/threads/messages/file_list_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/messages/message_file.py">SyncCursorPage[MessageFile]</a></code>
+
+# Batches
+
+Types:
+
+```python
+from openai.types import Batch, BatchError, BatchRequestCounts
+```
+
+Methods:
+
+- <code title="post /batches">client.batches.<a href="./src/openai/resources/batches.py">create</a>(\*\*<a href="src/openai/types/batch_create_params.py">params</a>) -> <a href="./src/openai/types/batch.py">Batch</a></code>
+- <code title="get /batches/{batch_id}">client.batches.<a href="./src/openai/resources/batches.py">retrieve</a>(batch_id) -> <a href="./src/openai/types/batch.py">Batch</a></code>
+- <code title="post /batches/{batch_id}/cancel">client.batches.<a href="./src/openai/resources/batches.py">cancel</a>(batch_id) -> <a href="./src/openai/types/batch.py">Batch</a></code>
diff --git a/pyproject.toml b/pyproject.toml
index 9eb6330616..505f1a3e7a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "1.17.1"
+version = "1.18.0"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/openai/__init__.py b/src/openai/__init__.py
index 1daa26f7b7..490ba017f0 100644
--- a/src/openai/__init__.py
+++ b/src/openai/__init__.py
@@ -335,6 +335,7 @@ def _reset_client() -> None:  # type: ignore[reportUnusedFunction]
     files as files,
     images as images,
     models as models,
+    batches as batches,
     embeddings as embeddings,
     completions as completions,
     fine_tuning as fine_tuning,
diff --git a/src/openai/_client.py b/src/openai/_client.py
index e9169df72a..5a6852e571 100644
--- a/src/openai/_client.py
+++ b/src/openai/_client.py
@@ -57,6 +57,7 @@ class OpenAI(SyncAPIClient):
     models: resources.Models
     fine_tuning: resources.FineTuning
     beta: resources.Beta
+    batches: resources.Batches
     with_raw_response: OpenAIWithRawResponse
     with_streaming_response: OpenAIWithStreamedResponse
 
@@ -134,6 +135,7 @@ def __init__(
         self.models = resources.Models(self)
         self.fine_tuning = resources.FineTuning(self)
         self.beta = resources.Beta(self)
+        self.batches = resources.Batches(self)
         self.with_raw_response = OpenAIWithRawResponse(self)
         self.with_streaming_response = OpenAIWithStreamedResponse(self)
 
@@ -257,6 +259,7 @@ class AsyncOpenAI(AsyncAPIClient):
     models: resources.AsyncModels
     fine_tuning: resources.AsyncFineTuning
     beta: resources.AsyncBeta
+    batches: resources.AsyncBatches
     with_raw_response: AsyncOpenAIWithRawResponse
     with_streaming_response: AsyncOpenAIWithStreamedResponse
 
@@ -334,6 +337,7 @@ def __init__(
         self.models = resources.AsyncModels(self)
         self.fine_tuning = resources.AsyncFineTuning(self)
         self.beta = resources.AsyncBeta(self)
+        self.batches = resources.AsyncBatches(self)
         self.with_raw_response = AsyncOpenAIWithRawResponse(self)
         self.with_streaming_response = AsyncOpenAIWithStreamedResponse(self)
 
@@ -458,6 +462,7 @@ def __init__(self, client: OpenAI) -> None:
         self.models = resources.ModelsWithRawResponse(client.models)
         self.fine_tuning = resources.FineTuningWithRawResponse(client.fine_tuning)
         self.beta = resources.BetaWithRawResponse(client.beta)
+        self.batches = resources.BatchesWithRawResponse(client.batches)
 
 
 class AsyncOpenAIWithRawResponse:
@@ -472,6 +477,7 @@ def __init__(self, client: AsyncOpenAI) -> None:
         self.models = resources.AsyncModelsWithRawResponse(client.models)
         self.fine_tuning = resources.AsyncFineTuningWithRawResponse(client.fine_tuning)
         self.beta = resources.AsyncBetaWithRawResponse(client.beta)
+        self.batches = resources.AsyncBatchesWithRawResponse(client.batches)
 
 
 class OpenAIWithStreamedResponse:
@@ -486,6 +492,7 @@ def __init__(self, client: OpenAI) -> None:
         self.models = resources.ModelsWithStreamingResponse(client.models)
         self.fine_tuning = resources.FineTuningWithStreamingResponse(client.fine_tuning)
         self.beta = resources.BetaWithStreamingResponse(client.beta)
+        self.batches = resources.BatchesWithStreamingResponse(client.batches)
 
 
 class AsyncOpenAIWithStreamedResponse:
@@ -500,6 +507,7 @@ def __init__(self, client: AsyncOpenAI) -> None:
         self.models = resources.AsyncModelsWithStreamingResponse(client.models)
         self.fine_tuning = resources.AsyncFineTuningWithStreamingResponse(client.fine_tuning)
         self.beta = resources.AsyncBetaWithStreamingResponse(client.beta)
+        self.batches = resources.AsyncBatchesWithStreamingResponse(client.batches)
 
 
 Client = OpenAI
diff --git a/src/openai/_module_client.py b/src/openai/_module_client.py
index 9227f5e2b4..6f7356eb3c 100644
--- a/src/openai/_module_client.py
+++ b/src/openai/_module_client.py
@@ -42,6 +42,12 @@ def __load__(self) -> resources.Models:
         return _load_client().models
 
 
+class BatchesProxy(LazyProxy[resources.Batches]):
+    @override
+    def __load__(self) -> resources.Batches:
+        return _load_client().batches
+
+
 class EmbeddingsProxy(LazyProxy[resources.Embeddings]):
     @override
     def __load__(self) -> resources.Embeddings:
@@ -72,6 +78,7 @@ def __load__(self) -> resources.FineTuning:
 audio: resources.Audio = AudioProxy().__as_proxied__()
 images: resources.Images = ImagesProxy().__as_proxied__()
 models: resources.Models = ModelsProxy().__as_proxied__()
+batches: resources.Batches = BatchesProxy().__as_proxied__()
 embeddings: resources.Embeddings = EmbeddingsProxy().__as_proxied__()
 completions: resources.Completions = CompletionsProxy().__as_proxied__()
 moderations: resources.Moderations = ModerationsProxy().__as_proxied__()
diff --git a/src/openai/_version.py b/src/openai/_version.py
index a4ffbb2c35..2957462e3d 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.17.1"  # x-release-please-version
+__version__ = "1.18.0"  # x-release-please-version
diff --git a/src/openai/resources/__init__.py b/src/openai/resources/__init__.py
index 64aa12d260..ecae4243fc 100644
--- a/src/openai/resources/__init__.py
+++ b/src/openai/resources/__init__.py
@@ -48,6 +48,14 @@
     ModelsWithStreamingResponse,
     AsyncModelsWithStreamingResponse,
 )
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
 from .embeddings import (
     Embeddings,
     AsyncEmbeddings,
@@ -142,4 +150,10 @@
     "AsyncBetaWithRawResponse",
     "BetaWithStreamingResponse",
     "AsyncBetaWithStreamingResponse",
+    "Batches",
+    "AsyncBatches",
+    "BatchesWithRawResponse",
+    "AsyncBatchesWithRawResponse",
+    "BatchesWithStreamingResponse",
+    "AsyncBatchesWithStreamingResponse",
 ]
diff --git a/src/openai/resources/batches.py b/src/openai/resources/batches.py
new file mode 100644
index 0000000000..0921ccb194
--- /dev/null
+++ b/src/openai/resources/batches.py
@@ -0,0 +1,354 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import Batch, batch_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .._base_client import (
+    make_request_options,
+)
+
+__all__ = ["Batches", "AsyncBatches"]
+
+
+class Batches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BatchesWithRawResponse:
+        return BatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        return BatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        completion_window: Literal["24h"],
+        endpoint: Literal["/v1/chat/completions"],
+        input_file_id: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Creates and executes a batch from an uploaded file of requests
+
+        Args:
+          completion_window: The time frame within which the batch should be processed. Currently only `24h`
+              is supported.
+
+          endpoint: The endpoint to be used for all requests in the batch. Currently only
+              `/v1/chat/completions` is supported.
+
+          input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your input file must be formatted as a JSONL file, and must be uploaded with the
+              purpose `batch`.
+
+          metadata: Optional custom metadata for the batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/batches",
+            body=maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._get(
+            f"/batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Cancels an in-progress batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._post(
+            f"/batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+
+class AsyncBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        return AsyncBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        return AsyncBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        completion_window: Literal["24h"],
+        endpoint: Literal["/v1/chat/completions"],
+        input_file_id: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Creates and executes a batch from an uploaded file of requests
+
+        Args:
+          completion_window: The time frame within which the batch should be processed. Currently only `24h`
+              is supported.
+
+          endpoint: The endpoint to be used for all requests in the batch. Currently only
+              `/v1/chat/completions` is supported.
+
+          input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+              See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+              for how to upload a file.
+
+              Your input file must be formatted as a JSONL file, and must be uploaded with the
+              purpose `batch`.
+
+          metadata: Optional custom metadata for the batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/batches",
+            body=await async_maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    async def retrieve(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._get(
+            f"/batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+    async def cancel(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Batch:
+        """
+        Cancels an in-progress batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._post(
+            f"/batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Batch,
+        )
+
+
+class BatchesWithRawResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithRawResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class BatchesWithStreamingResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithStreamingResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = async_to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            batches.cancel,
+        )
diff --git a/src/openai/resources/beta/assistants/assistants.py b/src/openai/resources/beta/assistants/assistants.py
index 232451ab25..9e88794ebc 100644
--- a/src/openai/resources/beta/assistants/assistants.py
+++ b/src/openai/resources/beta/assistants/assistants.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import List, Iterable, Optional
+from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal
 
 import httpx
@@ -57,7 +57,29 @@ def with_streaming_response(self) -> AssistantsWithStreamingResponse:
     def create(
         self,
         *,
-        model: str,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ],
         description: Optional[str] | NotGiven = NOT_GIVEN,
         file_ids: List[str] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -87,7 +109,7 @@ def create(
               attached to this assistant. There can be a maximum of 20 files attached to the
               assistant. Files are ordered by their creation date in ascending order.
 
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -194,7 +216,7 @@ def update(
               file was previously attached to the list but does not show up in the list, it
               will be deleted from the assistant.
 
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -360,7 +382,29 @@ def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
     async def create(
         self,
         *,
-        model: str,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ],
         description: Optional[str] | NotGiven = NOT_GIVEN,
         file_ids: List[str] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -390,7 +434,7 @@ async def create(
               attached to this assistant. There can be a maximum of 20 files attached to the
               assistant. Files are ordered by their creation date in ascending order.
 
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -497,7 +541,7 @@ async def update(
               file was previously attached to the list but does not show up in the list, it
               will be deleted from the assistant.
 
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
+          instructions: The system instructions that the assistant uses. The maximum length is 256,000
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index 8576a5c09a..9fa7239c0b 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -4,7 +4,7 @@
 
 import time
 import typing_extensions
-from typing import Iterable, Optional, overload
+from typing import Union, Iterable, Optional, overload
 from functools import partial
 from typing_extensions import Literal
 
@@ -31,7 +31,12 @@
 from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....._streaming import Stream, AsyncStream
 from .....pagination import SyncCursorPage, AsyncCursorPage
-from .....types.beta import AssistantToolParam, AssistantStreamEvent
+from .....types.beta import (
+    AssistantToolParam,
+    AssistantStreamEvent,
+    AssistantToolChoiceOptionParam,
+    AssistantResponseFormatOptionParam,
+)
 from ....._base_client import (
     AsyncPaginator,
     make_request_options,
@@ -77,11 +82,40 @@ def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -107,6 +141,18 @@ def create(
               [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
               of the assistant. This is useful for modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -117,6 +163,21 @@ def create(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           stream: If `true`, returns a stream of events that happen during the Run as server-sent
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
@@ -125,6 +186,13 @@ def create(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -148,10 +216,39 @@ def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -181,6 +278,18 @@ def create(
               [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
               of the assistant. This is useful for modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -191,10 +300,32 @@ def create(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -218,10 +349,39 @@ def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -251,6 +411,18 @@ def create(
               [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
               of the assistant. This is useful for modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -261,10 +433,32 @@ def create(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -287,11 +481,40 @@ def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -310,11 +533,16 @@ def create(
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 run_create_params.RunCreateParams,
             ),
@@ -518,10 +746,39 @@ def create_and_poll(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
         thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -542,12 +799,17 @@ def create_and_poll(
             additional_instructions=additional_instructions,
             additional_messages=additional_messages,
             instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
             metadata=metadata,
             model=model,
+            response_format=response_format,
             temperature=temperature,
+            tool_choice=tool_choice,
             # We assume we are not streaming when polling
             stream=False,
             tools=tools,
+            truncation_strategy=truncation_strategy,
             extra_headers=extra_headers,
             extra_query=extra_query,
             extra_body=extra_body,
@@ -572,10 +834,39 @@ def create_and_stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -596,10 +887,39 @@ def create_and_stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -620,10 +940,39 @@ def create_and_stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AssistantEventHandlerT | None = None,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -652,11 +1001,16 @@ def create_and_stream(
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "stream": True,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 run_create_params.RunCreateParams,
             ),
@@ -722,10 +1076,39 @@ def stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -745,10 +1128,39 @@ def stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -768,10 +1180,39 @@ def stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AssistantEventHandlerT | None = None,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -800,11 +1241,16 @@ def stream(
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "stream": True,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 run_create_params.RunCreateParams,
             ),
@@ -1123,11 +1569,40 @@ async def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -1153,6 +1628,18 @@ async def create(
               [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
               of the assistant. This is useful for modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -1163,6 +1650,21 @@ async def create(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           stream: If `true`, returns a stream of events that happen during the Run as server-sent
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
@@ -1171,6 +1673,13 @@ async def create(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -1194,10 +1703,39 @@ async def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -1227,6 +1765,18 @@ async def create(
               [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
               of the assistant. This is useful for modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -1237,10 +1787,32 @@ async def create(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -1264,10 +1836,39 @@ async def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -1297,6 +1898,18 @@ async def create(
               [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
               of the assistant. This is useful for modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -1307,10 +1920,32 @@ async def create(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -1333,11 +1968,40 @@ async def create(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -1356,11 +2020,16 @@ async def create(
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 run_create_params.RunCreateParams,
             ),
@@ -1564,10 +2233,39 @@ async def create_and_poll(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
         thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1588,12 +2286,17 @@ async def create_and_poll(
             additional_instructions=additional_instructions,
             additional_messages=additional_messages,
             instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
             metadata=metadata,
             model=model,
+            response_format=response_format,
             temperature=temperature,
+            tool_choice=tool_choice,
             # We assume we are not streaming when polling
             stream=False,
             tools=tools,
+            truncation_strategy=truncation_strategy,
             extra_headers=extra_headers,
             extra_query=extra_query,
             extra_body=extra_body,
@@ -1618,10 +2321,39 @@ def create_and_stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1642,10 +2374,39 @@ def create_and_stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AsyncAssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1666,10 +2427,39 @@ def create_and_stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AsyncAssistantEventHandlerT | None = None,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1700,11 +2490,16 @@ def create_and_stream(
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "stream": True,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 run_create_params.RunCreateParams,
             ),
@@ -1770,10 +2565,39 @@ def stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1793,10 +2617,39 @@ def stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AsyncAssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1816,10 +2669,39 @@ def stream(
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
         event_handler: AsyncAssistantEventHandlerT | None = None,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1850,11 +2732,16 @@ def stream(
                     "additional_instructions": additional_instructions,
                     "additional_messages": additional_messages,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "stream": True,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 run_create_params.RunCreateParams,
             ),
diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py
index 3509267d4f..9c2e2f0043 100644
--- a/src/openai/resources/beta/threads/threads.py
+++ b/src/openai/resources/beta/threads/threads.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Iterable, Optional, overload
+from typing import Union, Iterable, Optional, overload
 from functools import partial
 from typing_extensions import Literal
 
@@ -40,6 +40,8 @@
     Thread,
     ThreadDeleted,
     AssistantStreamEvent,
+    AssistantToolChoiceOptionParam,
+    AssistantResponseFormatOptionParam,
     thread_create_params,
     thread_update_params,
     thread_create_and_run_params,
@@ -241,12 +243,41 @@ def create_and_run(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -265,6 +296,18 @@ def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -275,6 +318,21 @@ def create_and_run(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           stream: If `true`, returns a stream of events that happen during the Run as server-sent
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
@@ -285,6 +343,13 @@ def create_and_run(
 
           thread: If no thread is provided, an empty thread will be created.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -305,11 +370,40 @@ def create_and_run(
         assistant_id: str,
         stream: Literal[True],
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -332,6 +426,18 @@ def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -342,12 +448,34 @@ def create_and_run(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
           thread: If no thread is provided, an empty thread will be created.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -368,11 +496,40 @@ def create_and_run(
         assistant_id: str,
         stream: bool,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -395,6 +552,18 @@ def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -405,12 +574,34 @@ def create_and_run(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
           thread: If no thread is provided, an empty thread will be created.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -430,12 +621,41 @@ def create_and_run(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -450,12 +670,17 @@ def create_and_run(
                 {
                     "assistant_id": assistant_id,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
                     "thread": thread,
+                    "tool_choice": tool_choice,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 thread_create_and_run_params.ThreadCreateAndRunParams,
             ),
@@ -472,11 +697,40 @@ def create_and_run_poll(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -493,11 +747,16 @@ def create_and_run_poll(
         run = self.create_and_run(
             assistant_id=assistant_id,
             instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
             metadata=metadata,
             model=model,
+            response_format=response_format,
             temperature=temperature,
             stream=False,
             thread=thread,
+            tool_choice=tool_choice,
+            truncation_strategy=truncation_strategy,
             tools=tools,
             extra_headers=extra_headers,
             extra_query=extra_query,
@@ -512,11 +771,40 @@ def create_and_run_stream(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -533,11 +821,40 @@ def create_and_run_stream(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         event_handler: AssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -554,11 +871,40 @@ def create_and_run_stream(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         event_handler: AssistantEventHandlerT | None = None,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -581,12 +927,17 @@ def create_and_run_stream(
                 {
                     "assistant_id": assistant_id,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "stream": True,
                     "thread": thread,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 thread_create_and_run_params.ThreadCreateAndRunParams,
             ),
@@ -780,12 +1131,41 @@ async def create_and_run(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -804,6 +1184,18 @@ async def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -814,6 +1206,21 @@ async def create_and_run(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           stream: If `true`, returns a stream of events that happen during the Run as server-sent
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
@@ -824,6 +1231,13 @@ async def create_and_run(
 
           thread: If no thread is provided, an empty thread will be created.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -844,11 +1258,40 @@ async def create_and_run(
         assistant_id: str,
         stream: Literal[True],
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -871,6 +1314,18 @@ async def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -881,12 +1336,34 @@ async def create_and_run(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
           thread: If no thread is provided, an empty thread will be created.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -907,11 +1384,40 @@ async def create_and_run(
         assistant_id: str,
         stream: bool,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -934,6 +1440,18 @@ async def create_and_run(
           instructions: Override the default system message of the assistant. This is useful for
               modifying the behavior on a per-run basis.
 
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `complete`. See
+              `incomplete_details` for more info.
+
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format. Keys
               can be a maximum of 64 characters long and values can be a maxium of 512
@@ -944,12 +1462,34 @@ async def create_and_run(
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
           thread: If no thread is provided, an empty thread will be created.
 
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling a tool.
+              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -969,12 +1509,41 @@ async def create_and_run(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -989,12 +1558,17 @@ async def create_and_run(
                 {
                     "assistant_id": assistant_id,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
                     "thread": thread,
+                    "tool_choice": tool_choice,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 thread_create_and_run_params.ThreadCreateAndRunParams,
             ),
@@ -1011,11 +1585,40 @@ async def create_and_run_poll(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         poll_interval_ms: int | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1032,11 +1635,16 @@ async def create_and_run_poll(
         run = await self.create_and_run(
             assistant_id=assistant_id,
             instructions=instructions,
+            max_completion_tokens=max_completion_tokens,
+            max_prompt_tokens=max_prompt_tokens,
             metadata=metadata,
             model=model,
+            response_format=response_format,
             temperature=temperature,
             stream=False,
             thread=thread,
+            tool_choice=tool_choice,
+            truncation_strategy=truncation_strategy,
             tools=tools,
             extra_headers=extra_headers,
             extra_query=extra_query,
@@ -1053,11 +1661,40 @@ def create_and_run_stream(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -1074,11 +1711,40 @@ def create_and_run_stream(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         event_handler: AsyncAssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1095,11 +1761,40 @@ def create_and_run_stream(
         *,
         assistant_id: str,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+            None,
+        ]
+        | NotGiven = NOT_GIVEN,
+        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         event_handler: AsyncAssistantEventHandlerT | None = None,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1124,12 +1819,17 @@ def create_and_run_stream(
                 {
                     "assistant_id": assistant_id,
                     "instructions": instructions,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "response_format": response_format,
                     "temperature": temperature,
+                    "tool_choice": tool_choice,
                     "stream": True,
                     "thread": thread,
                     "tools": tools,
+                    "truncation_strategy": truncation_strategy,
                 },
                 thread_create_and_run_params.ThreadCreateAndRunParams,
             ),
diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
index 3000603689..1a23e7876e 100644
--- a/src/openai/resources/chat/completions.py
+++ b/src/openai/resources/chat/completions.py
@@ -50,6 +50,8 @@ def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -137,8 +139,7 @@ def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -240,6 +241,8 @@ def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -334,8 +337,7 @@ def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -430,6 +432,8 @@ def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -524,8 +528,7 @@ def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -620,6 +623,8 @@ def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -717,6 +722,8 @@ async def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -804,8 +811,7 @@ async def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -907,6 +913,8 @@ async def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -1001,8 +1009,7 @@ async def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -1097,6 +1104,8 @@ async def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -1191,8 +1200,7 @@ async def create(
 
           logprobs: Whether to return log probabilities of the output tokens or not. If true,
               returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
+              `message`.
 
           max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
               completion.
@@ -1287,6 +1295,8 @@ async def create(
         model: Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
diff --git a/src/openai/resources/fine_tuning/fine_tuning.py b/src/openai/resources/fine_tuning/fine_tuning.py
index 659b3e8501..0404fed6ec 100644
--- a/src/openai/resources/fine_tuning/fine_tuning.py
+++ b/src/openai/resources/fine_tuning/fine_tuning.py
@@ -11,6 +11,7 @@
     AsyncJobsWithStreamingResponse,
 )
 from ..._compat import cached_property
+from .jobs.jobs import Jobs, AsyncJobs
 from ..._resource import SyncAPIResource, AsyncAPIResource
 
 __all__ = ["FineTuning", "AsyncFineTuning"]
diff --git a/src/openai/resources/fine_tuning/jobs/__init__.py b/src/openai/resources/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..94cd1fb7e7
--- /dev/null
+++ b/src/openai/resources/fine_tuning/jobs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .jobs import (
+    Jobs,
+    AsyncJobs,
+    JobsWithRawResponse,
+    AsyncJobsWithRawResponse,
+    JobsWithStreamingResponse,
+    AsyncJobsWithStreamingResponse,
+)
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
+
+__all__ = [
+    "Checkpoints",
+    "AsyncCheckpoints",
+    "CheckpointsWithRawResponse",
+    "AsyncCheckpointsWithRawResponse",
+    "CheckpointsWithStreamingResponse",
+    "AsyncCheckpointsWithStreamingResponse",
+    "Jobs",
+    "AsyncJobs",
+    "JobsWithRawResponse",
+    "AsyncJobsWithRawResponse",
+    "JobsWithStreamingResponse",
+    "AsyncJobsWithStreamingResponse",
+]
diff --git a/src/openai/resources/fine_tuning/jobs/checkpoints.py b/src/openai/resources/fine_tuning/jobs/checkpoints.py
new file mode 100644
index 0000000000..e9ea6aad9a
--- /dev/null
+++ b/src/openai/resources/fine_tuning/jobs/checkpoints.py
@@ -0,0 +1,176 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
+    AsyncPaginator,
+    make_request_options,
+)
+from ....types.fine_tuning.jobs import FineTuningJobCheckpoint, checkpoint_list_params
+
+__all__ = ["Checkpoints", "AsyncCheckpoints"]
+
+
+class Checkpoints(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CheckpointsWithRawResponse:
+        return CheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+        return CheckpointsWithStreamingResponse(self)
+
+    def list(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[FineTuningJobCheckpoint]:
+        """
+        List checkpoints for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last checkpoint ID from the previous pagination request.
+
+          limit: Number of checkpoints to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+            page=SyncCursorPage[FineTuningJobCheckpoint],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    checkpoint_list_params.CheckpointListParams,
+                ),
+            ),
+            model=FineTuningJobCheckpoint,
+        )
+
+
+class AsyncCheckpoints(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+        return AsyncCheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+        return AsyncCheckpointsWithStreamingResponse(self)
+
+    def list(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FineTuningJobCheckpoint, AsyncCursorPage[FineTuningJobCheckpoint]]:
+        """
+        List checkpoints for a fine-tuning job.
+
+        Args:
+          after: Identifier for the last checkpoint ID from the previous pagination request.
+
+          limit: Number of checkpoints to retrieve.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._get_api_list(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints",
+            page=AsyncCursorPage[FineTuningJobCheckpoint],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                    },
+                    checkpoint_list_params.CheckpointListParams,
+                ),
+            ),
+            model=FineTuningJobCheckpoint,
+        )
+
+
+class CheckpointsWithRawResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = _legacy_response.to_raw_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class AsyncCheckpointsWithRawResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class CheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = to_streamed_response_wrapper(
+            checkpoints.list,
+        )
+
+
+class AsyncCheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+        self.list = async_to_streamed_response_wrapper(
+            checkpoints.list,
+        )
diff --git a/src/openai/resources/fine_tuning/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py
similarity index 89%
rename from src/openai/resources/fine_tuning/jobs.py
rename to src/openai/resources/fine_tuning/jobs/jobs.py
index a0c3e24dac..229f716c48 100644
--- a/src/openai/resources/fine_tuning/jobs.py
+++ b/src/openai/resources/fine_tuning/jobs/jobs.py
@@ -2,26 +2,34 @@
 
 from __future__ import annotations
 
-from typing import Union, Optional
+from typing import Union, Iterable, Optional
 from typing_extensions import Literal
 
 import httpx
 
-from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
     maybe_transform,
     async_maybe_transform,
 )
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...pagination import SyncCursorPage, AsyncCursorPage
-from ..._base_client import (
+from ...._compat import cached_property
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import (
     AsyncPaginator,
     make_request_options,
 )
-from ...types.fine_tuning import (
+from ....types.fine_tuning import (
     FineTuningJob,
     FineTuningJobEvent,
     job_list_params,
@@ -33,6 +41,10 @@
 
 
 class Jobs(SyncAPIResource):
+    @cached_property
+    def checkpoints(self) -> Checkpoints:
+        return Checkpoints(self._client)
+
     @cached_property
     def with_raw_response(self) -> JobsWithRawResponse:
         return JobsWithRawResponse(self)
@@ -47,6 +59,8 @@ def create(
         model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
         training_file: str,
         hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+        integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -82,6 +96,12 @@ def create(
 
           hyperparameters: The hyperparameters used for the fine-tuning job.
 
+          integrations: A list of integrations to enable for your fine-tuning job.
+
+          seed: The seed controls the reproducibility of the job. Passing in the same seed and
+              job parameters should produce the same results, but may differ in rare cases. If
+              a seed is not specified, one will be generated for you.
+
           suffix: A string of up to 18 characters that will be added to your fine-tuned model
               name.
 
@@ -116,6 +136,8 @@ def create(
                     "model": model,
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
+                    "integrations": integrations,
+                    "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
                 },
@@ -294,6 +316,10 @@ def list_events(
 
 
 class AsyncJobs(AsyncAPIResource):
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpoints:
+        return AsyncCheckpoints(self._client)
+
     @cached_property
     def with_raw_response(self) -> AsyncJobsWithRawResponse:
         return AsyncJobsWithRawResponse(self)
@@ -308,6 +334,8 @@ async def create(
         model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
         training_file: str,
         hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
+        integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -343,6 +371,12 @@ async def create(
 
           hyperparameters: The hyperparameters used for the fine-tuning job.
 
+          integrations: A list of integrations to enable for your fine-tuning job.
+
+          seed: The seed controls the reproducibility of the job. Passing in the same seed and
+              job parameters should produce the same results, but may differ in rare cases. If
+              a seed is not specified, one will be generated for you.
+
           suffix: A string of up to 18 characters that will be added to your fine-tuned model
               name.
 
@@ -377,6 +411,8 @@ async def create(
                     "model": model,
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
+                    "integrations": integrations,
+                    "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
                 },
@@ -574,6 +610,10 @@ def __init__(self, jobs: Jobs) -> None:
             jobs.list_events,
         )
 
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithRawResponse:
+        return CheckpointsWithRawResponse(self._jobs.checkpoints)
+
 
 class AsyncJobsWithRawResponse:
     def __init__(self, jobs: AsyncJobs) -> None:
@@ -595,6 +635,10 @@ def __init__(self, jobs: AsyncJobs) -> None:
             jobs.list_events,
         )
 
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithRawResponse:
+        return AsyncCheckpointsWithRawResponse(self._jobs.checkpoints)
+
 
 class JobsWithStreamingResponse:
     def __init__(self, jobs: Jobs) -> None:
@@ -616,6 +660,10 @@ def __init__(self, jobs: Jobs) -> None:
             jobs.list_events,
         )
 
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithStreamingResponse:
+        return CheckpointsWithStreamingResponse(self._jobs.checkpoints)
+
 
 class AsyncJobsWithStreamingResponse:
     def __init__(self, jobs: AsyncJobs) -> None:
@@ -636,3 +684,7 @@ def __init__(self, jobs: AsyncJobs) -> None:
         self.list_events = async_to_streamed_response_wrapper(
             jobs.list_events,
         )
+
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithStreamingResponse:
+        return AsyncCheckpointsWithStreamingResponse(self._jobs.checkpoints)
diff --git a/src/openai/types/__init__.py b/src/openai/types/__init__.py
index 0917e22a8f..4bbcdddc2a 100644
--- a/src/openai/types/__init__.py
+++ b/src/openai/types/__init__.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+from .batch import Batch as Batch
 from .image import Image as Image
 from .model import Model as Model
 from .shared import (
@@ -12,6 +13,7 @@
 from .embedding import Embedding as Embedding
 from .completion import Completion as Completion
 from .moderation import Moderation as Moderation
+from .batch_error import BatchError as BatchError
 from .file_object import FileObject as FileObject
 from .file_content import FileContent as FileContent
 from .file_deleted import FileDeleted as FileDeleted
@@ -22,6 +24,8 @@
 from .completion_choice import CompletionChoice as CompletionChoice
 from .image_edit_params import ImageEditParams as ImageEditParams
 from .file_create_params import FileCreateParams as FileCreateParams
+from .batch_create_params import BatchCreateParams as BatchCreateParams
+from .batch_request_counts import BatchRequestCounts as BatchRequestCounts
 from .image_generate_params import ImageGenerateParams as ImageGenerateParams
 from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
diff --git a/src/openai/types/batch.py b/src/openai/types/batch.py
new file mode 100644
index 0000000000..bde04d1a24
--- /dev/null
+++ b/src/openai/types/batch.py
@@ -0,0 +1,85 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import builtins
+from typing import List, Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .batch_error import BatchError
+from .batch_request_counts import BatchRequestCounts
+
+__all__ = ["Batch", "Errors"]
+
+
+class Errors(BaseModel):
+    data: Optional[List[BatchError]] = None
+
+    object: Optional[str] = None
+    """The object type, which is always `list`."""
+
+
+class Batch(BaseModel):
+    id: str
+
+    completion_window: str
+    """The time frame within which the batch should be processed."""
+
+    created_at: str
+    """The Unix timestamp (in seconds) for when the batch was created."""
+
+    endpoint: str
+    """The OpenAI API endpoint used by the batch."""
+
+    input_file_id: str
+    """The ID of the input file for the batch."""
+
+    object: Literal["batch"]
+    """The object type, which is always `batch`."""
+
+    status: Literal[
+        "validating", "failed", "in_progress", "finalizing", "completed", "expired", "cancelling", "cancelled"
+    ]
+    """The current status of the batch."""
+
+    cancelled_at: Optional[str] = None
+    """The Unix timestamp (in seconds) for when the batch was cancelled."""
+
+    cancelling_at: Optional[str] = None
+    """The Unix timestamp (in seconds) for when the batch started cancelling."""
+
+    completed_at: Optional[str] = None
+    """The Unix timestamp (in seconds) for when the batch was completed."""
+
+    error_file_id: Optional[str] = None
+    """The ID of the file containing the outputs of requests with errors."""
+
+    errors: Optional[Errors] = None
+
+    expired_at: Optional[str] = None
+    """The Unix timestamp (in seconds) for when the batch expired."""
+
+    expires_at: Optional[str] = None
+    """The Unix timestamp (in seconds) for when the batch will expire."""
+
+    failed_at: Optional[str] = None
+    """The Unix timestamp (in seconds) for when the batch failed."""
+
+    finalizing_at: Optional[str] = None
+    """The Unix timestamp (in seconds) for when the batch started finalizing."""
+
+    in_progress_at: Optional[str] = None
+    """The Unix timestamp (in seconds) for when the batch started processing."""
+
+    metadata: Optional[builtins.object] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    output_file_id: Optional[str] = None
+    """The ID of the file containing the outputs of successfully executed requests."""
+
+    request_counts: Optional[BatchRequestCounts] = None
+    """The request counts for different statuses within the batch."""
diff --git a/src/openai/types/batch_create_params.py b/src/openai/types/batch_create_params.py
new file mode 100644
index 0000000000..6a22be8626
--- /dev/null
+++ b/src/openai/types/batch_create_params.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["BatchCreateParams"]
+
+
+class BatchCreateParams(TypedDict, total=False):
+    completion_window: Required[Literal["24h"]]
+    """The time frame within which the batch should be processed.
+
+    Currently only `24h` is supported.
+    """
+
+    endpoint: Required[Literal["/v1/chat/completions"]]
+    """The endpoint to be used for all requests in the batch.
+
+    Currently only `/v1/chat/completions` is supported.
+    """
+
+    input_file_id: Required[str]
+    """The ID of an uploaded file that contains requests for the new batch.
+
+    See [upload file](https://platform.openai.com/docs/api-reference/files/create)
+    for how to upload a file.
+
+    Your input file must be formatted as a JSONL file, and must be uploaded with the
+    purpose `batch`.
+    """
+
+    metadata: Optional[Dict[str, str]]
+    """Optional custom metadata for the batch."""
diff --git a/src/openai/types/batch_error.py b/src/openai/types/batch_error.py
new file mode 100644
index 0000000000..1cdd808dbd
--- /dev/null
+++ b/src/openai/types/batch_error.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["BatchError"]
+
+
+class BatchError(BaseModel):
+    code: Optional[str] = None
+    """An error code identifying the error type."""
+
+    line: Optional[int] = None
+    """The line number of the input file where the error occurred, if applicable."""
+
+    message: Optional[str] = None
+    """A human-readable message providing more details about the error."""
+
+    param: Optional[str] = None
+    """The name of the parameter that caused the error, if applicable."""
diff --git a/src/openai/types/batch_request_counts.py b/src/openai/types/batch_request_counts.py
new file mode 100644
index 0000000000..068b071af1
--- /dev/null
+++ b/src/openai/types/batch_request_counts.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["BatchRequestCounts"]
+
+
+class BatchRequestCounts(BaseModel):
+    completed: int
+    """Number of requests that have been completed successfully."""
+
+    failed: int
+    """Number of requests that have failed."""
+
+    total: int
+    """Total number of requests in the batch."""
diff --git a/src/openai/types/beta/__init__.py b/src/openai/types/beta/__init__.py
index a7de0272b4..0171694587 100644
--- a/src/openai/types/beta/__init__.py
+++ b/src/openai/types/beta/__init__.py
@@ -15,9 +15,21 @@
 from .thread_create_params import ThreadCreateParams as ThreadCreateParams
 from .thread_update_params import ThreadUpdateParams as ThreadUpdateParams
 from .assistant_list_params import AssistantListParams as AssistantListParams
+from .assistant_tool_choice import AssistantToolChoice as AssistantToolChoice
 from .code_interpreter_tool import CodeInterpreterTool as CodeInterpreterTool
 from .assistant_stream_event import AssistantStreamEvent as AssistantStreamEvent
 from .assistant_create_params import AssistantCreateParams as AssistantCreateParams
 from .assistant_update_params import AssistantUpdateParams as AssistantUpdateParams
+from .assistant_response_format import AssistantResponseFormat as AssistantResponseFormat
+from .assistant_tool_choice_param import AssistantToolChoiceParam as AssistantToolChoiceParam
 from .code_interpreter_tool_param import CodeInterpreterToolParam as CodeInterpreterToolParam
+from .assistant_tool_choice_option import AssistantToolChoiceOption as AssistantToolChoiceOption
 from .thread_create_and_run_params import ThreadCreateAndRunParams as ThreadCreateAndRunParams
+from .assistant_tool_choice_function import AssistantToolChoiceFunction as AssistantToolChoiceFunction
+from .assistant_response_format_param import AssistantResponseFormatParam as AssistantResponseFormatParam
+from .assistant_response_format_option import AssistantResponseFormatOption as AssistantResponseFormatOption
+from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam as AssistantToolChoiceOptionParam
+from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam as AssistantToolChoiceFunctionParam
+from .assistant_response_format_option_param import (
+    AssistantResponseFormatOptionParam as AssistantResponseFormatOptionParam,
+)
diff --git a/src/openai/types/beta/assistant.py b/src/openai/types/beta/assistant.py
index 32561a9aa8..0a0d28ed01 100644
--- a/src/openai/types/beta/assistant.py
+++ b/src/openai/types/beta/assistant.py
@@ -29,7 +29,7 @@ class Assistant(BaseModel):
     instructions: Optional[str] = None
     """The system instructions that the assistant uses.
 
-    The maximum length is 32768 characters.
+    The maximum length is 256,000 characters.
     """
 
     metadata: Optional[object] = None
diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py
index 8bad323640..011121485f 100644
--- a/src/openai/types/beta/assistant_create_params.py
+++ b/src/openai/types/beta/assistant_create_params.py
@@ -2,8 +2,8 @@
 
 from __future__ import annotations
 
-from typing import List, Iterable, Optional
-from typing_extensions import Required, TypedDict
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
 
 from .assistant_tool_param import AssistantToolParam
 
@@ -11,7 +11,31 @@
 
 
 class AssistantCreateParams(TypedDict, total=False):
-    model: Required[str]
+    model: Required[
+        Union[
+            str,
+            Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ]
+    ]
     """ID of the model to use.
 
     You can use the
@@ -34,7 +58,7 @@ class AssistantCreateParams(TypedDict, total=False):
     instructions: Optional[str]
     """The system instructions that the assistant uses.
 
-    The maximum length is 32768 characters.
+    The maximum length is 256,000 characters.
     """
 
     metadata: Optional[object]
diff --git a/src/openai/types/beta/assistant_response_format.py b/src/openai/types/beta/assistant_response_format.py
new file mode 100644
index 0000000000..f53bdaf62a
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["AssistantResponseFormat"]
+
+
+class AssistantResponseFormat(BaseModel):
+    type: Optional[Literal["text", "json_object"]] = None
+    """Must be one of `text` or `json_object`."""
diff --git a/src/openai/types/beta/assistant_response_format_option.py b/src/openai/types/beta/assistant_response_format_option.py
new file mode 100644
index 0000000000..d4e05e0ea9
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_option.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_response_format import AssistantResponseFormat
+
+__all__ = ["AssistantResponseFormatOption"]
+
+AssistantResponseFormatOption = Union[Literal["none", "auto"], AssistantResponseFormat]
diff --git a/src/openai/types/beta/assistant_response_format_option_param.py b/src/openai/types/beta/assistant_response_format_option_param.py
new file mode 100644
index 0000000000..46e04125d1
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_option_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_response_format_param import AssistantResponseFormatParam
+
+__all__ = ["AssistantResponseFormatOptionParam"]
+
+AssistantResponseFormatOptionParam = Union[Literal["none", "auto"], AssistantResponseFormatParam]
diff --git a/src/openai/types/beta/assistant_response_format_param.py b/src/openai/types/beta/assistant_response_format_param.py
new file mode 100644
index 0000000000..96e1d02115
--- /dev/null
+++ b/src/openai/types/beta/assistant_response_format_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["AssistantResponseFormatParam"]
+
+
+class AssistantResponseFormatParam(TypedDict, total=False):
+    type: Literal["text", "json_object"]
+    """Must be one of `text` or `json_object`."""
diff --git a/src/openai/types/beta/assistant_tool_choice.py b/src/openai/types/beta/assistant_tool_choice.py
new file mode 100644
index 0000000000..4314d4b41e
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .assistant_tool_choice_function import AssistantToolChoiceFunction
+
+__all__ = ["AssistantToolChoice"]
+
+
+class AssistantToolChoice(BaseModel):
+    type: Literal["function", "code_interpreter", "retrieval"]
+    """The type of the tool. If type is `function`, the function name must be set"""
+
+    function: Optional[AssistantToolChoiceFunction] = None
diff --git a/src/openai/types/beta/assistant_tool_choice_function.py b/src/openai/types/beta/assistant_tool_choice_function.py
new file mode 100644
index 0000000000..87f38310ca
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_function.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["AssistantToolChoiceFunction"]
+
+
+class AssistantToolChoiceFunction(BaseModel):
+    name: str
+    """The name of the function to call."""
diff --git a/src/openai/types/beta/assistant_tool_choice_function_param.py b/src/openai/types/beta/assistant_tool_choice_function_param.py
new file mode 100644
index 0000000000..428857de91
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_function_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["AssistantToolChoiceFunctionParam"]
+
+
+class AssistantToolChoiceFunctionParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
diff --git a/src/openai/types/beta/assistant_tool_choice_option.py b/src/openai/types/beta/assistant_tool_choice_option.py
new file mode 100644
index 0000000000..0045a5986e
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_option.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_tool_choice import AssistantToolChoice
+
+__all__ = ["AssistantToolChoiceOption"]
+
+AssistantToolChoiceOption = Union[Literal["none", "auto"], AssistantToolChoice]
diff --git a/src/openai/types/beta/assistant_tool_choice_option_param.py b/src/openai/types/beta/assistant_tool_choice_option_param.py
new file mode 100644
index 0000000000..618e7bff98
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_option_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+from .assistant_tool_choice_param import AssistantToolChoiceParam
+
+__all__ = ["AssistantToolChoiceOptionParam"]
+
+AssistantToolChoiceOptionParam = Union[Literal["none", "auto"], AssistantToolChoiceParam]
diff --git a/src/openai/types/beta/assistant_tool_choice_param.py b/src/openai/types/beta/assistant_tool_choice_param.py
new file mode 100644
index 0000000000..5cf6ea27be
--- /dev/null
+++ b/src/openai/types/beta/assistant_tool_choice_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam
+
+__all__ = ["AssistantToolChoiceParam"]
+
+
+class AssistantToolChoiceParam(TypedDict, total=False):
+    type: Required[Literal["function", "code_interpreter", "retrieval"]]
+    """The type of the tool. If type is `function`, the function name must be set"""
+
+    function: AssistantToolChoiceFunctionParam
diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py
index 7c96aca8c1..6e9d9ed5db 100644
--- a/src/openai/types/beta/assistant_update_params.py
+++ b/src/openai/types/beta/assistant_update_params.py
@@ -26,7 +26,7 @@ class AssistantUpdateParams(TypedDict, total=False):
     instructions: Optional[str]
     """The system instructions that the assistant uses.
 
-    The maximum length is 32768 characters.
+    The maximum length is 256,000 characters.
     """
 
     metadata: Optional[object]
diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py
index d4266fc48c..50f947a40a 100644
--- a/src/openai/types/beta/thread_create_and_run_params.py
+++ b/src/openai/types/beta/thread_create_and_run_params.py
@@ -8,12 +8,15 @@
 from .function_tool_param import FunctionToolParam
 from .retrieval_tool_param import RetrievalToolParam
 from .code_interpreter_tool_param import CodeInterpreterToolParam
+from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = [
     "ThreadCreateAndRunParamsBase",
     "Thread",
     "ThreadMessage",
     "Tool",
+    "TruncationStrategy",
     "ThreadCreateAndRunParamsNonStreaming",
     "ThreadCreateAndRunParamsStreaming",
 ]
@@ -33,6 +36,24 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     This is useful for modifying the behavior on a per-run basis.
     """
 
+    max_completion_tokens: Optional[int]
+    """
+    The maximum number of completion tokens that may be used over the course of the
+    run. The run will make a best effort to use only the number of completion tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    completion tokens specified, the run will end with status `complete`. See
+    `incomplete_details` for more info.
+    """
+
+    max_prompt_tokens: Optional[int]
+    """The maximum number of prompt tokens that may be used over the course of the run.
+
+    The run will make a best effort to use only the number of prompt tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    prompt tokens specified, the run will end with status `complete`. See
+    `incomplete_details` for more info.
+    """
+
     metadata: Optional[object]
     """Set of 16 key-value pairs that can be attached to an object.
 
@@ -41,7 +62,30 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     a maxium of 512 characters long.
     """
 
-    model: Optional[str]
+    model: Union[
+        str,
+        Literal[
+            "gpt-4-turbo",
+            "gpt-4-turbo-2024-04-09",
+            "gpt-4-0125-preview",
+            "gpt-4-turbo-preview",
+            "gpt-4-1106-preview",
+            "gpt-4-vision-preview",
+            "gpt-4",
+            "gpt-4-0314",
+            "gpt-4-0613",
+            "gpt-4-32k",
+            "gpt-4-32k-0314",
+            "gpt-4-32k-0613",
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0125",
+            "gpt-3.5-turbo-16k-0613",
+        ],
+        None,
+    ]
     """
     The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
     be used to execute this run. If a value is provided here, it will override the
@@ -49,6 +93,25 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     assistant will be used.
     """
 
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
     temperature: Optional[float]
     """What sampling temperature to use, between 0 and 2.
 
@@ -59,12 +122,24 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     thread: Thread
     """If no thread is provided, an empty thread will be created."""
 
+    tool_choice: Optional[AssistantToolChoiceOptionParam]
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling a tool.
+    Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
     tools: Optional[Iterable[Tool]]
     """Override the tools the assistant can use for this run.
 
     This is useful for modifying the behavior on a per-run basis.
     """
 
+    truncation_strategy: Optional[TruncationStrategy]
+
 
 class ThreadMessage(TypedDict, total=False):
     content: Required[str]
@@ -115,6 +190,23 @@ class Thread(TypedDict, total=False):
 Tool = Union[CodeInterpreterToolParam, RetrievalToolParam, FunctionToolParam]
 
 
+class TruncationStrategy(TypedDict, total=False):
+    type: Required[Literal["auto", "last_messages"]]
+    """The truncation strategy to use for the thread.
+
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
+
+    last_messages: Optional[int]
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
+
+
 class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase):
     stream: Optional[Literal[False]]
     """
diff --git a/src/openai/types/beta/threads/run.py b/src/openai/types/beta/threads/run.py
index 3ab276245f..2efc3c77fa 100644
--- a/src/openai/types/beta/threads/run.py
+++ b/src/openai/types/beta/threads/run.py
@@ -6,9 +6,28 @@
 from ...._models import BaseModel
 from .run_status import RunStatus
 from ..assistant_tool import AssistantTool
+from ..assistant_tool_choice_option import AssistantToolChoiceOption
+from ..assistant_response_format_option import AssistantResponseFormatOption
 from .required_action_function_tool_call import RequiredActionFunctionToolCall
 
-__all__ = ["Run", "LastError", "RequiredAction", "RequiredActionSubmitToolOutputs", "Usage"]
+__all__ = [
+    "Run",
+    "IncompleteDetails",
+    "LastError",
+    "RequiredAction",
+    "RequiredActionSubmitToolOutputs",
+    "TruncationStrategy",
+    "Usage",
+]
+
+
+class IncompleteDetails(BaseModel):
+    reason: Optional[Literal["max_completion_tokens", "max_prompt_tokens"]] = None
+    """The reason why the run is incomplete.
+
+    This will point to which specific token limit was reached over the course of the
+    run.
+    """
 
 
 class LastError(BaseModel):
@@ -32,6 +51,23 @@ class RequiredAction(BaseModel):
     """For now, this is always `submit_tool_outputs`."""
 
 
+class TruncationStrategy(BaseModel):
+    type: Literal["auto", "last_messages"]
+    """The truncation strategy to use for the thread.
+
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
+
+    last_messages: Optional[int] = None
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
+
+
 class Usage(BaseModel):
     completion_tokens: int
     """Number of completion tokens used over the course of the run."""
@@ -76,6 +112,12 @@ class Run(BaseModel):
     this run.
     """
 
+    incomplete_details: Optional[IncompleteDetails] = None
+    """Details on why the run is incomplete.
+
+    Will be `null` if the run is not incomplete.
+    """
+
     instructions: str
     """
     The instructions that the
@@ -86,6 +128,18 @@ class Run(BaseModel):
     last_error: Optional[LastError] = None
     """The last error associated with this run. Will be `null` if there are no errors."""
 
+    max_completion_tokens: Optional[int] = None
+    """
+    The maximum number of completion tokens specified to have been used over the
+    course of the run.
+    """
+
+    max_prompt_tokens: Optional[int] = None
+    """
+    The maximum number of prompt tokens specified to have been used over the course
+    of the run.
+    """
+
     metadata: Optional[object] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
@@ -110,6 +164,25 @@ class Run(BaseModel):
     Will be `null` if no action is required.
     """
 
+    response_format: Optional[AssistantResponseFormatOption] = None
+    """Specifies the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
     started_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run was started."""
 
@@ -126,6 +199,16 @@ class Run(BaseModel):
     that was executed on as a part of this run.
     """
 
+    tool_choice: Optional[AssistantToolChoiceOption] = None
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling a tool.
+    Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
     tools: List[AssistantTool]
     """
     The list of tools that the
@@ -133,6 +216,8 @@ class Run(BaseModel):
     this run.
     """
 
+    truncation_strategy: Optional[TruncationStrategy] = None
+
     usage: Optional[Usage] = None
     """Usage statistics related to the run.
 
diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py
index e9bc19d980..9f2d4ba18b 100644
--- a/src/openai/types/beta/threads/run_create_params.py
+++ b/src/openai/types/beta/threads/run_create_params.py
@@ -6,8 +6,16 @@
 from typing_extensions import Literal, Required, TypedDict
 
 from ..assistant_tool_param import AssistantToolParam
+from ..assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
+from ..assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
-__all__ = ["RunCreateParamsBase", "AdditionalMessage", "RunCreateParamsNonStreaming", "RunCreateParamsStreaming"]
+__all__ = [
+    "RunCreateParamsBase",
+    "AdditionalMessage",
+    "TruncationStrategy",
+    "RunCreateParamsNonStreaming",
+    "RunCreateParamsStreaming",
+]
 
 
 class RunCreateParamsBase(TypedDict, total=False):
@@ -35,6 +43,24 @@ class RunCreateParamsBase(TypedDict, total=False):
     of the assistant. This is useful for modifying the behavior on a per-run basis.
     """
 
+    max_completion_tokens: Optional[int]
+    """
+    The maximum number of completion tokens that may be used over the course of the
+    run. The run will make a best effort to use only the number of completion tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    completion tokens specified, the run will end with status `complete`. See
+    `incomplete_details` for more info.
+    """
+
+    max_prompt_tokens: Optional[int]
+    """The maximum number of prompt tokens that may be used over the course of the run.
+
+    The run will make a best effort to use only the number of prompt tokens
+    specified, across multiple turns of the run. If the run exceeds the number of
+    prompt tokens specified, the run will end with status `complete`. See
+    `incomplete_details` for more info.
+    """
+
     metadata: Optional[object]
     """Set of 16 key-value pairs that can be attached to an object.
 
@@ -43,7 +69,30 @@ class RunCreateParamsBase(TypedDict, total=False):
     a maxium of 512 characters long.
     """
 
-    model: Optional[str]
+    model: Union[
+        str,
+        Literal[
+            "gpt-4-turbo",
+            "gpt-4-turbo-2024-04-09",
+            "gpt-4-0125-preview",
+            "gpt-4-turbo-preview",
+            "gpt-4-1106-preview",
+            "gpt-4-vision-preview",
+            "gpt-4",
+            "gpt-4-0314",
+            "gpt-4-0613",
+            "gpt-4-32k",
+            "gpt-4-32k-0314",
+            "gpt-4-32k-0613",
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0125",
+            "gpt-3.5-turbo-16k-0613",
+        ],
+        None,
+    ]
     """
     The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
     be used to execute this run. If a value is provided here, it will override the
@@ -51,6 +100,25 @@ class RunCreateParamsBase(TypedDict, total=False):
     assistant will be used.
     """
 
+    response_format: Optional[AssistantResponseFormatOptionParam]
+    """Specifies the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
     temperature: Optional[float]
     """What sampling temperature to use, between 0 and 2.
 
@@ -58,12 +126,24 @@ class RunCreateParamsBase(TypedDict, total=False):
     0.2 will make it more focused and deterministic.
     """
 
+    tool_choice: Optional[AssistantToolChoiceOptionParam]
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tools and instead generates a message. `auto` is the default value
+    and means the model can pick between generating a message or calling a tool.
+    Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+    call that tool.
+    """
+
     tools: Optional[Iterable[AssistantToolParam]]
     """Override the tools the assistant can use for this run.
 
     This is useful for modifying the behavior on a per-run basis.
     """
 
+    truncation_strategy: Optional[TruncationStrategy]
+
 
 class AdditionalMessage(TypedDict, total=False):
     content: Required[str]
@@ -95,6 +175,23 @@ class AdditionalMessage(TypedDict, total=False):
     """
 
 
+class TruncationStrategy(TypedDict, total=False):
+    type: Required[Literal["auto", "last_messages"]]
+    """The truncation strategy to use for the thread.
+
+    The default is `auto`. If set to `last_messages`, the thread will be truncated
+    to the n most recent messages in the thread. When set to `auto`, messages in the
+    middle of the thread will be dropped to fit the context length of the model,
+    `max_prompt_tokens`.
+    """
+
+    last_messages: Optional[int]
+    """
+    The number of most recent messages from the thread when constructing the context
+    for the run.
+    """
+
+
 class RunCreateParamsNonStreaming(RunCreateParamsBase):
     stream: Optional[Literal[False]]
     """
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index ab6a747021..1e0f7f8195 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -32,6 +32,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
         Union[
             str,
             Literal[
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
                 "gpt-4-0125-preview",
                 "gpt-4-turbo-preview",
                 "gpt-4-1106-preview",
@@ -102,8 +104,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """Whether to return log probabilities of the output tokens or not.
 
     If true, returns the log probabilities of each output token returned in the
-    `content` of `message`. This option is currently not available on the
-    `gpt-4-vision-preview` model.
+    `content` of `message`.
     """
 
     max_tokens: Optional[int]
diff --git a/src/openai/types/fine_tuning/__init__.py b/src/openai/types/fine_tuning/__init__.py
index 0bb2b90438..92b81329b1 100644
--- a/src/openai/types/fine_tuning/__init__.py
+++ b/src/openai/types/fine_tuning/__init__.py
@@ -7,3 +7,8 @@
 from .job_create_params import JobCreateParams as JobCreateParams
 from .fine_tuning_job_event import FineTuningJobEvent as FineTuningJobEvent
 from .job_list_events_params import JobListEventsParams as JobListEventsParams
+from .fine_tuning_job_integration import FineTuningJobIntegration as FineTuningJobIntegration
+from .fine_tuning_job_wandb_integration import FineTuningJobWandbIntegration as FineTuningJobWandbIntegration
+from .fine_tuning_job_wandb_integration_object import (
+    FineTuningJobWandbIntegrationObject as FineTuningJobWandbIntegrationObject,
+)
diff --git a/src/openai/types/fine_tuning/fine_tuning_job.py b/src/openai/types/fine_tuning/fine_tuning_job.py
index 23fe96d1a0..1593bf50c7 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job.py
@@ -4,6 +4,7 @@
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
 
 __all__ = ["FineTuningJob", "Error", "Hyperparameters"]
 
@@ -80,6 +81,9 @@ class FineTuningJob(BaseModel):
     [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
     """
 
+    seed: int
+    """The seed used for the fine-tuning job."""
+
     status: Literal["validating_files", "queued", "running", "succeeded", "failed", "cancelled"]
     """
     The current status of the fine-tuning job, which can be either
@@ -105,3 +109,6 @@ class FineTuningJob(BaseModel):
     You can retrieve the validation results with the
     [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
     """
+
+    integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
+    """A list of integrations to enable for this fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
new file mode 100644
index 0000000000..8076313cae
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+
+from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
+
+FineTuningJobIntegration = FineTuningJobWandbIntegrationObject
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
new file mode 100644
index 0000000000..4ac282eb54
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobWandbIntegration"]
+
+
+class FineTuningJobWandbIntegration(BaseModel):
+    project: str
+    """The name of the project that the new run will be created under."""
+
+    entity: Optional[str] = None
+    """The entity to use for the run.
+
+    This allows you to set the team or username of the WandB user that you would
+    like associated with the run. If not set, the default entity for the registered
+    WandB API key is used.
+    """
+
+    name: Optional[str] = None
+    """A display name to set for the run.
+
+    If not set, we will use the Job ID as the name.
+    """
+
+    tags: Optional[List[str]] = None
+    """A list of tags to be attached to the newly created run.
+
+    These tags are passed through directly to WandB. Some default tags are generated
+    by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+    """
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
new file mode 100644
index 0000000000..5b94354d50
--- /dev/null
+++ b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration_object.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .fine_tuning_job_wandb_integration import FineTuningJobWandbIntegration
+
+__all__ = ["FineTuningJobWandbIntegrationObject"]
+
+
+class FineTuningJobWandbIntegrationObject(BaseModel):
+    type: Literal["wandb"]
+    """The type of the integration being enabled for the fine-tuning job"""
+
+    wandb: FineTuningJobWandbIntegration
+    """The settings for your integration with Weights and Biases.
+
+    This payload specifies the project that metrics will be sent to. Optionally, you
+    can set an explicit display name for your run, add tags to your run, and set a
+    default entity (team, username, etc) to be associated with your run.
+    """
diff --git a/src/openai/types/fine_tuning/job_create_params.py b/src/openai/types/fine_tuning/job_create_params.py
index 79e0b67e13..892c737fa3 100644
--- a/src/openai/types/fine_tuning/job_create_params.py
+++ b/src/openai/types/fine_tuning/job_create_params.py
@@ -2,10 +2,10 @@
 
 from __future__ import annotations
 
-from typing import Union, Optional
+from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["JobCreateParams", "Hyperparameters"]
+__all__ = ["JobCreateParams", "Hyperparameters", "Integration", "IntegrationWandb"]
 
 
 class JobCreateParams(TypedDict, total=False):
@@ -32,6 +32,17 @@ class JobCreateParams(TypedDict, total=False):
     hyperparameters: Hyperparameters
     """The hyperparameters used for the fine-tuning job."""
 
+    integrations: Optional[Iterable[Integration]]
+    """A list of integrations to enable for your fine-tuning job."""
+
+    seed: Optional[int]
+    """The seed controls the reproducibility of the job.
+
+    Passing in the same seed and job parameters should produce the same results, but
+    may differ in rare cases. If a seed is not specified, one will be generated for
+    you.
+    """
+
     suffix: Optional[str]
     """
     A string of up to 18 characters that will be added to your fine-tuned model
@@ -76,3 +87,45 @@ class Hyperparameters(TypedDict, total=False):
 
     An epoch refers to one full cycle through the training dataset.
     """
+
+
+class IntegrationWandb(TypedDict, total=False):
+    project: Required[str]
+    """The name of the project that the new run will be created under."""
+
+    entity: Optional[str]
+    """The entity to use for the run.
+
+    This allows you to set the team or username of the WandB user that you would
+    like associated with the run. If not set, the default entity for the registered
+    WandB API key is used.
+    """
+
+    name: Optional[str]
+    """A display name to set for the run.
+
+    If not set, we will use the Job ID as the name.
+    """
+
+    tags: List[str]
+    """A list of tags to be attached to the newly created run.
+
+    These tags are passed through directly to WandB. Some default tags are generated
+    by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+    """
+
+
+class Integration(TypedDict, total=False):
+    type: Required[Literal["wandb"]]
+    """The type of integration to enable.
+
+    Currently, only "wandb" (Weights and Biases) is supported.
+    """
+
+    wandb: Required[IntegrationWandb]
+    """The settings for your integration with Weights and Biases.
+
+    This payload specifies the project that metrics will be sent to. Optionally, you
+    can set an explicit display name for your run, add tags to your run, and set a
+    default entity (team, username, etc) to be associated with your run.
+    """
diff --git a/src/openai/types/fine_tuning/jobs/__init__.py b/src/openai/types/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..6c93da1b69
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .checkpoint_list_params import CheckpointListParams as CheckpointListParams
+from .fine_tuning_job_checkpoint import FineTuningJobCheckpoint as FineTuningJobCheckpoint
diff --git a/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py b/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py
new file mode 100644
index 0000000000..adceb3b218
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["CheckpointListParams"]
+
+
+class CheckpointListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last checkpoint ID from the previous pagination request."""
+
+    limit: int
+    """Number of checkpoints to retrieve."""
diff --git a/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py b/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
new file mode 100644
index 0000000000..bd07317a3e
--- /dev/null
+++ b/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FineTuningJobCheckpoint", "Metrics"]
+
+
+class Metrics(BaseModel):
+    full_valid_loss: Optional[float] = None
+
+    full_valid_mean_token_accuracy: Optional[float] = None
+
+    step: Optional[float] = None
+
+    train_loss: Optional[float] = None
+
+    train_mean_token_accuracy: Optional[float] = None
+
+    valid_loss: Optional[float] = None
+
+    valid_mean_token_accuracy: Optional[float] = None
+
+
+class FineTuningJobCheckpoint(BaseModel):
+    id: str
+    """The checkpoint identifier, which can be referenced in the API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the checkpoint was created."""
+
+    fine_tuned_model_checkpoint: str
+    """The name of the fine-tuned checkpoint model that is created."""
+
+    fine_tuning_job_id: str
+    """The name of the fine-tuning job that this checkpoint was created from."""
+
+    metrics: Metrics
+    """Metrics at the step number during the fine-tuning job."""
+
+    object: Literal["fine_tuning.job.checkpoint"]
+    """The object type, which is always "fine_tuning.job.checkpoint"."""
+
+    step_number: int
+    """The step number that the checkpoint was created at."""
diff --git a/tests/api_resources/beta/test_assistants.py b/tests/api_resources/beta/test_assistants.py
index 6edbe4b491..a509627b8e 100644
--- a/tests/api_resources/beta/test_assistants.py
+++ b/tests/api_resources/beta/test_assistants.py
@@ -24,14 +24,14 @@ class TestAssistants:
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.create(
-            model="string",
+            model="gpt-4-turbo",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.create(
-            model="string",
+            model="gpt-4-turbo",
             description="string",
             file_ids=["string", "string", "string"],
             instructions="string",
@@ -44,7 +44,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.create(
-            model="string",
+            model="gpt-4-turbo",
         )
 
         assert response.is_closed is True
@@ -55,7 +55,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.beta.assistants.with_streaming_response.create(
-            model="string",
+            model="gpt-4-turbo",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -235,14 +235,14 @@ class TestAsyncAssistants:
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.create(
-            model="string",
+            model="gpt-4-turbo",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.create(
-            model="string",
+            model="gpt-4-turbo",
             description="string",
             file_ids=["string", "string", "string"],
             instructions="string",
@@ -255,7 +255,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.assistants.with_raw_response.create(
-            model="string",
+            model="gpt-4-turbo",
         )
 
         assert response.is_closed is True
@@ -266,7 +266,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.assistants.with_streaming_response.create(
-            model="string",
+            model="gpt-4-turbo",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/beta/test_threads.py b/tests/api_resources/beta/test_threads.py
index fd3f7c5102..7c07251433 100644
--- a/tests/api_resources/beta/test_threads.py
+++ b/tests/api_resources/beta/test_threads.py
@@ -207,8 +207,11 @@ def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI)
         thread = client.beta.threads.create_and_run(
             assistant_id="string",
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             stream=False,
             temperature=1,
             thread={
@@ -234,7 +237,12 @@ def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI)
                 ],
                 "metadata": {},
             },
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         assert_matches_type(Run, thread, path=["response"])
 
@@ -276,8 +284,11 @@ def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI)
             assistant_id="string",
             stream=True,
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             temperature=1,
             thread={
                 "messages": [
@@ -302,7 +313,12 @@ def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI)
                 ],
                 "metadata": {},
             },
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         thread_stream.response.close()
 
@@ -521,8 +537,11 @@ async def test_method_create_and_run_with_all_params_overload_1(self, async_clie
         thread = await async_client.beta.threads.create_and_run(
             assistant_id="string",
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             stream=False,
             temperature=1,
             thread={
@@ -548,7 +567,12 @@ async def test_method_create_and_run_with_all_params_overload_1(self, async_clie
                 ],
                 "metadata": {},
             },
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         assert_matches_type(Run, thread, path=["response"])
 
@@ -590,8 +614,11 @@ async def test_method_create_and_run_with_all_params_overload_2(self, async_clie
             assistant_id="string",
             stream=True,
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             temperature=1,
             thread={
                 "messages": [
@@ -616,7 +643,12 @@ async def test_method_create_and_run_with_all_params_overload_2(self, async_clie
                 ],
                 "metadata": {},
             },
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         await thread_stream.response.aclose()
 
diff --git a/tests/api_resources/beta/threads/test_runs.py b/tests/api_resources/beta/threads/test_runs.py
index 271bcccdd3..cf5b2998b9 100644
--- a/tests/api_resources/beta/threads/test_runs.py
+++ b/tests/api_resources/beta/threads/test_runs.py
@@ -57,11 +57,19 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
                 },
             ],
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             stream=False,
             temperature=1,
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         assert_matches_type(Run, run, path=["response"])
 
@@ -136,10 +144,18 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
                 },
             ],
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             temperature=1,
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         run_stream.response.close()
 
@@ -553,11 +569,19 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                 },
             ],
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             stream=False,
             temperature=1,
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         assert_matches_type(Run, run, path=["response"])
 
@@ -632,10 +656,18 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                 },
             ],
             instructions="string",
+            max_completion_tokens=256,
+            max_prompt_tokens=256,
             metadata={},
-            model="string",
+            model="gpt-4-turbo",
+            response_format="none",
             temperature=1,
+            tool_choice="none",
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            truncation_strategy={
+                "type": "auto",
+                "last_messages": 1,
+            },
         )
         await run_stream.response.aclose()
 
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index bb0658f3d9..c54b56a37d 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -26,7 +26,7 @@ def test_method_create_overload_1(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -40,7 +40,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
                     "name": "string",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -102,7 +102,7 @@ def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         )
 
         assert response.is_closed is True
@@ -119,7 +119,7 @@ def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -138,7 +138,7 @@ def test_method_create_overload_2(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         )
         completion_stream.response.close()
@@ -153,7 +153,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
                     "name": "string",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
             frequency_penalty=-2,
             function_call="none",
@@ -215,7 +215,7 @@ def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         )
 
@@ -232,7 +232,7 @@ def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         ) as response:
             assert not response.is_closed
@@ -256,7 +256,7 @@ async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -270,7 +270,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                     "name": "string",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             frequency_penalty=-2,
             function_call="none",
             functions=[
@@ -332,7 +332,7 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         )
 
         assert response.is_closed is True
@@ -349,7 +349,7 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncOpe
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -368,7 +368,7 @@ async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         )
         await completion_stream.response.aclose()
@@ -383,7 +383,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                     "name": "string",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
             frequency_penalty=-2,
             function_call="none",
@@ -445,7 +445,7 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         )
 
@@ -462,7 +462,7 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncOpe
                     "role": "system",
                 }
             ],
-            model="gpt-3.5-turbo",
+            model="gpt-4-turbo",
             stream=True,
         ) as response:
             assert not response.is_closed
diff --git a/tests/api_resources/fine_tuning/jobs/__init__.py b/tests/api_resources/fine_tuning/jobs/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/fine_tuning/jobs/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/fine_tuning/jobs/test_checkpoints.py b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
new file mode 100644
index 0000000000..915d5c6f63
--- /dev/null
+++ b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
@@ -0,0 +1,117 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.fine_tuning.jobs import FineTuningJobCheckpoint
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestCheckpoints:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        checkpoint = client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        checkpoint = client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="string",
+            limit=0,
+        )
+        assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        checkpoint = response.parse()
+        assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            checkpoint = response.parse()
+            assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+                "",
+            )
+
+
+class TestAsyncCheckpoints:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="string",
+            limit=0,
+        )
+        assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        checkpoint = response.parse()
+        assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            checkpoint = await response.parse()
+            assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
+                "",
+            )
diff --git a/tests/api_resources/fine_tuning/test_jobs.py b/tests/api_resources/fine_tuning/test_jobs.py
index f4974ebbcd..1ff6d63b31 100644
--- a/tests/api_resources/fine_tuning/test_jobs.py
+++ b/tests/api_resources/fine_tuning/test_jobs.py
@@ -39,6 +39,36 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
                 "learning_rate_multiplier": "auto",
                 "n_epochs": "auto",
             },
+            integrations=[
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+            ],
+            seed=42,
             suffix="x",
             validation_file="file-abc123",
         )
@@ -248,6 +278,36 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
                 "learning_rate_multiplier": "auto",
                 "n_epochs": "auto",
             },
+            integrations=[
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+                {
+                    "type": "wandb",
+                    "wandb": {
+                        "project": "my-wandb-project",
+                        "name": "string",
+                        "entity": "string",
+                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    },
+                },
+            ],
+            seed=42,
             suffix="x",
             validation_file="file-abc123",
         )
diff --git a/tests/api_resources/test_batches.py b/tests/api_resources/test_batches.py
new file mode 100644
index 0000000000..aafeff8116
--- /dev/null
+++ b/tests/api_resources/test_batches.py
@@ -0,0 +1,268 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import Batch
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestBatches:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        batch = client.batches.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        batch = client.batches.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.batches.with_raw_response.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.batches.with_streaming_response.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        batch = client.batches.retrieve(
+            "string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.batches.with_raw_response.retrieve(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.batches.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.batches.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_cancel(self, client: OpenAI) -> None:
+        batch = client.batches.cancel(
+            "string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: OpenAI) -> None:
+        response = client.batches.with_raw_response.cancel(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.batches.with_streaming_response.cancel(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.batches.with_raw_response.cancel(
+                "",
+            )
+
+
+class TestAsyncBatches:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.batches.with_raw_response.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.batches.with_streaming_response.create(
+            completion_window="24h",
+            endpoint="/v1/chat/completions",
+            input_file_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.retrieve(
+            "string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.batches.with_raw_response.retrieve(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.batches.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.batches.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        batch = await async_client.batches.cancel(
+            "string",
+        )
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.batches.with_raw_response.cancel(
+            "string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(Batch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.batches.with_streaming_response.cancel(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(Batch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.batches.with_raw_response.cancel(
+                "",
+            )