diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index dd93962010..ff261bad78 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -3,7 +3,7 @@ FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
 
 USER vscode
 
-RUN curl -sSf https://rye-up.com/get | RYE_VERSION="0.24.0" RYE_INSTALL_OPTION="--yes" bash
+RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.44.0" RYE_INSTALL_OPTION="--yes" bash
 ENV PATH=/home/vscode/.rye/shims:$PATH
 
-RUN echo "[[ -d .venv ]] && source .venv/bin/activate" >> /home/vscode/.bashrc
+RUN echo "[[ -d .venv ]] && source .venv/bin/activate || export PATH=\$PATH" >> /home/vscode/.bashrc
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index bbeb30b148..c17fdc169f 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -24,6 +24,9 @@
         }
       }
     }
+  },
+  "features": {
+    "ghcr.io/devcontainers/features/node:1": {}
   }
 
   // Features to add to the dev container. More info: https://containers.dev/features.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 3ce5f8d004..d58c8454c5 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1 +1,4 @@
+# This file is used to automatically assign reviewers to PRs
+# For more information see: https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners
+
 * @openai/sdks-team
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
deleted file mode 100644
index fa09dbe5b0..0000000000
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ /dev/null
@@ -1,64 +0,0 @@
-name: Bug report
-description: Report an issue or bug with this library
-labels: ['bug']
-body:
-  - type: markdown
-    attributes:
-      value: |
-        Thanks for taking the time to fill out this bug report!
-  - type: checkboxes
-    id: non_api
-    attributes:
-      label: Confirm this is an issue with the Python library and not an underlying OpenAI API
-      description: Issues with the underlying OpenAI API should be reported on our [Developer Community](https://community.openai.com/c/api/7)
-      options:
-        - label: This is an issue with the Python library
-          required: true
-  - type: textarea
-    id: what-happened
-    attributes:
-      label: Describe the bug
-      description: A clear and concise description of what the bug is, and any additional context.
-      placeholder: Tell us what you see!
-    validations:
-      required: true
-  - type: textarea
-    id: repro-steps
-    attributes:
-      label: To Reproduce
-      description: Steps to reproduce the behavior.
-      placeholder: |
-        1. Fetch a '...'
-        2. Update the '....'
-        3. See error
-    validations:
-      required: true
-  - type: textarea
-    id: code-snippets
-    attributes:
-      label: Code snippets
-      description: If applicable, add code snippets to help explain your problem.
-      render: Python
-    validations:
-      required: false
-  - type: input
-    id: os
-    attributes:
-      label: OS
-      placeholder: macOS
-    validations:
-      required: true
-  - type: input
-    id: language-version
-    attributes:
-      label: Python version
-      placeholder: Python v3.11.4
-    validations:
-      required: true
-  - type: input
-    id: lib-version
-    attributes:
-      label: Library version
-      placeholder: openai v1.0.1
-    validations:
-      required: true
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
deleted file mode 100644
index 0498cf7f6f..0000000000
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-blank_issues_enabled: false
-contact_links:
-  - name: OpenAI support
-    url: https://help.openai.com/
-    about: |
-      Please only file issues here that you believe represent actual bugs or feature requests for the OpenAI Python library.
-      If you're having general trouble with the OpenAI API, please visit our help center to get support.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
deleted file mode 100644
index b529547d08..0000000000
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: Feature request
-description: Suggest an idea for this library
-labels: ['feature-request']
-body:
-  - type: markdown
-    attributes:
-      value: |
-        Thanks for taking the time to fill out this feature request!
-  - type: checkboxes
-    id: non_api
-    attributes:
-      label: Confirm this is a feature request for the Python library and not the underlying OpenAI API.
-      description: Feature requests for the underlying OpenAI API should be reported on our [Developer Community](https://community.openai.com/c/api/7)
-      options:
-        - label: This is a feature request for the Python library
-          required: true
-  - type: textarea
-    id: feature
-    attributes:
-      label: Describe the feature or improvement you're requesting
-      description: A clear and concise description of what you want to happen.
-    validations:
-      required: true
-  - type: textarea
-    id: context
-    attributes:
-      label: Additional context
-      description: Add any other context about the feature request here.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
deleted file mode 100644
index 4416b1e547..0000000000
--- a/.github/pull_request_template.md
+++ /dev/null
@@ -1,10 +0,0 @@
-<!-- Thank you for contributing to this project! -->
-<!-- The code in this repository is all auto-generated, and is not meant to be edited manually. -->
-<!-- We recommend opening an Issue instead, but you are still welcome to open a PR to share for -->
-<!-- an improvement if you wish, just note that we are unlikely to merge it as-is. -->
-
-- [ ] I understand that this repository is auto-generated and my pull request may not be merged
-
-## Changes being requested
-
-## Additional context & links
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 76655ed7d6..4c617a6f19 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,27 +1,32 @@
 name: CI
 on:
   push:
-    branches:
-      - main
+    branches-ignore:
+      - 'generated'
+      - 'codegen/**'
+      - 'integrated/**'
+      - 'stl-preview-head/**'
+      - 'stl-preview-base/**'
   pull_request:
-    branches:
-      - main
+    branches-ignore:
+      - 'stl-preview-head/**'
+      - 'stl-preview-base/**'
 
 jobs:
   lint:
+    timeout-minutes: 10
     name: lint
-    runs-on: ubuntu-latest
-    if: github.repository == 'openai/openai-python'
-
+    runs-on: ${{ github.repository == 'stainless-sdks/openai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
     steps:
       - uses: actions/checkout@v4
 
       - name: Install Rye
         run: |
-          curl -sSf https://rye-up.com/get | bash
+          curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.24.0
+          RYE_VERSION: '0.44.0'
           RYE_INSTALL_OPTION: '--yes'
 
       - name: Install dependencies
@@ -29,20 +34,61 @@ jobs:
 
       - name: Run lints
         run: ./scripts/lint
+
+  build:
+    if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
+    timeout-minutes: 10
+    name: build
+    permissions:
+      contents: read
+      id-token: write
+    runs-on: ${{ github.repository == 'stainless-sdks/openai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+
+      - name: Install dependencies
+        run: rye sync --all-features
+
+      - name: Run build
+        run: rye build
+
+      - name: Get GitHub OIDC Token
+        if: github.repository == 'stainless-sdks/openai-python'
+        id: github-oidc
+        uses: actions/github-script@v6
+        with:
+          script: core.setOutput('github_token', await core.getIDToken());
+
+      - name: Upload tarball
+        if: github.repository == 'stainless-sdks/openai-python'
+        env:
+          URL: https://pkg.stainless.com/s
+          AUTH: ${{ steps.github-oidc.outputs.github_token }}
+          SHA: ${{ github.sha }}
+        run: ./scripts/utils/upload-artifact.sh
+
   test:
+    timeout-minutes: 10
     name: test
-    runs-on: ubuntu-latest
-    if: github.repository == 'openai/openai-python'
-
+    runs-on: ${{ github.repository == 'stainless-sdks/openai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
     steps:
       - uses: actions/checkout@v4
 
       - name: Install Rye
         run: |
-          curl -sSf https://rye-up.com/get | bash
+          curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.24.0
+          RYE_VERSION: '0.44.0'
           RYE_INSTALL_OPTION: '--yes'
 
       - name: Bootstrap
@@ -51,3 +97,31 @@ jobs:
       - name: Run tests
         run: ./scripts/test
 
+  examples:
+    timeout-minutes: 10
+    name: examples
+    runs-on: ${{ github.repository == 'stainless-sdks/openai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    if: github.repository == 'openai/openai-python' && (github.event_name == 'push' || github.event.pull_request.head.repo.fork)
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+      - name: Install dependencies
+        run: |
+          rye sync --all-features
+
+      - env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+            rye run python examples/demo.py
+      - env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+            rye run python examples/async_demo.py
diff --git a/.github/workflows/create-releases.yml b/.github/workflows/create-releases.yml
index a641be287b..b3e1c679d4 100644
--- a/.github/workflows/create-releases.yml
+++ b/.github/workflows/create-releases.yml
@@ -25,11 +25,11 @@ jobs:
       - name: Install Rye
         if: ${{ steps.release.outputs.releases_created }}
         run: |
-          curl -sSf https://rye-up.com/get | bash
+          curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.24.0
-          RYE_INSTALL_OPTION: "--yes"
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
 
       - name: Publish to PyPI
         if: ${{ steps.release.outputs.releases_created }}
diff --git a/.github/workflows/detect-breaking-changes.yml b/.github/workflows/detect-breaking-changes.yml
new file mode 100644
index 0000000000..f10fdf3b19
--- /dev/null
+++ b/.github/workflows/detect-breaking-changes.yml
@@ -0,0 +1,42 @@
+name: CI
+on:
+  pull_request:
+    branches:
+      - main
+      - next
+
+jobs:
+  detect_breaking_changes:
+    runs-on: 'ubuntu-latest'
+    name: detect-breaking-changes
+    if: github.repository == 'openai/openai-python'
+    steps:
+      - name: Calculate fetch-depth
+        run: |
+          echo "FETCH_DEPTH=$(expr ${{ github.event.pull_request.commits }} + 1)" >> $GITHUB_ENV
+
+      - uses: actions/checkout@v4
+        with:
+          # Ensure we can check out the pull request base in the script below.
+          fetch-depth: ${{ env.FETCH_DEPTH }}
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+      - name: Install dependencies
+        run: |
+          rye sync --all-features
+      - name: Detect removed symbols
+        run: |
+          rye run python scripts/detect-breaking-changes.py "${{ github.event.pull_request.base.sha }}"
+
+      - name: Detect breaking changes
+        run: |
+          # Try to check out previous versions of the breaking change detection script. This ensures that
+          # we still detect breaking changes when entire files and their tests are removed.
+          git checkout "${{ github.event.pull_request.base.sha }}" -- ./scripts/detect-breaking-changes 2>/dev/null || true
+          ./scripts/detect-breaking-changes ${{ github.event.pull_request.base.sha }}
\ No newline at end of file
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index 2f88f86407..7096ca9832 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -14,11 +14,11 @@ jobs:
 
       - name: Install Rye
         run: |
-          curl -sSf https://rye-up.com/get | bash
+          curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.24.0
-          RYE_INSTALL_OPTION: "--yes"
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
 
       - name: Publish to PyPI
         run: |
diff --git a/.gitignore b/.gitignore
index 0f9a66a976..95ceb189aa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-.vscode
+.prism.log
 _dev
 
 __pycache__
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 1f79fd2d11..0746cbe20a 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.30.1"
+  ".": "2.12.0"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 2e5c705a0d..3793c0f01b 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,4 @@
-configured_endpoints: 64
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-363dd904e5d6e65b3a323fc88e6b502fb23a6aa319be219273e3ee47c7530993.yml
+configured_endpoints: 137
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-fe8e67bdc351a518b113ab48e775750190e207807903d6b03ab22c438c38a588.yml
+openapi_spec_hash: 8af972190647ffb9dcec516e19d8761a
+config_hash: d013f4fdd4dd59c6f376a9ca482b7f9e
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000000..5b01030785
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.analysis.importFormat": "relative",
+}
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 354d21b2d2..c14e652328 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,9 +2,13 @@
 
 ### With Rye
 
-We use [Rye](https://rye-up.com/) to manage dependencies so we highly recommend [installing it](https://rye-up.com/guide/installation/) as it will automatically provision a Python environment with the expected Python version.
+We use [Rye](https://rye.astral.sh/) to manage dependencies because it will automatically provision a Python environment with the expected Python version. To set it up, run:
 
-After installing Rye, you'll just have to run this command:
+```sh
+$ ./scripts/bootstrap
+```
+
+Or [install Rye manually](https://rye.astral.sh/guide/installation/) and run:
 
 ```sh
 $ rye sync --all-features
@@ -13,8 +17,7 @@ $ rye sync --all-features
 You can then run scripts using `rye run python script.py` or by activating the virtual environment:
 
 ```sh
-$ rye shell
-# or manually activate - https://docs.python.org/3/library/venv.html#how-venvs-work
+# Activate the virtual environment - https://docs.python.org/3/library/venv.html#how-venvs-work
 $ source .venv/bin/activate
 
 # now you can omit the `rye run` prefix
@@ -31,25 +34,25 @@ $ pip install -r requirements-dev.lock
 
 ## Modifying/Adding code
 
-Most of the SDK is generated code, and any modified code will be overridden on the next generation. The
-`src/openai/lib/` and `examples/` directories are exceptions and will never be overridden.
+Most of the SDK is generated code. Modifications to code will be persisted between generations, but may
+result in merge conflicts between manual patches and changes from the generator. The generator will never
+modify the contents of the `src/openai/lib/` and `examples/` directories.
 
 ## Adding and running examples
 
-All files in the `examples/` directory are not modified by the Stainless generator and can be freely edited or
-added to.
+All files in the `examples/` directory are not modified by the generator and can be freely edited or added to.
 
-```bash
+```py
 # add an example to examples/<your-example>.py
 
 #!/usr/bin/env -S rye run python
 …
 ```
 
-```
-chmod +x examples/<your-example>.py
+```sh
+$ chmod +x examples/<your-example>.py
 # run the example against your api
-./examples/<your-example>.py
+$ ./examples/<your-example>.py
 ```
 
 ## Using the repository from source
@@ -58,8 +61,8 @@ If you’d like to use the repository from source, you can either install from g
 
 To install via git:
 
-```bash
-pip install git+ssh://git@github.com/openai/openai-python.git
+```sh
+$ pip install git+ssh://git@github.com/openai/openai-python.git
 ```
 
 Alternatively, you can build from source and install the wheel file:
@@ -68,29 +71,29 @@ Building this package will create two files in the `dist/` directory, a `.tar.gz
 
 To create a distributable version of the library, all you have to do is run this command:
 
-```bash
-rye build
+```sh
+$ rye build
 # or
-python -m build
+$ python -m build
 ```
 
 Then to install:
 
 ```sh
-pip install ./path-to-wheel-file.whl
+$ pip install ./path-to-wheel-file.whl
 ```
 
 ## Running tests
 
 Most tests require you to [set up a mock server](https://github.com/stoplightio/prism) against the OpenAPI spec to run the tests.
 
-```bash
+```sh
 # you will need npm installed
-npx prism mock path/to/your/openapi.yml
+$ npx prism mock path/to/your/openapi.yml
 ```
 
-```bash
-rye run pytest
+```sh
+$ ./scripts/test
 ```
 
 ## Linting and formatting
@@ -100,14 +103,14 @@ This repository uses [ruff](https://github.com/astral-sh/ruff) and
 
 To lint:
 
-```bash
-rye run lint
+```sh
+$ ./scripts/lint
 ```
 
 To format and fix all ruff issues automatically:
 
-```bash
-rye run format
+```sh
+$ ./scripts/format
 ```
 
 ## Publishing and releases
diff --git a/LICENSE b/LICENSE
index 621a6becfb..f011417af6 100644
--- a/LICENSE
+++ b/LICENSE
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2024 OpenAI
+   Copyright 2025 OpenAI
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
diff --git a/README.md b/README.md
index e566a2f8d0..2fd47a6d27 100644
--- a/README.md
+++ b/README.md
@@ -1,22 +1,18 @@
 # OpenAI Python API library
 
-[![PyPI version](https://img.shields.io/pypi/v/openai.svg)](https://pypi.org/project/openai/)
+<!-- prettier-ignore -->
+[![PyPI version](https://img.shields.io/pypi/v/openai.svg?label=pypi%20(stable))](https://pypi.org/project/openai/)
 
-The OpenAI Python library provides convenient access to the OpenAI REST API from any Python 3.7+
+The OpenAI Python library provides convenient access to the OpenAI REST API from any Python 3.9+
 application. The library includes type definitions for all request params and response fields,
 and offers both synchronous and asynchronous clients powered by [httpx](https://github.com/encode/httpx).
 
-It is generated from our [OpenAPI specification](https://github.com/openai/openai-openapi) with [Stainless](https://stainlessapi.com/).
-
 ## Documentation
 
-The REST API documentation can be found [on platform.openai.com](https://platform.openai.com/docs). The full API of this library can be found in [api.md](api.md).
+The REST API documentation can be found on [platform.openai.com](https://platform.openai.com/docs). The full API of this library can be found in [api.md](api.md).
 
 ## Installation
 
-> [!IMPORTANT]
-> The SDK was rewritten in v1, which was released November 6th 2023. See the [v1 migration guide](https://github.com/openai/openai-python/discussions/742), which includes scripts to automatically update your code.
-
 ```sh
 # install from PyPI
 pip install openai
@@ -31,8 +27,7 @@ import os
 from openai import OpenAI
 
 client = OpenAI(
-    # This is the default and can be omitted
-    api_key=os.environ.get("OPENAI_API_KEY"),
+    api_key=os.environ.get("OPENAI_API_KEY"),  # This is the default and can be omitted
 )
 
 chat_completion = client.chat.completions.create(
@@ -42,7 +37,7 @@ chat_completion = client.chat.completions.create(
             "content": "Say this is a test",
         }
     ],
-    model="gpt-3.5-turbo",
+    model="gpt-4o",
 )
 ```
 
@@ -51,56 +46,6 @@ we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/)
 to add `OPENAI_API_KEY="My API Key"` to your `.env` file
 so that your API Key is not stored in source control.
 
-### Polling Helpers
-
-When interacting with the API some actions such as starting a Run and adding files to vector stores are asynchronous and take time to complete. The SDK includes
-helper functions which will poll the status until it reaches a terminal state and then return the resulting object.
-If an API method results in an action which could benefit from polling there will be a corresponding version of the
-method ending in '\_and_poll'.
-
-For instance to create a Run and poll until it reaches a terminal state you can run:
-
-```python
-run = client.beta.threads.runs.create_and_poll(
-    thread_id=thread.id,
-    assistant_id=assistant.id,
-)
-```
-
-More information on the lifecycle of a Run can be found in the [Run Lifecycle Documentation](https://platform.openai.com/docs/assistants/how-it-works/run-lifecycle)
-
-### Bulk Upload Helpers
-
-When creating an interacting with vector stores, you can use the polling helpers to monitor the status of operations.
-For convenience, we also provide a bulk upload helper to allow you to simultaneously upload several files at once.
-
-```python
-sample_files = [Path("sample-paper.pdf"), ...]
-
-batch = await client.vector_stores.file_batches.upload_and_poll(
-    store.id,
-    files=sample_files,
-)
-```
-
-### Streaming Helpers
-
-The SDK also includes helpers to process streams and handle the incoming events.
-
-```python
-with client.beta.threads.runs.stream(
-    thread_id=thread.id,
-    assistant_id=assistant.id,
-    instructions="Please address the user as Jane Doe. The user has a premium account.",
-) as stream:
-    for event in stream:
-        # Print the text from text delta events
-        if event.type == "thread.message.delta" and event.data.delta.content:
-            print(event.data.delta.content[0].text)
-```
-
-More information on streaming helpers can be found in the dedicated documentation: [helpers.md](helpers.md)
-
 ## Async usage
 
 Simply import `AsyncOpenAI` instead of `OpenAI` and use `await` with each API call:
@@ -111,8 +56,7 @@ import asyncio
 from openai import AsyncOpenAI
 
 client = AsyncOpenAI(
-    # This is the default and can be omitted
-    api_key=os.environ.get("OPENAI_API_KEY"),
+    api_key=os.environ.get("OPENAI_API_KEY"),  # This is the default and can be omitted
 )
 
 
@@ -124,7 +68,7 @@ async def main() -> None:
                 "content": "Say this is a test",
             }
         ],
-        model="gpt-3.5-turbo",
+        model="gpt-4o",
     )
 
 
@@ -133,84 +77,88 @@ asyncio.run(main())
 
 Functionality between the synchronous and asynchronous clients is otherwise identical.
 
-## Streaming responses
+### With aiohttp
 
-We provide support for streaming responses using Server Side Events (SSE).
+By default, the async client uses `httpx` for HTTP requests. However, for improved concurrency performance you may also use `aiohttp` as the HTTP backend.
 
-```python
-from openai import OpenAI
-
-client = OpenAI()
+You can enable this by installing `aiohttp`:
 
-stream = client.chat.completions.create(
-    model="gpt-4",
-    messages=[{"role": "user", "content": "Say this is a test"}],
-    stream=True,
-)
-for chunk in stream:
-    print(chunk.choices[0].delta.content or "", end="")
+```sh
+# install from PyPI
+pip install openai[aiohttp]
 ```
 
-The async client uses the exact same interface.
+Then you can enable it by instantiating the client with `http_client=DefaultAioHttpClient()`:
 
 ```python
+import os
+import asyncio
+from openai import DefaultAioHttpClient
 from openai import AsyncOpenAI
 
-client = AsyncOpenAI()
-
 
-async def main():
-    stream = await client.chat.completions.create(
-        model="gpt-4",
-        messages=[{"role": "user", "content": "Say this is a test"}],
-        stream=True,
-    )
-    async for chunk in stream:
-        print(chunk.choices[0].delta.content or "", end="")
+async def main() -> None:
+    async with AsyncOpenAI(
+        api_key=os.environ.get("OPENAI_API_KEY"),  # This is the default and can be omitted
+        http_client=DefaultAioHttpClient(),
+    ) as client:
+        chat_completion = await client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": "Say this is a test",
+                }
+            ],
+            model="gpt-4o",
+        )
 
 
 asyncio.run(main())
 ```
 
-## Module-level client
-
-> [!IMPORTANT]
-> We highly recommend instantiating client instances instead of relying on the global client.
-
-We also expose a global client instance that is accessible in a similar fashion to versions prior to v1.
+## Streaming responses
 
-```py
-import openai
+We provide support for streaming responses using Server Side Events (SSE).
 
-# optional; defaults to `os.environ['OPENAI_API_KEY']`
-openai.api_key = '...'
+```python
+from openai import OpenAI
 
-# all client options can be configured just like the `OpenAI` instantiation counterpart
-openai.base_url = "https://..."
-openai.default_headers = {"x-foo": "true"}
+client = OpenAI()
 
-completion = openai.chat.completions.create(
-    model="gpt-4",
+stream = client.chat.completions.create(
     messages=[
         {
             "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
+            "content": "Say this is a test",
+        }
     ],
+    model="gpt-4o",
+    stream=True,
 )
-print(completion.choices[0].message.content)
+for chat_completion in stream:
+    print(chat_completion)
 ```
 
-The API is the exact same as the standard client instance based API.
+The async client uses the exact same interface.
 
-This is intended to be used within REPLs or notebooks for faster iteration, **not** in application code.
+```python
+from openai import AsyncOpenAI
 
-We recommend that you always instantiate a client (e.g., with `client = OpenAI()`) in application code because:
+client = AsyncOpenAI()
 
-- It can be difficult to reason about where client options are configured
-- It's not possible to change certain client options without potentially causing race conditions
-- It's harder to mock for testing purposes
-- It's not possible to control cleanup of network connections
+stream = await client.chat.completions.create(
+    messages=[
+        {
+            "role": "user",
+            "content": "Say this is a test",
+        }
+    ],
+    model="gpt-4o",
+    stream=True,
+)
+async for chat_completion in stream:
+    print(chat_completion)
+```
 
 ## Using types
 
@@ -228,7 +176,7 @@ List methods in the OpenAI API are paginated.
 This library provides auto-paginating iterators with each list response, so you do not have to request successive pages manually:
 
 ```python
-import openai
+from openai import OpenAI
 
 client = OpenAI()
 
@@ -246,7 +194,7 @@ Or, asynchronously:
 
 ```python
 import asyncio
-import openai
+from openai import AsyncOpenAI
 
 client = AsyncOpenAI()
 
@@ -308,14 +256,14 @@ completion = client.chat.completions.create(
             "content": "Can you generate an example json object describing a fruit?",
         }
     ],
-    model="gpt-3.5-turbo-1106",
+    model="gpt-4o",
     response_format={"type": "json_object"},
 )
 ```
 
 ## File uploads
 
-Request parameters that correspond to file uploads can be passed as `bytes`, a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance or a tuple of `(filename, contents, media type)`.
+Request parameters that correspond to file uploads can be passed as `bytes`, or a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance or a tuple of `(filename, contents, media type)`.
 
 ```python
 from pathlib import Path
@@ -348,7 +296,7 @@ client = OpenAI()
 
 try:
     client.fine_tuning.jobs.create(
-        model="gpt-3.5-turbo",
+        model="gpt-4o",
         training_file="file-abc123",
     )
 except openai.APIConnectionError as e:
@@ -362,7 +310,7 @@ except openai.APIStatusError as e:
     print(e.response)
 ```
 
-Error codes are as followed:
+Error codes are as follows:
 
 | Status Code | Error Type                 |
 | ----------- | -------------------------- |
@@ -397,17 +345,17 @@ client.with_options(max_retries=5).chat.completions.create(
     messages=[
         {
             "role": "user",
-            "content": "How can I get the name of the current day in Node.js?",
+            "content": "How can I get the name of the current day in JavaScript?",
         }
     ],
-    model="gpt-3.5-turbo",
+    model="gpt-4o",
 )
 ```
 
 ### Timeouts
 
 By default requests time out after 10 minutes. You can configure this with a `timeout` option,
-which accepts a float or an [`httpx.Timeout`](https://www.python-httpx.org/advanced/#fine-tuning-the-configuration) object:
+which accepts a float or an [`httpx.Timeout`](https://www.python-httpx.org/advanced/timeouts/#fine-tuning-the-configuration) object:
 
 ```python
 from openai import OpenAI
@@ -431,7 +379,7 @@ client.with_options(timeout=5.0).chat.completions.create(
             "content": "How can I list all files in a directory using Python?",
         }
     ],
-    model="gpt-3.5-turbo",
+    model="gpt-4o",
 )
 ```
 
@@ -445,12 +393,14 @@ Note that requests that time out are [retried twice by default](#retries).
 
 We use the standard library [`logging`](https://docs.python.org/3/library/logging.html) module.
 
-You can enable logging by setting the environment variable `OPENAI_LOG` to `debug`.
+You can enable logging by setting the environment variable `OPENAI_LOG` to `info`.
 
 ```shell
-$ export OPENAI_LOG=debug
+$ export OPENAI_LOG=info
 ```
 
+Or to `debug` for more verbose logging.
+
 ### How to tell whether `None` means `null` or missing
 
 In an API response, a field may be explicitly `null`, or missing entirely; in either case, its value is `None` in this library. You can differentiate the two cases with `.model_fields_set`:
@@ -476,7 +426,7 @@ response = client.chat.completions.with_raw_response.create(
         "role": "user",
         "content": "Say this is a test",
     }],
-    model="gpt-3.5-turbo",
+    model="gpt-4o",
 )
 print(response.headers.get('X-My-Header'))
 
@@ -484,7 +434,7 @@ completion = response.parse()  # get the object that `chat.completions.create()`
 print(completion)
 ```
 
-These methods return an [`LegacyAPIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_legacy_response.py) object. This is a legacy class as we're changing it slightly in the next major version.
+These methods return a [`LegacyAPIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_legacy_response.py) object. This is a legacy class as we're changing it slightly in the next major version.
 
 For the sync client this will mostly be the same with the exception
 of `content` & `text` will be methods instead of properties. In the
@@ -509,7 +459,7 @@ with client.chat.completions.with_streaming_response.create(
             "content": "Say this is a test",
         }
     ],
-    model="gpt-3.5-turbo",
+    model="gpt-4o",
 ) as response:
     print(response.headers.get("X-My-Header"))
 
@@ -528,8 +478,7 @@ If you need to access undocumented endpoints, params, or response properties, th
 #### Undocumented endpoints
 
 To make requests to undocumented endpoints, you can make requests using `client.get`, `client.post`, and other
-http verbs. Options on the client will be respected (such as retries) will be respected when making this
-request.
+http verbs. Options on the client will be respected (such as retries) when making this request.
 
 ```py
 import httpx
@@ -558,81 +507,71 @@ can also get all the extra fields on the Pydantic model as a dict with
 
 You can directly override the [httpx client](https://www.python-httpx.org/api/#client) to customize it for your use case, including:
 
-- Support for proxies
-- Custom transports
-- Additional [advanced](https://www.python-httpx.org/advanced/#client-instances) functionality
+- Support for [proxies](https://www.python-httpx.org/advanced/proxies/)
+- Custom [transports](https://www.python-httpx.org/advanced/transports/)
+- Additional [advanced](https://www.python-httpx.org/advanced/clients/) functionality
 
 ```python
+import httpx
 from openai import OpenAI, DefaultHttpxClient
 
 client = OpenAI(
     # Or use the `OPENAI_BASE_URL` env var
     base_url="http://my.test.server.example.com:8083",
     http_client=DefaultHttpxClient(
-        proxies="http://my.test.proxy.example.com",
+        proxy="http://my.test.proxy.example.com",
         transport=httpx.HTTPTransport(local_address="0.0.0.0"),
     ),
 )
 ```
 
-### Managing HTTP resources
-
-By default the library closes underlying HTTP connections whenever the client is [garbage collected](https://docs.python.org/3/reference/datamodel.html#object.__del__). You can manually close the client using the `.close()` method if desired, or with a context manager that closes when exiting.
+You can also customize the client on a per-request basis by using `with_options()`:
 
-## Microsoft Azure OpenAI
+```python
+client.with_options(http_client=DefaultHttpxClient(...))
+```
 
-To use this library with [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview), use the `AzureOpenAI`
-class instead of the `OpenAI` class.
+### Managing HTTP resources
 
-> [!IMPORTANT]
-> The Azure API shape differs from the core API shape which means that the static types for responses / params
-> won't always be correct.
+By default the library closes underlying HTTP connections whenever the client is [garbage collected](https://docs.python.org/3/reference/datamodel.html#object.__del__). You can manually close the client using the `.close()` method if desired, or with a context manager that closes when exiting.
 
 ```py
-from openai import AzureOpenAI
-
-# gets the API Key from environment variable AZURE_OPENAI_API_KEY
-client = AzureOpenAI(
-    # https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning
-    api_version="2023-07-01-preview",
-    # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
-    azure_endpoint="https://example-endpoint.openai.azure.com",
-)
-
-completion = client.chat.completions.create(
-    model="deployment-name",  # e.g. gpt-35-instant
-    messages=[
-        {
-            "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
-    ],
-)
-print(completion.to_json())
-```
-
-In addition to the options provided in the base `OpenAI` client, the following options are provided:
+from openai import OpenAI
 
-- `azure_endpoint` (or the `AZURE_OPENAI_ENDPOINT` environment variable)
-- `azure_deployment`
-- `api_version` (or the `OPENAI_API_VERSION` environment variable)
-- `azure_ad_token` (or the `AZURE_OPENAI_AD_TOKEN` environment variable)
-- `azure_ad_token_provider`
+with OpenAI() as client:
+  # make requests here
+  ...
 
-An example of using the client with Azure Active Directory can be found [here](https://github.com/openai/openai-python/blob/main/examples/azure_ad.py).
+# HTTP client is now closed
+```
 
 ## Versioning
 
 This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) conventions, though certain backwards-incompatible changes may be released as minor versions:
 
 1. Changes that only affect static types, without breaking runtime behavior.
-2. Changes to library internals which are technically public but not intended or documented for external use. _(Please open a GitHub issue to let us know if you are relying on such internals)_.
+2. Changes to library internals which are technically public but not intended or documented for external use. _(Please open a GitHub issue to let us know if you are relying on such internals.)_
 3. Changes that we do not expect to impact the vast majority of users in practice.
 
 We take backwards-compatibility seriously and work hard to ensure you can rely on a smooth upgrade experience.
 
 We are keen for your feedback; please open an [issue](https://www.github.com/openai/openai-python/issues) with questions, bugs, or suggestions.
 
+### Determining the installed version
+
+If you've upgraded to the latest version but aren't seeing any new features you were expecting then your python environment is likely still using an older version.
+
+You can determine the version that is being used at runtime with:
+
+```py
+import openai
+print(openai.__version__)
+```
+
 ## Requirements
 
-Python 3.7 or higher.
+Python 3.9 or higher.
+
+## Contributing
+
+See [the contributing documentation](./CONTRIBUTING.md).
diff --git a/SECURITY.md b/SECURITY.md
index c54acaf331..4adb0c54f1 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -2,9 +2,9 @@
 
 ## Reporting Security Issues
 
-This SDK is generated by [Stainless Software Inc](http://stainlessapi.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken.
+This SDK is generated by [Stainless Software Inc](http://stainless.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken.
 
-To report a security issue, please contact the Stainless team at security@stainlessapi.com.
+To report a security issue, please contact the Stainless team at security@stainless.com.
 
 ## Responsible Disclosure
 
@@ -16,13 +16,13 @@ before making any information public.
 ## Reporting Non-SDK Related Security Issues
 
 If you encounter security issues that are not directly related to SDKs but pertain to the services
-or products provided by OpenAI please follow the respective company's security reporting guidelines.
+or products provided by OpenAI, please follow the respective company's security reporting guidelines.
 
 ### OpenAI Terms and Policies
 
 Our Security Policy can be found at [Security Policy URL](https://openai.com/policies/coordinated-vulnerability-disclosure-policy).
 
-Please contact disclosure@openai.com for any questions or concerns regarding security of our services.
+Please contact disclosure@openai.com for any questions or concerns regarding the security of our services.
 
 ---
 
diff --git a/api.md b/api.md
index de69f11dca..4f9c283a82 100644
--- a/api.md
+++ b/api.md
@@ -1,7 +1,25 @@
 # Shared Types
 
 ```python
-from openai.types import ErrorObject, FunctionDefinition, FunctionParameters
+from openai.types import (
+    AllModels,
+    ChatModel,
+    ComparisonFilter,
+    CompoundFilter,
+    CustomToolInputFormat,
+    ErrorObject,
+    FunctionDefinition,
+    FunctionParameters,
+    Metadata,
+    Reasoning,
+    ReasoningEffort,
+    ResponseFormatJSONObject,
+    ResponseFormatJSONSchema,
+    ResponseFormatText,
+    ResponseFormatTextGrammar,
+    ResponseFormatTextPython,
+    ResponsesModel,
+)
 ```
 
 # Completions
@@ -31,38 +49,65 @@ Types:
 ```python
 from openai.types.chat import (
     ChatCompletion,
+    ChatCompletionAllowedToolChoice,
     ChatCompletionAssistantMessageParam,
+    ChatCompletionAudio,
+    ChatCompletionAudioParam,
     ChatCompletionChunk,
     ChatCompletionContentPart,
     ChatCompletionContentPartImage,
+    ChatCompletionContentPartInputAudio,
+    ChatCompletionContentPartRefusal,
     ChatCompletionContentPartText,
+    ChatCompletionCustomTool,
+    ChatCompletionDeleted,
+    ChatCompletionDeveloperMessageParam,
     ChatCompletionFunctionCallOption,
     ChatCompletionFunctionMessageParam,
+    ChatCompletionFunctionTool,
     ChatCompletionMessage,
+    ChatCompletionMessageCustomToolCall,
+    ChatCompletionMessageFunctionToolCall,
     ChatCompletionMessageParam,
-    ChatCompletionMessageToolCall,
+    ChatCompletionMessageToolCallUnion,
+    ChatCompletionModality,
     ChatCompletionNamedToolChoice,
+    ChatCompletionNamedToolChoiceCustom,
+    ChatCompletionPredictionContent,
     ChatCompletionRole,
+    ChatCompletionStoreMessage,
     ChatCompletionStreamOptions,
     ChatCompletionSystemMessageParam,
     ChatCompletionTokenLogprob,
-    ChatCompletionTool,
+    ChatCompletionToolUnion,
     ChatCompletionToolChoiceOption,
     ChatCompletionToolMessageParam,
     ChatCompletionUserMessageParam,
+    ChatCompletionAllowedTools,
+    ChatCompletionReasoningEffort,
 )
 ```
 
 Methods:
 
-- <code title="post /chat/completions">client.chat.completions.<a href="./src/openai/resources/chat/completions.py">create</a>(\*\*<a href="src/openai/types/chat/completion_create_params.py">params</a>) -> <a href="./src/openai/types/chat/chat_completion.py">ChatCompletion</a></code>
+- <code title="post /chat/completions">client.chat.completions.<a href="./src/openai/resources/chat/completions/completions.py">create</a>(\*\*<a href="src/openai/types/chat/completion_create_params.py">params</a>) -> <a href="./src/openai/types/chat/chat_completion.py">ChatCompletion</a></code>
+- <code title="get /chat/completions/{completion_id}">client.chat.completions.<a href="./src/openai/resources/chat/completions/completions.py">retrieve</a>(completion_id) -> <a href="./src/openai/types/chat/chat_completion.py">ChatCompletion</a></code>
+- <code title="post /chat/completions/{completion_id}">client.chat.completions.<a href="./src/openai/resources/chat/completions/completions.py">update</a>(completion_id, \*\*<a href="src/openai/types/chat/completion_update_params.py">params</a>) -> <a href="./src/openai/types/chat/chat_completion.py">ChatCompletion</a></code>
+- <code title="get /chat/completions">client.chat.completions.<a href="./src/openai/resources/chat/completions/completions.py">list</a>(\*\*<a href="src/openai/types/chat/completion_list_params.py">params</a>) -> <a href="./src/openai/types/chat/chat_completion.py">SyncCursorPage[ChatCompletion]</a></code>
+- <code title="delete /chat/completions/{completion_id}">client.chat.completions.<a href="./src/openai/resources/chat/completions/completions.py">delete</a>(completion_id) -> <a href="./src/openai/types/chat/chat_completion_deleted.py">ChatCompletionDeleted</a></code>
+
+### Messages
+
+Methods:
+
+- <code title="get /chat/completions/{completion_id}/messages">client.chat.completions.messages.<a href="./src/openai/resources/chat/completions/messages.py">list</a>(completion_id, \*\*<a href="src/openai/types/chat/completions/message_list_params.py">params</a>) -> <a href="./src/openai/types/chat/chat_completion_store_message.py">SyncCursorPage[ChatCompletionStoreMessage]</a></code>
 
 # Embeddings
 
 Types:
 
 ```python
-from openai.types import CreateEmbeddingResponse, Embedding
+from openai.types import CreateEmbeddingResponse, Embedding, EmbeddingModel
 ```
 
 Methods:
@@ -74,25 +119,34 @@ Methods:
 Types:
 
 ```python
-from openai.types import FileContent, FileDeleted, FileObject
+from openai.types import FileContent, FileDeleted, FileObject, FilePurpose
 ```
 
 Methods:
 
 - <code title="post /files">client.files.<a href="./src/openai/resources/files.py">create</a>(\*\*<a href="src/openai/types/file_create_params.py">params</a>) -> <a href="./src/openai/types/file_object.py">FileObject</a></code>
 - <code title="get /files/{file_id}">client.files.<a href="./src/openai/resources/files.py">retrieve</a>(file_id) -> <a href="./src/openai/types/file_object.py">FileObject</a></code>
-- <code title="get /files">client.files.<a href="./src/openai/resources/files.py">list</a>(\*\*<a href="src/openai/types/file_list_params.py">params</a>) -> <a href="./src/openai/types/file_object.py">SyncPage[FileObject]</a></code>
+- <code title="get /files">client.files.<a href="./src/openai/resources/files.py">list</a>(\*\*<a href="src/openai/types/file_list_params.py">params</a>) -> <a href="./src/openai/types/file_object.py">SyncCursorPage[FileObject]</a></code>
 - <code title="delete /files/{file_id}">client.files.<a href="./src/openai/resources/files.py">delete</a>(file_id) -> <a href="./src/openai/types/file_deleted.py">FileDeleted</a></code>
 - <code title="get /files/{file_id}/content">client.files.<a href="./src/openai/resources/files.py">content</a>(file_id) -> HttpxBinaryResponseContent</code>
-- <code title="get /files/{file_id}/content">client.files.<a href="./src/openai/resources/files.py">retrieve_content</a>(file_id) -> str</code>
-- <code>client.files.<a href="./src/openai/resources/files.py">wait_for_processing</a>(\*args) -> FileObject</code>
+- <code title="get /files/{file_id}/content">client.files.<a href="./src/openai/resources/files.py">retrieve_content</a>(file_id) -> <a href="./src/openai/types/file_content.py">str</a></code>
 
 # Images
 
 Types:
 
 ```python
-from openai.types import Image, ImagesResponse
+from openai.types import (
+    Image,
+    ImageEditCompletedEvent,
+    ImageEditPartialImageEvent,
+    ImageEditStreamEvent,
+    ImageGenCompletedEvent,
+    ImageGenPartialImageEvent,
+    ImageGenStreamEvent,
+    ImageModel,
+    ImagesResponse,
+)
 ```
 
 Methods:
@@ -103,32 +157,57 @@ Methods:
 
 # Audio
 
+Types:
+
+```python
+from openai.types import AudioModel, AudioResponseFormat
+```
+
 ## Transcriptions
 
 Types:
 
 ```python
-from openai.types.audio import Transcription
+from openai.types.audio import (
+    Transcription,
+    TranscriptionDiarized,
+    TranscriptionDiarizedSegment,
+    TranscriptionInclude,
+    TranscriptionSegment,
+    TranscriptionStreamEvent,
+    TranscriptionTextDeltaEvent,
+    TranscriptionTextDoneEvent,
+    TranscriptionTextSegmentEvent,
+    TranscriptionVerbose,
+    TranscriptionWord,
+    TranscriptionCreateResponse,
+)
 ```
 
 Methods:
 
-- <code title="post /audio/transcriptions">client.audio.transcriptions.<a href="./src/openai/resources/audio/transcriptions.py">create</a>(\*\*<a href="src/openai/types/audio/transcription_create_params.py">params</a>) -> <a href="./src/openai/types/audio/transcription.py">Transcription</a></code>
+- <code title="post /audio/transcriptions">client.audio.transcriptions.<a href="./src/openai/resources/audio/transcriptions.py">create</a>(\*\*<a href="src/openai/types/audio/transcription_create_params.py">params</a>) -> <a href="./src/openai/types/audio/transcription_create_response.py">TranscriptionCreateResponse</a></code>
 
 ## Translations
 
 Types:
 
 ```python
-from openai.types.audio import Translation
+from openai.types.audio import Translation, TranslationVerbose, TranslationCreateResponse
 ```
 
 Methods:
 
-- <code title="post /audio/translations">client.audio.translations.<a href="./src/openai/resources/audio/translations.py">create</a>(\*\*<a href="src/openai/types/audio/translation_create_params.py">params</a>) -> <a href="./src/openai/types/audio/translation.py">Translation</a></code>
+- <code title="post /audio/translations">client.audio.translations.<a href="./src/openai/resources/audio/translations.py">create</a>(\*\*<a href="src/openai/types/audio/translation_create_params.py">params</a>) -> <a href="./src/openai/types/audio/translation_create_response.py">TranslationCreateResponse</a></code>
 
 ## Speech
 
+Types:
+
+```python
+from openai.types.audio import SpeechModel
+```
+
 Methods:
 
 - <code title="post /audio/speech">client.audio.speech.<a href="./src/openai/resources/audio/speech.py">create</a>(\*\*<a href="src/openai/types/audio/speech_create_params.py">params</a>) -> HttpxBinaryResponseContent</code>
@@ -138,7 +217,14 @@ Methods:
 Types:
 
 ```python
-from openai.types import Moderation, ModerationCreateResponse
+from openai.types import (
+    Moderation,
+    ModerationImageURLInput,
+    ModerationModel,
+    ModerationMultiModalInput,
+    ModerationTextInput,
+    ModerationCreateResponse,
+)
 ```
 
 Methods:
@@ -161,6 +247,21 @@ Methods:
 
 # FineTuning
 
+## Methods
+
+Types:
+
+```python
+from openai.types.fine_tuning import (
+    DpoHyperparameters,
+    DpoMethod,
+    ReinforcementHyperparameters,
+    ReinforcementMethod,
+    SupervisedHyperparameters,
+    SupervisedMethod,
+)
+```
+
 ## Jobs
 
 Types:
@@ -169,9 +270,9 @@ Types:
 from openai.types.fine_tuning import (
     FineTuningJob,
     FineTuningJobEvent,
-    FineTuningJobIntegration,
     FineTuningJobWandbIntegration,
     FineTuningJobWandbIntegrationObject,
+    FineTuningJobIntegration,
 )
 ```
 
@@ -182,6 +283,8 @@ Methods:
 - <code title="get /fine_tuning/jobs">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">list</a>(\*\*<a href="src/openai/types/fine_tuning/job_list_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">SyncCursorPage[FineTuningJob]</a></code>
 - <code title="post /fine_tuning/jobs/{fine_tuning_job_id}/cancel">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">cancel</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
 - <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/events">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">list_events</a>(fine_tuning_job_id, \*\*<a href="src/openai/types/fine_tuning/job_list_events_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job_event.py">SyncCursorPage[FineTuningJobEvent]</a></code>
+- <code title="post /fine_tuning/jobs/{fine_tuning_job_id}/pause">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">pause</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
+- <code title="post /fine_tuning/jobs/{fine_tuning_job_id}/resume">client.fine_tuning.jobs.<a href="./src/openai/resources/fine_tuning/jobs/jobs.py">resume</a>(fine_tuning_job_id) -> <a href="./src/openai/types/fine_tuning/fine_tuning_job.py">FineTuningJob</a></code>
 
 ### Checkpoints
 
@@ -195,60 +298,195 @@ Methods:
 
 - <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/checkpoints">client.fine_tuning.jobs.checkpoints.<a href="./src/openai/resources/fine_tuning/jobs/checkpoints.py">list</a>(fine_tuning_job_id, \*\*<a href="src/openai/types/fine_tuning/jobs/checkpoint_list_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py">SyncCursorPage[FineTuningJobCheckpoint]</a></code>
 
-# Beta
+## Checkpoints
 
-## VectorStores
+### Permissions
 
 Types:
 
 ```python
-from openai.types.beta import VectorStore, VectorStoreDeleted
+from openai.types.fine_tuning.checkpoints import (
+    PermissionCreateResponse,
+    PermissionRetrieveResponse,
+    PermissionDeleteResponse,
+)
+```
+
+Methods:
+
+- <code title="post /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions">client.fine_tuning.checkpoints.permissions.<a href="./src/openai/resources/fine_tuning/checkpoints/permissions.py">create</a>(fine_tuned_model_checkpoint, \*\*<a href="src/openai/types/fine_tuning/checkpoints/permission_create_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/checkpoints/permission_create_response.py">SyncPage[PermissionCreateResponse]</a></code>
+- <code title="get /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions">client.fine_tuning.checkpoints.permissions.<a href="./src/openai/resources/fine_tuning/checkpoints/permissions.py">retrieve</a>(fine_tuned_model_checkpoint, \*\*<a href="src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py">PermissionRetrieveResponse</a></code>
+- <code title="delete /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}">client.fine_tuning.checkpoints.permissions.<a href="./src/openai/resources/fine_tuning/checkpoints/permissions.py">delete</a>(permission_id, \*, fine_tuned_model_checkpoint) -> <a href="./src/openai/types/fine_tuning/checkpoints/permission_delete_response.py">PermissionDeleteResponse</a></code>
+
+## Alpha
+
+### Graders
+
+Types:
+
+```python
+from openai.types.fine_tuning.alpha import GraderRunResponse, GraderValidateResponse
+```
+
+Methods:
+
+- <code title="post /fine_tuning/alpha/graders/run">client.fine_tuning.alpha.graders.<a href="./src/openai/resources/fine_tuning/alpha/graders.py">run</a>(\*\*<a href="src/openai/types/fine_tuning/alpha/grader_run_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/alpha/grader_run_response.py">GraderRunResponse</a></code>
+- <code title="post /fine_tuning/alpha/graders/validate">client.fine_tuning.alpha.graders.<a href="./src/openai/resources/fine_tuning/alpha/graders.py">validate</a>(\*\*<a href="src/openai/types/fine_tuning/alpha/grader_validate_params.py">params</a>) -> <a href="./src/openai/types/fine_tuning/alpha/grader_validate_response.py">GraderValidateResponse</a></code>
+
+# Graders
+
+## GraderModels
+
+Types:
+
+```python
+from openai.types.graders import (
+    GraderInputs,
+    LabelModelGrader,
+    MultiGrader,
+    PythonGrader,
+    ScoreModelGrader,
+    StringCheckGrader,
+    TextSimilarityGrader,
+)
+```
+
+# VectorStores
+
+Types:
+
+```python
+from openai.types import (
+    AutoFileChunkingStrategyParam,
+    FileChunkingStrategy,
+    FileChunkingStrategyParam,
+    OtherFileChunkingStrategyObject,
+    StaticFileChunkingStrategy,
+    StaticFileChunkingStrategyObject,
+    StaticFileChunkingStrategyObjectParam,
+    VectorStore,
+    VectorStoreDeleted,
+    VectorStoreSearchResponse,
+)
 ```
 
 Methods:
 
-- <code title="post /vector_stores">client.beta.vector_stores.<a href="./src/openai/resources/beta/vector_stores/vector_stores.py">create</a>(\*\*<a href="src/openai/types/beta/vector_store_create_params.py">params</a>) -> <a href="./src/openai/types/beta/vector_store.py">VectorStore</a></code>
-- <code title="get /vector_stores/{vector_store_id}">client.beta.vector_stores.<a href="./src/openai/resources/beta/vector_stores/vector_stores.py">retrieve</a>(vector_store_id) -> <a href="./src/openai/types/beta/vector_store.py">VectorStore</a></code>
-- <code title="post /vector_stores/{vector_store_id}">client.beta.vector_stores.<a href="./src/openai/resources/beta/vector_stores/vector_stores.py">update</a>(vector_store_id, \*\*<a href="src/openai/types/beta/vector_store_update_params.py">params</a>) -> <a href="./src/openai/types/beta/vector_store.py">VectorStore</a></code>
-- <code title="get /vector_stores">client.beta.vector_stores.<a href="./src/openai/resources/beta/vector_stores/vector_stores.py">list</a>(\*\*<a href="src/openai/types/beta/vector_store_list_params.py">params</a>) -> <a href="./src/openai/types/beta/vector_store.py">SyncCursorPage[VectorStore]</a></code>
-- <code title="delete /vector_stores/{vector_store_id}">client.beta.vector_stores.<a href="./src/openai/resources/beta/vector_stores/vector_stores.py">delete</a>(vector_store_id) -> <a href="./src/openai/types/beta/vector_store_deleted.py">VectorStoreDeleted</a></code>
+- <code title="post /vector_stores">client.vector_stores.<a href="./src/openai/resources/vector_stores/vector_stores.py">create</a>(\*\*<a href="src/openai/types/vector_store_create_params.py">params</a>) -> <a href="./src/openai/types/vector_store.py">VectorStore</a></code>
+- <code title="get /vector_stores/{vector_store_id}">client.vector_stores.<a href="./src/openai/resources/vector_stores/vector_stores.py">retrieve</a>(vector_store_id) -> <a href="./src/openai/types/vector_store.py">VectorStore</a></code>
+- <code title="post /vector_stores/{vector_store_id}">client.vector_stores.<a href="./src/openai/resources/vector_stores/vector_stores.py">update</a>(vector_store_id, \*\*<a href="src/openai/types/vector_store_update_params.py">params</a>) -> <a href="./src/openai/types/vector_store.py">VectorStore</a></code>
+- <code title="get /vector_stores">client.vector_stores.<a href="./src/openai/resources/vector_stores/vector_stores.py">list</a>(\*\*<a href="src/openai/types/vector_store_list_params.py">params</a>) -> <a href="./src/openai/types/vector_store.py">SyncCursorPage[VectorStore]</a></code>
+- <code title="delete /vector_stores/{vector_store_id}">client.vector_stores.<a href="./src/openai/resources/vector_stores/vector_stores.py">delete</a>(vector_store_id) -> <a href="./src/openai/types/vector_store_deleted.py">VectorStoreDeleted</a></code>
+- <code title="post /vector_stores/{vector_store_id}/search">client.vector_stores.<a href="./src/openai/resources/vector_stores/vector_stores.py">search</a>(vector_store_id, \*\*<a href="src/openai/types/vector_store_search_params.py">params</a>) -> <a href="./src/openai/types/vector_store_search_response.py">SyncPage[VectorStoreSearchResponse]</a></code>
 
-### Files
+## Files
 
 Types:
 
 ```python
-from openai.types.beta.vector_stores import VectorStoreFile, VectorStoreFileDeleted
+from openai.types.vector_stores import VectorStoreFile, VectorStoreFileDeleted, FileContentResponse
 ```
 
 Methods:
 
-- <code title="post /vector_stores/{vector_store_id}/files">client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">create</a>(vector_store_id, \*\*<a href="src/openai/types/beta/vector_stores/file_create_params.py">params</a>) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
-- <code title="get /vector_stores/{vector_store_id}/files/{file_id}">client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">retrieve</a>(file_id, \*, vector_store_id) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
-- <code title="get /vector_stores/{vector_store_id}/files">client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">list</a>(vector_store_id, \*\*<a href="src/openai/types/beta/vector_stores/file_list_params.py">params</a>) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file.py">SyncCursorPage[VectorStoreFile]</a></code>
-- <code title="delete /vector_stores/{vector_store_id}/files/{file_id}">client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">delete</a>(file_id, \*, vector_store_id) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file_deleted.py">VectorStoreFileDeleted</a></code>
-- <code>client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">create_and_poll</a>(\*args) -> VectorStoreFile</code>
-- <code>client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">poll</a>(\*args) -> VectorStoreFile</code>
-- <code>client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">upload</a>(\*args) -> VectorStoreFile</code>
-- <code>client.beta.vector_stores.files.<a href="./src/openai/resources/beta/vector_stores/files.py">upload_and_poll</a>(\*args) -> VectorStoreFile</code>
+- <code title="post /vector_stores/{vector_store_id}/files">client.vector_stores.files.<a href="./src/openai/resources/vector_stores/files.py">create</a>(vector_store_id, \*\*<a href="src/openai/types/vector_stores/file_create_params.py">params</a>) -> <a href="./src/openai/types/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
+- <code title="get /vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="./src/openai/resources/vector_stores/files.py">retrieve</a>(file_id, \*, vector_store_id) -> <a href="./src/openai/types/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
+- <code title="post /vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="./src/openai/resources/vector_stores/files.py">update</a>(file_id, \*, vector_store_id, \*\*<a href="src/openai/types/vector_stores/file_update_params.py">params</a>) -> <a href="./src/openai/types/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
+- <code title="get /vector_stores/{vector_store_id}/files">client.vector_stores.files.<a href="./src/openai/resources/vector_stores/files.py">list</a>(vector_store_id, \*\*<a href="src/openai/types/vector_stores/file_list_params.py">params</a>) -> <a href="./src/openai/types/vector_stores/vector_store_file.py">SyncCursorPage[VectorStoreFile]</a></code>
+- <code title="delete /vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="./src/openai/resources/vector_stores/files.py">delete</a>(file_id, \*, vector_store_id) -> <a href="./src/openai/types/vector_stores/vector_store_file_deleted.py">VectorStoreFileDeleted</a></code>
+- <code title="get /vector_stores/{vector_store_id}/files/{file_id}/content">client.vector_stores.files.<a href="./src/openai/resources/vector_stores/files.py">content</a>(file_id, \*, vector_store_id) -> <a href="./src/openai/types/vector_stores/file_content_response.py">SyncPage[FileContentResponse]</a></code>
 
-### FileBatches
+## FileBatches
 
 Types:
 
 ```python
-from openai.types.beta.vector_stores import VectorStoreFileBatch
+from openai.types.vector_stores import VectorStoreFileBatch
 ```
 
 Methods:
 
-- <code title="post /vector_stores/{vector_store_id}/file_batches">client.beta.vector_stores.file_batches.<a href="./src/openai/resources/beta/vector_stores/file_batches.py">create</a>(vector_store_id, \*\*<a href="src/openai/types/beta/vector_stores/file_batch_create_params.py">params</a>) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
-- <code title="get /vector_stores/{vector_store_id}/file_batches/{batch_id}">client.beta.vector_stores.file_batches.<a href="./src/openai/resources/beta/vector_stores/file_batches.py">retrieve</a>(batch_id, \*, vector_store_id) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
-- <code title="post /vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel">client.beta.vector_stores.file_batches.<a href="./src/openai/resources/beta/vector_stores/file_batches.py">cancel</a>(batch_id, \*, vector_store_id) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
-- <code title="get /vector_stores/{vector_store_id}/file_batches/{batch_id}/files">client.beta.vector_stores.file_batches.<a href="./src/openai/resources/beta/vector_stores/file_batches.py">list_files</a>(batch_id, \*, vector_store_id, \*\*<a href="src/openai/types/beta/vector_stores/file_batch_list_files_params.py">params</a>) -> <a href="./src/openai/types/beta/vector_stores/vector_store_file.py">SyncCursorPage[VectorStoreFile]</a></code>
-- <code>client.beta.vector_stores.file_batches.<a href="./src/openai/resources/beta/vector_stores/file_batches.py">create_and_poll</a>(\*args) -> VectorStoreFileBatch</code>
-- <code>client.beta.vector_stores.file_batches.<a href="./src/openai/resources/beta/vector_stores/file_batches.py">poll</a>(\*args) -> VectorStoreFileBatch</code>
-- <code>client.beta.vector_stores.file_batches.<a href="./src/openai/resources/beta/vector_stores/file_batches.py">upload_and_poll</a>(\*args) -> VectorStoreFileBatch</code>
+- <code title="post /vector_stores/{vector_store_id}/file_batches">client.vector_stores.file_batches.<a href="./src/openai/resources/vector_stores/file_batches.py">create</a>(vector_store_id, \*\*<a href="src/openai/types/vector_stores/file_batch_create_params.py">params</a>) -> <a href="./src/openai/types/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
+- <code title="get /vector_stores/{vector_store_id}/file_batches/{batch_id}">client.vector_stores.file_batches.<a href="./src/openai/resources/vector_stores/file_batches.py">retrieve</a>(batch_id, \*, vector_store_id) -> <a href="./src/openai/types/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
+- <code title="post /vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel">client.vector_stores.file_batches.<a href="./src/openai/resources/vector_stores/file_batches.py">cancel</a>(batch_id, \*, vector_store_id) -> <a href="./src/openai/types/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
+- <code title="get /vector_stores/{vector_store_id}/file_batches/{batch_id}/files">client.vector_stores.file_batches.<a href="./src/openai/resources/vector_stores/file_batches.py">list_files</a>(batch_id, \*, vector_store_id, \*\*<a href="src/openai/types/vector_stores/file_batch_list_files_params.py">params</a>) -> <a href="./src/openai/types/vector_stores/vector_store_file.py">SyncCursorPage[VectorStoreFile]</a></code>
+
+# Webhooks
+
+Types:
+
+```python
+from openai.types.webhooks import (
+    BatchCancelledWebhookEvent,
+    BatchCompletedWebhookEvent,
+    BatchExpiredWebhookEvent,
+    BatchFailedWebhookEvent,
+    EvalRunCanceledWebhookEvent,
+    EvalRunFailedWebhookEvent,
+    EvalRunSucceededWebhookEvent,
+    FineTuningJobCancelledWebhookEvent,
+    FineTuningJobFailedWebhookEvent,
+    FineTuningJobSucceededWebhookEvent,
+    RealtimeCallIncomingWebhookEvent,
+    ResponseCancelledWebhookEvent,
+    ResponseCompletedWebhookEvent,
+    ResponseFailedWebhookEvent,
+    ResponseIncompleteWebhookEvent,
+    UnwrapWebhookEvent,
+)
+```
+
+# Beta
+
+## ChatKit
+
+Types:
+
+```python
+from openai.types.beta import ChatKitWorkflow
+```
+
+### Sessions
+
+Methods:
+
+- <code title="post /chatkit/sessions">client.beta.chatkit.sessions.<a href="./src/openai/resources/beta/chatkit/sessions.py">create</a>(\*\*<a href="src/openai/types/beta/chatkit/session_create_params.py">params</a>) -> <a href="./src/openai/types/beta/chatkit/chat_session.py">ChatSession</a></code>
+- <code title="post /chatkit/sessions/{session_id}/cancel">client.beta.chatkit.sessions.<a href="./src/openai/resources/beta/chatkit/sessions.py">cancel</a>(session_id) -> <a href="./src/openai/types/beta/chatkit/chat_session.py">ChatSession</a></code>
+
+### Threads
+
+Types:
+
+```python
+from openai.types.beta.chatkit import (
+    ChatSession,
+    ChatSessionAutomaticThreadTitling,
+    ChatSessionChatKitConfiguration,
+    ChatSessionChatKitConfigurationParam,
+    ChatSessionExpiresAfterParam,
+    ChatSessionFileUpload,
+    ChatSessionHistory,
+    ChatSessionRateLimits,
+    ChatSessionRateLimitsParam,
+    ChatSessionStatus,
+    ChatSessionWorkflowParam,
+    ChatKitAttachment,
+    ChatKitResponseOutputText,
+    ChatKitThread,
+    ChatKitThreadAssistantMessageItem,
+    ChatKitThreadItemList,
+    ChatKitThreadUserMessageItem,
+    ChatKitWidgetItem,
+    ThreadDeleteResponse,
+)
+```
+
+Methods:
+
+- <code title="get /chatkit/threads/{thread_id}">client.beta.chatkit.threads.<a href="./src/openai/resources/beta/chatkit/threads.py">retrieve</a>(thread_id) -> <a href="./src/openai/types/beta/chatkit/chatkit_thread.py">ChatKitThread</a></code>
+- <code title="get /chatkit/threads">client.beta.chatkit.threads.<a href="./src/openai/resources/beta/chatkit/threads.py">list</a>(\*\*<a href="src/openai/types/beta/chatkit/thread_list_params.py">params</a>) -> <a href="./src/openai/types/beta/chatkit/chatkit_thread.py">SyncConversationCursorPage[ChatKitThread]</a></code>
+- <code title="delete /chatkit/threads/{thread_id}">client.beta.chatkit.threads.<a href="./src/openai/resources/beta/chatkit/threads.py">delete</a>(thread_id) -> <a href="./src/openai/types/beta/chatkit/thread_delete_response.py">ThreadDeleteResponse</a></code>
+- <code title="get /chatkit/threads/{thread_id}/items">client.beta.chatkit.threads.<a href="./src/openai/resources/beta/chatkit/threads.py">list_items</a>(thread_id, \*\*<a href="src/openai/types/beta/chatkit/thread_list_items_params.py">params</a>) -> SyncConversationCursorPage[Data]</code>
 
 ## Assistants
 
@@ -284,7 +522,6 @@ Types:
 
 ```python
 from openai.types.beta import (
-    AssistantResponseFormat,
     AssistantResponseFormatOption,
     AssistantToolChoice,
     AssistantToolChoiceFunction,
@@ -301,8 +538,6 @@ Methods:
 - <code title="post /threads/{thread_id}">client.beta.threads.<a href="./src/openai/resources/beta/threads/threads.py">update</a>(thread_id, \*\*<a href="src/openai/types/beta/thread_update_params.py">params</a>) -> <a href="./src/openai/types/beta/thread.py">Thread</a></code>
 - <code title="delete /threads/{thread_id}">client.beta.threads.<a href="./src/openai/resources/beta/threads/threads.py">delete</a>(thread_id) -> <a href="./src/openai/types/beta/thread_deleted.py">ThreadDeleted</a></code>
 - <code title="post /threads/runs">client.beta.threads.<a href="./src/openai/resources/beta/threads/threads.py">create_and_run</a>(\*\*<a href="src/openai/types/beta/thread_create_and_run_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/run.py">Run</a></code>
-- <code>client.beta.threads.<a href="./src/openai/resources/beta/threads/threads.py">create_and_run_poll</a>(\*args) -> Run</code>
-- <code>client.beta.threads.<a href="./src/openai/resources/beta/threads/threads.py">create_and_run_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
 
 ### Runs
 
@@ -320,12 +555,6 @@ Methods:
 - <code title="get /threads/{thread_id}/runs">client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">list</a>(thread_id, \*\*<a href="src/openai/types/beta/threads/run_list_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/run.py">SyncCursorPage[Run]</a></code>
 - <code title="post /threads/{thread_id}/runs/{run_id}/cancel">client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">cancel</a>(run_id, \*, thread_id) -> <a href="./src/openai/types/beta/threads/run.py">Run</a></code>
 - <code title="post /threads/{thread_id}/runs/{run_id}/submit_tool_outputs">client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">submit_tool_outputs</a>(run_id, \*, thread_id, \*\*<a href="src/openai/types/beta/threads/run_submit_tool_outputs_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/run.py">Run</a></code>
-- <code>client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">create_and_poll</a>(\*args) -> Run</code>
-- <code>client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">create_and_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
-- <code>client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">poll</a>(\*args) -> Run</code>
-- <code>client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
-- <code>client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">submit_tool_outputs_and_poll</a>(\*args) -> Run</code>
-- <code>client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">submit_tool_outputs_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
 
 #### Steps
 
@@ -346,6 +575,7 @@ from openai.types.beta.threads.runs import (
     RunStepDelta,
     RunStepDeltaEvent,
     RunStepDeltaMessageDelta,
+    RunStepInclude,
     ToolCall,
     ToolCallDelta,
     ToolCallDeltaObject,
@@ -355,7 +585,7 @@ from openai.types.beta.threads.runs import (
 
 Methods:
 
-- <code title="get /threads/{thread_id}/runs/{run_id}/steps/{step_id}">client.beta.threads.runs.steps.<a href="./src/openai/resources/beta/threads/runs/steps.py">retrieve</a>(step_id, \*, thread_id, run_id) -> <a href="./src/openai/types/beta/threads/runs/run_step.py">RunStep</a></code>
+- <code title="get /threads/{thread_id}/runs/{run_id}/steps/{step_id}">client.beta.threads.runs.steps.<a href="./src/openai/resources/beta/threads/runs/steps.py">retrieve</a>(step_id, \*, thread_id, run_id, \*\*<a href="src/openai/types/beta/threads/runs/step_retrieve_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/runs/run_step.py">RunStep</a></code>
 - <code title="get /threads/{thread_id}/runs/{run_id}/steps">client.beta.threads.runs.steps.<a href="./src/openai/resources/beta/threads/runs/steps.py">list</a>(run_id, \*, thread_id, \*\*<a href="src/openai/types/beta/threads/runs/step_list_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/runs/run_step.py">SyncCursorPage[RunStep]</a></code>
 
 ### Messages
@@ -385,6 +615,8 @@ from openai.types.beta.threads import (
     MessageDeleted,
     MessageDelta,
     MessageDeltaEvent,
+    RefusalContentBlock,
+    RefusalDeltaBlock,
     Text,
     TextContentBlock,
     TextContentBlockParam,
@@ -406,7 +638,7 @@ Methods:
 Types:
 
 ```python
-from openai.types import Batch, BatchError, BatchRequestCounts
+from openai.types import Batch, BatchError, BatchRequestCounts, BatchUsage
 ```
 
 Methods:
@@ -415,3 +647,496 @@ Methods:
 - <code title="get /batches/{batch_id}">client.batches.<a href="./src/openai/resources/batches.py">retrieve</a>(batch_id) -> <a href="./src/openai/types/batch.py">Batch</a></code>
 - <code title="get /batches">client.batches.<a href="./src/openai/resources/batches.py">list</a>(\*\*<a href="src/openai/types/batch_list_params.py">params</a>) -> <a href="./src/openai/types/batch.py">SyncCursorPage[Batch]</a></code>
 - <code title="post /batches/{batch_id}/cancel">client.batches.<a href="./src/openai/resources/batches.py">cancel</a>(batch_id) -> <a href="./src/openai/types/batch.py">Batch</a></code>
+
+# Uploads
+
+Types:
+
+```python
+from openai.types import Upload
+```
+
+Methods:
+
+- <code title="post /uploads">client.uploads.<a href="./src/openai/resources/uploads/uploads.py">create</a>(\*\*<a href="src/openai/types/upload_create_params.py">params</a>) -> <a href="./src/openai/types/upload.py">Upload</a></code>
+- <code title="post /uploads/{upload_id}/cancel">client.uploads.<a href="./src/openai/resources/uploads/uploads.py">cancel</a>(upload_id) -> <a href="./src/openai/types/upload.py">Upload</a></code>
+- <code title="post /uploads/{upload_id}/complete">client.uploads.<a href="./src/openai/resources/uploads/uploads.py">complete</a>(upload_id, \*\*<a href="src/openai/types/upload_complete_params.py">params</a>) -> <a href="./src/openai/types/upload.py">Upload</a></code>
+
+## Parts
+
+Types:
+
+```python
+from openai.types.uploads import UploadPart
+```
+
+Methods:
+
+- <code title="post /uploads/{upload_id}/parts">client.uploads.parts.<a href="./src/openai/resources/uploads/parts.py">create</a>(upload_id, \*\*<a href="src/openai/types/uploads/part_create_params.py">params</a>) -> <a href="./src/openai/types/uploads/upload_part.py">UploadPart</a></code>
+
+# Responses
+
+Types:
+
+```python
+from openai.types.responses import (
+    ApplyPatchTool,
+    CompactedResponse,
+    ComputerTool,
+    CustomTool,
+    EasyInputMessage,
+    FileSearchTool,
+    FunctionShellTool,
+    FunctionTool,
+    Response,
+    ResponseApplyPatchToolCall,
+    ResponseApplyPatchToolCallOutput,
+    ResponseAudioDeltaEvent,
+    ResponseAudioDoneEvent,
+    ResponseAudioTranscriptDeltaEvent,
+    ResponseAudioTranscriptDoneEvent,
+    ResponseCodeInterpreterCallCodeDeltaEvent,
+    ResponseCodeInterpreterCallCodeDoneEvent,
+    ResponseCodeInterpreterCallCompletedEvent,
+    ResponseCodeInterpreterCallInProgressEvent,
+    ResponseCodeInterpreterCallInterpretingEvent,
+    ResponseCodeInterpreterToolCall,
+    ResponseCompactionItem,
+    ResponseCompactionItemParam,
+    ResponseCompletedEvent,
+    ResponseComputerToolCall,
+    ResponseComputerToolCallOutputItem,
+    ResponseComputerToolCallOutputScreenshot,
+    ResponseContent,
+    ResponseContentPartAddedEvent,
+    ResponseContentPartDoneEvent,
+    ResponseConversationParam,
+    ResponseCreatedEvent,
+    ResponseCustomToolCall,
+    ResponseCustomToolCallInputDeltaEvent,
+    ResponseCustomToolCallInputDoneEvent,
+    ResponseCustomToolCallOutput,
+    ResponseError,
+    ResponseErrorEvent,
+    ResponseFailedEvent,
+    ResponseFileSearchCallCompletedEvent,
+    ResponseFileSearchCallInProgressEvent,
+    ResponseFileSearchCallSearchingEvent,
+    ResponseFileSearchToolCall,
+    ResponseFormatTextConfig,
+    ResponseFormatTextJSONSchemaConfig,
+    ResponseFunctionCallArgumentsDeltaEvent,
+    ResponseFunctionCallArgumentsDoneEvent,
+    ResponseFunctionCallOutputItem,
+    ResponseFunctionCallOutputItemList,
+    ResponseFunctionShellCallOutputContent,
+    ResponseFunctionShellToolCall,
+    ResponseFunctionShellToolCallOutput,
+    ResponseFunctionToolCall,
+    ResponseFunctionToolCallItem,
+    ResponseFunctionToolCallOutputItem,
+    ResponseFunctionWebSearch,
+    ResponseImageGenCallCompletedEvent,
+    ResponseImageGenCallGeneratingEvent,
+    ResponseImageGenCallInProgressEvent,
+    ResponseImageGenCallPartialImageEvent,
+    ResponseInProgressEvent,
+    ResponseIncludable,
+    ResponseIncompleteEvent,
+    ResponseInput,
+    ResponseInputAudio,
+    ResponseInputContent,
+    ResponseInputFile,
+    ResponseInputFileContent,
+    ResponseInputImage,
+    ResponseInputImageContent,
+    ResponseInputItem,
+    ResponseInputMessageContentList,
+    ResponseInputMessageItem,
+    ResponseInputText,
+    ResponseInputTextContent,
+    ResponseItem,
+    ResponseMcpCallArgumentsDeltaEvent,
+    ResponseMcpCallArgumentsDoneEvent,
+    ResponseMcpCallCompletedEvent,
+    ResponseMcpCallFailedEvent,
+    ResponseMcpCallInProgressEvent,
+    ResponseMcpListToolsCompletedEvent,
+    ResponseMcpListToolsFailedEvent,
+    ResponseMcpListToolsInProgressEvent,
+    ResponseOutputAudio,
+    ResponseOutputItem,
+    ResponseOutputItemAddedEvent,
+    ResponseOutputItemDoneEvent,
+    ResponseOutputMessage,
+    ResponseOutputRefusal,
+    ResponseOutputText,
+    ResponseOutputTextAnnotationAddedEvent,
+    ResponsePrompt,
+    ResponseQueuedEvent,
+    ResponseReasoningItem,
+    ResponseReasoningSummaryPartAddedEvent,
+    ResponseReasoningSummaryPartDoneEvent,
+    ResponseReasoningSummaryTextDeltaEvent,
+    ResponseReasoningSummaryTextDoneEvent,
+    ResponseReasoningTextDeltaEvent,
+    ResponseReasoningTextDoneEvent,
+    ResponseRefusalDeltaEvent,
+    ResponseRefusalDoneEvent,
+    ResponseStatus,
+    ResponseStreamEvent,
+    ResponseTextConfig,
+    ResponseTextDeltaEvent,
+    ResponseTextDoneEvent,
+    ResponseUsage,
+    ResponseWebSearchCallCompletedEvent,
+    ResponseWebSearchCallInProgressEvent,
+    ResponseWebSearchCallSearchingEvent,
+    Tool,
+    ToolChoiceAllowed,
+    ToolChoiceApplyPatch,
+    ToolChoiceCustom,
+    ToolChoiceFunction,
+    ToolChoiceMcp,
+    ToolChoiceOptions,
+    ToolChoiceShell,
+    ToolChoiceTypes,
+    WebSearchPreviewTool,
+    WebSearchTool,
+)
+```
+
+Methods:
+
+- <code title="post /responses">client.responses.<a href="./src/openai/resources/responses/responses.py">create</a>(\*\*<a href="src/openai/types/responses/response_create_params.py">params</a>) -> <a href="./src/openai/types/responses/response.py">Response</a></code>
+- <code title="get /responses/{response_id}">client.responses.<a href="./src/openai/resources/responses/responses.py">retrieve</a>(response_id, \*\*<a href="src/openai/types/responses/response_retrieve_params.py">params</a>) -> <a href="./src/openai/types/responses/response.py">Response</a></code>
+- <code title="delete /responses/{response_id}">client.responses.<a href="./src/openai/resources/responses/responses.py">delete</a>(response_id) -> None</code>
+- <code title="post /responses/{response_id}/cancel">client.responses.<a href="./src/openai/resources/responses/responses.py">cancel</a>(response_id) -> <a href="./src/openai/types/responses/response.py">Response</a></code>
+- <code title="post /responses/compact">client.responses.<a href="./src/openai/resources/responses/responses.py">compact</a>(\*\*<a href="src/openai/types/responses/response_compact_params.py">params</a>) -> <a href="./src/openai/types/responses/compacted_response.py">CompactedResponse</a></code>
+
+## InputItems
+
+Types:
+
+```python
+from openai.types.responses import ResponseItemList
+```
+
+Methods:
+
+- <code title="get /responses/{response_id}/input_items">client.responses.input_items.<a href="./src/openai/resources/responses/input_items.py">list</a>(response_id, \*\*<a href="src/openai/types/responses/input_item_list_params.py">params</a>) -> <a href="./src/openai/types/responses/response_item.py">SyncCursorPage[ResponseItem]</a></code>
+
+## InputTokens
+
+Types:
+
+```python
+from openai.types.responses import InputTokenCountResponse
+```
+
+Methods:
+
+- <code title="post /responses/input_tokens">client.responses.input_tokens.<a href="./src/openai/resources/responses/input_tokens.py">count</a>(\*\*<a href="src/openai/types/responses/input_token_count_params.py">params</a>) -> <a href="./src/openai/types/responses/input_token_count_response.py">InputTokenCountResponse</a></code>
+
+# Realtime
+
+Types:
+
+```python
+from openai.types.realtime import (
+    AudioTranscription,
+    ConversationCreatedEvent,
+    ConversationItem,
+    ConversationItemAdded,
+    ConversationItemCreateEvent,
+    ConversationItemCreatedEvent,
+    ConversationItemDeleteEvent,
+    ConversationItemDeletedEvent,
+    ConversationItemDone,
+    ConversationItemInputAudioTranscriptionCompletedEvent,
+    ConversationItemInputAudioTranscriptionDeltaEvent,
+    ConversationItemInputAudioTranscriptionFailedEvent,
+    ConversationItemInputAudioTranscriptionSegment,
+    ConversationItemRetrieveEvent,
+    ConversationItemTruncateEvent,
+    ConversationItemTruncatedEvent,
+    ConversationItemWithReference,
+    InputAudioBufferAppendEvent,
+    InputAudioBufferClearEvent,
+    InputAudioBufferClearedEvent,
+    InputAudioBufferCommitEvent,
+    InputAudioBufferCommittedEvent,
+    InputAudioBufferDtmfEventReceivedEvent,
+    InputAudioBufferSpeechStartedEvent,
+    InputAudioBufferSpeechStoppedEvent,
+    InputAudioBufferTimeoutTriggered,
+    LogProbProperties,
+    McpListToolsCompleted,
+    McpListToolsFailed,
+    McpListToolsInProgress,
+    NoiseReductionType,
+    OutputAudioBufferClearEvent,
+    RateLimitsUpdatedEvent,
+    RealtimeAudioConfig,
+    RealtimeAudioConfigInput,
+    RealtimeAudioConfigOutput,
+    RealtimeAudioFormats,
+    RealtimeAudioInputTurnDetection,
+    RealtimeClientEvent,
+    RealtimeConversationItemAssistantMessage,
+    RealtimeConversationItemFunctionCall,
+    RealtimeConversationItemFunctionCallOutput,
+    RealtimeConversationItemSystemMessage,
+    RealtimeConversationItemUserMessage,
+    RealtimeError,
+    RealtimeErrorEvent,
+    RealtimeFunctionTool,
+    RealtimeMcpApprovalRequest,
+    RealtimeMcpApprovalResponse,
+    RealtimeMcpListTools,
+    RealtimeMcpProtocolError,
+    RealtimeMcpToolCall,
+    RealtimeMcpToolExecutionError,
+    RealtimeMcphttpError,
+    RealtimeResponse,
+    RealtimeResponseCreateAudioOutput,
+    RealtimeResponseCreateMcpTool,
+    RealtimeResponseCreateParams,
+    RealtimeResponseStatus,
+    RealtimeResponseUsage,
+    RealtimeResponseUsageInputTokenDetails,
+    RealtimeResponseUsageOutputTokenDetails,
+    RealtimeServerEvent,
+    RealtimeSession,
+    RealtimeSessionCreateRequest,
+    RealtimeToolChoiceConfig,
+    RealtimeToolsConfig,
+    RealtimeToolsConfigUnion,
+    RealtimeTracingConfig,
+    RealtimeTranscriptionSessionAudio,
+    RealtimeTranscriptionSessionAudioInput,
+    RealtimeTranscriptionSessionAudioInputTurnDetection,
+    RealtimeTranscriptionSessionCreateRequest,
+    RealtimeTruncation,
+    RealtimeTruncationRetentionRatio,
+    ResponseAudioDeltaEvent,
+    ResponseAudioDoneEvent,
+    ResponseAudioTranscriptDeltaEvent,
+    ResponseAudioTranscriptDoneEvent,
+    ResponseCancelEvent,
+    ResponseContentPartAddedEvent,
+    ResponseContentPartDoneEvent,
+    ResponseCreateEvent,
+    ResponseCreatedEvent,
+    ResponseDoneEvent,
+    ResponseFunctionCallArgumentsDeltaEvent,
+    ResponseFunctionCallArgumentsDoneEvent,
+    ResponseMcpCallArgumentsDelta,
+    ResponseMcpCallArgumentsDone,
+    ResponseMcpCallCompleted,
+    ResponseMcpCallFailed,
+    ResponseMcpCallInProgress,
+    ResponseOutputItemAddedEvent,
+    ResponseOutputItemDoneEvent,
+    ResponseTextDeltaEvent,
+    ResponseTextDoneEvent,
+    SessionCreatedEvent,
+    SessionUpdateEvent,
+    SessionUpdatedEvent,
+    TranscriptionSessionUpdate,
+    TranscriptionSessionUpdatedEvent,
+)
+```
+
+## ClientSecrets
+
+Types:
+
+```python
+from openai.types.realtime import (
+    RealtimeSessionClientSecret,
+    RealtimeSessionCreateResponse,
+    RealtimeTranscriptionSessionCreateResponse,
+    RealtimeTranscriptionSessionTurnDetection,
+    ClientSecretCreateResponse,
+)
+```
+
+Methods:
+
+- <code title="post /realtime/client_secrets">client.realtime.client_secrets.<a href="./src/openai/resources/realtime/client_secrets.py">create</a>(\*\*<a href="src/openai/types/realtime/client_secret_create_params.py">params</a>) -> <a href="./src/openai/types/realtime/client_secret_create_response.py">ClientSecretCreateResponse</a></code>
+
+## Calls
+
+Methods:
+
+- <code title="post /realtime/calls">client.realtime.calls.<a href="./src/openai/resources/realtime/calls.py">create</a>(\*\*<a href="src/openai/types/realtime/call_create_params.py">params</a>) -> HttpxBinaryResponseContent</code>
+- <code title="post /realtime/calls/{call_id}/accept">client.realtime.calls.<a href="./src/openai/resources/realtime/calls.py">accept</a>(call_id, \*\*<a href="src/openai/types/realtime/call_accept_params.py">params</a>) -> None</code>
+- <code title="post /realtime/calls/{call_id}/hangup">client.realtime.calls.<a href="./src/openai/resources/realtime/calls.py">hangup</a>(call_id) -> None</code>
+- <code title="post /realtime/calls/{call_id}/refer">client.realtime.calls.<a href="./src/openai/resources/realtime/calls.py">refer</a>(call_id, \*\*<a href="src/openai/types/realtime/call_refer_params.py">params</a>) -> None</code>
+- <code title="post /realtime/calls/{call_id}/reject">client.realtime.calls.<a href="./src/openai/resources/realtime/calls.py">reject</a>(call_id, \*\*<a href="src/openai/types/realtime/call_reject_params.py">params</a>) -> None</code>
+
+# Conversations
+
+Types:
+
+```python
+from openai.types.conversations import (
+    ComputerScreenshotContent,
+    Conversation,
+    ConversationDeleted,
+    ConversationDeletedResource,
+    Message,
+    SummaryTextContent,
+    TextContent,
+    InputTextContent,
+    OutputTextContent,
+    RefusalContent,
+    InputImageContent,
+    InputFileContent,
+)
+```
+
+Methods:
+
+- <code title="post /conversations">client.conversations.<a href="./src/openai/resources/conversations/conversations.py">create</a>(\*\*<a href="src/openai/types/conversations/conversation_create_params.py">params</a>) -> <a href="./src/openai/types/conversations/conversation.py">Conversation</a></code>
+- <code title="get /conversations/{conversation_id}">client.conversations.<a href="./src/openai/resources/conversations/conversations.py">retrieve</a>(conversation_id) -> <a href="./src/openai/types/conversations/conversation.py">Conversation</a></code>
+- <code title="post /conversations/{conversation_id}">client.conversations.<a href="./src/openai/resources/conversations/conversations.py">update</a>(conversation_id, \*\*<a href="src/openai/types/conversations/conversation_update_params.py">params</a>) -> <a href="./src/openai/types/conversations/conversation.py">Conversation</a></code>
+- <code title="delete /conversations/{conversation_id}">client.conversations.<a href="./src/openai/resources/conversations/conversations.py">delete</a>(conversation_id) -> <a href="./src/openai/types/conversations/conversation_deleted_resource.py">ConversationDeletedResource</a></code>
+
+## Items
+
+Types:
+
+```python
+from openai.types.conversations import ConversationItem, ConversationItemList
+```
+
+Methods:
+
+- <code title="post /conversations/{conversation_id}/items">client.conversations.items.<a href="./src/openai/resources/conversations/items.py">create</a>(conversation_id, \*\*<a href="src/openai/types/conversations/item_create_params.py">params</a>) -> <a href="./src/openai/types/conversations/conversation_item_list.py">ConversationItemList</a></code>
+- <code title="get /conversations/{conversation_id}/items/{item_id}">client.conversations.items.<a href="./src/openai/resources/conversations/items.py">retrieve</a>(item_id, \*, conversation_id, \*\*<a href="src/openai/types/conversations/item_retrieve_params.py">params</a>) -> <a href="./src/openai/types/conversations/conversation_item.py">ConversationItem</a></code>
+- <code title="get /conversations/{conversation_id}/items">client.conversations.items.<a href="./src/openai/resources/conversations/items.py">list</a>(conversation_id, \*\*<a href="src/openai/types/conversations/item_list_params.py">params</a>) -> <a href="./src/openai/types/conversations/conversation_item.py">SyncConversationCursorPage[ConversationItem]</a></code>
+- <code title="delete /conversations/{conversation_id}/items/{item_id}">client.conversations.items.<a href="./src/openai/resources/conversations/items.py">delete</a>(item_id, \*, conversation_id) -> <a href="./src/openai/types/conversations/conversation.py">Conversation</a></code>
+
+# Evals
+
+Types:
+
+```python
+from openai.types import (
+    EvalCustomDataSourceConfig,
+    EvalStoredCompletionsDataSourceConfig,
+    EvalCreateResponse,
+    EvalRetrieveResponse,
+    EvalUpdateResponse,
+    EvalListResponse,
+    EvalDeleteResponse,
+)
+```
+
+Methods:
+
+- <code title="post /evals">client.evals.<a href="./src/openai/resources/evals/evals.py">create</a>(\*\*<a href="src/openai/types/eval_create_params.py">params</a>) -> <a href="./src/openai/types/eval_create_response.py">EvalCreateResponse</a></code>
+- <code title="get /evals/{eval_id}">client.evals.<a href="./src/openai/resources/evals/evals.py">retrieve</a>(eval_id) -> <a href="./src/openai/types/eval_retrieve_response.py">EvalRetrieveResponse</a></code>
+- <code title="post /evals/{eval_id}">client.evals.<a href="./src/openai/resources/evals/evals.py">update</a>(eval_id, \*\*<a href="src/openai/types/eval_update_params.py">params</a>) -> <a href="./src/openai/types/eval_update_response.py">EvalUpdateResponse</a></code>
+- <code title="get /evals">client.evals.<a href="./src/openai/resources/evals/evals.py">list</a>(\*\*<a href="src/openai/types/eval_list_params.py">params</a>) -> <a href="./src/openai/types/eval_list_response.py">SyncCursorPage[EvalListResponse]</a></code>
+- <code title="delete /evals/{eval_id}">client.evals.<a href="./src/openai/resources/evals/evals.py">delete</a>(eval_id) -> <a href="./src/openai/types/eval_delete_response.py">EvalDeleteResponse</a></code>
+
+## Runs
+
+Types:
+
+```python
+from openai.types.evals import (
+    CreateEvalCompletionsRunDataSource,
+    CreateEvalJSONLRunDataSource,
+    EvalAPIError,
+    RunCreateResponse,
+    RunRetrieveResponse,
+    RunListResponse,
+    RunDeleteResponse,
+    RunCancelResponse,
+)
+```
+
+Methods:
+
+- <code title="post /evals/{eval_id}/runs">client.evals.runs.<a href="./src/openai/resources/evals/runs/runs.py">create</a>(eval_id, \*\*<a href="src/openai/types/evals/run_create_params.py">params</a>) -> <a href="./src/openai/types/evals/run_create_response.py">RunCreateResponse</a></code>
+- <code title="get /evals/{eval_id}/runs/{run_id}">client.evals.runs.<a href="./src/openai/resources/evals/runs/runs.py">retrieve</a>(run_id, \*, eval_id) -> <a href="./src/openai/types/evals/run_retrieve_response.py">RunRetrieveResponse</a></code>
+- <code title="get /evals/{eval_id}/runs">client.evals.runs.<a href="./src/openai/resources/evals/runs/runs.py">list</a>(eval_id, \*\*<a href="src/openai/types/evals/run_list_params.py">params</a>) -> <a href="./src/openai/types/evals/run_list_response.py">SyncCursorPage[RunListResponse]</a></code>
+- <code title="delete /evals/{eval_id}/runs/{run_id}">client.evals.runs.<a href="./src/openai/resources/evals/runs/runs.py">delete</a>(run_id, \*, eval_id) -> <a href="./src/openai/types/evals/run_delete_response.py">RunDeleteResponse</a></code>
+- <code title="post /evals/{eval_id}/runs/{run_id}">client.evals.runs.<a href="./src/openai/resources/evals/runs/runs.py">cancel</a>(run_id, \*, eval_id) -> <a href="./src/openai/types/evals/run_cancel_response.py">RunCancelResponse</a></code>
+
+### OutputItems
+
+Types:
+
+```python
+from openai.types.evals.runs import OutputItemRetrieveResponse, OutputItemListResponse
+```
+
+Methods:
+
+- <code title="get /evals/{eval_id}/runs/{run_id}/output_items/{output_item_id}">client.evals.runs.output_items.<a href="./src/openai/resources/evals/runs/output_items.py">retrieve</a>(output_item_id, \*, eval_id, run_id) -> <a href="./src/openai/types/evals/runs/output_item_retrieve_response.py">OutputItemRetrieveResponse</a></code>
+- <code title="get /evals/{eval_id}/runs/{run_id}/output_items">client.evals.runs.output_items.<a href="./src/openai/resources/evals/runs/output_items.py">list</a>(run_id, \*, eval_id, \*\*<a href="src/openai/types/evals/runs/output_item_list_params.py">params</a>) -> <a href="./src/openai/types/evals/runs/output_item_list_response.py">SyncCursorPage[OutputItemListResponse]</a></code>
+
+# Containers
+
+Types:
+
+```python
+from openai.types import ContainerCreateResponse, ContainerRetrieveResponse, ContainerListResponse
+```
+
+Methods:
+
+- <code title="post /containers">client.containers.<a href="./src/openai/resources/containers/containers.py">create</a>(\*\*<a href="src/openai/types/container_create_params.py">params</a>) -> <a href="./src/openai/types/container_create_response.py">ContainerCreateResponse</a></code>
+- <code title="get /containers/{container_id}">client.containers.<a href="./src/openai/resources/containers/containers.py">retrieve</a>(container_id) -> <a href="./src/openai/types/container_retrieve_response.py">ContainerRetrieveResponse</a></code>
+- <code title="get /containers">client.containers.<a href="./src/openai/resources/containers/containers.py">list</a>(\*\*<a href="src/openai/types/container_list_params.py">params</a>) -> <a href="./src/openai/types/container_list_response.py">SyncCursorPage[ContainerListResponse]</a></code>
+- <code title="delete /containers/{container_id}">client.containers.<a href="./src/openai/resources/containers/containers.py">delete</a>(container_id) -> None</code>
+
+## Files
+
+Types:
+
+```python
+from openai.types.containers import FileCreateResponse, FileRetrieveResponse, FileListResponse
+```
+
+Methods:
+
+- <code title="post /containers/{container_id}/files">client.containers.files.<a href="./src/openai/resources/containers/files/files.py">create</a>(container_id, \*\*<a href="src/openai/types/containers/file_create_params.py">params</a>) -> <a href="./src/openai/types/containers/file_create_response.py">FileCreateResponse</a></code>
+- <code title="get /containers/{container_id}/files/{file_id}">client.containers.files.<a href="./src/openai/resources/containers/files/files.py">retrieve</a>(file_id, \*, container_id) -> <a href="./src/openai/types/containers/file_retrieve_response.py">FileRetrieveResponse</a></code>
+- <code title="get /containers/{container_id}/files">client.containers.files.<a href="./src/openai/resources/containers/files/files.py">list</a>(container_id, \*\*<a href="src/openai/types/containers/file_list_params.py">params</a>) -> <a href="./src/openai/types/containers/file_list_response.py">SyncCursorPage[FileListResponse]</a></code>
+- <code title="delete /containers/{container_id}/files/{file_id}">client.containers.files.<a href="./src/openai/resources/containers/files/files.py">delete</a>(file_id, \*, container_id) -> None</code>
+
+### Content
+
+Methods:
+
+- <code title="get /containers/{container_id}/files/{file_id}/content">client.containers.files.content.<a href="./src/openai/resources/containers/files/content.py">retrieve</a>(file_id, \*, container_id) -> HttpxBinaryResponseContent</code>
+
+# Videos
+
+Types:
+
+```python
+from openai.types import (
+    Video,
+    VideoCreateError,
+    VideoModel,
+    VideoSeconds,
+    VideoSize,
+    VideoDeleteResponse,
+)
+```
+
+Methods:
+
+- <code title="post /videos">client.videos.<a href="./src/openai/resources/videos.py">create</a>(\*\*<a href="src/openai/types/video_create_params.py">params</a>) -> <a href="./src/openai/types/video.py">Video</a></code>
+- <code title="get /videos/{video_id}">client.videos.<a href="./src/openai/resources/videos.py">retrieve</a>(video_id) -> <a href="./src/openai/types/video.py">Video</a></code>
+- <code title="get /videos">client.videos.<a href="./src/openai/resources/videos.py">list</a>(\*\*<a href="src/openai/types/video_list_params.py">params</a>) -> <a href="./src/openai/types/video.py">SyncConversationCursorPage[Video]</a></code>
+- <code title="delete /videos/{video_id}">client.videos.<a href="./src/openai/resources/videos.py">delete</a>(video_id) -> <a href="./src/openai/types/video_delete_response.py">VideoDeleteResponse</a></code>
+- <code title="get /videos/{video_id}/content">client.videos.<a href="./src/openai/resources/videos.py">download_content</a>(video_id, \*\*<a href="src/openai/types/video_download_content_params.py">params</a>) -> HttpxBinaryResponseContent</code>
+- <code title="post /videos/{video_id}/remix">client.videos.<a href="./src/openai/resources/videos.py">remix</a>(video_id, \*\*<a href="src/openai/types/video_remix_params.py">params</a>) -> <a href="./src/openai/types/video.py">Video</a></code>
diff --git a/bin/check-release-environment b/bin/check-release-environment
index 2cc5ad6352..044ed525d1 100644
--- a/bin/check-release-environment
+++ b/bin/check-release-environment
@@ -7,7 +7,7 @@ if [ -z "${STAINLESS_API_KEY}" ]; then
 fi
 
 if [ -z "${PYPI_TOKEN}" ]; then
-  errors+=("The OPENAI_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
+  errors+=("The PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
 fi
 
 lenErrors=${#errors[@]}
diff --git a/examples/assistant.py b/examples/assistant.py
deleted file mode 100644
index 0631494ecc..0000000000
--- a/examples/assistant.py
+++ /dev/null
@@ -1,38 +0,0 @@
-
-import openai
-
-# gets API Key from environment variable OPENAI_API_KEY
-client = openai.OpenAI()
-
-assistant = client.beta.assistants.create(
-    name="Math Tutor",
-    instructions="You are a personal math tutor. Write and run code to answer math questions.",
-    tools=[{"type": "code_interpreter"}],
-    model="gpt-4-1106-preview",
-)
-
-thread = client.beta.threads.create()
-
-message = client.beta.threads.messages.create(
-    thread_id=thread.id,
-    role="user",
-    content="I need to solve the equation `3x + 11 = 14`. Can you help me?",
-)
-
-run = client.beta.threads.runs.create_and_poll(
-    thread_id=thread.id,
-    assistant_id=assistant.id,
-    instructions="Please address the user as Jane Doe. The user has a premium account.",
-)
-
-print("Run completed with status: " + run.status)
-
-if run.status == "completed":
-    messages = client.beta.threads.messages.list(thread_id=thread.id)
-
-    print("messages: ")
-    for message in messages:
-        assert message.content[0].type == "text"
-        print({"role": message.role, "message": message.content[0].text.value})
-
-    client.beta.assistants.delete(assistant.id)
diff --git a/examples/assistant_stream.py b/examples/assistant_stream.py
deleted file mode 100644
index 0465d3930f..0000000000
--- a/examples/assistant_stream.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import openai
-
-# gets API Key from environment variable OPENAI_API_KEY
-client = openai.OpenAI()
-
-assistant = client.beta.assistants.create(
-    name="Math Tutor",
-    instructions="You are a personal math tutor. Write and run code to answer math questions.",
-    tools=[{"type": "code_interpreter"}],
-    model="gpt-4-1106-preview",
-)
-
-thread = client.beta.threads.create()
-
-message = client.beta.threads.messages.create(
-    thread_id=thread.id,
-    role="user",
-    content="I need to solve the equation `3x + 11 = 14`. Can you help me?",
-)
-
-print("starting run stream")
-
-stream = client.beta.threads.runs.create(
-    thread_id=thread.id,
-    assistant_id=assistant.id,
-    instructions="Please address the user as Jane Doe. The user has a premium account.",
-    stream=True,
-)
-
-for event in stream:
-    print(event.model_dump_json(indent=2, exclude_unset=True))
-
-client.beta.assistants.delete(assistant.id)
diff --git a/examples/assistant_stream_helpers.py b/examples/assistant_stream_helpers.py
deleted file mode 100644
index 7baec77c72..0000000000
--- a/examples/assistant_stream_helpers.py
+++ /dev/null
@@ -1,78 +0,0 @@
-from __future__ import annotations
-
-from typing_extensions import override
-
-import openai
-from openai import AssistantEventHandler
-from openai.types.beta import AssistantStreamEvent
-from openai.types.beta.threads import Text, TextDelta
-from openai.types.beta.threads.runs import RunStep, RunStepDelta
-
-
-class EventHandler(AssistantEventHandler):
-    @override
-    def on_event(self, event: AssistantStreamEvent) -> None:
-        if event.event == "thread.run.step.created":
-            details = event.data.step_details
-            if details.type == "tool_calls":
-                print("Generating code to interpret:\n\n```py")
-        elif event.event == "thread.message.created":
-            print("\nResponse:\n")
-
-    @override
-    def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
-        print(delta.value, end="", flush=True)
-
-    @override
-    def on_run_step_done(self, run_step: RunStep) -> None:
-        details = run_step.step_details
-        if details.type == "tool_calls":
-            for tool in details.tool_calls:
-                if tool.type == "code_interpreter":
-                    print("\n```\nExecuting code...")
-
-    @override
-    def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
-        details = delta.step_details
-        if details is not None and details.type == "tool_calls":
-            for tool in details.tool_calls or []:
-                if tool.type == "code_interpreter" and tool.code_interpreter and tool.code_interpreter.input:
-                    print(tool.code_interpreter.input, end="", flush=True)
-
-
-def main() -> None:
-    client = openai.OpenAI()
-
-    assistant = client.beta.assistants.create(
-        name="Math Tutor",
-        instructions="You are a personal math tutor. Write and run code to answer math questions.",
-        tools=[{"type": "code_interpreter"}],
-        model="gpt-4-1106-preview",
-    )
-
-    try:
-        question = "I need to solve the equation `3x + 11 = 14`. Can you help me?"
-
-        thread = client.beta.threads.create(
-            messages=[
-                {
-                    "role": "user",
-                    "content": question,
-                },
-            ]
-        )
-        print(f"Question: {question}\n")
-
-        with client.beta.threads.runs.stream(
-            thread_id=thread.id,
-            assistant_id=assistant.id,
-            instructions="Please address the user as Jane Doe. The user has a premium account.",
-            event_handler=EventHandler(),
-        ) as stream:
-            stream.until_done()
-            print()
-    finally:
-        client.beta.assistants.delete(assistant.id)
-
-
-main()
diff --git a/examples/async_demo.py b/examples/async_demo.py
deleted file mode 100755
index 793b4e43fb..0000000000
--- a/examples/async_demo.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env -S poetry run python
-
-import asyncio
-
-from openai import AsyncOpenAI
-
-# gets API Key from environment variable OPENAI_API_KEY
-client = AsyncOpenAI()
-
-
-async def main() -> None:
-    stream = await client.completions.create(
-        model="gpt-3.5-turbo-instruct",
-        prompt="Say this is a test",
-        stream=True,
-    )
-    async for completion in stream:
-        print(completion.choices[0].text, end="")
-    print()
-
-
-asyncio.run(main())
diff --git a/examples/audio.py b/examples/audio.py
deleted file mode 100755
index 85f47bfb06..0000000000
--- a/examples/audio.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env rye run python
-
-import time
-from pathlib import Path
-
-from openai import OpenAI
-
-# gets OPENAI_API_KEY from your environment variables
-openai = OpenAI()
-
-speech_file_path = Path(__file__).parent / "speech.mp3"
-
-
-def main() -> None:
-    stream_to_speakers()
-
-    # Create text-to-speech audio file
-    with openai.audio.speech.with_streaming_response.create(
-        model="tts-1",
-        voice="alloy",
-        input="the quick brown fox jumped over the lazy dogs",
-    ) as response:
-        response.stream_to_file(speech_file_path)
-
-    # Create transcription from audio file
-    transcription = openai.audio.transcriptions.create(
-        model="whisper-1",
-        file=speech_file_path,
-    )
-    print(transcription.text)
-
-    # Create translation from audio file
-    translation = openai.audio.translations.create(
-        model="whisper-1",
-        file=speech_file_path,
-    )
-    print(translation.text)
-
-
-def stream_to_speakers() -> None:
-    import pyaudio
-
-    player_stream = pyaudio.PyAudio().open(format=pyaudio.paInt16, channels=1, rate=24000, output=True)
-
-    start_time = time.time()
-
-    with openai.audio.speech.with_streaming_response.create(
-        model="tts-1",
-        voice="alloy",
-        response_format="pcm",  # similar to WAV, but without a header chunk at the start.
-        input="""I see skies of blue and clouds of white
-                The bright blessed days, the dark sacred nights
-                And I think to myself
-                What a wonderful world""",
-    ) as response:
-        print(f"Time to first byte: {int((time.time() - start_time) * 1000)}ms")
-        for chunk in response.iter_bytes(chunk_size=1024):
-            player_stream.write(chunk)
-
-    print(f"Done in {int((time.time() - start_time) * 1000)}ms.")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/azure.py b/examples/azure.py
deleted file mode 100755
index 6936c4cb0e..0000000000
--- a/examples/azure.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from openai import AzureOpenAI
-
-# may change in the future
-# https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning
-api_version = "2023-07-01-preview"
-
-# gets the API Key from environment variable AZURE_OPENAI_API_KEY
-client = AzureOpenAI(
-    api_version=api_version,
-    # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
-    azure_endpoint="https://example-endpoint.openai.azure.com",
-)
-
-completion = client.chat.completions.create(
-    model="deployment-name",  # e.g. gpt-35-instant
-    messages=[
-        {
-            "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
-    ],
-)
-print(completion.to_json())
-
-
-deployment_client = AzureOpenAI(
-    api_version=api_version,
-    # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
-    azure_endpoint="https://example-resource.azure.openai.com/",
-    # Navigate to the Azure OpenAI Studio to deploy a model.
-    azure_deployment="deployment-name",  # e.g. gpt-35-instant
-)
-
-completion = deployment_client.chat.completions.create(
-    model="<ignored>",
-    messages=[
-        {
-            "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
-    ],
-)
-print(completion.to_json())
diff --git a/examples/azure_ad.py b/examples/azure_ad.py
deleted file mode 100755
index 1b0d81863d..0000000000
--- a/examples/azure_ad.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from azure.identity import DefaultAzureCredential, get_bearer_token_provider
-
-from openai import AzureOpenAI
-
-token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default")
-
-
-# may change in the future
-# https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning
-api_version = "2023-07-01-preview"
-
-# https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
-endpoint = "https://my-resource.openai.azure.com"
-
-client = AzureOpenAI(
-    api_version=api_version,
-    azure_endpoint=endpoint,
-    azure_ad_token_provider=token_provider,
-)
-
-completion = client.chat.completions.create(
-    model="deployment-name",  # e.g. gpt-35-instant
-    messages=[
-        {
-            "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
-    ],
-)
-print(completion.to_json())
diff --git a/examples/demo.py b/examples/demo.py
deleted file mode 100755
index ac1710f3e0..0000000000
--- a/examples/demo.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env -S poetry run python
-
-from openai import OpenAI
-
-# gets API Key from environment variable OPENAI_API_KEY
-client = OpenAI()
-
-# Non-streaming:
-print("----- standard request -----")
-completion = client.chat.completions.create(
-    model="gpt-4",
-    messages=[
-        {
-            "role": "user",
-            "content": "Say this is a test",
-        },
-    ],
-)
-print(completion.choices[0].message.content)
-
-# Streaming:
-print("----- streaming request -----")
-stream = client.chat.completions.create(
-    model="gpt-4",
-    messages=[
-        {
-            "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
-    ],
-    stream=True,
-)
-for chunk in stream:
-    if not chunk.choices:
-        continue
-
-    print(chunk.choices[0].delta.content, end="")
-print()
-
-# Response headers:
-print("----- custom response headers test -----")
-response = client.chat.completions.with_raw_response.create(
-    model="gpt-4",
-    messages=[
-        {
-            "role": "user",
-            "content": "Say this is a test",
-        }
-    ],
-)
-completion = response.parse()
-print(response.request_id)
-print(completion.choices[0].message.content)
diff --git a/examples/module_client.py b/examples/module_client.py
deleted file mode 100755
index 5f2fb79dcf..0000000000
--- a/examples/module_client.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import openai
-
-# will default to `os.environ['OPENAI_API_KEY']` if not explicitly set
-openai.api_key = "..."
-
-# all client options can be configured just like the `OpenAI` instantiation counterpart
-openai.base_url = "https://..."
-openai.default_headers = {"x-foo": "true"}
-
-# all API calls work in the exact same fashion as well
-stream = openai.chat.completions.create(
-    model="gpt-4",
-    messages=[
-        {
-            "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
-    ],
-    stream=True,
-)
-
-for chunk in stream:
-    print(chunk.choices[0].delta.content or "", end="", flush=True)
-
-print()
diff --git a/examples/picture.py b/examples/picture.py
deleted file mode 100644
index c27b52b0da..0000000000
--- a/examples/picture.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-from openai import OpenAI
-
-# gets OPENAI_API_KEY from your environment variables
-openai = OpenAI()
-
-prompt = "An astronaut lounging in a tropical resort in space, pixel art"
-model = "dall-e-3"
-
-
-def main() -> None:
-    # Generate an image based on the prompt
-    response = openai.images.generate(prompt=prompt, model=model)
-
-    # Prints response containing a URL link to image
-    print(response)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/streaming.py b/examples/streaming.py
deleted file mode 100755
index 9a84891a83..0000000000
--- a/examples/streaming.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/env -S poetry run python
-
-import asyncio
-
-from openai import OpenAI, AsyncOpenAI
-
-# This script assumes you have the OPENAI_API_KEY environment variable set to a valid OpenAI API key.
-#
-# You can run this script from the root directory like so:
-# `python examples/streaming.py`
-
-
-def sync_main() -> None:
-    client = OpenAI()
-    response = client.completions.create(
-        model="gpt-3.5-turbo-instruct",
-        prompt="1,2,3,",
-        max_tokens=5,
-        temperature=0,
-        stream=True,
-    )
-
-    # You can manually control iteration over the response
-    first = next(response)
-    print(f"got response data: {first.to_json()}")
-
-    # Or you could automatically iterate through all of data.
-    # Note that the for loop will not exit until *all* of the data has been processed.
-    for data in response:
-        print(data.to_json())
-
-
-async def async_main() -> None:
-    client = AsyncOpenAI()
-    response = await client.completions.create(
-        model="gpt-3.5-turbo-instruct",
-        prompt="1,2,3,",
-        max_tokens=5,
-        temperature=0,
-        stream=True,
-    )
-
-    # You can manually control iteration over the response.
-    # In Python 3.10+ you can also use the `await anext(response)` builtin instead
-    first = await response.__anext__()
-    print(f"got response data: {first.to_json()}")
-
-    # Or you could automatically iterate through all of data.
-    # Note that the for loop will not exit until *all* of the data has been processed.
-    async for data in response:
-        print(data.to_json())
-
-
-sync_main()
-
-asyncio.run(async_main())
diff --git a/helpers.md b/helpers.md
deleted file mode 100644
index 3508b59a33..0000000000
--- a/helpers.md
+++ /dev/null
@@ -1,238 +0,0 @@
-# Streaming Helpers
-
-OpenAI supports streaming responses when interacting with the [Assistant](#assistant-streaming-api) APIs.
-
-## Assistant Streaming API
-
-OpenAI supports streaming responses from Assistants. The SDK provides convenience wrappers around the API
-so you can subscribe to the types of events you are interested in as well as receive accumulated responses.
-
-More information can be found in the documentation: [Assistant Streaming](https://platform.openai.com/docs/assistants/overview?lang=python)
-
-#### An example of creating a run and subscribing to some events
-
-You can subscribe to events by creating an event handler class and overloading the relevant event handlers.
-
-```python
-from typing_extensions import override
-from openai import AssistantEventHandler, OpenAI
-from openai.types.beta.threads import Text, TextDelta
-from openai.types.beta.threads.runs import ToolCall, ToolCallDelta
-
-client = openai.OpenAI()
-
-# First, we create a EventHandler class to define
-# how we want to handle the events in the response stream.
-
-class EventHandler(AssistantEventHandler):
-  @override
-  def on_text_created(self, text: Text) -> None:
-    print(f"\nassistant > ", end="", flush=True)
-
-  @override
-  def on_text_delta(self, delta: TextDelta, snapshot: Text):
-    print(delta.value, end="", flush=True)
-
-  @override
-  def on_tool_call_created(self, tool_call: ToolCall):
-    print(f"\nassistant > {tool_call.type}\n", flush=True)
-
-  @override
-  def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall):
-    if delta.type == "code_interpreter" and delta.code_interpreter:
-      if delta.code_interpreter.input:
-        print(delta.code_interpreter.input, end="", flush=True)
-      if delta.code_interpreter.outputs:
-        print(f"\n\noutput >", flush=True)
-        for output in delta.code_interpreter.outputs:
-          if output.type == "logs":
-            print(f"\n{output.logs}", flush=True)
-
-# Then, we use the `stream` SDK helper
-# with the `EventHandler` class to create the Run
-# and stream the response.
-
-with client.beta.threads.runs.stream(
-  thread_id="thread_id",
-  assistant_id="assistant_id",
-  event_handler=EventHandler(),
-) as stream:
-  stream.until_done()
-```
-
-#### An example of iterating over events
-
-You can also iterate over all the streamed events.
-
-```python
-with client.beta.threads.runs.stream(
-  thread_id=thread.id,
-  assistant_id=assistant.id
-) as stream:
-    for event in stream:
-        # Print the text from text delta events
-        if event.event == "thread.message.delta" and event.data.delta.content:
-            print(event.data.delta.content[0].text)
-```
-
-#### An example of iterating over text
-
-You can also iterate over just the text deltas received
-
-```python
-with client.beta.threads.runs.stream(
-  thread_id=thread.id,
-  assistant_id=assistant.id
-) as stream:
-    for text in stream.text_deltas:
-        print(text)
-```
-
-### Creating Streams
-
-There are three helper methods for creating streams:
-
-```python
-client.beta.threads.runs.stream()
-```
-
-This method can be used to start and stream the response to an existing run with an associated thread
-that is already populated with messages.
-
-```python
-client.beta.threads.create_and_run_stream()
-```
-
-This method can be used to add a message to a thread, start a run and then stream the response.
-
-```python
-client.beta.threads.runs.submit_tool_outputs_stream()
-```
-
-This method can be used to submit a tool output to a run waiting on the output and start a stream.
-
-### Assistant Events
-
-The assistant API provides events you can subscribe to for the following events.
-
-```python
-def on_event(self, event: AssistantStreamEvent)
-```
-
-This allows you to subscribe to all the possible raw events sent by the OpenAI streaming API.
-In many cases it will be more convenient to subscribe to a more specific set of events for your use case.
-
-More information on the types of events can be found here: [Events](https://platform.openai.com/docs/api-reference/assistants-streaming/events)
-
-```python
-def on_run_step_created(self, run_step: RunStep)
-def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep)
-def on_run_step_done(self, run_step: RunStep)
-```
-
-These events allow you to subscribe to the creation, delta and completion of a RunStep.
-
-For more information on how Runs and RunSteps work see the documentation [Runs and RunSteps](https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps)
-
-```python
-def on_message_created(self, message: Message)
-def on_message_delta(self, delta: MessageDelta, snapshot: Message)
-def on_message_done(self, message: Message)
-```
-
-This allows you to subscribe to Message creation, delta and completion events. Messages can contain
-different types of content that can be sent from a model (and events are available for specific content types).
-For convenience, the delta event includes both the incremental update and an accumulated snapshot of the content.
-
-More information on messages can be found
-on in the documentation page [Message](https://platform.openai.com/docs/api-reference/messages/object).
-
-```python
-def on_text_created(self, text: Text)
-def on_text_delta(self, delta: TextDelta, snapshot: Text)
-def on_text_done(self, text: Text)
-```
-
-These events allow you to subscribe to the creation, delta and completion of a Text content (a specific type of message).
-For convenience, the delta event includes both the incremental update and an accumulated snapshot of the content.
-
-```python
-def on_image_file_done(self, image_file: ImageFile)
-```
-
-Image files are not sent incrementally so an event is provided for when a image file is available.
-
-```python
-def on_tool_call_created(self, tool_call: ToolCall)
-def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall)
-def on_tool_call_done(self, tool_call: ToolCall)
-```
-
-These events allow you to subscribe to events for the creation, delta and completion of a ToolCall.
-
-More information on tools can be found here [Tools](https://platform.openai.com/docs/assistants/tools)
-
-```python
-def on_end(self)
-```
-
-The last event send when a stream ends.
-
-```python
-def on_timeout(self)
-```
-
-This event is triggered if the request times out.
-
-```python
-def on_exception(self, exception: Exception)
-```
-
-This event is triggered if an exception occurs during streaming.
-
-### Assistant Methods
-
-The assistant streaming object also provides a few methods for convenience:
-
-```python
-def current_event() -> AssistantStreamEvent | None
-def current_run() -> Run | None
-def current_message_snapshot() -> Message | None
-def current_run_step_snapshot() -> RunStep | None
-```
-
-These methods are provided to allow you to access additional context from within event handlers. In many cases
-the handlers should include all the information you need for processing, but if additional context is required it
-can be accessed.
-
-Note: There is not always a relevant context in certain situations (these will be `None` in those cases).
-
-```python
-def get_final_run(self) -> Run
-def get_final_run_steps(self) -> List[RunStep]
-def get_final_messages(self) -> List[Message]
-```
-
-These methods are provided for convenience to collect information at the end of a stream. Calling these events
-will trigger consumption of the stream until completion and then return the relevant accumulated objects.
-
-# Polling Helpers
-
-When interacting with the API some actions such as starting a Run and adding files to vector stores are asynchronous and take time to complete.
-The SDK includes helper functions which will poll the status until it reaches a terminal state and then return the resulting object.
-If an API method results in an action which could benefit from polling there will be a corresponding version of the
-method ending in `_and_poll`.
-
-All methods also allow you to set the polling frequency, how often the API is checked for an update, via a function argument (`poll_interval_ms`).
-
-The polling methods are:
-
-```python
-client.beta.threads.create_and_run_poll(...)
-client.beta.threads.runs.create_and_poll(...)
-client.beta.threads.runs.submit_tool_ouptputs_and_poll(...)
-client.beta.vector_stores.files.upload_and_poll(...)
-client.beta.vector_stores.files.create_and_poll(...)
-client.beta.vector_stores.file_batches.create_and_poll(...)
-client.beta.vector_stores.file_batches.upload_and_poll(...)
-```
diff --git a/mypy.ini b/mypy.ini
deleted file mode 100644
index a4517a002d..0000000000
--- a/mypy.ini
+++ /dev/null
@@ -1,47 +0,0 @@
-[mypy]
-pretty = True
-show_error_codes = True
-
-# Exclude _files.py because mypy isn't smart enough to apply
-# the correct type narrowing and as this is an internal module
-# it's fine to just use Pyright.
-exclude = ^(src/openai/_files\.py|_dev/.*\.py)$
-
-strict_equality = True
-implicit_reexport = True
-check_untyped_defs = True
-no_implicit_optional = True
-
-warn_return_any = True
-warn_unreachable = True
-warn_unused_configs = True
-
-# Turn these options off as it could cause conflicts
-# with the Pyright options.
-warn_unused_ignores = False
-warn_redundant_casts = False
-
-disallow_any_generics = True
-disallow_untyped_defs = True
-disallow_untyped_calls = True
-disallow_subclassing_any = True
-disallow_incomplete_defs = True
-disallow_untyped_decorators = True
-cache_fine_grained = True
-
-# By default, mypy reports an error if you assign a value to the result
-# of a function call that doesn't return anything. We do this in our test
-# cases:
-# ```
-# result = ...
-# assert result is None
-# ```
-# Changing this codegen to make mypy happy would increase complexity
-# and would not be worth it.
-disable_error_code = func-returns-value
-
-# https://github.com/python/mypy/issues/12162
-[mypy.overrides]
-module = "black.files.*"
-ignore_errors = true
-ignore_missing_imports = true
diff --git a/pyproject.toml b/pyproject.toml
index a33e167244..b54a3ce6db 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,32 +1,32 @@
 [project]
 name = "openai"
-version = "1.30.1"
+version = "2.12.0"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"
 authors = [
 { name = "OpenAI", email = "support@openai.com" },
 ]
+
 dependencies = [
-    "httpx>=0.23.0, <1",
-    "pydantic>=1.9.0, <3",
-    "typing-extensions>=4.7, <5",
-    "anyio>=3.5.0, <5",
-    "distro>=1.7.0, <2",
-    "sniffio",
-    "cached-property; python_version < '3.8'",
-    "tqdm > 4"
+  "httpx>=0.23.0, <1",
+  "pydantic>=1.9.0, <3",
+  "typing-extensions>=4.10, <5",
+  "anyio>=3.5.0, <5",
+  "distro>=1.7.0, <2",
+  "sniffio",
 ]
-requires-python = ">= 3.7.1"
+
+requires-python = ">= 3.9"
 classifiers = [
   "Typing :: Typed",
   "Intended Audience :: Developers",
-  "Programming Language :: Python :: 3.7",
-  "Programming Language :: Python :: 3.8",
   "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+  "Programming Language :: Python :: 3.14",
   "Operating System :: OS Independent",
   "Operating System :: POSIX",
   "Operating System :: MacOS",
@@ -36,22 +36,20 @@ classifiers = [
   "License :: OSI Approved :: Apache Software License"
 ]
 
-[project.optional-dependencies]
-datalib = ["numpy >= 1", "pandas >= 1.2.3", "pandas-stubs >= 1.1.0.11"]
-
 [project.urls]
 Homepage = "https://github.com/openai/openai-python"
 Repository = "https://github.com/openai/openai-python"
 
-[project.scripts]
-openai = "openai.cli:main"
+[project.optional-dependencies]
+aiohttp = ["aiohttp", "httpx_aiohttp>=0.1.9"]
+realtime = ["websockets >= 13, < 16"]
 
 [tool.rye]
 managed = true
 # version pins are in requirements-dev.lock
 dev-dependencies = [
-    "pyright>=1.1.359",
-    "mypy",
+    "pyright==1.1.399",
+    "mypy==1.17",
     "respx",
     "pytest",
     "pytest-asyncio",
@@ -60,11 +58,9 @@ dev-dependencies = [
     "nox",
     "dirty-equals>=0.6.0",
     "importlib-metadata>=6.7.0",
-    "inline-snapshot >=0.7.0",
-    "azure-identity >=1.14.1",
-    "types-tqdm > 4",
-    "types-pyaudio > 0",
-    "trio >=0.22.2"
+    "rich>=13.7.1",
+    "pytest-xdist>=3.6.1",
+    "griffe>=1",
 ]
 
 [tool.rye.scripts]
@@ -72,18 +68,21 @@ format = { chain = [
   "format:ruff",
   "format:docs",
   "fix:ruff",
+  # run formatting again to fix any inconsistencies when imports are stripped
+  "format:ruff",
 ]}
-"format:black" = "black ."
 "format:docs" = "python scripts/utils/ruffen-docs.py README.md api.md"
 "format:ruff" = "ruff format"
-"format:isort" = "isort ."
 
 "lint" = { chain = [
   "check:ruff",
   "typecheck",
+  "check:importable",
 ]}
-"check:ruff" = "ruff ."
-"fix:ruff" = "ruff --fix ."
+"check:ruff" = "ruff check ."
+"fix:ruff" = "ruff check --fix ."
+
+"check:importable" = "python -c 'import openai'"
 
 typecheck = { chain = [
   "typecheck:pyright",
@@ -94,7 +93,7 @@ typecheck = { chain = [
 "typecheck:mypy" = "mypy ."
 
 [build-system]
-requires = ["hatchling", "hatch-fancy-pypi-readme"]
+requires = ["hatchling==1.26.3", "hatch-fancy-pypi-readme"]
 build-backend = "hatchling.build"
 
 [tool.hatch.build]
@@ -105,6 +104,21 @@ include = [
 [tool.hatch.build.targets.wheel]
 packages = ["src/openai"]
 
+[tool.hatch.build.targets.sdist]
+# Basically everything except hidden files/directories (such as .github, .devcontainers, .python-version, etc)
+include = [
+  "/*.toml",
+  "/*.json",
+  "/*.lock",
+  "/*.md",
+  "/mypy.ini",
+  "/noxfile.py",
+  "bin/*",
+  "examples/*",
+  "src/*",
+  "tests/*",
+]
+
 [tool.hatch.metadata.hooks.fancy-pypi-readme]
 content-type = "text/markdown"
 
@@ -116,15 +130,12 @@ path = "README.md"
 pattern = '\[(.+?)\]\(((?!https?://)\S+?)\)'
 replacement = '[\1](https://github.com/openai/openai-python/tree/main/\g<2>)'
 
-[tool.black]
-line-length = 120
-target-version = ["py37"]
-
 [tool.pytest.ini_options]
 testpaths = ["tests"]
-addopts = "--tb=short"
+addopts = "--tb=short -n auto"
 xfail_strict = true
 asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "session"
 filterwarnings = [
   "error"
 ]
@@ -134,24 +145,82 @@ filterwarnings = [
 # there are a couple of flags that are still disabled by
 # default in strict mode as they are experimental and niche.
 typeCheckingMode = "strict"
-pythonVersion = "3.7"
+pythonVersion = "3.9"
 
 exclude = [
     "_dev",
     ".venv",
     ".nox",
+    ".git",
 ]
 
 reportImplicitOverride = true
+reportOverlappingOverload = false
 
 reportImportCycles = false
 reportPrivateUsage = false
 
+[tool.mypy]
+pretty = true
+show_error_codes = true
+
+# Exclude _files.py because mypy isn't smart enough to apply
+# the correct type narrowing and as this is an internal module
+# it's fine to just use Pyright.
+#
+# We also exclude our `tests` as mypy doesn't always infer
+# types correctly and Pyright will still catch any type errors.
+exclude = ['src/openai/_files.py', '_dev/.*.py', 'tests/.*']
+
+strict_equality = true
+implicit_reexport = true
+check_untyped_defs = true
+no_implicit_optional = true
+
+warn_return_any = true
+warn_unreachable = true
+warn_unused_configs = true
+
+# Turn these options off as it could cause conflicts
+# with the Pyright options.
+warn_unused_ignores = false
+warn_redundant_casts = false
+
+disallow_any_generics = true
+disallow_untyped_defs = true
+disallow_untyped_calls = true
+disallow_subclassing_any = true
+disallow_incomplete_defs = true
+disallow_untyped_decorators = true
+cache_fine_grained = true
+
+# By default, mypy reports an error if you assign a value to the result
+# of a function call that doesn't return anything. We do this in our test
+# cases:
+# ```
+# result = ...
+# assert result is None
+# ```
+# Changing this codegen to make mypy happy would increase complexity
+# and would not be worth it.
+disable_error_code = "func-returns-value,overload-cannot-match"
+
+# https://github.com/python/mypy/issues/12162
+[[tool.mypy.overrides]]
+module = "black.files.*"
+ignore_errors = true
+ignore_missing_imports = true
+
 
 [tool.ruff]
 line-length = 120
 output-format = "grouped"
-target-version = "py37"
+target-version = "py38"
+
+[tool.ruff.format]
+docstring-code-format = true
+
+[tool.ruff.lint]
 select = [
   # isort
   "I",
@@ -159,6 +228,8 @@ select = [
   "B",
   # remove unused imports
   "F401",
+  # check for missing future annotations
+  "FA102",
   # bare except statements
   "E722",
   # unused arguments
@@ -167,7 +238,7 @@ select = [
   "T201",
   "T203",
   # misuse of typing.TYPE_CHECKING
-  "TCH004",
+  "TC004",
   # import rules
   "TID251",
 ]
@@ -180,10 +251,8 @@ unfixable = [
   "T201",
   "T203",
 ]
-ignore-init-module-imports = true
 
-[tool.ruff.format]
-docstring-code-format = true
+extend-safe-fixes = ["FA102"]
 
 [tool.ruff.lint.flake8-tidy-imports.banned-api]
 "functools.lru_cache".msg = "This function does not retain type information for the wrapped function's arguments; The `lru_cache` function from `_utils` should be used instead"
@@ -195,7 +264,7 @@ combine-as-imports = true
 extra-standard-library = ["typing_extensions"]
 known-first-party = ["openai", "tests"]
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 "bin/**.py" = ["T201", "T203"]
 "scripts/**.py" = ["T201", "T203"]
 "tests/**.py" = ["T201", "T203"]
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 6a4e12022a..e518b9494d 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -6,173 +6,149 @@
 #   features: []
 #   all-features: true
 #   with-sources: false
+#   generate-hashes: false
+#   universal: false
 
 -e file:.
-annotated-types==0.6.0
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.13.2
+    # via httpx-aiohttp
+    # via openai
+aiosignal==1.4.0
+    # via aiohttp
+annotated-types==0.7.0
     # via pydantic
-anyio==4.1.0
+anyio==4.12.0
     # via httpx
     # via openai
-argcomplete==3.1.2
+argcomplete==3.6.3
     # via nox
-asttokens==2.4.1
-    # via inline-snapshot
-attrs==23.1.0
-    # via outcome
-    # via pytest
-    # via trio
-azure-core==1.30.1
-    # via azure-identity
-azure-identity==1.15.0
-black==24.4.2
-    # via inline-snapshot
-certifi==2023.7.22
+async-timeout==5.0.1
+    # via aiohttp
+attrs==25.4.0
+    # via aiohttp
+    # via nox
+backports-asyncio-runner==1.2.0
+    # via pytest-asyncio
+certifi==2025.11.12
     # via httpcore
     # via httpx
-    # via requests
-cffi==1.16.0
-    # via cryptography
-charset-normalizer==3.3.2
-    # via requests
-click==8.1.7
-    # via black
-    # via inline-snapshot
-colorlog==6.7.0
+colorama==0.4.6
+    # via griffe
+colorlog==6.10.1
+    # via nox
+dependency-groups==1.3.1
     # via nox
-cryptography==42.0.7
-    # via azure-identity
-    # via msal
-    # via pyjwt
-dirty-equals==0.6.0
-distlib==0.3.7
+dirty-equals==0.11
+distlib==0.4.0
     # via virtualenv
-distro==1.8.0
+distro==1.9.0
     # via openai
-exceptiongroup==1.1.3
+exceptiongroup==1.3.1
     # via anyio
-    # via trio
-executing==2.0.1
-    # via inline-snapshot
-filelock==3.12.4
+    # via pytest
+execnet==2.1.2
+    # via pytest-xdist
+filelock==3.19.1
     # via virtualenv
-h11==0.14.0
+frozenlist==1.8.0
+    # via aiohttp
+    # via aiosignal
+griffe==1.14.0
+h11==0.16.0
     # via httpcore
-httpcore==1.0.2
+httpcore==1.0.9
     # via httpx
-httpx==0.25.2
+httpx==0.28.1
+    # via httpx-aiohttp
     # via openai
     # via respx
-idna==3.4
+httpx-aiohttp==0.1.9
+    # via openai
+humanize==4.13.0
+    # via nox
+idna==3.11
     # via anyio
     # via httpx
-    # via requests
-    # via trio
-importlib-metadata==7.0.0
-iniconfig==2.0.0
+    # via yarl
+importlib-metadata==8.7.0
+iniconfig==2.1.0
     # via pytest
-inline-snapshot==0.7.0
-msal==1.28.0
-    # via azure-identity
-    # via msal-extensions
-msal-extensions==1.1.0
-    # via azure-identity
-mypy==1.7.1
-mypy-extensions==1.0.0
-    # via black
+markdown-it-py==3.0.0
+    # via rich
+mdurl==0.1.2
+    # via markdown-it-py
+multidict==6.7.0
+    # via aiohttp
+    # via yarl
+mypy==1.17.0
+mypy-extensions==1.1.0
     # via mypy
-nodeenv==1.8.0
+nodeenv==1.9.1
     # via pyright
-nox==2023.4.22
-numpy==1.26.3
-    # via openai
-    # via pandas
-    # via pandas-stubs
-outcome==1.3.0.post0
-    # via trio
-packaging==23.2
-    # via black
-    # via msal-extensions
+nox==2025.11.12
+packaging==25.0
+    # via dependency-groups
     # via nox
     # via pytest
-pandas==2.1.4
-    # via openai
-pandas-stubs==2.1.4.231227
-    # via openai
 pathspec==0.12.1
-    # via black
-platformdirs==3.11.0
-    # via black
+    # via mypy
+platformdirs==4.4.0
     # via virtualenv
-pluggy==1.3.0
+pluggy==1.6.0
     # via pytest
-portalocker==2.8.2
-    # via msal-extensions
-py==1.11.0
-    # via pytest
-pycparser==2.22
-    # via cffi
-pydantic==2.7.1
+propcache==0.4.1
+    # via aiohttp
+    # via yarl
+pydantic==2.12.5
     # via openai
-pydantic-core==2.18.2
+pydantic-core==2.41.5
     # via pydantic
-pyjwt==2.8.0
-    # via msal
-pyright==1.1.359
-pytest==7.1.1
+pygments==2.19.2
+    # via pytest
+    # via rich
+pyright==1.1.399
+pytest==8.4.2
     # via pytest-asyncio
-pytest-asyncio==0.21.1
-python-dateutil==2.8.2
-    # via pandas
+    # via pytest-xdist
+pytest-asyncio==1.2.0
+pytest-xdist==3.8.0
+python-dateutil==2.9.0.post0
     # via time-machine
-pytz==2023.3.post1
-    # via dirty-equals
-    # via pandas
-requests==2.31.0
-    # via azure-core
-    # via msal
-respx==0.20.2
-ruff==0.1.9
-setuptools==68.2.2
-    # via nodeenv
-six==1.16.0
-    # via asttokens
-    # via azure-core
+respx==0.22.0
+rich==14.2.0
+ruff==0.14.7
+six==1.17.0
     # via python-dateutil
-sniffio==1.3.0
-    # via anyio
-    # via httpx
+sniffio==1.3.1
     # via openai
-    # via trio
-sortedcontainers==2.4.0
-    # via trio
-time-machine==2.9.0
-toml==0.10.2
-    # via inline-snapshot
-tomli==2.0.1
-    # via black
+time-machine==2.19.0
+tomli==2.3.0
+    # via dependency-groups
     # via mypy
+    # via nox
     # via pytest
-tqdm==4.66.1
-    # via openai
-trio==0.22.2
-types-pyaudio==0.2.16.20240106
-types-pytz==2024.1.0.20240417
-    # via pandas-stubs
-types-toml==0.10.8.20240310
-    # via inline-snapshot
-types-tqdm==4.66.0.2
-typing-extensions==4.8.0
-    # via azure-core
-    # via black
+typing-extensions==4.15.0
+    # via aiosignal
+    # via anyio
+    # via exceptiongroup
+    # via multidict
     # via mypy
     # via openai
     # via pydantic
     # via pydantic-core
-tzdata==2024.1
-    # via pandas
-urllib3==2.2.1
-    # via requests
-virtualenv==20.24.5
+    # via pyright
+    # via pytest-asyncio
+    # via typing-inspection
+    # via virtualenv
+typing-inspection==0.4.2
+    # via pydantic
+virtualenv==20.35.4
     # via nox
-zipp==3.17.0
+websockets==15.0.1
+    # via openai
+yarl==1.22.0
+    # via aiohttp
+zipp==3.23.0
     # via importlib-metadata
diff --git a/requirements.lock b/requirements.lock
index 47cf8a40e9..aeefc07d8a 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -6,58 +6,73 @@
 #   features: []
 #   all-features: true
 #   with-sources: false
+#   generate-hashes: false
+#   universal: false
 
 -e file:.
-annotated-types==0.6.0
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.13.2
+    # via httpx-aiohttp
+    # via openai
+aiosignal==1.4.0
+    # via aiohttp
+annotated-types==0.7.0
     # via pydantic
-anyio==4.1.0
+anyio==4.12.0
     # via httpx
     # via openai
-certifi==2023.7.22
+async-timeout==5.0.1
+    # via aiohttp
+attrs==25.4.0
+    # via aiohttp
+certifi==2025.11.12
     # via httpcore
     # via httpx
-distro==1.8.0
+distro==1.9.0
     # via openai
-exceptiongroup==1.1.3
+exceptiongroup==1.3.1
     # via anyio
-h11==0.14.0
+frozenlist==1.8.0
+    # via aiohttp
+    # via aiosignal
+h11==0.16.0
     # via httpcore
-httpcore==1.0.2
-    # via httpx
-httpx==0.25.2
-    # via openai
-idna==3.4
-    # via anyio
+httpcore==1.0.9
     # via httpx
-numpy==1.26.4
-    # via openai
-    # via pandas
-    # via pandas-stubs
-pandas==2.2.2
-    # via openai
-pandas-stubs==2.2.1.240316
+httpx==0.28.1
+    # via httpx-aiohttp
     # via openai
-pydantic==2.7.1
+httpx-aiohttp==0.1.9
     # via openai
-pydantic-core==2.18.2
-    # via pydantic
-python-dateutil==2.9.0.post0
-    # via pandas
-pytz==2024.1
-    # via pandas
-six==1.16.0
-    # via python-dateutil
-sniffio==1.3.0
+idna==3.11
     # via anyio
     # via httpx
+    # via yarl
+multidict==6.7.0
+    # via aiohttp
+    # via yarl
+propcache==0.4.1
+    # via aiohttp
+    # via yarl
+pydantic==2.12.5
     # via openai
-tqdm==4.66.1
+pydantic-core==2.41.5
+    # via pydantic
+sniffio==1.3.1
     # via openai
-types-pytz==2024.1.0.20240417
-    # via pandas-stubs
-typing-extensions==4.8.0
+typing-extensions==4.15.0
+    # via aiosignal
+    # via anyio
+    # via exceptiongroup
+    # via multidict
     # via openai
     # via pydantic
     # via pydantic-core
-tzdata==2024.1
-    # via pandas
+    # via typing-inspection
+typing-inspection==0.4.2
+    # via pydantic
+websockets==15.0.1
+    # via openai
+yarl==1.22.0
+    # via aiohttp
diff --git a/scripts/bootstrap b/scripts/bootstrap
index 29df07e77b..b430fee36d 100755
--- a/scripts/bootstrap
+++ b/scripts/bootstrap
@@ -4,10 +4,18 @@ set -e
 
 cd "$(dirname "$0")/.."
 
-if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ]; then
+if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ] && [ "$SKIP_BREW" != "1" ] && [ -t 0 ]; then
   brew bundle check >/dev/null 2>&1 || {
-    echo "==> Installing Homebrew dependencies…"
-    brew bundle
+    echo -n "==> Install Homebrew dependencies? (y/N): "
+    read -r response
+    case "$response" in
+      [yY][eE][sS]|[yY])
+        brew bundle
+        ;;
+      *)
+        ;;
+    esac
+    echo
   }
 fi
 
@@ -16,4 +24,4 @@ echo "==> Installing Python dependencies…"
 # experimental uv support makes installations significantly faster
 rye config --set-bool behavior.use-uv=true
 
-rye sync
+rye sync --all-features
diff --git a/scripts/detect-breaking-changes b/scripts/detect-breaking-changes
new file mode 100755
index 0000000000..833872ef3a
--- /dev/null
+++ b/scripts/detect-breaking-changes
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+set -e
+
+cd "$(dirname "$0")/.."
+
+echo "==> Detecting breaking changes"
+
+TEST_PATHS=(
+	tests/api_resources
+	tests/test_client.py
+	tests/test_response.py
+	tests/test_legacy_response.py
+)
+
+for PATHSPEC in "${TEST_PATHS[@]}"; do
+    # Try to check out previous versions of the test files
+    # with the current SDK.
+    git checkout "$1" -- "${PATHSPEC}" 2>/dev/null || true
+done
+
+# Instead of running the tests, use the linter to check if an
+# older test is no longer compatible with the latest SDK.
+./scripts/lint
diff --git a/scripts/detect-breaking-changes.py b/scripts/detect-breaking-changes.py
new file mode 100644
index 0000000000..3a30f3db2f
--- /dev/null
+++ b/scripts/detect-breaking-changes.py
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+import sys
+from typing import Iterator
+from pathlib import Path
+
+import rich
+import griffe
+from rich.text import Text
+from rich.style import Style
+
+
+def public_members(obj: griffe.Object | griffe.Alias) -> dict[str, griffe.Object | griffe.Alias]:
+    if isinstance(obj, griffe.Alias):
+        # ignore imports for now, they're technically part of the public API
+        # but we don't have good preventative measures in place to prevent
+        # changing them
+        return {}
+
+    return {name: value for name, value in obj.all_members.items() if not name.startswith("_")}
+
+
+def find_breaking_changes(
+    new_obj: griffe.Object | griffe.Alias,
+    old_obj: griffe.Object | griffe.Alias,
+    *,
+    path: list[str],
+) -> Iterator[Text | str]:
+    new_members = public_members(new_obj)
+    old_members = public_members(old_obj)
+
+    for name, old_member in old_members.items():
+        if isinstance(old_member, griffe.Alias) and len(path) > 2:
+            # ignore imports in `/types/` for now, they're technically part of the public API
+            # but we don't have good preventative measures in place to prevent changing them
+            continue
+
+        new_member = new_members.get(name)
+        if new_member is None:
+            cls_name = old_member.__class__.__name__
+            yield Text(f"({cls_name})", style=Style(color="rgb(119, 119, 119)"))
+            yield from [" " for _ in range(10 - len(cls_name))]
+            yield f" {'.'.join(path)}.{name}"
+            yield "\n"
+            continue
+
+        yield from find_breaking_changes(new_member, old_member, path=[*path, name])
+
+
+def main() -> None:
+    try:
+        against_ref = sys.argv[1]
+    except IndexError as err:
+        raise RuntimeError("You must specify a base ref to run breaking change detection against") from err
+
+    package = griffe.load(
+        "openai",
+        search_paths=[Path(__file__).parent.parent.joinpath("src")],
+    )
+    old_package = griffe.load_git(
+        "openai",
+        ref=against_ref,
+        search_paths=["src"],
+    )
+    assert isinstance(package, griffe.Module)
+    assert isinstance(old_package, griffe.Module)
+
+    output = list(find_breaking_changes(package, old_package, path=["openai"]))
+    if output:
+        rich.print(Text("Breaking changes detected!", style=Style(color="rgb(165, 79, 87)")))
+        rich.print()
+
+        for text in output:
+            rich.print(text, end="")
+
+        sys.exit(1)
+
+
+main()
diff --git a/scripts/lint b/scripts/lint
index 64495ee345..55bc1dd711 100755
--- a/scripts/lint
+++ b/scripts/lint
@@ -9,4 +9,3 @@ rye run lint
 
 echo "==> Making sure it imports"
 rye run python -c 'import openai'
-
diff --git a/scripts/mock b/scripts/mock
index fe89a1d084..0b28f6ea23 100755
--- a/scripts/mock
+++ b/scripts/mock
@@ -21,7 +21,7 @@ echo "==> Starting mock server with URL ${URL}"
 
 # Run prism mock on the given spec
 if [ "$1" == "--daemon" ]; then
-  npm exec --package=@stoplight/prism-cli@~5.8 -- prism mock "$URL" &> .prism.log &
+  npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock "$URL" &> .prism.log &
 
   # Wait for server to come online
   echo -n "Waiting for server"
@@ -37,5 +37,5 @@ if [ "$1" == "--daemon" ]; then
 
   echo
 else
-  npm exec  --package=@stoplight/prism-cli@~5.8 -- prism mock "$URL"
+  npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock "$URL"
 fi
diff --git a/scripts/test b/scripts/test
index b3ace9013b..dbeda2d217 100755
--- a/scripts/test
+++ b/scripts/test
@@ -43,7 +43,7 @@ elif ! prism_is_running ; then
   echo -e "To run the server, pass in the path or url of your OpenAPI"
   echo -e "spec to the prism command:"
   echo
-  echo -e "  \$ ${YELLOW}npm exec --package=@stoplight/prism-cli@~5.3.2 -- prism mock path/to/your.openapi.yml${NC}"
+  echo -e "  \$ ${YELLOW}npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock path/to/your.openapi.yml${NC}"
   echo
 
   exit 1
@@ -52,5 +52,10 @@ else
   echo
 fi
 
+export DEFER_PYDANTIC_BUILD=false
+
 echo "==> Running tests"
 rye run pytest "$@"
+
+echo "==> Running Pydantic v1 tests"
+rye run nox -s test-pydantic-v1 -- "$@"
diff --git a/scripts/utils/ruffen-docs.py b/scripts/utils/ruffen-docs.py
index 37b3d94f0f..0cf2bd2fd9 100644
--- a/scripts/utils/ruffen-docs.py
+++ b/scripts/utils/ruffen-docs.py
@@ -47,7 +47,7 @@ def _md_match(match: Match[str]) -> str:
         with _collect_error(match):
             code = format_code_block(code)
         code = textwrap.indent(code, match["indent"])
-        return f'{match["before"]}{code}{match["after"]}'
+        return f"{match['before']}{code}{match['after']}"
 
     def _pycon_match(match: Match[str]) -> str:
         code = ""
@@ -97,7 +97,7 @@ def finish_fragment() -> None:
     def _md_pycon_match(match: Match[str]) -> str:
         code = _pycon_match(match)
         code = textwrap.indent(code, match["indent"])
-        return f'{match["before"]}{code}{match["after"]}'
+        return f"{match['before']}{code}{match['after']}"
 
     src = MD_RE.sub(_md_match, src)
     src = MD_PYCON_RE.sub(_md_pycon_match, src)
diff --git a/scripts/utils/upload-artifact.sh b/scripts/utils/upload-artifact.sh
new file mode 100755
index 0000000000..cd522975fc
--- /dev/null
+++ b/scripts/utils/upload-artifact.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -exuo pipefail
+
+FILENAME=$(basename dist/*.whl)
+
+RESPONSE=$(curl -X POST "$URL?filename=$FILENAME" \
+  -H "Authorization: Bearer $AUTH" \
+  -H "Content-Type: application/json")
+
+SIGNED_URL=$(echo "$RESPONSE" | jq -r '.url')
+
+if [[ "$SIGNED_URL" == "null" ]]; then
+  echo -e "\033[31mFailed to get signed URL.\033[0m"
+  exit 1
+fi
+
+UPLOAD_RESPONSE=$(curl -v -X PUT \
+  -H "Content-Type: binary/octet-stream" \
+  --data-binary "@dist/$FILENAME" "$SIGNED_URL" 2>&1)
+
+if echo "$UPLOAD_RESPONSE" | grep -q "HTTP/[0-9.]* 200"; then
+  echo -e "\033[32mUploaded build to Stainless storage.\033[0m"
+  echo -e "\033[32mInstallation: pip install 'https://pkg.stainless.com/s/openai-python/$SHA/$FILENAME'\033[0m"
+else
+  echo -e "\033[31mFailed to upload artifact.\033[0m"
+  exit 1
+fi
diff --git a/src/openai/__init__.py b/src/openai/__init__.py
index 0e87ae9259..d1744014ba 100644
--- a/src/openai/__init__.py
+++ b/src/openai/__init__.py
@@ -2,11 +2,11 @@
 
 from __future__ import annotations
 
-import os as _os
+import typing as _t
 from typing_extensions import override
 
 from . import types
-from ._types import NOT_GIVEN, NoneType, NotGiven, Transport, ProxiesTypes
+from ._types import NOT_GIVEN, Omit, NoneType, NotGiven, Transport, ProxiesTypes, omit, not_given
 from ._utils import file_from_path
 from ._client import Client, OpenAI, Stream, Timeout, Transport, AsyncClient, AsyncOpenAI, AsyncStream, RequestOptions
 from ._models import BaseModel
@@ -29,7 +29,7 @@
     UnprocessableEntityError,
     APIResponseValidationError,
 )
-from ._base_client import DefaultHttpxClient, DefaultAsyncHttpxClient
+from ._base_client import DefaultHttpxClient, DefaultAioHttpClient, DefaultAsyncHttpxClient
 from ._utils._logs import setup_logging as _setup_logging
 
 __all__ = [
@@ -41,6 +41,9 @@
     "ProxiesTypes",
     "NotGiven",
     "NOT_GIVEN",
+    "not_given",
+    "Omit",
+    "omit",
     "OpenAIError",
     "APIError",
     "APIStatusError",
@@ -70,16 +73,11 @@
     "DEFAULT_CONNECTION_LIMITS",
     "DefaultHttpxClient",
     "DefaultAsyncHttpxClient",
+    "DefaultAioHttpClient",
 ]
 
-from .lib import azure as _azure
-from .version import VERSION as VERSION
-from .lib.azure import AzureOpenAI as AzureOpenAI, AsyncAzureOpenAI as AsyncAzureOpenAI
-from .lib._old_api import *
-from .lib.streaming import (
-    AssistantEventHandler as AssistantEventHandler,
-    AsyncAssistantEventHandler as AsyncAssistantEventHandler,
-)
+if not _t.TYPE_CHECKING:
+    from ._utils._resources_proxy import resources as resources
 
 _setup_logging()
 
@@ -98,7 +96,6 @@
 
 # ------ Module level client ------
 import typing as _t
-import typing_extensions as _te
 
 import httpx as _httpx
 
@@ -110,6 +107,8 @@
 
 project: str | None = None
 
+webhook_secret: str | None = None
+
 base_url: str | _httpx.URL | None = None
 
 timeout: float | Timeout | None = DEFAULT_TIMEOUT
@@ -122,18 +121,6 @@
 
 http_client: _httpx.Client | None = None
 
-_ApiType = _te.Literal["openai", "azure"]
-
-api_type: _ApiType | None = _t.cast(_ApiType, _os.environ.get("OPENAI_API_TYPE"))
-
-api_version: str | None = _os.environ.get("OPENAI_API_VERSION")
-
-azure_endpoint: str | None = _os.environ.get("AZURE_OPENAI_ENDPOINT")
-
-azure_ad_token: str | None = _os.environ.get("AZURE_OPENAI_AD_TOKEN")
-
-azure_ad_token_provider: _azure.AzureADTokenProvider | None = None
-
 
 class _ModuleClient(OpenAI):
     # Note: we have to use type: ignores here as overriding class members
@@ -172,6 +159,17 @@ def project(self, value: str | None) -> None:  # type: ignore
 
         project = value
 
+    @property  # type: ignore
+    @override
+    def webhook_secret(self) -> str | None:
+        return webhook_secret
+
+    @webhook_secret.setter  # type: ignore
+    def webhook_secret(self, value: str | None) -> None:  # type: ignore
+        global webhook_secret
+
+        webhook_secret = value
+
     @property
     @override
     def base_url(self) -> _httpx.URL:
@@ -240,33 +238,6 @@ def _client(self, value: _httpx.Client) -> None:  # type: ignore
         http_client = value
 
 
-class _AzureModuleClient(_ModuleClient, AzureOpenAI):  # type: ignore
-    ...
-
-
-class _AmbiguousModuleClientUsageError(OpenAIError):
-    def __init__(self) -> None:
-        super().__init__(
-            "Ambiguous use of module client; please set `openai.api_type` or the `OPENAI_API_TYPE` environment variable to `openai` or `azure`"
-        )
-
-
-def _has_openai_credentials() -> bool:
-    return _os.environ.get("OPENAI_API_KEY") is not None
-
-
-def _has_azure_credentials() -> bool:
-    return azure_endpoint is not None or _os.environ.get("AZURE_OPENAI_API_KEY") is not None
-
-
-def _has_azure_ad_credentials() -> bool:
-    return (
-        _os.environ.get("AZURE_OPENAI_AD_TOKEN") is not None
-        or azure_ad_token is not None
-        or azure_ad_token_provider is not None
-    )
-
-
 _client: OpenAI | None = None
 
 
@@ -274,56 +245,11 @@ def _load_client() -> OpenAI:  # type: ignore[reportUnusedFunction]
     global _client
 
     if _client is None:
-        global api_type, azure_endpoint, azure_ad_token, api_version
-
-        if azure_endpoint is None:
-            azure_endpoint = _os.environ.get("AZURE_OPENAI_ENDPOINT")
-
-        if azure_ad_token is None:
-            azure_ad_token = _os.environ.get("AZURE_OPENAI_AD_TOKEN")
-
-        if api_version is None:
-            api_version = _os.environ.get("OPENAI_API_VERSION")
-
-        if api_type is None:
-            has_openai = _has_openai_credentials()
-            has_azure = _has_azure_credentials()
-            has_azure_ad = _has_azure_ad_credentials()
-
-            if has_openai and (has_azure or has_azure_ad):
-                raise _AmbiguousModuleClientUsageError()
-
-            if (azure_ad_token is not None or azure_ad_token_provider is not None) and _os.environ.get(
-                "AZURE_OPENAI_API_KEY"
-            ) is not None:
-                raise _AmbiguousModuleClientUsageError()
-
-            if has_azure or has_azure_ad:
-                api_type = "azure"
-            else:
-                api_type = "openai"
-
-        if api_type == "azure":
-            _client = _AzureModuleClient(  # type: ignore
-                api_version=api_version,
-                azure_endpoint=azure_endpoint,
-                api_key=api_key,
-                azure_ad_token=azure_ad_token,
-                azure_ad_token_provider=azure_ad_token_provider,
-                organization=organization,
-                base_url=base_url,
-                timeout=timeout,
-                max_retries=max_retries,
-                default_headers=default_headers,
-                default_query=default_query,
-                http_client=http_client,
-            )
-            return _client
-
         _client = _ModuleClient(
             api_key=api_key,
             organization=organization,
             project=project,
+            webhook_secret=webhook_secret,
             base_url=base_url,
             timeout=timeout,
             max_retries=max_retries,
@@ -346,12 +272,21 @@ def _reset_client() -> None:  # type: ignore[reportUnusedFunction]
     beta as beta,
     chat as chat,
     audio as audio,
+    evals as evals,
     files as files,
     images as images,
     models as models,
+    videos as videos,
     batches as batches,
+    uploads as uploads,
+    realtime as realtime,
+    webhooks as webhooks,
+    responses as responses,
+    containers as containers,
     embeddings as embeddings,
     completions as completions,
     fine_tuning as fine_tuning,
     moderations as moderations,
+    conversations as conversations,
+    vector_stores as vector_stores,
 )
diff --git a/src/openai/__main__.py b/src/openai/__main__.py
deleted file mode 100644
index 4e28416e10..0000000000
--- a/src/openai/__main__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .cli import main
-
-main()
diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py
index 5d5d25fca9..27d3aa975d 100644
--- a/src/openai/_base_client.py
+++ b/src/openai/_base_client.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import sys
 import json
 import time
 import uuid
@@ -8,7 +9,6 @@
 import inspect
 import logging
 import platform
-import warnings
 import email.utils
 from types import TracebackType
 from random import random
@@ -35,14 +35,13 @@
 import httpx
 import distro
 import pydantic
-from httpx import URL, Limits
+from httpx import URL
 from pydantic import PrivateAttr
 
 from . import _exceptions
 from ._qs import Querystring
 from ._files import to_httpx_files, async_to_httpx_files
 from ._types import (
-    NOT_GIVEN,
     Body,
     Omit,
     Query,
@@ -50,18 +49,17 @@
     Timeout,
     NotGiven,
     ResponseT,
-    Transport,
     AnyMapping,
     PostParser,
-    ProxiesTypes,
     RequestFiles,
     HttpxSendArgs,
-    AsyncTransport,
     RequestOptions,
+    HttpxRequestFiles,
     ModelBuilderProtocol,
+    not_given,
 )
-from ._utils import is_dict, is_list, is_given, lru_cache, is_mapping
-from ._compat import model_copy, model_dump
+from ._utils import is_dict, is_list, asyncify, is_given, lru_cache, is_mapping
+from ._compat import PYDANTIC_V1, model_copy, model_dump
 from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type
 from ._response import (
     APIResponse,
@@ -101,7 +99,11 @@
 _AsyncStreamT = TypeVar("_AsyncStreamT", bound=AsyncStream[Any])
 
 if TYPE_CHECKING:
-    from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+    from httpx._config import (
+        DEFAULT_TIMEOUT_CONFIG,  # pyright: ignore[reportPrivateImportUsage]
+    )
+
+    HTTPX_DEFAULT_TIMEOUT = DEFAULT_TIMEOUT_CONFIG
 else:
     try:
         from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
@@ -118,32 +120,48 @@ class PageInfo:
 
     url: URL | NotGiven
     params: Query | NotGiven
+    json: Body | NotGiven
 
     @overload
     def __init__(
         self,
         *,
         url: URL,
-    ) -> None:
-        ...
+    ) -> None: ...
 
     @overload
     def __init__(
         self,
         *,
         params: Query,
-    ) -> None:
-        ...
+    ) -> None: ...
+
+    @overload
+    def __init__(
+        self,
+        *,
+        json: Body,
+    ) -> None: ...
 
     def __init__(
         self,
         *,
-        url: URL | NotGiven = NOT_GIVEN,
-        params: Query | NotGiven = NOT_GIVEN,
+        url: URL | NotGiven = not_given,
+        json: Body | NotGiven = not_given,
+        params: Query | NotGiven = not_given,
     ) -> None:
         self.url = url
+        self.json = json
         self.params = params
 
+    @override
+    def __repr__(self) -> str:
+        if self.url:
+            return f"{self.__class__.__name__}(url={self.url})"
+        if self.json:
+            return f"{self.__class__.__name__}(json={self.json})"
+        return f"{self.__class__.__name__}(params={self.params})"
+
 
 class BasePage(GenericModel, Generic[_T]):
     """
@@ -166,8 +184,7 @@ def has_next_page(self) -> bool:
             return False
         return self.next_page_info() is not None
 
-    def next_page_info(self) -> Optional[PageInfo]:
-        ...
+    def next_page_info(self) -> Optional[PageInfo]: ...
 
     def _get_page_items(self) -> Iterable[_T]:  # type: ignore[empty-body]
         ...
@@ -191,6 +208,19 @@ def _info_to_options(self, info: PageInfo) -> FinalRequestOptions:
             options.url = str(url)
             return options
 
+        if not isinstance(info.json, NotGiven):
+            if not is_mapping(info.json):
+                raise TypeError("Pagination is only supported with mappings")
+
+            if not options.json_data:
+                options.json_data = {**info.json}
+            else:
+                if not is_mapping(options.json_data):
+                    raise TypeError("Pagination is only supported with mappings")
+
+                options.json_data = {**options.json_data, **info.json}
+            return options
+
         raise ValueError("Unexpected PageInfo state")
 
 
@@ -203,6 +233,9 @@ def _set_private_attributes(
         model: Type[_T],
         options: FinalRequestOptions,
     ) -> None:
+        if (not PYDANTIC_V1) and getattr(self, "__pydantic_private__", None) is None:
+            self.__pydantic_private__ = {}
+
         self._model = model
         self._client = client
         self._options = options
@@ -288,6 +321,9 @@ def _set_private_attributes(
         client: AsyncAPIClient,
         options: FinalRequestOptions,
     ) -> None:
+        if (not PYDANTIC_V1) and getattr(self, "__pydantic_private__", None) is None:
+            self.__pydantic_private__ = {}
+
         self._model = model
         self._client = client
         self._options = options
@@ -327,9 +363,6 @@ class BaseClient(Generic[_HttpxClientT, _DefaultStreamT]):
     _base_url: URL
     max_retries: int
     timeout: Union[float, Timeout, None]
-    _limits: httpx.Limits
-    _proxies: ProxiesTypes | None
-    _transport: Transport | AsyncTransport | None
     _strict_response_validation: bool
     _idempotency_header: str | None
     _default_stream_cls: type[_DefaultStreamT] | None = None
@@ -342,9 +375,6 @@ def __init__(
         _strict_response_validation: bool,
         max_retries: int = DEFAULT_MAX_RETRIES,
         timeout: float | Timeout | None = DEFAULT_TIMEOUT,
-        limits: httpx.Limits,
-        transport: Transport | AsyncTransport | None,
-        proxies: ProxiesTypes | None,
         custom_headers: Mapping[str, str] | None = None,
         custom_query: Mapping[str, object] | None = None,
     ) -> None:
@@ -352,13 +382,11 @@ def __init__(
         self._base_url = self._enforce_trailing_slash(URL(base_url))
         self.max_retries = max_retries
         self.timeout = timeout
-        self._limits = limits
-        self._proxies = proxies
-        self._transport = transport
         self._custom_headers = custom_headers or {}
         self._custom_query = custom_query or {}
         self._strict_response_validation = _strict_response_validation
         self._idempotency_header = None
+        self._platform: Platform | None = None
 
         if max_retries is None:  # pyright: ignore[reportUnnecessaryComparison]
             raise TypeError(
@@ -401,14 +429,7 @@ def _make_status_error(
     ) -> _exceptions.APIStatusError:
         raise NotImplementedError()
 
-    def _remaining_retries(
-        self,
-        remaining_retries: Optional[int],
-        options: FinalRequestOptions,
-    ) -> int:
-        return remaining_retries if remaining_retries is not None else options.get_max_retries(self.max_retries)
-
-    def _build_headers(self, options: FinalRequestOptions) -> httpx.Headers:
+    def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0) -> httpx.Headers:
         custom_headers = options.headers or {}
         headers_dict = _merge_mappings(self.default_headers, custom_headers)
         self._validate_headers(headers_dict, custom_headers)
@@ -417,8 +438,20 @@ def _build_headers(self, options: FinalRequestOptions) -> httpx.Headers:
         headers = httpx.Headers(headers_dict)
 
         idempotency_header = self._idempotency_header
-        if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
-            headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
+        if idempotency_header and options.idempotency_key and idempotency_header not in headers:
+            headers[idempotency_header] = options.idempotency_key
+
+        # Don't set these headers if they were already set or removed by the caller. We check
+        # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
+        lower_custom_headers = [header.lower() for header in custom_headers]
+        if "x-stainless-retry-count" not in lower_custom_headers:
+            headers["x-stainless-retry-count"] = str(retries_taken)
+        if "x-stainless-read-timeout" not in lower_custom_headers:
+            timeout = self.timeout if isinstance(options.timeout, NotGiven) else options.timeout
+            if isinstance(timeout, Timeout):
+                timeout = timeout.read
+            if timeout is not None:
+                headers["x-stainless-read-timeout"] = str(timeout)
 
         return headers
 
@@ -441,6 +474,8 @@ def _make_sse_decoder(self) -> SSEDecoder | SSEBytesDecoder:
     def _build_request(
         self,
         options: FinalRequestOptions,
+        *,
+        retries_taken: int = 0,
     ) -> httpx.Request:
         if log.isEnabledFor(logging.DEBUG):
             log.debug("Request options: %s", model_dump(options, exclude_unset=True))
@@ -456,9 +491,10 @@ def _build_request(
             else:
                 raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`")
 
-        headers = self._build_headers(options)
-        params = _merge_mappings(self._custom_query, options.params)
+        headers = self._build_headers(options, retries_taken=retries_taken)
+        params = _merge_mappings(self.default_query, options.params)
         content_type = headers.get("Content-Type")
+        files = options.files
 
         # If the given Content-Type header is multipart/form-data then it
         # has to be removed so that httpx can generate the header with
@@ -472,7 +508,7 @@ def _build_request(
                 headers.pop("Content-Type")
 
             # As we are now sending multipart/form-data instead of application/json
-            # we need to tell httpx to use it, https://www.python-httpx.org/advanced/#multipart-file-encoding
+            # we need to tell httpx to use it, https://www.python-httpx.org/advanced/clients/#multipart-file-encoding
             if json_data:
                 if not is_dict(json_data):
                     raise TypeError(
@@ -480,19 +516,43 @@ def _build_request(
                     )
                 kwargs["data"] = self._serialize_multipartform(json_data)
 
+            # httpx determines whether or not to send a "multipart/form-data"
+            # request based on the truthiness of the "files" argument.
+            # This gets around that issue by generating a dict value that
+            # evaluates to true.
+            #
+            # https://github.com/encode/httpx/discussions/2399#discussioncomment-3814186
+            if not files:
+                files = cast(HttpxRequestFiles, ForceMultipartDict())
+
+        prepared_url = self._prepare_url(options.url)
+        if "_" in prepared_url.host:
+            # work around https://github.com/encode/httpx/discussions/2880
+            kwargs["extensions"] = {"sni_hostname": prepared_url.host.replace("_", "-")}
+
+        is_body_allowed = options.method.lower() != "get"
+
+        if is_body_allowed:
+            if isinstance(json_data, bytes):
+                kwargs["content"] = json_data
+            else:
+                kwargs["json"] = json_data if is_given(json_data) else None
+            kwargs["files"] = files
+        else:
+            headers.pop("Content-Type", None)
+            kwargs.pop("data", None)
+
         # TODO: report this error to httpx
         return self._client.build_request(  # pyright: ignore[reportUnknownMemberType]
             headers=headers,
             timeout=self.timeout if isinstance(options.timeout, NotGiven) else options.timeout,
             method=options.method,
-            url=self._prepare_url(options.url),
+            url=prepared_url,
             # the `Query` type that we use is incompatible with qs'
             # `Params` type as it needs to be typed as `Mapping[str, object]`
             # so that passing a `TypedDict` doesn't cause an error.
             # https://github.com/microsoft/pyright/issues/3526#event-6715453066
             params=self.qs.stringify(cast(Mapping[str, Any], params)) if params else None,
-            json=json_data,
-            files=options.files,
             **kwargs,
         )
 
@@ -536,7 +596,7 @@ def _maybe_override_cast_to(self, cast_to: type[ResponseT], options: FinalReques
         # we internally support defining a temporary header to override the
         # default `cast_to` type for use with `.with_raw_response` and `.with_streaming_response`
         # see _response.py for implementation details
-        override_cast_to = headers.pop(OVERRIDE_CAST_TO_HEADER, NOT_GIVEN)
+        override_cast_to = headers.pop(OVERRIDE_CAST_TO_HEADER, not_given)
         if is_given(override_cast_to):
             options.headers = headers
             return cast(Type[ResponseT], override_cast_to)
@@ -593,6 +653,12 @@ def default_headers(self) -> dict[str, str | Omit]:
             **self._custom_headers,
         }
 
+    @property
+    def default_query(self) -> dict[str, object]:
+        return {
+            **self._custom_query,
+        }
+
     def _validate_headers(
         self,
         headers: Headers,  # noqa: ARG002
@@ -617,7 +683,10 @@ def base_url(self, url: URL | str) -> None:
         self._base_url = self._enforce_trailing_slash(url if isinstance(url, URL) else URL(url))
 
     def platform_headers(self) -> Dict[str, str]:
-        return platform_headers(self._version)
+        # the actual implementation is in a separate `lru_cache` decorated
+        # function because adding `lru_cache` to methods will leak memory
+        # https://github.com/python/cpython/issues/88476
+        return platform_headers(self._version, platform=self._platform)
 
     def _parse_retry_after_header(self, response_headers: Optional[httpx.Headers] = None) -> float | None:
         """Returns a float of the number of seconds (not milliseconds) to wait after retrying, or None if unspecified.
@@ -666,7 +735,8 @@ def _calculate_retry_timeout(
         if retry_after is not None and 0 < retry_after <= 60:
             return retry_after
 
-        nb_retries = max_retries - remaining_retries
+        # Also cap retry count to 1000 to avoid any potential overflows with `pow`
+        nb_retries = min(max_retries - remaining_retries, 1000)
 
         # Apply exponential backoff, but not more than the max.
         sleep_seconds = min(INITIAL_RETRY_DELAY * pow(2.0, nb_retries), MAX_RETRY_DELAY)
@@ -737,6 +807,9 @@ def __init__(self, **kwargs: Any) -> None:
 
 class SyncHttpxClientWrapper(DefaultHttpxClient):
     def __del__(self) -> None:
+        if self.is_closed:
+            return
+
         try:
             self.close()
         except Exception:
@@ -753,44 +826,12 @@ def __init__(
         version: str,
         base_url: str | URL,
         max_retries: int = DEFAULT_MAX_RETRIES,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        transport: Transport | None = None,
-        proxies: ProxiesTypes | None = None,
-        limits: Limits | None = None,
+        timeout: float | Timeout | None | NotGiven = not_given,
         http_client: httpx.Client | None = None,
         custom_headers: Mapping[str, str] | None = None,
         custom_query: Mapping[str, object] | None = None,
         _strict_response_validation: bool,
     ) -> None:
-        if limits is not None:
-            warnings.warn(
-                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
-        else:
-            limits = DEFAULT_CONNECTION_LIMITS
-
-        if transport is not None:
-            warnings.warn(
-                "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `transport`")
-
-        if proxies is not None:
-            warnings.warn(
-                "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `proxies`")
-
         if not is_given(timeout):
             # if the user passed in a custom http client with a non-default
             # timeout set then we use that timeout.
@@ -811,12 +852,9 @@ def __init__(
 
         super().__init__(
             version=version,
-            limits=limits,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            proxies=proxies,
             base_url=base_url,
-            transport=transport,
             max_retries=max_retries,
             custom_query=custom_query,
             custom_headers=custom_headers,
@@ -826,10 +864,6 @@ def __init__(
             base_url=base_url,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            proxies=proxies,
-            transport=transport,
-            limits=limits,
-            follow_redirects=True,
         )
 
     def is_closed(self) -> bool:
@@ -859,9 +893,9 @@ def __exit__(
     def _prepare_options(
         self,
         options: FinalRequestOptions,  # noqa: ARG002
-    ) -> None:
+    ) -> FinalRequestOptions:
         """Hook for mutating the given options"""
-        return None
+        return options
 
     def _prepare_request(
         self,
@@ -879,185 +913,164 @@ def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: Literal[True],
         stream_cls: Type[_StreamT],
-    ) -> _StreamT:
-        ...
+    ) -> _StreamT: ...
 
     @overload
     def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: bool = False,
         stream_cls: Type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        ...
+    ) -> ResponseT | _StreamT: ...
 
     def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: bool = False,
         stream_cls: type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        return self._request(
-            cast_to=cast_to,
-            options=options,
-            stream=stream,
-            stream_cls=stream_cls,
-            remaining_retries=remaining_retries,
-        )
-
-    def _request(
-        self,
-        *,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        remaining_retries: int | None,
-        stream: bool,
-        stream_cls: type[_StreamT] | None,
     ) -> ResponseT | _StreamT:
         cast_to = self._maybe_override_cast_to(cast_to, options)
-        self._prepare_options(options)
-
-        retries = self._remaining_retries(remaining_retries, options)
-        request = self._build_request(options)
-        self._prepare_request(request)
 
-        kwargs: HttpxSendArgs = {}
-        if self.custom_auth is not None:
-            kwargs["auth"] = self.custom_auth
+        # create a copy of the options we were given so that if the
+        # options are mutated later & we then retry, the retries are
+        # given the original options
+        input_options = model_copy(options)
+        if input_options.idempotency_key is None and input_options.method.lower() != "get":
+            # ensure the idempotency key is reused between requests
+            input_options.idempotency_key = self._idempotency_key()
 
-        log.debug("Sending HTTP Request: %s %s", request.method, request.url)
+        response: httpx.Response | None = None
+        max_retries = input_options.get_max_retries(self.max_retries)
 
-        try:
-            response = self._client.send(
-                request,
-                stream=stream or self._should_stream_response_body(request=request),
-                **kwargs,
-            )
-        except httpx.TimeoutException as err:
-            log.debug("Encountered httpx.TimeoutException", exc_info=True)
-
-            if retries > 0:
-                return self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
-                )
+        retries_taken = 0
+        for retries_taken in range(max_retries + 1):
+            options = model_copy(input_options)
+            options = self._prepare_options(options)
 
-            log.debug("Raising timeout error")
-            raise APITimeoutError(request=request) from err
-        except Exception as err:
-            log.debug("Encountered Exception", exc_info=True)
+            remaining_retries = max_retries - retries_taken
+            request = self._build_request(options, retries_taken=retries_taken)
+            self._prepare_request(request)
 
-            if retries > 0:
-                return self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
-                )
+            kwargs: HttpxSendArgs = {}
+            if self.custom_auth is not None:
+                kwargs["auth"] = self.custom_auth
 
-            log.debug("Raising connection error")
-            raise APIConnectionError(request=request) from err
+            if options.follow_redirects is not None:
+                kwargs["follow_redirects"] = options.follow_redirects
 
-        log.debug(
-            'HTTP Response: %s %s "%i %s" %s',
-            request.method,
-            request.url,
-            response.status_code,
-            response.reason_phrase,
-            response.headers,
-        )
-        log.debug("request_id: %s", response.headers.get("x-request-id"))
+            log.debug("Sending HTTP Request: %s %s", request.method, request.url)
 
-        try:
-            response.raise_for_status()
-        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
-            log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
-
-            if retries > 0 and self._should_retry(err.response):
-                err.response.close()
-                return self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    err.response.headers,
-                    stream=stream,
-                    stream_cls=stream_cls,
+            response = None
+            try:
+                response = self._client.send(
+                    request,
+                    stream=stream or self._should_stream_response_body(request=request),
+                    **kwargs,
                 )
+            except httpx.TimeoutException as err:
+                log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+                if remaining_retries > 0:
+                    self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
+
+                log.debug("Raising timeout error")
+                raise APITimeoutError(request=request) from err
+            except Exception as err:
+                log.debug("Encountered Exception", exc_info=True)
+
+                if remaining_retries > 0:
+                    self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
+
+                log.debug("Raising connection error")
+                raise APIConnectionError(request=request) from err
+
+            log.debug(
+                'HTTP Response: %s %s "%i %s" %s',
+                request.method,
+                request.url,
+                response.status_code,
+                response.reason_phrase,
+                response.headers,
+            )
+            log.debug("request_id: %s", response.headers.get("x-request-id"))
 
-            # If the response is streamed then we need to explicitly read the response
-            # to completion before attempting to access the response text.
-            if not err.response.is_closed:
-                err.response.read()
+            try:
+                response.raise_for_status()
+            except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+                log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+                if remaining_retries > 0 and self._should_retry(err.response):
+                    err.response.close()
+                    self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=response,
+                    )
+                    continue
+
+                # If the response is streamed then we need to explicitly read the response
+                # to completion before attempting to access the response text.
+                if not err.response.is_closed:
+                    err.response.read()
 
-            log.debug("Re-raising status error")
-            raise self._make_status_error_from_response(err.response) from None
+                log.debug("Re-raising status error")
+                raise self._make_status_error_from_response(err.response) from None
 
+            break
+
+        assert response is not None, "could not resolve response (should never happen)"
         return self._process_response(
             cast_to=cast_to,
             options=options,
             response=response,
             stream=stream,
             stream_cls=stream_cls,
+            retries_taken=retries_taken,
         )
 
-    def _retry_request(
-        self,
-        options: FinalRequestOptions,
-        cast_to: Type[ResponseT],
-        remaining_retries: int,
-        response_headers: httpx.Headers | None,
-        *,
-        stream: bool,
-        stream_cls: type[_StreamT] | None,
-    ) -> ResponseT | _StreamT:
-        remaining = remaining_retries - 1
-        if remaining == 1:
+    def _sleep_for_retry(
+        self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None
+    ) -> None:
+        remaining_retries = max_retries - retries_taken
+        if remaining_retries == 1:
             log.debug("1 retry left")
         else:
-            log.debug("%i retries left", remaining)
+            log.debug("%i retries left", remaining_retries)
 
-        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None)
         log.info("Retrying request to %s in %f seconds", options.url, timeout)
 
-        # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
-        # different thread if necessary.
         time.sleep(timeout)
 
-        return self._request(
-            options=options,
-            cast_to=cast_to,
-            remaining_retries=remaining,
-            stream=stream,
-            stream_cls=stream_cls,
-        )
-
     def _process_response(
         self,
         *,
@@ -1066,6 +1079,7 @@ def _process_response(
         response: httpx.Response,
         stream: bool,
         stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+        retries_taken: int = 0,
     ) -> ResponseT:
         if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
             return cast(
@@ -1077,12 +1091,20 @@ def _process_response(
                     stream=stream,
                     stream_cls=stream_cls,
                     options=options,
+                    retries_taken=retries_taken,
                 ),
             )
 
         origin = get_origin(cast_to) or cast_to
 
-        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
+        if (
+            inspect.isclass(origin)
+            and issubclass(origin, BaseAPIResponse)
+            # we only want to actually return the custom BaseAPIResponse class if we're
+            # returning the raw response, or if we're not streaming SSE, as if we're streaming
+            # SSE then `cast_to` doesn't actively reflect the type we need to parse into
+            and (not stream or bool(response.request.headers.get(RAW_RESPONSE_HEADER)))
+        ):
             if not issubclass(origin, APIResponse):
                 raise TypeError(f"API Response types must subclass {APIResponse}; Received {origin}")
 
@@ -1096,6 +1118,7 @@ def _process_response(
                     stream=stream,
                     stream_cls=stream_cls,
                     options=options,
+                    retries_taken=retries_taken,
                 ),
             )
 
@@ -1109,6 +1132,7 @@ def _process_response(
             stream=stream,
             stream_cls=stream_cls,
             options=options,
+            retries_taken=retries_taken,
         )
         if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
             return cast(ResponseT, api_response)
@@ -1141,8 +1165,7 @@ def get(
         cast_to: Type[ResponseT],
         options: RequestOptions = {},
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     def get(
@@ -1153,8 +1176,7 @@ def get(
         options: RequestOptions = {},
         stream: Literal[True],
         stream_cls: type[_StreamT],
-    ) -> _StreamT:
-        ...
+    ) -> _StreamT: ...
 
     @overload
     def get(
@@ -1165,8 +1187,7 @@ def get(
         options: RequestOptions = {},
         stream: bool,
         stream_cls: type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        ...
+    ) -> ResponseT | _StreamT: ...
 
     def get(
         self,
@@ -1192,8 +1213,7 @@ def post(
         options: RequestOptions = {},
         files: RequestFiles | None = None,
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     def post(
@@ -1206,8 +1226,7 @@ def post(
         files: RequestFiles | None = None,
         stream: Literal[True],
         stream_cls: type[_StreamT],
-    ) -> _StreamT:
-        ...
+    ) -> _StreamT: ...
 
     @overload
     def post(
@@ -1220,8 +1239,7 @@ def post(
         files: RequestFiles | None = None,
         stream: bool,
         stream_cls: type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        ...
+    ) -> ResponseT | _StreamT: ...
 
     def post(
         self,
@@ -1245,9 +1263,12 @@ def patch(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        files: RequestFiles | None = None,
         options: RequestOptions = {},
     ) -> ResponseT:
-        opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options)
+        opts = FinalRequestOptions.construct(
+            method="patch", url=path, json_data=body, files=to_httpx_files(files), **options
+        )
         return self.request(cast_to, opts)
 
     def put(
@@ -1297,6 +1318,24 @@ def __init__(self, **kwargs: Any) -> None:
         super().__init__(**kwargs)
 
 
+try:
+    import httpx_aiohttp
+except ImportError:
+
+    class _DefaultAioHttpClient(httpx.AsyncClient):
+        def __init__(self, **_kwargs: Any) -> None:
+            raise RuntimeError("To use the aiohttp client you must have installed the package with the `aiohttp` extra")
+else:
+
+    class _DefaultAioHttpClient(httpx_aiohttp.HttpxAiohttpClient):  # type: ignore
+        def __init__(self, **kwargs: Any) -> None:
+            kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+            kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+            kwargs.setdefault("follow_redirects", True)
+
+            super().__init__(**kwargs)
+
+
 if TYPE_CHECKING:
     DefaultAsyncHttpxClient = httpx.AsyncClient
     """An alias to `httpx.AsyncClient` that provides the same defaults that this SDK
@@ -1305,12 +1344,19 @@ def __init__(self, **kwargs: Any) -> None:
     This is useful because overriding the `http_client` with your own instance of
     `httpx.AsyncClient` will result in httpx's defaults being used, not ours.
     """
+
+    DefaultAioHttpClient = httpx.AsyncClient
+    """An alias to `httpx.AsyncClient` that changes the default HTTP transport to `aiohttp`."""
 else:
     DefaultAsyncHttpxClient = _DefaultAsyncHttpxClient
+    DefaultAioHttpClient = _DefaultAioHttpClient
 
 
 class AsyncHttpxClientWrapper(DefaultAsyncHttpxClient):
     def __del__(self) -> None:
+        if self.is_closed:
+            return
+
         try:
             # TODO(someday): support non asyncio runtimes here
             asyncio.get_running_loop().create_task(self.aclose())
@@ -1329,43 +1375,11 @@ def __init__(
         base_url: str | URL,
         _strict_response_validation: bool,
         max_retries: int = DEFAULT_MAX_RETRIES,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        transport: AsyncTransport | None = None,
-        proxies: ProxiesTypes | None = None,
-        limits: Limits | None = None,
+        timeout: float | Timeout | None | NotGiven = not_given,
         http_client: httpx.AsyncClient | None = None,
         custom_headers: Mapping[str, str] | None = None,
         custom_query: Mapping[str, object] | None = None,
     ) -> None:
-        if limits is not None:
-            warnings.warn(
-                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
-        else:
-            limits = DEFAULT_CONNECTION_LIMITS
-
-        if transport is not None:
-            warnings.warn(
-                "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `transport`")
-
-        if proxies is not None:
-            warnings.warn(
-                "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `proxies`")
-
         if not is_given(timeout):
             # if the user passed in a custom http client with a non-default
             # timeout set then we use that timeout.
@@ -1387,11 +1401,8 @@ def __init__(
         super().__init__(
             version=version,
             base_url=base_url,
-            limits=limits,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            proxies=proxies,
-            transport=transport,
             max_retries=max_retries,
             custom_query=custom_query,
             custom_headers=custom_headers,
@@ -1401,10 +1412,6 @@ def __init__(
             base_url=base_url,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            proxies=proxies,
-            transport=transport,
-            limits=limits,
-            follow_redirects=True,
         )
 
     def is_closed(self) -> bool:
@@ -1431,9 +1438,9 @@ async def __aexit__(
     async def _prepare_options(
         self,
         options: FinalRequestOptions,  # noqa: ARG002
-    ) -> None:
+    ) -> FinalRequestOptions:
         """Hook for mutating the given options"""
-        return None
+        return options
 
     async def _prepare_request(
         self,
@@ -1453,9 +1460,7 @@ async def request(
         options: FinalRequestOptions,
         *,
         stream: Literal[False] = False,
-        remaining_retries: Optional[int] = None,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     async def request(
@@ -1465,9 +1470,7 @@ async def request(
         *,
         stream: Literal[True],
         stream_cls: type[_AsyncStreamT],
-        remaining_retries: Optional[int] = None,
-    ) -> _AsyncStreamT:
-        ...
+    ) -> _AsyncStreamT: ...
 
     @overload
     async def request(
@@ -1477,9 +1480,7 @@ async def request(
         *,
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None = None,
-        remaining_retries: Optional[int] = None,
-    ) -> ResponseT | _AsyncStreamT:
-        ...
+    ) -> ResponseT | _AsyncStreamT: ...
 
     async def request(
         self,
@@ -1488,138 +1489,138 @@ async def request(
         *,
         stream: bool = False,
         stream_cls: type[_AsyncStreamT] | None = None,
-        remaining_retries: Optional[int] = None,
     ) -> ResponseT | _AsyncStreamT:
-        return await self._request(
-            cast_to=cast_to,
-            options=options,
-            stream=stream,
-            stream_cls=stream_cls,
-            remaining_retries=remaining_retries,
-        )
+        if self._platform is None:
+            # `get_platform` can make blocking IO calls so we
+            # execute it earlier while we are in an async context
+            self._platform = await asyncify(get_platform)()
 
-    async def _request(
-        self,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        *,
-        stream: bool,
-        stream_cls: type[_AsyncStreamT] | None,
-        remaining_retries: int | None,
-    ) -> ResponseT | _AsyncStreamT:
         cast_to = self._maybe_override_cast_to(cast_to, options)
-        await self._prepare_options(options)
 
-        retries = self._remaining_retries(remaining_retries, options)
-        request = self._build_request(options)
-        await self._prepare_request(request)
+        # create a copy of the options we were given so that if the
+        # options are mutated later & we then retry, the retries are
+        # given the original options
+        input_options = model_copy(options)
+        if input_options.idempotency_key is None and input_options.method.lower() != "get":
+            # ensure the idempotency key is reused between requests
+            input_options.idempotency_key = self._idempotency_key()
 
-        kwargs: HttpxSendArgs = {}
-        if self.custom_auth is not None:
-            kwargs["auth"] = self.custom_auth
+        response: httpx.Response | None = None
+        max_retries = input_options.get_max_retries(self.max_retries)
 
-        try:
-            response = await self._client.send(
-                request,
-                stream=stream or self._should_stream_response_body(request=request),
-                **kwargs,
-            )
-        except httpx.TimeoutException as err:
-            log.debug("Encountered httpx.TimeoutException", exc_info=True)
-
-            if retries > 0:
-                return await self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
-                )
+        retries_taken = 0
+        for retries_taken in range(max_retries + 1):
+            options = model_copy(input_options)
+            options = await self._prepare_options(options)
 
-            log.debug("Raising timeout error")
-            raise APITimeoutError(request=request) from err
-        except Exception as err:
-            log.debug("Encountered Exception", exc_info=True)
+            remaining_retries = max_retries - retries_taken
+            request = self._build_request(options, retries_taken=retries_taken)
+            await self._prepare_request(request)
 
-            if retries > 0:
-                return await self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
-                )
+            kwargs: HttpxSendArgs = {}
+            if self.custom_auth is not None:
+                kwargs["auth"] = self.custom_auth
 
-            log.debug("Raising connection error")
-            raise APIConnectionError(request=request) from err
+            if options.follow_redirects is not None:
+                kwargs["follow_redirects"] = options.follow_redirects
 
-        log.debug(
-            'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
-        )
+            log.debug("Sending HTTP Request: %s %s", request.method, request.url)
 
-        try:
-            response.raise_for_status()
-        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
-            log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
-
-            if retries > 0 and self._should_retry(err.response):
-                await err.response.aclose()
-                return await self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    err.response.headers,
-                    stream=stream,
-                    stream_cls=stream_cls,
+            response = None
+            try:
+                response = await self._client.send(
+                    request,
+                    stream=stream or self._should_stream_response_body(request=request),
+                    **kwargs,
                 )
+            except httpx.TimeoutException as err:
+                log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+                if remaining_retries > 0:
+                    await self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
+
+                log.debug("Raising timeout error")
+                raise APITimeoutError(request=request) from err
+            except Exception as err:
+                log.debug("Encountered Exception", exc_info=True)
+
+                if remaining_retries > 0:
+                    await self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
+
+                log.debug("Raising connection error")
+                raise APIConnectionError(request=request) from err
+
+            log.debug(
+                'HTTP Response: %s %s "%i %s" %s',
+                request.method,
+                request.url,
+                response.status_code,
+                response.reason_phrase,
+                response.headers,
+            )
+            log.debug("request_id: %s", response.headers.get("x-request-id"))
+
+            try:
+                response.raise_for_status()
+            except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+                log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+                if remaining_retries > 0 and self._should_retry(err.response):
+                    await err.response.aclose()
+                    await self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=response,
+                    )
+                    continue
+
+                # If the response is streamed then we need to explicitly read the response
+                # to completion before attempting to access the response text.
+                if not err.response.is_closed:
+                    await err.response.aread()
 
-            # If the response is streamed then we need to explicitly read the response
-            # to completion before attempting to access the response text.
-            if not err.response.is_closed:
-                await err.response.aread()
+                log.debug("Re-raising status error")
+                raise self._make_status_error_from_response(err.response) from None
 
-            log.debug("Re-raising status error")
-            raise self._make_status_error_from_response(err.response) from None
+            break
 
+        assert response is not None, "could not resolve response (should never happen)"
         return await self._process_response(
             cast_to=cast_to,
             options=options,
             response=response,
             stream=stream,
             stream_cls=stream_cls,
+            retries_taken=retries_taken,
         )
 
-    async def _retry_request(
-        self,
-        options: FinalRequestOptions,
-        cast_to: Type[ResponseT],
-        remaining_retries: int,
-        response_headers: httpx.Headers | None,
-        *,
-        stream: bool,
-        stream_cls: type[_AsyncStreamT] | None,
-    ) -> ResponseT | _AsyncStreamT:
-        remaining = remaining_retries - 1
-        if remaining == 1:
+    async def _sleep_for_retry(
+        self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None
+    ) -> None:
+        remaining_retries = max_retries - retries_taken
+        if remaining_retries == 1:
             log.debug("1 retry left")
         else:
-            log.debug("%i retries left", remaining)
+            log.debug("%i retries left", remaining_retries)
 
-        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None)
         log.info("Retrying request to %s in %f seconds", options.url, timeout)
 
         await anyio.sleep(timeout)
 
-        return await self._request(
-            options=options,
-            cast_to=cast_to,
-            remaining_retries=remaining,
-            stream=stream,
-            stream_cls=stream_cls,
-        )
-
     async def _process_response(
         self,
         *,
@@ -1628,6 +1629,7 @@ async def _process_response(
         response: httpx.Response,
         stream: bool,
         stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+        retries_taken: int = 0,
     ) -> ResponseT:
         if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
             return cast(
@@ -1639,12 +1641,20 @@ async def _process_response(
                     stream=stream,
                     stream_cls=stream_cls,
                     options=options,
+                    retries_taken=retries_taken,
                 ),
             )
 
         origin = get_origin(cast_to) or cast_to
 
-        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
+        if (
+            inspect.isclass(origin)
+            and issubclass(origin, BaseAPIResponse)
+            # we only want to actually return the custom BaseAPIResponse class if we're
+            # returning the raw response, or if we're not streaming SSE, as if we're streaming
+            # SSE then `cast_to` doesn't actively reflect the type we need to parse into
+            and (not stream or bool(response.request.headers.get(RAW_RESPONSE_HEADER)))
+        ):
             if not issubclass(origin, AsyncAPIResponse):
                 raise TypeError(f"API Response types must subclass {AsyncAPIResponse}; Received {origin}")
 
@@ -1658,6 +1668,7 @@ async def _process_response(
                     stream=stream,
                     stream_cls=stream_cls,
                     options=options,
+                    retries_taken=retries_taken,
                 ),
             )
 
@@ -1671,6 +1682,7 @@ async def _process_response(
             stream=stream,
             stream_cls=stream_cls,
             options=options,
+            retries_taken=retries_taken,
         )
         if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
             return cast(ResponseT, api_response)
@@ -1693,8 +1705,7 @@ async def get(
         cast_to: Type[ResponseT],
         options: RequestOptions = {},
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     async def get(
@@ -1705,8 +1716,7 @@ async def get(
         options: RequestOptions = {},
         stream: Literal[True],
         stream_cls: type[_AsyncStreamT],
-    ) -> _AsyncStreamT:
-        ...
+    ) -> _AsyncStreamT: ...
 
     @overload
     async def get(
@@ -1717,8 +1727,7 @@ async def get(
         options: RequestOptions = {},
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None = None,
-    ) -> ResponseT | _AsyncStreamT:
-        ...
+    ) -> ResponseT | _AsyncStreamT: ...
 
     async def get(
         self,
@@ -1742,8 +1751,7 @@ async def post(
         files: RequestFiles | None = None,
         options: RequestOptions = {},
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     async def post(
@@ -1756,8 +1764,7 @@ async def post(
         options: RequestOptions = {},
         stream: Literal[True],
         stream_cls: type[_AsyncStreamT],
-    ) -> _AsyncStreamT:
-        ...
+    ) -> _AsyncStreamT: ...
 
     @overload
     async def post(
@@ -1770,8 +1777,7 @@ async def post(
         options: RequestOptions = {},
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None = None,
-    ) -> ResponseT | _AsyncStreamT:
-        ...
+    ) -> ResponseT | _AsyncStreamT: ...
 
     async def post(
         self,
@@ -1795,9 +1801,12 @@ async def patch(
         *,
         cast_to: Type[ResponseT],
         body: Body | None = None,
+        files: RequestFiles | None = None,
         options: RequestOptions = {},
     ) -> ResponseT:
-        opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options)
+        opts = FinalRequestOptions.construct(
+            method="patch", url=path, json_data=body, files=to_httpx_files(files), **options
+        )
         return await self.request(cast_to, opts)
 
     async def put(
@@ -1846,8 +1855,8 @@ def make_request_options(
     extra_query: Query | None = None,
     extra_body: Body | None = None,
     idempotency_key: str | None = None,
-    timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    post_parser: PostParser | NotGiven = NOT_GIVEN,
+    timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    post_parser: PostParser | NotGiven = not_given,
 ) -> RequestOptions:
     """Create a dict of type RequestOptions without keys of NotGiven values."""
     options: RequestOptions = {}
@@ -1876,6 +1885,11 @@ def make_request_options(
     return options
 
 
+class ForceMultipartDict(Dict[str, None]):
+    def __bool__(self) -> bool:
+        return True
+
+
 class OtherPlatform:
     def __init__(self, name: str) -> None:
         self.name = name
@@ -1943,11 +1957,11 @@ def get_platform() -> Platform:
 
 
 @lru_cache(maxsize=None)
-def platform_headers(version: str) -> Dict[str, str]:
+def platform_headers(version: str, *, platform: Platform | None) -> Dict[str, str]:
     return {
         "X-Stainless-Lang": "python",
         "X-Stainless-Package-Version": version,
-        "X-Stainless-OS": str(get_platform()),
+        "X-Stainless-OS": str(platform or get_platform()),
         "X-Stainless-Arch": str(get_architecture()),
         "X-Stainless-Runtime": get_python_runtime(),
         "X-Stainless-Runtime-Version": get_python_version(),
@@ -1982,7 +1996,6 @@ def get_python_version() -> str:
 
 def get_architecture() -> Arch:
     try:
-        python_bitness, _ = platform.architecture()
         machine = platform.machine().lower()
     except Exception:
         return "unknown"
@@ -1998,7 +2011,7 @@ def get_architecture() -> Arch:
         return "x64"
 
     # TODO: untested
-    if python_bitness == "32bit":
+    if sys.maxsize <= 2**32:
         return "x32"
 
     if machine:
diff --git a/src/openai/_client.py b/src/openai/_client.py
index 8f3060c6f6..bcc82f5b0b 100644
--- a/src/openai/_client.py
+++ b/src/openai/_client.py
@@ -3,27 +3,28 @@
 from __future__ import annotations
 
 import os
-from typing import Any, Union, Mapping
+from typing import TYPE_CHECKING, Any, Mapping
 from typing_extensions import Self, override
 
 import httpx
 
-from . import resources, _exceptions
+from . import _exceptions
 from ._qs import Querystring
 from ._types import (
-    NOT_GIVEN,
     Omit,
     Timeout,
     NotGiven,
     Transport,
     ProxiesTypes,
     RequestOptions,
+    not_given,
 )
 from ._utils import (
     is_given,
     is_mapping,
     get_async_library,
 )
+from ._compat import cached_property
 from ._version import __version__
 from ._streaming import Stream as Stream, AsyncStream as AsyncStream
 from ._exceptions import OpenAIError, APIStatusError
@@ -33,38 +34,66 @@
     AsyncAPIClient,
 )
 
-__all__ = [
-    "Timeout",
-    "Transport",
-    "ProxiesTypes",
-    "RequestOptions",
-    "resources",
-    "OpenAI",
-    "AsyncOpenAI",
-    "Client",
-    "AsyncClient",
-]
+if TYPE_CHECKING:
+    from .resources import (
+        beta,
+        chat,
+        audio,
+        evals,
+        files,
+        images,
+        models,
+        videos,
+        batches,
+        uploads,
+        realtime,
+        responses,
+        containers,
+        embeddings,
+        completions,
+        fine_tuning,
+        moderations,
+        conversations,
+        vector_stores,
+    )
+    from .resources.files import Files, AsyncFiles
+    from .resources.images import Images, AsyncImages
+    from .resources.models import Models, AsyncModels
+    from .resources.videos import Videos, AsyncVideos
+    from .resources.batches import Batches, AsyncBatches
+    from .resources.webhooks import Webhooks, AsyncWebhooks
+    from .resources.beta.beta import Beta, AsyncBeta
+    from .resources.chat.chat import Chat, AsyncChat
+    from .resources.embeddings import Embeddings, AsyncEmbeddings
+    from .resources.audio.audio import Audio, AsyncAudio
+    from .resources.completions import Completions, AsyncCompletions
+    from .resources.evals.evals import Evals, AsyncEvals
+    from .resources.moderations import Moderations, AsyncModerations
+    from .resources.uploads.uploads import Uploads, AsyncUploads
+    from .resources.realtime.realtime import Realtime, AsyncRealtime
+    from .resources.responses.responses import Responses, AsyncResponses
+    from .resources.containers.containers import Containers, AsyncContainers
+    from .resources.fine_tuning.fine_tuning import FineTuning, AsyncFineTuning
+    from .resources.conversations.conversations import Conversations, AsyncConversations
+    from .resources.vector_stores.vector_stores import VectorStores, AsyncVectorStores
+
+__all__ = ["Timeout", "Transport", "ProxiesTypes", "RequestOptions", "OpenAI", "AsyncOpenAI", "Client", "AsyncClient"]
 
 
 class OpenAI(SyncAPIClient):
-    completions: resources.Completions
-    chat: resources.Chat
-    embeddings: resources.Embeddings
-    files: resources.Files
-    images: resources.Images
-    audio: resources.Audio
-    moderations: resources.Moderations
-    models: resources.Models
-    fine_tuning: resources.FineTuning
-    beta: resources.Beta
-    batches: resources.Batches
-    with_raw_response: OpenAIWithRawResponse
-    with_streaming_response: OpenAIWithStreamedResponse
-
     # client options
     api_key: str
     organization: str | None
     project: str | None
+    webhook_secret: str | None
+
+    websocket_base_url: str | httpx.URL | None
+    """Base URL for WebSocket connections.
+
+    If not specified, the default base URL will be used, with 'wss://' replacing the
+    'http://' or 'https://' scheme. For example: 'http://example.com' becomes
+    'wss://example.com'
+    """
 
     def __init__(
         self,
@@ -72,8 +101,10 @@ def __init__(
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
         base_url: str | httpx.URL | None = None,
-        timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
+        websocket_base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = not_given,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
@@ -91,12 +122,13 @@ def __init__(
         # part of our public interface in the future.
         _strict_response_validation: bool = False,
     ) -> None:
-        """Construct a new synchronous openai client instance.
+        """Construct a new synchronous OpenAI client instance.
 
         This automatically infers the following arguments from their corresponding environment variables if they are not provided:
         - `api_key` from `OPENAI_API_KEY`
         - `organization` from `OPENAI_ORG_ID`
         - `project` from `OPENAI_PROJECT_ID`
+        - `webhook_secret` from `OPENAI_WEBHOOK_SECRET`
         """
         if api_key is None:
             api_key = os.environ.get("OPENAI_API_KEY")
@@ -114,6 +146,12 @@ def __init__(
             project = os.environ.get("OPENAI_PROJECT_ID")
         self.project = project
 
+        if webhook_secret is None:
+            webhook_secret = os.environ.get("OPENAI_WEBHOOK_SECRET")
+        self.webhook_secret = webhook_secret
+
+        self.websocket_base_url = websocket_base_url
+
         if base_url is None:
             base_url = os.environ.get("OPENAI_BASE_URL")
         if base_url is None:
@@ -132,24 +170,138 @@ def __init__(
 
         self._default_stream_cls = Stream
 
-        self.completions = resources.Completions(self)
-        self.chat = resources.Chat(self)
-        self.embeddings = resources.Embeddings(self)
-        self.files = resources.Files(self)
-        self.images = resources.Images(self)
-        self.audio = resources.Audio(self)
-        self.moderations = resources.Moderations(self)
-        self.models = resources.Models(self)
-        self.fine_tuning = resources.FineTuning(self)
-        self.beta = resources.Beta(self)
-        self.batches = resources.Batches(self)
-        self.with_raw_response = OpenAIWithRawResponse(self)
-        self.with_streaming_response = OpenAIWithStreamedResponse(self)
+    @cached_property
+    def completions(self) -> Completions:
+        from .resources.completions import Completions
+
+        return Completions(self)
+
+    @cached_property
+    def chat(self) -> Chat:
+        from .resources.chat import Chat
+
+        return Chat(self)
+
+    @cached_property
+    def embeddings(self) -> Embeddings:
+        from .resources.embeddings import Embeddings
+
+        return Embeddings(self)
+
+    @cached_property
+    def files(self) -> Files:
+        from .resources.files import Files
+
+        return Files(self)
+
+    @cached_property
+    def images(self) -> Images:
+        from .resources.images import Images
+
+        return Images(self)
+
+    @cached_property
+    def audio(self) -> Audio:
+        from .resources.audio import Audio
+
+        return Audio(self)
+
+    @cached_property
+    def moderations(self) -> Moderations:
+        from .resources.moderations import Moderations
+
+        return Moderations(self)
+
+    @cached_property
+    def models(self) -> Models:
+        from .resources.models import Models
+
+        return Models(self)
+
+    @cached_property
+    def fine_tuning(self) -> FineTuning:
+        from .resources.fine_tuning import FineTuning
+
+        return FineTuning(self)
+
+    @cached_property
+    def vector_stores(self) -> VectorStores:
+        from .resources.vector_stores import VectorStores
+
+        return VectorStores(self)
+
+    @cached_property
+    def webhooks(self) -> Webhooks:
+        from .resources.webhooks import Webhooks
+
+        return Webhooks(self)
+
+    @cached_property
+    def beta(self) -> Beta:
+        from .resources.beta import Beta
+
+        return Beta(self)
+
+    @cached_property
+    def batches(self) -> Batches:
+        from .resources.batches import Batches
+
+        return Batches(self)
+
+    @cached_property
+    def uploads(self) -> Uploads:
+        from .resources.uploads import Uploads
+
+        return Uploads(self)
+
+    @cached_property
+    def responses(self) -> Responses:
+        from .resources.responses import Responses
+
+        return Responses(self)
+
+    @cached_property
+    def realtime(self) -> Realtime:
+        from .resources.realtime import Realtime
+
+        return Realtime(self)
+
+    @cached_property
+    def conversations(self) -> Conversations:
+        from .resources.conversations import Conversations
+
+        return Conversations(self)
+
+    @cached_property
+    def evals(self) -> Evals:
+        from .resources.evals import Evals
+
+        return Evals(self)
+
+    @cached_property
+    def containers(self) -> Containers:
+        from .resources.containers import Containers
+
+        return Containers(self)
+
+    @cached_property
+    def videos(self) -> Videos:
+        from .resources.videos import Videos
+
+        return Videos(self)
+
+    @cached_property
+    def with_raw_response(self) -> OpenAIWithRawResponse:
+        return OpenAIWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> OpenAIWithStreamedResponse:
+        return OpenAIWithStreamedResponse(self)
 
     @property
     @override
     def qs(self) -> Querystring:
-        return Querystring(array_format="comma")
+        return Querystring(array_format="brackets")
 
     @property
     @override
@@ -174,10 +326,12 @@ def copy(
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         base_url: str | httpx.URL | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | Timeout | None | NotGiven = not_given,
         http_client: httpx.Client | None = None,
-        max_retries: int | NotGiven = NOT_GIVEN,
+        max_retries: int | NotGiven = not_given,
         default_headers: Mapping[str, str] | None = None,
         set_default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
@@ -210,6 +364,8 @@ def copy(
             api_key=api_key or self.api_key,
             organization=organization or self.organization,
             project=project or self.project,
+            webhook_secret=webhook_secret or self.webhook_secret,
+            websocket_base_url=websocket_base_url or self.websocket_base_url,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
@@ -259,24 +415,19 @@ def _make_status_error(
 
 
 class AsyncOpenAI(AsyncAPIClient):
-    completions: resources.AsyncCompletions
-    chat: resources.AsyncChat
-    embeddings: resources.AsyncEmbeddings
-    files: resources.AsyncFiles
-    images: resources.AsyncImages
-    audio: resources.AsyncAudio
-    moderations: resources.AsyncModerations
-    models: resources.AsyncModels
-    fine_tuning: resources.AsyncFineTuning
-    beta: resources.AsyncBeta
-    batches: resources.AsyncBatches
-    with_raw_response: AsyncOpenAIWithRawResponse
-    with_streaming_response: AsyncOpenAIWithStreamedResponse
-
     # client options
     api_key: str
     organization: str | None
     project: str | None
+    webhook_secret: str | None
+
+    websocket_base_url: str | httpx.URL | None
+    """Base URL for WebSocket connections.
+
+    If not specified, the default base URL will be used, with 'wss://' replacing the
+    'http://' or 'https://' scheme. For example: 'http://example.com' becomes
+    'wss://example.com'
+    """
 
     def __init__(
         self,
@@ -284,8 +435,10 @@ def __init__(
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
         base_url: str | httpx.URL | None = None,
-        timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
+        websocket_base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = not_given,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
@@ -303,12 +456,13 @@ def __init__(
         # part of our public interface in the future.
         _strict_response_validation: bool = False,
     ) -> None:
-        """Construct a new async openai client instance.
+        """Construct a new async AsyncOpenAI client instance.
 
         This automatically infers the following arguments from their corresponding environment variables if they are not provided:
         - `api_key` from `OPENAI_API_KEY`
         - `organization` from `OPENAI_ORG_ID`
         - `project` from `OPENAI_PROJECT_ID`
+        - `webhook_secret` from `OPENAI_WEBHOOK_SECRET`
         """
         if api_key is None:
             api_key = os.environ.get("OPENAI_API_KEY")
@@ -326,6 +480,12 @@ def __init__(
             project = os.environ.get("OPENAI_PROJECT_ID")
         self.project = project
 
+        if webhook_secret is None:
+            webhook_secret = os.environ.get("OPENAI_WEBHOOK_SECRET")
+        self.webhook_secret = webhook_secret
+
+        self.websocket_base_url = websocket_base_url
+
         if base_url is None:
             base_url = os.environ.get("OPENAI_BASE_URL")
         if base_url is None:
@@ -344,24 +504,138 @@ def __init__(
 
         self._default_stream_cls = AsyncStream
 
-        self.completions = resources.AsyncCompletions(self)
-        self.chat = resources.AsyncChat(self)
-        self.embeddings = resources.AsyncEmbeddings(self)
-        self.files = resources.AsyncFiles(self)
-        self.images = resources.AsyncImages(self)
-        self.audio = resources.AsyncAudio(self)
-        self.moderations = resources.AsyncModerations(self)
-        self.models = resources.AsyncModels(self)
-        self.fine_tuning = resources.AsyncFineTuning(self)
-        self.beta = resources.AsyncBeta(self)
-        self.batches = resources.AsyncBatches(self)
-        self.with_raw_response = AsyncOpenAIWithRawResponse(self)
-        self.with_streaming_response = AsyncOpenAIWithStreamedResponse(self)
+    @cached_property
+    def completions(self) -> AsyncCompletions:
+        from .resources.completions import AsyncCompletions
+
+        return AsyncCompletions(self)
+
+    @cached_property
+    def chat(self) -> AsyncChat:
+        from .resources.chat import AsyncChat
+
+        return AsyncChat(self)
+
+    @cached_property
+    def embeddings(self) -> AsyncEmbeddings:
+        from .resources.embeddings import AsyncEmbeddings
+
+        return AsyncEmbeddings(self)
+
+    @cached_property
+    def files(self) -> AsyncFiles:
+        from .resources.files import AsyncFiles
+
+        return AsyncFiles(self)
+
+    @cached_property
+    def images(self) -> AsyncImages:
+        from .resources.images import AsyncImages
+
+        return AsyncImages(self)
+
+    @cached_property
+    def audio(self) -> AsyncAudio:
+        from .resources.audio import AsyncAudio
+
+        return AsyncAudio(self)
+
+    @cached_property
+    def moderations(self) -> AsyncModerations:
+        from .resources.moderations import AsyncModerations
+
+        return AsyncModerations(self)
+
+    @cached_property
+    def models(self) -> AsyncModels:
+        from .resources.models import AsyncModels
+
+        return AsyncModels(self)
+
+    @cached_property
+    def fine_tuning(self) -> AsyncFineTuning:
+        from .resources.fine_tuning import AsyncFineTuning
+
+        return AsyncFineTuning(self)
+
+    @cached_property
+    def vector_stores(self) -> AsyncVectorStores:
+        from .resources.vector_stores import AsyncVectorStores
+
+        return AsyncVectorStores(self)
+
+    @cached_property
+    def webhooks(self) -> AsyncWebhooks:
+        from .resources.webhooks import AsyncWebhooks
+
+        return AsyncWebhooks(self)
+
+    @cached_property
+    def beta(self) -> AsyncBeta:
+        from .resources.beta import AsyncBeta
+
+        return AsyncBeta(self)
+
+    @cached_property
+    def batches(self) -> AsyncBatches:
+        from .resources.batches import AsyncBatches
+
+        return AsyncBatches(self)
+
+    @cached_property
+    def uploads(self) -> AsyncUploads:
+        from .resources.uploads import AsyncUploads
+
+        return AsyncUploads(self)
+
+    @cached_property
+    def responses(self) -> AsyncResponses:
+        from .resources.responses import AsyncResponses
+
+        return AsyncResponses(self)
+
+    @cached_property
+    def realtime(self) -> AsyncRealtime:
+        from .resources.realtime import AsyncRealtime
+
+        return AsyncRealtime(self)
+
+    @cached_property
+    def conversations(self) -> AsyncConversations:
+        from .resources.conversations import AsyncConversations
+
+        return AsyncConversations(self)
+
+    @cached_property
+    def evals(self) -> AsyncEvals:
+        from .resources.evals import AsyncEvals
+
+        return AsyncEvals(self)
+
+    @cached_property
+    def containers(self) -> AsyncContainers:
+        from .resources.containers import AsyncContainers
+
+        return AsyncContainers(self)
+
+    @cached_property
+    def videos(self) -> AsyncVideos:
+        from .resources.videos import AsyncVideos
+
+        return AsyncVideos(self)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncOpenAIWithRawResponse:
+        return AsyncOpenAIWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncOpenAIWithStreamedResponse:
+        return AsyncOpenAIWithStreamedResponse(self)
 
     @property
     @override
     def qs(self) -> Querystring:
-        return Querystring(array_format="comma")
+        return Querystring(array_format="brackets")
 
     @property
     @override
@@ -386,10 +660,12 @@ def copy(
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        webhook_secret: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         base_url: str | httpx.URL | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | Timeout | None | NotGiven = not_given,
         http_client: httpx.AsyncClient | None = None,
-        max_retries: int | NotGiven = NOT_GIVEN,
+        max_retries: int | NotGiven = not_given,
         default_headers: Mapping[str, str] | None = None,
         set_default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
@@ -422,6 +698,8 @@ def copy(
             api_key=api_key or self.api_key,
             organization=organization or self.organization,
             project=project or self.project,
+            webhook_secret=webhook_secret or self.webhook_secret,
+            websocket_base_url=websocket_base_url or self.websocket_base_url,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
@@ -471,63 +749,487 @@ def _make_status_error(
 
 
 class OpenAIWithRawResponse:
+    _client: OpenAI
+
     def __init__(self, client: OpenAI) -> None:
-        self.completions = resources.CompletionsWithRawResponse(client.completions)
-        self.chat = resources.ChatWithRawResponse(client.chat)
-        self.embeddings = resources.EmbeddingsWithRawResponse(client.embeddings)
-        self.files = resources.FilesWithRawResponse(client.files)
-        self.images = resources.ImagesWithRawResponse(client.images)
-        self.audio = resources.AudioWithRawResponse(client.audio)
-        self.moderations = resources.ModerationsWithRawResponse(client.moderations)
-        self.models = resources.ModelsWithRawResponse(client.models)
-        self.fine_tuning = resources.FineTuningWithRawResponse(client.fine_tuning)
-        self.beta = resources.BetaWithRawResponse(client.beta)
-        self.batches = resources.BatchesWithRawResponse(client.batches)
+        self._client = client
+
+    @cached_property
+    def completions(self) -> completions.CompletionsWithRawResponse:
+        from .resources.completions import CompletionsWithRawResponse
+
+        return CompletionsWithRawResponse(self._client.completions)
+
+    @cached_property
+    def chat(self) -> chat.ChatWithRawResponse:
+        from .resources.chat import ChatWithRawResponse
+
+        return ChatWithRawResponse(self._client.chat)
+
+    @cached_property
+    def embeddings(self) -> embeddings.EmbeddingsWithRawResponse:
+        from .resources.embeddings import EmbeddingsWithRawResponse
+
+        return EmbeddingsWithRawResponse(self._client.embeddings)
+
+    @cached_property
+    def files(self) -> files.FilesWithRawResponse:
+        from .resources.files import FilesWithRawResponse
+
+        return FilesWithRawResponse(self._client.files)
+
+    @cached_property
+    def images(self) -> images.ImagesWithRawResponse:
+        from .resources.images import ImagesWithRawResponse
+
+        return ImagesWithRawResponse(self._client.images)
+
+    @cached_property
+    def audio(self) -> audio.AudioWithRawResponse:
+        from .resources.audio import AudioWithRawResponse
+
+        return AudioWithRawResponse(self._client.audio)
+
+    @cached_property
+    def moderations(self) -> moderations.ModerationsWithRawResponse:
+        from .resources.moderations import ModerationsWithRawResponse
+
+        return ModerationsWithRawResponse(self._client.moderations)
+
+    @cached_property
+    def models(self) -> models.ModelsWithRawResponse:
+        from .resources.models import ModelsWithRawResponse
+
+        return ModelsWithRawResponse(self._client.models)
+
+    @cached_property
+    def fine_tuning(self) -> fine_tuning.FineTuningWithRawResponse:
+        from .resources.fine_tuning import FineTuningWithRawResponse
+
+        return FineTuningWithRawResponse(self._client.fine_tuning)
+
+    @cached_property
+    def vector_stores(self) -> vector_stores.VectorStoresWithRawResponse:
+        from .resources.vector_stores import VectorStoresWithRawResponse
+
+        return VectorStoresWithRawResponse(self._client.vector_stores)
+
+    @cached_property
+    def beta(self) -> beta.BetaWithRawResponse:
+        from .resources.beta import BetaWithRawResponse
+
+        return BetaWithRawResponse(self._client.beta)
+
+    @cached_property
+    def batches(self) -> batches.BatchesWithRawResponse:
+        from .resources.batches import BatchesWithRawResponse
+
+        return BatchesWithRawResponse(self._client.batches)
+
+    @cached_property
+    def uploads(self) -> uploads.UploadsWithRawResponse:
+        from .resources.uploads import UploadsWithRawResponse
+
+        return UploadsWithRawResponse(self._client.uploads)
+
+    @cached_property
+    def responses(self) -> responses.ResponsesWithRawResponse:
+        from .resources.responses import ResponsesWithRawResponse
+
+        return ResponsesWithRawResponse(self._client.responses)
+
+    @cached_property
+    def realtime(self) -> realtime.RealtimeWithRawResponse:
+        from .resources.realtime import RealtimeWithRawResponse
+
+        return RealtimeWithRawResponse(self._client.realtime)
+
+    @cached_property
+    def conversations(self) -> conversations.ConversationsWithRawResponse:
+        from .resources.conversations import ConversationsWithRawResponse
+
+        return ConversationsWithRawResponse(self._client.conversations)
+
+    @cached_property
+    def evals(self) -> evals.EvalsWithRawResponse:
+        from .resources.evals import EvalsWithRawResponse
+
+        return EvalsWithRawResponse(self._client.evals)
+
+    @cached_property
+    def containers(self) -> containers.ContainersWithRawResponse:
+        from .resources.containers import ContainersWithRawResponse
+
+        return ContainersWithRawResponse(self._client.containers)
+
+    @cached_property
+    def videos(self) -> videos.VideosWithRawResponse:
+        from .resources.videos import VideosWithRawResponse
+
+        return VideosWithRawResponse(self._client.videos)
 
 
 class AsyncOpenAIWithRawResponse:
+    _client: AsyncOpenAI
+
     def __init__(self, client: AsyncOpenAI) -> None:
-        self.completions = resources.AsyncCompletionsWithRawResponse(client.completions)
-        self.chat = resources.AsyncChatWithRawResponse(client.chat)
-        self.embeddings = resources.AsyncEmbeddingsWithRawResponse(client.embeddings)
-        self.files = resources.AsyncFilesWithRawResponse(client.files)
-        self.images = resources.AsyncImagesWithRawResponse(client.images)
-        self.audio = resources.AsyncAudioWithRawResponse(client.audio)
-        self.moderations = resources.AsyncModerationsWithRawResponse(client.moderations)
-        self.models = resources.AsyncModelsWithRawResponse(client.models)
-        self.fine_tuning = resources.AsyncFineTuningWithRawResponse(client.fine_tuning)
-        self.beta = resources.AsyncBetaWithRawResponse(client.beta)
-        self.batches = resources.AsyncBatchesWithRawResponse(client.batches)
+        self._client = client
+
+    @cached_property
+    def completions(self) -> completions.AsyncCompletionsWithRawResponse:
+        from .resources.completions import AsyncCompletionsWithRawResponse
+
+        return AsyncCompletionsWithRawResponse(self._client.completions)
+
+    @cached_property
+    def chat(self) -> chat.AsyncChatWithRawResponse:
+        from .resources.chat import AsyncChatWithRawResponse
+
+        return AsyncChatWithRawResponse(self._client.chat)
+
+    @cached_property
+    def embeddings(self) -> embeddings.AsyncEmbeddingsWithRawResponse:
+        from .resources.embeddings import AsyncEmbeddingsWithRawResponse
+
+        return AsyncEmbeddingsWithRawResponse(self._client.embeddings)
+
+    @cached_property
+    def files(self) -> files.AsyncFilesWithRawResponse:
+        from .resources.files import AsyncFilesWithRawResponse
+
+        return AsyncFilesWithRawResponse(self._client.files)
+
+    @cached_property
+    def images(self) -> images.AsyncImagesWithRawResponse:
+        from .resources.images import AsyncImagesWithRawResponse
+
+        return AsyncImagesWithRawResponse(self._client.images)
+
+    @cached_property
+    def audio(self) -> audio.AsyncAudioWithRawResponse:
+        from .resources.audio import AsyncAudioWithRawResponse
+
+        return AsyncAudioWithRawResponse(self._client.audio)
+
+    @cached_property
+    def moderations(self) -> moderations.AsyncModerationsWithRawResponse:
+        from .resources.moderations import AsyncModerationsWithRawResponse
+
+        return AsyncModerationsWithRawResponse(self._client.moderations)
+
+    @cached_property
+    def models(self) -> models.AsyncModelsWithRawResponse:
+        from .resources.models import AsyncModelsWithRawResponse
+
+        return AsyncModelsWithRawResponse(self._client.models)
+
+    @cached_property
+    def fine_tuning(self) -> fine_tuning.AsyncFineTuningWithRawResponse:
+        from .resources.fine_tuning import AsyncFineTuningWithRawResponse
+
+        return AsyncFineTuningWithRawResponse(self._client.fine_tuning)
+
+    @cached_property
+    def vector_stores(self) -> vector_stores.AsyncVectorStoresWithRawResponse:
+        from .resources.vector_stores import AsyncVectorStoresWithRawResponse
+
+        return AsyncVectorStoresWithRawResponse(self._client.vector_stores)
+
+    @cached_property
+    def beta(self) -> beta.AsyncBetaWithRawResponse:
+        from .resources.beta import AsyncBetaWithRawResponse
+
+        return AsyncBetaWithRawResponse(self._client.beta)
+
+    @cached_property
+    def batches(self) -> batches.AsyncBatchesWithRawResponse:
+        from .resources.batches import AsyncBatchesWithRawResponse
+
+        return AsyncBatchesWithRawResponse(self._client.batches)
+
+    @cached_property
+    def uploads(self) -> uploads.AsyncUploadsWithRawResponse:
+        from .resources.uploads import AsyncUploadsWithRawResponse
+
+        return AsyncUploadsWithRawResponse(self._client.uploads)
+
+    @cached_property
+    def responses(self) -> responses.AsyncResponsesWithRawResponse:
+        from .resources.responses import AsyncResponsesWithRawResponse
+
+        return AsyncResponsesWithRawResponse(self._client.responses)
+
+    @cached_property
+    def realtime(self) -> realtime.AsyncRealtimeWithRawResponse:
+        from .resources.realtime import AsyncRealtimeWithRawResponse
+
+        return AsyncRealtimeWithRawResponse(self._client.realtime)
+
+    @cached_property
+    def conversations(self) -> conversations.AsyncConversationsWithRawResponse:
+        from .resources.conversations import AsyncConversationsWithRawResponse
+
+        return AsyncConversationsWithRawResponse(self._client.conversations)
+
+    @cached_property
+    def evals(self) -> evals.AsyncEvalsWithRawResponse:
+        from .resources.evals import AsyncEvalsWithRawResponse
+
+        return AsyncEvalsWithRawResponse(self._client.evals)
+
+    @cached_property
+    def containers(self) -> containers.AsyncContainersWithRawResponse:
+        from .resources.containers import AsyncContainersWithRawResponse
+
+        return AsyncContainersWithRawResponse(self._client.containers)
+
+    @cached_property
+    def videos(self) -> videos.AsyncVideosWithRawResponse:
+        from .resources.videos import AsyncVideosWithRawResponse
+
+        return AsyncVideosWithRawResponse(self._client.videos)
 
 
 class OpenAIWithStreamedResponse:
+    _client: OpenAI
+
     def __init__(self, client: OpenAI) -> None:
-        self.completions = resources.CompletionsWithStreamingResponse(client.completions)
-        self.chat = resources.ChatWithStreamingResponse(client.chat)
-        self.embeddings = resources.EmbeddingsWithStreamingResponse(client.embeddings)
-        self.files = resources.FilesWithStreamingResponse(client.files)
-        self.images = resources.ImagesWithStreamingResponse(client.images)
-        self.audio = resources.AudioWithStreamingResponse(client.audio)
-        self.moderations = resources.ModerationsWithStreamingResponse(client.moderations)
-        self.models = resources.ModelsWithStreamingResponse(client.models)
-        self.fine_tuning = resources.FineTuningWithStreamingResponse(client.fine_tuning)
-        self.beta = resources.BetaWithStreamingResponse(client.beta)
-        self.batches = resources.BatchesWithStreamingResponse(client.batches)
+        self._client = client
+
+    @cached_property
+    def completions(self) -> completions.CompletionsWithStreamingResponse:
+        from .resources.completions import CompletionsWithStreamingResponse
+
+        return CompletionsWithStreamingResponse(self._client.completions)
+
+    @cached_property
+    def chat(self) -> chat.ChatWithStreamingResponse:
+        from .resources.chat import ChatWithStreamingResponse
+
+        return ChatWithStreamingResponse(self._client.chat)
+
+    @cached_property
+    def embeddings(self) -> embeddings.EmbeddingsWithStreamingResponse:
+        from .resources.embeddings import EmbeddingsWithStreamingResponse
+
+        return EmbeddingsWithStreamingResponse(self._client.embeddings)
+
+    @cached_property
+    def files(self) -> files.FilesWithStreamingResponse:
+        from .resources.files import FilesWithStreamingResponse
+
+        return FilesWithStreamingResponse(self._client.files)
+
+    @cached_property
+    def images(self) -> images.ImagesWithStreamingResponse:
+        from .resources.images import ImagesWithStreamingResponse
+
+        return ImagesWithStreamingResponse(self._client.images)
+
+    @cached_property
+    def audio(self) -> audio.AudioWithStreamingResponse:
+        from .resources.audio import AudioWithStreamingResponse
+
+        return AudioWithStreamingResponse(self._client.audio)
+
+    @cached_property
+    def moderations(self) -> moderations.ModerationsWithStreamingResponse:
+        from .resources.moderations import ModerationsWithStreamingResponse
+
+        return ModerationsWithStreamingResponse(self._client.moderations)
+
+    @cached_property
+    def models(self) -> models.ModelsWithStreamingResponse:
+        from .resources.models import ModelsWithStreamingResponse
+
+        return ModelsWithStreamingResponse(self._client.models)
+
+    @cached_property
+    def fine_tuning(self) -> fine_tuning.FineTuningWithStreamingResponse:
+        from .resources.fine_tuning import FineTuningWithStreamingResponse
+
+        return FineTuningWithStreamingResponse(self._client.fine_tuning)
+
+    @cached_property
+    def vector_stores(self) -> vector_stores.VectorStoresWithStreamingResponse:
+        from .resources.vector_stores import VectorStoresWithStreamingResponse
+
+        return VectorStoresWithStreamingResponse(self._client.vector_stores)
+
+    @cached_property
+    def beta(self) -> beta.BetaWithStreamingResponse:
+        from .resources.beta import BetaWithStreamingResponse
+
+        return BetaWithStreamingResponse(self._client.beta)
+
+    @cached_property
+    def batches(self) -> batches.BatchesWithStreamingResponse:
+        from .resources.batches import BatchesWithStreamingResponse
+
+        return BatchesWithStreamingResponse(self._client.batches)
+
+    @cached_property
+    def uploads(self) -> uploads.UploadsWithStreamingResponse:
+        from .resources.uploads import UploadsWithStreamingResponse
+
+        return UploadsWithStreamingResponse(self._client.uploads)
+
+    @cached_property
+    def responses(self) -> responses.ResponsesWithStreamingResponse:
+        from .resources.responses import ResponsesWithStreamingResponse
+
+        return ResponsesWithStreamingResponse(self._client.responses)
+
+    @cached_property
+    def realtime(self) -> realtime.RealtimeWithStreamingResponse:
+        from .resources.realtime import RealtimeWithStreamingResponse
+
+        return RealtimeWithStreamingResponse(self._client.realtime)
+
+    @cached_property
+    def conversations(self) -> conversations.ConversationsWithStreamingResponse:
+        from .resources.conversations import ConversationsWithStreamingResponse
+
+        return ConversationsWithStreamingResponse(self._client.conversations)
+
+    @cached_property
+    def evals(self) -> evals.EvalsWithStreamingResponse:
+        from .resources.evals import EvalsWithStreamingResponse
+
+        return EvalsWithStreamingResponse(self._client.evals)
+
+    @cached_property
+    def containers(self) -> containers.ContainersWithStreamingResponse:
+        from .resources.containers import ContainersWithStreamingResponse
+
+        return ContainersWithStreamingResponse(self._client.containers)
+
+    @cached_property
+    def videos(self) -> videos.VideosWithStreamingResponse:
+        from .resources.videos import VideosWithStreamingResponse
+
+        return VideosWithStreamingResponse(self._client.videos)
 
 
 class AsyncOpenAIWithStreamedResponse:
+    _client: AsyncOpenAI
+
     def __init__(self, client: AsyncOpenAI) -> None:
-        self.completions = resources.AsyncCompletionsWithStreamingResponse(client.completions)
-        self.chat = resources.AsyncChatWithStreamingResponse(client.chat)
-        self.embeddings = resources.AsyncEmbeddingsWithStreamingResponse(client.embeddings)
-        self.files = resources.AsyncFilesWithStreamingResponse(client.files)
-        self.images = resources.AsyncImagesWithStreamingResponse(client.images)
-        self.audio = resources.AsyncAudioWithStreamingResponse(client.audio)
-        self.moderations = resources.AsyncModerationsWithStreamingResponse(client.moderations)
-        self.models = resources.AsyncModelsWithStreamingResponse(client.models)
-        self.fine_tuning = resources.AsyncFineTuningWithStreamingResponse(client.fine_tuning)
-        self.beta = resources.AsyncBetaWithStreamingResponse(client.beta)
-        self.batches = resources.AsyncBatchesWithStreamingResponse(client.batches)
+        self._client = client
+
+    @cached_property
+    def completions(self) -> completions.AsyncCompletionsWithStreamingResponse:
+        from .resources.completions import AsyncCompletionsWithStreamingResponse
+
+        return AsyncCompletionsWithStreamingResponse(self._client.completions)
+
+    @cached_property
+    def chat(self) -> chat.AsyncChatWithStreamingResponse:
+        from .resources.chat import AsyncChatWithStreamingResponse
+
+        return AsyncChatWithStreamingResponse(self._client.chat)
+
+    @cached_property
+    def embeddings(self) -> embeddings.AsyncEmbeddingsWithStreamingResponse:
+        from .resources.embeddings import AsyncEmbeddingsWithStreamingResponse
+
+        return AsyncEmbeddingsWithStreamingResponse(self._client.embeddings)
+
+    @cached_property
+    def files(self) -> files.AsyncFilesWithStreamingResponse:
+        from .resources.files import AsyncFilesWithStreamingResponse
+
+        return AsyncFilesWithStreamingResponse(self._client.files)
+
+    @cached_property
+    def images(self) -> images.AsyncImagesWithStreamingResponse:
+        from .resources.images import AsyncImagesWithStreamingResponse
+
+        return AsyncImagesWithStreamingResponse(self._client.images)
+
+    @cached_property
+    def audio(self) -> audio.AsyncAudioWithStreamingResponse:
+        from .resources.audio import AsyncAudioWithStreamingResponse
+
+        return AsyncAudioWithStreamingResponse(self._client.audio)
+
+    @cached_property
+    def moderations(self) -> moderations.AsyncModerationsWithStreamingResponse:
+        from .resources.moderations import AsyncModerationsWithStreamingResponse
+
+        return AsyncModerationsWithStreamingResponse(self._client.moderations)
+
+    @cached_property
+    def models(self) -> models.AsyncModelsWithStreamingResponse:
+        from .resources.models import AsyncModelsWithStreamingResponse
+
+        return AsyncModelsWithStreamingResponse(self._client.models)
+
+    @cached_property
+    def fine_tuning(self) -> fine_tuning.AsyncFineTuningWithStreamingResponse:
+        from .resources.fine_tuning import AsyncFineTuningWithStreamingResponse
+
+        return AsyncFineTuningWithStreamingResponse(self._client.fine_tuning)
+
+    @cached_property
+    def vector_stores(self) -> vector_stores.AsyncVectorStoresWithStreamingResponse:
+        from .resources.vector_stores import AsyncVectorStoresWithStreamingResponse
+
+        return AsyncVectorStoresWithStreamingResponse(self._client.vector_stores)
+
+    @cached_property
+    def beta(self) -> beta.AsyncBetaWithStreamingResponse:
+        from .resources.beta import AsyncBetaWithStreamingResponse
+
+        return AsyncBetaWithStreamingResponse(self._client.beta)
+
+    @cached_property
+    def batches(self) -> batches.AsyncBatchesWithStreamingResponse:
+        from .resources.batches import AsyncBatchesWithStreamingResponse
+
+        return AsyncBatchesWithStreamingResponse(self._client.batches)
+
+    @cached_property
+    def uploads(self) -> uploads.AsyncUploadsWithStreamingResponse:
+        from .resources.uploads import AsyncUploadsWithStreamingResponse
+
+        return AsyncUploadsWithStreamingResponse(self._client.uploads)
+
+    @cached_property
+    def responses(self) -> responses.AsyncResponsesWithStreamingResponse:
+        from .resources.responses import AsyncResponsesWithStreamingResponse
+
+        return AsyncResponsesWithStreamingResponse(self._client.responses)
+
+    @cached_property
+    def realtime(self) -> realtime.AsyncRealtimeWithStreamingResponse:
+        from .resources.realtime import AsyncRealtimeWithStreamingResponse
+
+        return AsyncRealtimeWithStreamingResponse(self._client.realtime)
+
+    @cached_property
+    def conversations(self) -> conversations.AsyncConversationsWithStreamingResponse:
+        from .resources.conversations import AsyncConversationsWithStreamingResponse
+
+        return AsyncConversationsWithStreamingResponse(self._client.conversations)
+
+    @cached_property
+    def evals(self) -> evals.AsyncEvalsWithStreamingResponse:
+        from .resources.evals import AsyncEvalsWithStreamingResponse
+
+        return AsyncEvalsWithStreamingResponse(self._client.evals)
+
+    @cached_property
+    def containers(self) -> containers.AsyncContainersWithStreamingResponse:
+        from .resources.containers import AsyncContainersWithStreamingResponse
+
+        return AsyncContainersWithStreamingResponse(self._client.containers)
+
+    @cached_property
+    def videos(self) -> videos.AsyncVideosWithStreamingResponse:
+        from .resources.videos import AsyncVideosWithStreamingResponse
+
+        return AsyncVideosWithStreamingResponse(self._client.videos)
 
 
 Client = OpenAI
diff --git a/src/openai/_compat.py b/src/openai/_compat.py
index 74c7639b4c..bdef67f042 100644
--- a/src/openai/_compat.py
+++ b/src/openai/_compat.py
@@ -2,24 +2,23 @@
 
 from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, cast, overload
 from datetime import date, datetime
-from typing_extensions import Self
+from typing_extensions import Self, Literal
 
 import pydantic
 from pydantic.fields import FieldInfo
 
-from ._types import StrBytesIntFloat
+from ._types import IncEx, StrBytesIntFloat
 
 _T = TypeVar("_T")
 _ModelT = TypeVar("_ModelT", bound=pydantic.BaseModel)
 
-# --------------- Pydantic v2 compatibility ---------------
+# --------------- Pydantic v2, v3 compatibility ---------------
 
 # Pyright incorrectly reports some of our functions as overriding a method when they don't
 # pyright: reportIncompatibleMethodOverride=false
 
-PYDANTIC_V2 = pydantic.VERSION.startswith("2.")
+PYDANTIC_V1 = pydantic.VERSION.startswith("1.")
 
-# v1 re-exports
 if TYPE_CHECKING:
 
     def parse_date(value: date | StrBytesIntFloat) -> date:  # noqa: ARG001
@@ -44,106 +43,116 @@ def is_typeddict(type_: type[Any]) -> bool:  # noqa: ARG001
         ...
 
 else:
-    if PYDANTIC_V2:
-        from pydantic.v1.typing import (
+    # v1 re-exports
+    if PYDANTIC_V1:
+        from pydantic.typing import (
             get_args as get_args,
             is_union as is_union,
             get_origin as get_origin,
             is_typeddict as is_typeddict,
             is_literal_type as is_literal_type,
         )
-        from pydantic.v1.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime
+        from pydantic.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime
     else:
-        from pydantic.typing import (
+        from ._utils import (
             get_args as get_args,
             is_union as is_union,
             get_origin as get_origin,
+            parse_date as parse_date,
             is_typeddict as is_typeddict,
+            parse_datetime as parse_datetime,
             is_literal_type as is_literal_type,
         )
-        from pydantic.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime
 
 
 # refactored config
 if TYPE_CHECKING:
     from pydantic import ConfigDict as ConfigDict
 else:
-    if PYDANTIC_V2:
-        from pydantic import ConfigDict
-    else:
+    if PYDANTIC_V1:
         # TODO: provide an error message here?
         ConfigDict = None
+    else:
+        from pydantic import ConfigDict as ConfigDict
 
 
 # renamed methods / properties
 def parse_obj(model: type[_ModelT], value: object) -> _ModelT:
-    if PYDANTIC_V2:
-        return model.model_validate(value)
-    else:
+    if PYDANTIC_V1:
         return cast(_ModelT, model.parse_obj(value))  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+    else:
+        return model.model_validate(value)
 
 
 def field_is_required(field: FieldInfo) -> bool:
-    if PYDANTIC_V2:
-        return field.is_required()
-    return field.required  # type: ignore
+    if PYDANTIC_V1:
+        return field.required  # type: ignore
+    return field.is_required()
 
 
 def field_get_default(field: FieldInfo) -> Any:
     value = field.get_default()
-    if PYDANTIC_V2:
-        from pydantic_core import PydanticUndefined
-
-        if value == PydanticUndefined:
-            return None
+    if PYDANTIC_V1:
         return value
+    from pydantic_core import PydanticUndefined
+
+    if value == PydanticUndefined:
+        return None
     return value
 
 
 def field_outer_type(field: FieldInfo) -> Any:
-    if PYDANTIC_V2:
-        return field.annotation
-    return field.outer_type_  # type: ignore
+    if PYDANTIC_V1:
+        return field.outer_type_  # type: ignore
+    return field.annotation
 
 
 def get_model_config(model: type[pydantic.BaseModel]) -> Any:
-    if PYDANTIC_V2:
-        return model.model_config
-    return model.__config__  # type: ignore
+    if PYDANTIC_V1:
+        return model.__config__  # type: ignore
+    return model.model_config
 
 
 def get_model_fields(model: type[pydantic.BaseModel]) -> dict[str, FieldInfo]:
-    if PYDANTIC_V2:
-        return model.model_fields
-    return model.__fields__  # type: ignore
+    if PYDANTIC_V1:
+        return model.__fields__  # type: ignore
+    return model.model_fields
 
 
-def model_copy(model: _ModelT) -> _ModelT:
-    if PYDANTIC_V2:
-        return model.model_copy()
-    return model.copy()  # type: ignore
+def model_copy(model: _ModelT, *, deep: bool = False) -> _ModelT:
+    if PYDANTIC_V1:
+        return model.copy(deep=deep)  # type: ignore
+    return model.model_copy(deep=deep)
 
 
 def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str:
-    if PYDANTIC_V2:
-        return model.model_dump_json(indent=indent)
-    return model.json(indent=indent)  # type: ignore
+    if PYDANTIC_V1:
+        return model.json(indent=indent)  # type: ignore
+    return model.model_dump_json(indent=indent)
 
 
 def model_dump(
     model: pydantic.BaseModel,
     *,
+    exclude: IncEx | None = None,
     exclude_unset: bool = False,
     exclude_defaults: bool = False,
+    warnings: bool = True,
+    mode: Literal["json", "python"] = "python",
 ) -> dict[str, Any]:
-    if PYDANTIC_V2:
+    if (not PYDANTIC_V1) or hasattr(model, "model_dump"):
         return model.model_dump(
+            mode=mode,
+            exclude=exclude,
             exclude_unset=exclude_unset,
             exclude_defaults=exclude_defaults,
+            # warnings are not supported in Pydantic v1
+            warnings=True if PYDANTIC_V1 else warnings,
         )
     return cast(
         "dict[str, Any]",
         model.dict(  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+            exclude=exclude,
             exclude_unset=exclude_unset,
             exclude_defaults=exclude_defaults,
         ),
@@ -151,30 +160,26 @@ def model_dump(
 
 
 def model_parse(model: type[_ModelT], data: Any) -> _ModelT:
-    if PYDANTIC_V2:
-        return model.model_validate(data)
-    return model.parse_obj(data)  # pyright: ignore[reportDeprecated]
+    if PYDANTIC_V1:
+        return model.parse_obj(data)  # pyright: ignore[reportDeprecated]
+    return model.model_validate(data)
 
 
 # generic models
 if TYPE_CHECKING:
 
-    class GenericModel(pydantic.BaseModel):
-        ...
+    class GenericModel(pydantic.BaseModel): ...
 
 else:
-    if PYDANTIC_V2:
+    if PYDANTIC_V1:
+        import pydantic.generics
+
+        class GenericModel(pydantic.generics.GenericModel, pydantic.BaseModel): ...
+    else:
         # there no longer needs to be a distinction in v2 but
         # we still have to create our own subclass to avoid
         # inconsistent MRO ordering errors
-        class GenericModel(pydantic.BaseModel):
-            ...
-
-    else:
-        import pydantic.generics
-
-        class GenericModel(pydantic.generics.GenericModel, pydantic.BaseModel):
-            ...
+        class GenericModel(pydantic.BaseModel): ...
 
 
 # cached properties
@@ -193,30 +198,22 @@ class typed_cached_property(Generic[_T]):
         func: Callable[[Any], _T]
         attrname: str | None
 
-        def __init__(self, func: Callable[[Any], _T]) -> None:
-            ...
+        def __init__(self, func: Callable[[Any], _T]) -> None: ...
 
         @overload
-        def __get__(self, instance: None, owner: type[Any] | None = None) -> Self:
-            ...
+        def __get__(self, instance: None, owner: type[Any] | None = None) -> Self: ...
 
         @overload
-        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T:
-            ...
+        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T: ...
 
         def __get__(self, instance: object, owner: type[Any] | None = None) -> _T | Self:
             raise NotImplementedError()
 
-        def __set_name__(self, owner: type[Any], name: str) -> None:
-            ...
+        def __set_name__(self, owner: type[Any], name: str) -> None: ...
 
         # __set__ is not defined at runtime, but @cached_property is designed to be settable
-        def __set__(self, instance: object, value: _T) -> None:
-            ...
+        def __set__(self, instance: object, value: _T) -> None: ...
 else:
-    try:
-        from functools import cached_property as cached_property
-    except ImportError:
-        from cached_property import cached_property as cached_property
+    from functools import cached_property as cached_property
 
     typed_cached_property = cached_property
diff --git a/src/openai/_constants.py b/src/openai/_constants.py
index 3f82bed037..7029dc72b0 100644
--- a/src/openai/_constants.py
+++ b/src/openai/_constants.py
@@ -6,7 +6,7 @@
 OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
 
 # default timeout is 10 minutes
-DEFAULT_TIMEOUT = httpx.Timeout(timeout=600.0, connect=5.0)
+DEFAULT_TIMEOUT = httpx.Timeout(timeout=600, connect=5.0)
 DEFAULT_MAX_RETRIES = 2
 DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=1000, max_keepalive_connections=100)
 
diff --git a/src/openai/_extras/__init__.py b/src/openai/_extras/__init__.py
deleted file mode 100644
index 864dac4171..0000000000
--- a/src/openai/_extras/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from .numpy_proxy import numpy as numpy, has_numpy as has_numpy
-from .pandas_proxy import pandas as pandas
diff --git a/src/openai/_extras/_common.py b/src/openai/_extras/_common.py
deleted file mode 100644
index 6e71720e64..0000000000
--- a/src/openai/_extras/_common.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from .._exceptions import OpenAIError
-
-INSTRUCTIONS = """
-
-OpenAI error:
-
-    missing `{library}`
-
-This feature requires additional dependencies:
-
-    $ pip install openai[{extra}]
-
-"""
-
-
-def format_instructions(*, library: str, extra: str) -> str:
-    return INSTRUCTIONS.format(library=library, extra=extra)
-
-
-class MissingDependencyError(OpenAIError):
-    pass
diff --git a/src/openai/_extras/numpy_proxy.py b/src/openai/_extras/numpy_proxy.py
deleted file mode 100644
index 27880bf132..0000000000
--- a/src/openai/_extras/numpy_proxy.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any
-from typing_extensions import override
-
-from .._utils import LazyProxy
-from ._common import MissingDependencyError, format_instructions
-
-if TYPE_CHECKING:
-    import numpy as numpy
-
-
-NUMPY_INSTRUCTIONS = format_instructions(library="numpy", extra="datalib")
-
-
-class NumpyProxy(LazyProxy[Any]):
-    @override
-    def __load__(self) -> Any:
-        try:
-            import numpy
-        except ImportError as err:
-            raise MissingDependencyError(NUMPY_INSTRUCTIONS) from err
-
-        return numpy
-
-
-if not TYPE_CHECKING:
-    numpy = NumpyProxy()
-
-
-def has_numpy() -> bool:
-    try:
-        import numpy  # noqa: F401  # pyright: ignore[reportUnusedImport]
-    except ImportError:
-        return False
-
-    return True
diff --git a/src/openai/_extras/pandas_proxy.py b/src/openai/_extras/pandas_proxy.py
deleted file mode 100644
index 686377bade..0000000000
--- a/src/openai/_extras/pandas_proxy.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any
-from typing_extensions import override
-
-from .._utils import LazyProxy
-from ._common import MissingDependencyError, format_instructions
-
-if TYPE_CHECKING:
-    import pandas as pandas
-
-
-PANDAS_INSTRUCTIONS = format_instructions(library="pandas", extra="datalib")
-
-
-class PandasProxy(LazyProxy[Any]):
-    @override
-    def __load__(self) -> Any:
-        try:
-            import pandas
-        except ImportError as err:
-            raise MissingDependencyError(PANDAS_INSTRUCTIONS) from err
-
-        return pandas
-
-
-if not TYPE_CHECKING:
-    pandas = PandasProxy()
diff --git a/src/openai/_files.py b/src/openai/_files.py
index ad7b668b4b..7b23ca084a 100644
--- a/src/openai/_files.py
+++ b/src/openai/_files.py
@@ -39,13 +39,11 @@ def assert_is_file_content(obj: object, *, key: str | None = None) -> None:
 
 
 @overload
-def to_httpx_files(files: None) -> None:
-    ...
+def to_httpx_files(files: None) -> None: ...
 
 
 @overload
-def to_httpx_files(files: RequestFiles) -> HttpxRequestFiles:
-    ...
+def to_httpx_files(files: RequestFiles) -> HttpxRequestFiles: ...
 
 
 def to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
@@ -71,25 +69,23 @@ def _transform_file(file: FileTypes) -> HttpxFileTypes:
         return file
 
     if is_tuple_t(file):
-        return (file[0], _read_file_content(file[1]), *file[2:])
+        return (file[0], read_file_content(file[1]), *file[2:])
 
     raise TypeError(f"Expected file types input to be a FileContent type or to be a tuple")
 
 
-def _read_file_content(file: FileContent) -> HttpxFileContent:
+def read_file_content(file: FileContent) -> HttpxFileContent:
     if isinstance(file, os.PathLike):
         return pathlib.Path(file).read_bytes()
     return file
 
 
 @overload
-async def async_to_httpx_files(files: None) -> None:
-    ...
+async def async_to_httpx_files(files: None) -> None: ...
 
 
 @overload
-async def async_to_httpx_files(files: RequestFiles) -> HttpxRequestFiles:
-    ...
+async def async_to_httpx_files(files: RequestFiles) -> HttpxRequestFiles: ...
 
 
 async def async_to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
@@ -115,12 +111,12 @@ async def _async_transform_file(file: FileTypes) -> HttpxFileTypes:
         return file
 
     if is_tuple_t(file):
-        return (file[0], await _async_read_file_content(file[1]), *file[2:])
+        return (file[0], await async_read_file_content(file[1]), *file[2:])
 
     raise TypeError(f"Expected file types input to be a FileContent type or to be a tuple")
 
 
-async def _async_read_file_content(file: FileContent) -> HttpxFileContent:
+async def async_read_file_content(file: FileContent) -> HttpxFileContent:
     if isinstance(file, os.PathLike):
         return await anyio.Path(file).read_bytes()
 
diff --git a/src/openai/_legacy_response.py b/src/openai/_legacy_response.py
index 1de906b167..bce9182750 100644
--- a/src/openai/_legacy_response.py
+++ b/src/openai/_legacy_response.py
@@ -5,7 +5,18 @@
 import logging
 import datetime
 import functools
-from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, Iterator, AsyncIterator, cast, overload
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Union,
+    Generic,
+    TypeVar,
+    Callable,
+    Iterator,
+    AsyncIterator,
+    cast,
+    overload,
+)
 from typing_extensions import Awaitable, ParamSpec, override, deprecated, get_origin
 
 import anyio
@@ -13,7 +24,7 @@
 import pydantic
 
 from ._types import NoneType
-from ._utils import is_given, extract_type_arg, is_annotated_type
+from ._utils import is_given, extract_type_arg, is_annotated_type, is_type_alias_type
 from ._models import BaseModel, is_basemodel
 from ._constants import RAW_RESPONSE_HEADER
 from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
@@ -53,6 +64,9 @@ class LegacyAPIResponse(Generic[R]):
 
     http_response: httpx.Response
 
+    retries_taken: int
+    """The number of retries made. If no retries happened this will be `0`"""
+
     def __init__(
         self,
         *,
@@ -62,6 +76,7 @@ def __init__(
         stream: bool,
         stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
         options: FinalRequestOptions,
+        retries_taken: int = 0,
     ) -> None:
         self._cast_to = cast_to
         self._client = client
@@ -70,18 +85,17 @@ def __init__(
         self._stream_cls = stream_cls
         self._options = options
         self.http_response = raw
+        self.retries_taken = retries_taken
 
     @property
     def request_id(self) -> str | None:
         return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
 
     @overload
-    def parse(self, *, to: type[_T]) -> _T:
-        ...
+    def parse(self, *, to: type[_T]) -> _T: ...
 
     @overload
-    def parse(self) -> R:
-        ...
+    def parse(self) -> R: ...
 
     def parse(self, *, to: type[_T] | None = None) -> R | _T:
         """Returns the rich python representation of this response's data.
@@ -178,9 +192,17 @@ def elapsed(self) -> datetime.timedelta:
         return self.http_response.elapsed
 
     def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        cast_to = to if to is not None else self._cast_to
+
+        # unwrap `TypeAlias('Name', T)` -> `T`
+        if is_type_alias_type(cast_to):
+            cast_to = cast_to.__value__  # type: ignore[unreachable]
+
         # unwrap `Annotated[T, ...]` -> `T`
-        if to and is_annotated_type(to):
-            to = extract_type_arg(to, 0)
+        if cast_to and is_annotated_type(cast_to):
+            cast_to = extract_type_arg(cast_to, 0)
+
+        origin = get_origin(cast_to) or cast_to
 
         if self._stream:
             if to:
@@ -216,18 +238,12 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
             return cast(
                 R,
                 stream_cls(
-                    cast_to=self._cast_to,
+                    cast_to=cast_to,
                     response=self.http_response,
                     client=cast(Any, self._client),
                 ),
             )
 
-        cast_to = to if to is not None else self._cast_to
-
-        # unwrap `Annotated[T, ...]` -> `T`
-        if is_annotated_type(cast_to):
-            cast_to = extract_type_arg(cast_to, 0)
-
         if cast_to is NoneType:
             return cast(R, None)
 
@@ -241,7 +257,8 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         if cast_to == float:
             return cast(R, float(response.text))
 
-        origin = get_origin(cast_to) or cast_to
+        if cast_to == bool:
+            return cast(R, response.text.lower() == "true")
 
         if inspect.isclass(origin) and issubclass(origin, HttpxBinaryResponseContent):
             return cast(R, cast_to(response))  # type: ignore
@@ -249,7 +266,9 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         if origin == LegacyAPIResponse:
             raise RuntimeError("Unexpected state - cast_to is `APIResponse`")
 
-        if inspect.isclass(origin) and issubclass(origin, httpx.Response):
+        if inspect.isclass(
+            origin  # pyright: ignore[reportUnknownArgumentType]
+        ) and issubclass(origin, httpx.Response):
             # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
             # and pass that class to our request functions. We cannot change the variance to be either
             # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
@@ -259,7 +278,13 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
                 raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
             return cast(R, response)
 
-        if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel):
+        if (
+            inspect.isclass(
+                origin  # pyright: ignore[reportUnknownArgumentType]
+            )
+            and not issubclass(origin, BaseModel)
+            and issubclass(origin, pydantic.BaseModel)
+        ):
             raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`")
 
         if (
@@ -276,7 +301,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         # split is required to handle cases where additional information is included
         # in the response, e.g. application/json; charset=utf-8
         content_type, *_ = response.headers.get("content-type", "*").split(";")
-        if content_type != "application/json":
+        if not content_type.endswith("json"):
             if is_basemodel(cast_to):
                 try:
                     data = response.json()
diff --git a/src/openai/_models.py b/src/openai/_models.py
index 75c68cc730..ca9500b2aa 100644
--- a/src/openai/_models.py
+++ b/src/openai/_models.py
@@ -2,14 +2,17 @@
 
 import os
 import inspect
-from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, cast
+import weakref
+from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, Optional, cast
 from datetime import date, datetime
 from typing_extensions import (
+    List,
     Unpack,
     Literal,
     ClassVar,
     Protocol,
     Required,
+    ParamSpec,
     TypedDict,
     TypeGuard,
     final,
@@ -18,7 +21,6 @@
 )
 
 import pydantic
-import pydantic.generics
 from pydantic.fields import FieldInfo
 
 from ._types import (
@@ -36,6 +38,7 @@
     PropertyInfo,
     is_list,
     is_given,
+    json_safe,
     lru_cache,
     is_mapping,
     parse_date,
@@ -44,10 +47,11 @@
     strip_not_given,
     extract_type_arg,
     is_annotated_type,
+    is_type_alias_type,
     strip_annotated_type,
 )
 from ._compat import (
-    PYDANTIC_V2,
+    PYDANTIC_V1,
     ConfigDict,
     GenericModel as BaseGenericModel,
     get_args,
@@ -62,11 +66,14 @@
 from ._constants import RAW_RESPONSE_HEADER
 
 if TYPE_CHECKING:
-    from pydantic_core.core_schema import ModelField, LiteralSchema, ModelFieldsSchema
+    from pydantic_core.core_schema import ModelField, ModelSchema, LiteralSchema, ModelFieldsSchema
 
 __all__ = ["BaseModel", "GenericModel"]
 
 _T = TypeVar("_T")
+_BaseModelT = TypeVar("_BaseModelT", bound="BaseModel")
+
+P = ParamSpec("P")
 
 
 @runtime_checkable
@@ -75,11 +82,7 @@ class _ConfigProtocol(Protocol):
 
 
 class BaseModel(pydantic.BaseModel):
-    if PYDANTIC_V2:
-        model_config: ClassVar[ConfigDict] = ConfigDict(
-            extra="allow", defer_build=coerce_boolean(os.environ.get("DEFER_PYDANTIC_BUILD", "true"))
-        )
-    else:
+    if PYDANTIC_V1:
 
         @property
         @override
@@ -89,6 +92,10 @@ def model_fields_set(self) -> set[str]:
 
         class Config(pydantic.BaseConfig):  # pyright: ignore[reportDeprecated]
             extra: Any = pydantic.Extra.allow  # type: ignore
+    else:
+        model_config: ClassVar[ConfigDict] = ConfigDict(
+            extra="allow", defer_build=coerce_boolean(os.environ.get("DEFER_PYDANTIC_BUILD", "true"))
+        )
 
     def to_dict(
         self,
@@ -166,21 +173,21 @@ def to_json(
     @override
     def __str__(self) -> str:
         # mypy complains about an invalid self arg
-        return f'{self.__repr_name__()}({self.__repr_str__(", ")})'  # type: ignore[misc]
+        return f"{self.__repr_name__()}({self.__repr_str__(', ')})"  # type: ignore[misc]
 
     # Override the 'construct' method in a way that supports recursive parsing without validation.
     # Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836.
     @classmethod
     @override
-    def construct(
-        cls: Type[ModelT],
+    def construct(  # pyright: ignore[reportIncompatibleMethodOverride]
+        __cls: Type[ModelT],
         _fields_set: set[str] | None = None,
         **values: object,
     ) -> ModelT:
-        m = cls.__new__(cls)
+        m = __cls.__new__(__cls)
         fields_values: dict[str, object] = {}
 
-        config = get_model_config(cls)
+        config = get_model_config(__cls)
         populate_by_name = (
             config.allow_population_by_field_name
             if isinstance(config, _ConfigProtocol)
@@ -190,7 +197,7 @@ def construct(
         if _fields_set is None:
             _fields_set = set()
 
-        model_fields = get_model_fields(cls)
+        model_fields = get_model_fields(__cls)
         for name, field in model_fields.items():
             key = field.alias
             if key is None or (key not in values and populate_by_name):
@@ -202,28 +209,32 @@ def construct(
             else:
                 fields_values[name] = field_get_default(field)
 
+        extra_field_type = _get_extra_fields_type(__cls)
+
         _extra = {}
         for key, value in values.items():
             if key not in model_fields:
-                if PYDANTIC_V2:
-                    _extra[key] = value
-                else:
+                parsed = construct_type(value=value, type_=extra_field_type) if extra_field_type is not None else value
+
+                if PYDANTIC_V1:
                     _fields_set.add(key)
-                    fields_values[key] = value
+                    fields_values[key] = parsed
+                else:
+                    _extra[key] = parsed
 
         object.__setattr__(m, "__dict__", fields_values)
 
-        if PYDANTIC_V2:
-            # these properties are copied from Pydantic's `model_construct()` method
-            object.__setattr__(m, "__pydantic_private__", None)
-            object.__setattr__(m, "__pydantic_extra__", _extra)
-            object.__setattr__(m, "__pydantic_fields_set__", _fields_set)
-        else:
+        if PYDANTIC_V1:
             # init_private_attributes() does not exist in v2
             m._init_private_attributes()  # type: ignore
 
             # copied from Pydantic v1's `construct()` method
             object.__setattr__(m, "__fields_set__", _fields_set)
+        else:
+            # these properties are copied from Pydantic's `model_construct()` method
+            object.__setattr__(m, "__pydantic_private__", None)
+            object.__setattr__(m, "__pydantic_extra__", _extra)
+            object.__setattr__(m, "__pydantic_fields_set__", _fields_set)
 
         return m
 
@@ -233,7 +244,7 @@ def construct(
         # although not in practice
         model_construct = construct
 
-    if not PYDANTIC_V2:
+    if PYDANTIC_V1:
         # we define aliases for some of the new pydantic v2 methods so
         # that we can just document these methods without having to specify
         # a specific pydantic version as some users may not know which
@@ -244,15 +255,17 @@ def model_dump(
             self,
             *,
             mode: Literal["json", "python"] | str = "python",
-            include: IncEx = None,
-            exclude: IncEx = None,
-            by_alias: bool = False,
+            include: IncEx | None = None,
+            exclude: IncEx | None = None,
+            context: Any | None = None,
+            by_alias: bool | None = None,
             exclude_unset: bool = False,
             exclude_defaults: bool = False,
             exclude_none: bool = False,
+            exclude_computed_fields: bool = False,
             round_trip: bool = False,
             warnings: bool | Literal["none", "warn", "error"] = True,
-            context: dict[str, Any] | None = None,
+            fallback: Callable[[Any], Any] | None = None,
             serialize_as_any: bool = False,
         ) -> dict[str, Any]:
             """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump
@@ -261,22 +274,30 @@ def model_dump(
 
             Args:
                 mode: The mode in which `to_python` should run.
-                    If mode is 'json', the dictionary will only contain JSON serializable types.
-                    If mode is 'python', the dictionary may contain any Python objects.
-                include: A list of fields to include in the output.
-                exclude: A list of fields to exclude from the output.
+                    If mode is 'json', the output will only contain JSON serializable types.
+                    If mode is 'python', the output may contain non-JSON-serializable Python objects.
+                include: A set of fields to include in the output.
+                exclude: A set of fields to exclude from the output.
+                context: Additional context to pass to the serializer.
                 by_alias: Whether to use the field's alias in the dictionary key if defined.
-                exclude_unset: Whether to exclude fields that are unset or None from the output.
-                exclude_defaults: Whether to exclude fields that are set to their default value from the output.
-                exclude_none: Whether to exclude fields that have a value of `None` from the output.
-                round_trip: Whether to enable serialization and deserialization round-trip support.
-                warnings: Whether to log warnings when invalid fields are encountered.
+                exclude_unset: Whether to exclude fields that have not been explicitly set.
+                exclude_defaults: Whether to exclude fields that are set to their default value.
+                exclude_none: Whether to exclude fields that have a value of `None`.
+                exclude_computed_fields: Whether to exclude computed fields.
+                    While this can be useful for round-tripping, it is usually recommended to use the dedicated
+                    `round_trip` parameter instead.
+                round_trip: If True, dumped values should be valid as input for non-idempotent types such as Json[T].
+                warnings: How to handle serialization errors. False/"none" ignores them, True/"warn" logs errors,
+                    "error" raises a [`PydanticSerializationError`][pydantic_core.PydanticSerializationError].
+                fallback: A function to call when an unknown value is encountered. If not provided,
+                    a [`PydanticSerializationError`][pydantic_core.PydanticSerializationError] error is raised.
+                serialize_as_any: Whether to serialize fields with duck-typing serialization behavior.
 
             Returns:
                 A dictionary representation of the model.
             """
-            if mode != "python":
-                raise ValueError("mode is only supported in Pydantic v2")
+            if mode not in {"json", "python"}:
+                raise ValueError("mode must be either 'json' or 'python'")
             if round_trip != False:
                 raise ValueError("round_trip is only supported in Pydantic v2")
             if warnings != True:
@@ -285,29 +306,38 @@ def model_dump(
                 raise ValueError("context is only supported in Pydantic v2")
             if serialize_as_any != False:
                 raise ValueError("serialize_as_any is only supported in Pydantic v2")
-            return super().dict(  # pyright: ignore[reportDeprecated]
+            if fallback is not None:
+                raise ValueError("fallback is only supported in Pydantic v2")
+            if exclude_computed_fields != False:
+                raise ValueError("exclude_computed_fields is only supported in Pydantic v2")
+            dumped = super().dict(  # pyright: ignore[reportDeprecated]
                 include=include,
                 exclude=exclude,
-                by_alias=by_alias,
+                by_alias=by_alias if by_alias is not None else False,
                 exclude_unset=exclude_unset,
                 exclude_defaults=exclude_defaults,
                 exclude_none=exclude_none,
             )
 
+            return cast("dict[str, Any]", json_safe(dumped)) if mode == "json" else dumped
+
         @override
         def model_dump_json(
             self,
             *,
             indent: int | None = None,
-            include: IncEx = None,
-            exclude: IncEx = None,
-            by_alias: bool = False,
+            ensure_ascii: bool = False,
+            include: IncEx | None = None,
+            exclude: IncEx | None = None,
+            context: Any | None = None,
+            by_alias: bool | None = None,
             exclude_unset: bool = False,
             exclude_defaults: bool = False,
             exclude_none: bool = False,
+            exclude_computed_fields: bool = False,
             round_trip: bool = False,
             warnings: bool | Literal["none", "warn", "error"] = True,
-            context: dict[str, Any] | None = None,
+            fallback: Callable[[Any], Any] | None = None,
             serialize_as_any: bool = False,
         ) -> str:
             """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump_json
@@ -336,11 +366,17 @@ def model_dump_json(
                 raise ValueError("context is only supported in Pydantic v2")
             if serialize_as_any != False:
                 raise ValueError("serialize_as_any is only supported in Pydantic v2")
+            if fallback is not None:
+                raise ValueError("fallback is only supported in Pydantic v2")
+            if ensure_ascii != False:
+                raise ValueError("ensure_ascii is only supported in Pydantic v2")
+            if exclude_computed_fields != False:
+                raise ValueError("exclude_computed_fields is only supported in Pydantic v2")
             return super().json(  # type: ignore[reportDeprecated]
                 indent=indent,
                 include=include,
                 exclude=exclude,
-                by_alias=by_alias,
+                by_alias=by_alias if by_alias is not None else False,
                 exclude_unset=exclude_unset,
                 exclude_defaults=exclude_defaults,
                 exclude_none=exclude_none,
@@ -351,15 +387,32 @@ def _construct_field(value: object, field: FieldInfo, key: str) -> object:
     if value is None:
         return field_get_default(field)
 
-    if PYDANTIC_V2:
-        type_ = field.annotation
-    else:
+    if PYDANTIC_V1:
         type_ = cast(type, field.outer_type_)  # type: ignore
+    else:
+        type_ = field.annotation  # type: ignore
 
     if type_ is None:
         raise RuntimeError(f"Unexpected field type is None for {key}")
 
-    return construct_type(value=value, type_=type_)
+    return construct_type(value=value, type_=type_, metadata=getattr(field, "metadata", None))
+
+
+def _get_extra_fields_type(cls: type[pydantic.BaseModel]) -> type | None:
+    if PYDANTIC_V1:
+        # TODO
+        return None
+
+    schema = cls.__pydantic_core_schema__
+    if schema["type"] == "model":
+        fields = schema["schema"]
+        if fields["type"] == "model-fields":
+            extras = fields.get("extras_schema")
+            if extras and "cls" in extras:
+                # mypy can't narrow the type
+                return extras["cls"]  # type: ignore[no-any-return]
+
+    return None
 
 
 def is_basemodel(type_: type) -> bool:
@@ -376,21 +429,65 @@ def is_basemodel(type_: type) -> bool:
 
 def is_basemodel_type(type_: type) -> TypeGuard[type[BaseModel] | type[GenericModel]]:
     origin = get_origin(type_) or type_
+    if not inspect.isclass(origin):
+        return False
     return issubclass(origin, BaseModel) or issubclass(origin, GenericModel)
 
 
-def construct_type(*, value: object, type_: object) -> object:
+def build(
+    base_model_cls: Callable[P, _BaseModelT],
+    *args: P.args,
+    **kwargs: P.kwargs,
+) -> _BaseModelT:
+    """Construct a BaseModel class without validation.
+
+    This is useful for cases where you need to instantiate a `BaseModel`
+    from an API response as this provides type-safe params which isn't supported
+    by helpers like `construct_type()`.
+
+    ```py
+    build(MyModel, my_field_a="foo", my_field_b=123)
+    ```
+    """
+    if args:
+        raise TypeError(
+            "Received positional arguments which are not supported; Keyword arguments must be used instead",
+        )
+
+    return cast(_BaseModelT, construct_type(type_=base_model_cls, value=kwargs))
+
+
+def construct_type_unchecked(*, value: object, type_: type[_T]) -> _T:
+    """Loose coercion to the expected type with construction of nested values.
+
+    Note: the returned value from this function is not guaranteed to match the
+    given type.
+    """
+    return cast(_T, construct_type(value=value, type_=type_))
+
+
+def construct_type(*, value: object, type_: object, metadata: Optional[List[Any]] = None) -> object:
     """Loose coercion to the expected type with construction of nested values.
 
     If the given value does not match the expected type then it is returned as-is.
     """
+
+    # store a reference to the original type we were given before we extract any inner
+    # types so that we can properly resolve forward references in `TypeAliasType` annotations
+    original_type = None
+
     # we allow `object` as the input type because otherwise, passing things like
     # `Literal['value']` will be reported as a type error by type checkers
     type_ = cast("type[object]", type_)
+    if is_type_alias_type(type_):
+        original_type = type_  # type: ignore[unreachable]
+        type_ = type_.__value__  # type: ignore[unreachable]
 
     # unwrap `Annotated[T, ...]` -> `T`
-    if is_annotated_type(type_):
-        meta: tuple[Any, ...] = get_args(type_)[1:]
+    if metadata is not None and len(metadata) > 0:
+        meta: tuple[Any, ...] = tuple(metadata)
+    elif is_annotated_type(type_):
+        meta = get_args(type_)[1:]
         type_ = extract_type_arg(type_, 0)
     else:
         meta = tuple()
@@ -402,7 +499,7 @@ def construct_type(*, value: object, type_: object) -> object:
 
     if is_union(origin):
         try:
-            return validate_type(type_=cast("type[object]", type_), value=value)
+            return validate_type(type_=cast("type[object]", original_type or type_), value=value)
         except Exception:
             pass
 
@@ -444,7 +541,11 @@ def construct_type(*, value: object, type_: object) -> object:
         _, items_type = get_args(type_)  # Dict[_, items_type]
         return {key: construct_type(value=item, type_=items_type) for key, item in value.items()}
 
-    if not is_literal_type(type_) and (issubclass(origin, BaseModel) or issubclass(origin, GenericModel)):
+    if (
+        not is_literal_type(type_)
+        and inspect.isclass(origin)
+        and (issubclass(origin, BaseModel) or issubclass(origin, GenericModel))
+    ):
         if is_list(value):
             return [cast(Any, type_).construct(**entry) if is_mapping(entry) else entry for entry in value]
 
@@ -490,6 +591,9 @@ class CachedDiscriminatorType(Protocol):
     __discriminator__: DiscriminatorDetails
 
 
+DISCRIMINATOR_CACHE: weakref.WeakKeyDictionary[type, DiscriminatorDetails] = weakref.WeakKeyDictionary()
+
+
 class DiscriminatorDetails:
     field_name: str
     """The name of the discriminator field in the variant class, e.g.
@@ -532,8 +636,9 @@ def __init__(
 
 
 def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any, ...]) -> DiscriminatorDetails | None:
-    if isinstance(union, CachedDiscriminatorType):
-        return union.__discriminator__
+    cached = DISCRIMINATOR_CACHE.get(union)
+    if cached is not None:
+        return cached
 
     discriminator_field_name: str | None = None
 
@@ -551,30 +656,30 @@ def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any,
     for variant in get_args(union):
         variant = strip_annotated_type(variant)
         if is_basemodel_type(variant):
-            if PYDANTIC_V2:
-                field = _extract_field_schema_pv2(variant, discriminator_field_name)
-                if not field:
+            if PYDANTIC_V1:
+                field_info = cast("dict[str, FieldInfo]", variant.__fields__).get(discriminator_field_name)  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+                if not field_info:
                     continue
 
                 # Note: if one variant defines an alias then they all should
-                discriminator_alias = field.get("serialization_alias")
-
-                field_schema = field["schema"]
+                discriminator_alias = field_info.alias
 
-                if field_schema["type"] == "literal":
-                    for entry in cast("LiteralSchema", field_schema)["expected"]:
+                if (annotation := getattr(field_info, "annotation", None)) and is_literal_type(annotation):
+                    for entry in get_args(annotation):
                         if isinstance(entry, str):
                             mapping[entry] = variant
             else:
-                field_info = cast("dict[str, FieldInfo]", variant.__fields__).get(discriminator_field_name)  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
-                if not field_info:
+                field = _extract_field_schema_pv2(variant, discriminator_field_name)
+                if not field:
                     continue
 
                 # Note: if one variant defines an alias then they all should
-                discriminator_alias = field_info.alias
+                discriminator_alias = field.get("serialization_alias")
+
+                field_schema = field["schema"]
 
-                if field_info.annotation and is_literal_type(field_info.annotation):
-                    for entry in get_args(field_info.annotation):
+                if field_schema["type"] == "literal":
+                    for entry in cast("LiteralSchema", field_schema)["expected"]:
                         if isinstance(entry, str):
                             mapping[entry] = variant
 
@@ -586,21 +691,24 @@ def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any,
         discriminator_field=discriminator_field_name,
         discriminator_alias=discriminator_alias,
     )
-    cast(CachedDiscriminatorType, union).__discriminator__ = details
+    DISCRIMINATOR_CACHE.setdefault(union, details)
     return details
 
 
 def _extract_field_schema_pv2(model: type[BaseModel], field_name: str) -> ModelField | None:
     schema = model.__pydantic_core_schema__
+    if schema["type"] == "definitions":
+        schema = schema["schema"]
+
     if schema["type"] != "model":
         return None
 
+    schema = cast("ModelSchema", schema)
     fields_schema = schema["schema"]
     if fields_schema["type"] != "model-fields":
         return None
 
     fields_schema = cast("ModelFieldsSchema", fields_schema)
-
     field = fields_schema["fields"].get(field_name)
     if not field:
         return None
@@ -616,7 +724,15 @@ def validate_type(*, type_: type[_T], value: object) -> _T:
     return cast(_T, _validate_non_model_type(type_=type_, value=value))
 
 
-# our use of subclasssing here causes weirdness for type checkers,
+def set_pydantic_config(typ: Any, config: pydantic.ConfigDict) -> None:
+    """Add a pydantic config for the given type.
+
+    Note: this is a no-op on Pydantic v1.
+    """
+    setattr(typ, "__pydantic_config__", config)  # noqa: B010
+
+
+# our use of subclassing here causes weirdness for type checkers,
 # so we just pretend that we don't subclass
 if TYPE_CHECKING:
     GenericModel = BaseModel
@@ -626,7 +742,7 @@ class GenericModel(BaseGenericModel, BaseModel):
         pass
 
 
-if PYDANTIC_V2:
+if not PYDANTIC_V1:
     from pydantic import TypeAdapter as _TypeAdapter
 
     _CachedTypeAdapter = cast("TypeAdapter[object]", lru_cache(maxsize=None)(_TypeAdapter))
@@ -673,6 +789,7 @@ class FinalRequestOptionsInput(TypedDict, total=False):
     idempotency_key: str
     json_data: Body
     extra_json: AnyMapping
+    follow_redirects: bool
 
 
 @final
@@ -686,18 +803,19 @@ class FinalRequestOptions(pydantic.BaseModel):
     files: Union[HttpxRequestFiles, None] = None
     idempotency_key: Union[str, None] = None
     post_parser: Union[Callable[[Any], Any], NotGiven] = NotGiven()
+    follow_redirects: Union[bool, None] = None
 
     # It should be noted that we cannot use `json` here as that would override
     # a BaseModel method in an incompatible fashion.
     json_data: Union[Body, None] = None
     extra_json: Union[AnyMapping, None] = None
 
-    if PYDANTIC_V2:
-        model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True)
-    else:
+    if PYDANTIC_V1:
 
         class Config(pydantic.BaseConfig):  # pyright: ignore[reportDeprecated]
             arbitrary_types_allowed: bool = True
+    else:
+        model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True)
 
     def get_max_retries(self, max_retries: int) -> int:
         if isinstance(self.max_retries, NotGiven):
@@ -730,9 +848,9 @@ def construct(  # type: ignore
             key: strip_not_given(value)
             for key, value in values.items()
         }
-        if PYDANTIC_V2:
-            return super().model_construct(_fields_set, **kwargs)
-        return cast(FinalRequestOptions, super().construct(_fields_set, **kwargs))  # pyright: ignore[reportDeprecated]
+        if PYDANTIC_V1:
+            return cast(FinalRequestOptions, super().construct(_fields_set, **kwargs))  # pyright: ignore[reportDeprecated]
+        return super().model_construct(_fields_set, **kwargs)
 
     if not TYPE_CHECKING:
         # type checkers incorrectly complain about this assignment
diff --git a/src/openai/_module_client.py b/src/openai/_module_client.py
index 6f7356eb3c..d0d721887b 100644
--- a/src/openai/_module_client.py
+++ b/src/openai/_module_client.py
@@ -1,85 +1,173 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 from typing_extensions import override
 
-from . import resources, _load_client
+if TYPE_CHECKING:
+    from .resources.files import Files
+    from .resources.images import Images
+    from .resources.models import Models
+    from .resources.videos import Videos
+    from .resources.batches import Batches
+    from .resources.webhooks import Webhooks
+    from .resources.beta.beta import Beta
+    from .resources.chat.chat import Chat
+    from .resources.embeddings import Embeddings
+    from .resources.audio.audio import Audio
+    from .resources.completions import Completions
+    from .resources.evals.evals import Evals
+    from .resources.moderations import Moderations
+    from .resources.uploads.uploads import Uploads
+    from .resources.realtime.realtime import Realtime
+    from .resources.responses.responses import Responses
+    from .resources.containers.containers import Containers
+    from .resources.fine_tuning.fine_tuning import FineTuning
+    from .resources.conversations.conversations import Conversations
+    from .resources.vector_stores.vector_stores import VectorStores
+
+from . import _load_client
 from ._utils import LazyProxy
 
 
-class ChatProxy(LazyProxy[resources.Chat]):
+class ChatProxy(LazyProxy["Chat"]):
     @override
-    def __load__(self) -> resources.Chat:
+    def __load__(self) -> Chat:
         return _load_client().chat
 
 
-class BetaProxy(LazyProxy[resources.Beta]):
+class BetaProxy(LazyProxy["Beta"]):
     @override
-    def __load__(self) -> resources.Beta:
+    def __load__(self) -> Beta:
         return _load_client().beta
 
 
-class FilesProxy(LazyProxy[resources.Files]):
+class FilesProxy(LazyProxy["Files"]):
     @override
-    def __load__(self) -> resources.Files:
+    def __load__(self) -> Files:
         return _load_client().files
 
 
-class AudioProxy(LazyProxy[resources.Audio]):
+class AudioProxy(LazyProxy["Audio"]):
     @override
-    def __load__(self) -> resources.Audio:
+    def __load__(self) -> Audio:
         return _load_client().audio
 
 
-class ImagesProxy(LazyProxy[resources.Images]):
+class EvalsProxy(LazyProxy["Evals"]):
+    @override
+    def __load__(self) -> Evals:
+        return _load_client().evals
+
+
+class ImagesProxy(LazyProxy["Images"]):
     @override
-    def __load__(self) -> resources.Images:
+    def __load__(self) -> Images:
         return _load_client().images
 
 
-class ModelsProxy(LazyProxy[resources.Models]):
+class ModelsProxy(LazyProxy["Models"]):
     @override
-    def __load__(self) -> resources.Models:
+    def __load__(self) -> Models:
         return _load_client().models
 
 
-class BatchesProxy(LazyProxy[resources.Batches]):
+class VideosProxy(LazyProxy["Videos"]):
+    @override
+    def __load__(self) -> Videos:
+        return _load_client().videos
+
+
+class BatchesProxy(LazyProxy["Batches"]):
     @override
-    def __load__(self) -> resources.Batches:
+    def __load__(self) -> Batches:
         return _load_client().batches
 
 
-class EmbeddingsProxy(LazyProxy[resources.Embeddings]):
+class UploadsProxy(LazyProxy["Uploads"]):
+    @override
+    def __load__(self) -> Uploads:
+        return _load_client().uploads
+
+
+class WebhooksProxy(LazyProxy["Webhooks"]):
+    @override
+    def __load__(self) -> Webhooks:
+        return _load_client().webhooks
+
+
+class RealtimeProxy(LazyProxy["Realtime"]):
     @override
-    def __load__(self) -> resources.Embeddings:
+    def __load__(self) -> Realtime:
+        return _load_client().realtime
+
+
+class ResponsesProxy(LazyProxy["Responses"]):
+    @override
+    def __load__(self) -> Responses:
+        return _load_client().responses
+
+
+class EmbeddingsProxy(LazyProxy["Embeddings"]):
+    @override
+    def __load__(self) -> Embeddings:
         return _load_client().embeddings
 
 
-class CompletionsProxy(LazyProxy[resources.Completions]):
+class ContainersProxy(LazyProxy["Containers"]):
     @override
-    def __load__(self) -> resources.Completions:
+    def __load__(self) -> Containers:
+        return _load_client().containers
+
+
+class CompletionsProxy(LazyProxy["Completions"]):
+    @override
+    def __load__(self) -> Completions:
         return _load_client().completions
 
 
-class ModerationsProxy(LazyProxy[resources.Moderations]):
+class ModerationsProxy(LazyProxy["Moderations"]):
     @override
-    def __load__(self) -> resources.Moderations:
+    def __load__(self) -> Moderations:
         return _load_client().moderations
 
 
-class FineTuningProxy(LazyProxy[resources.FineTuning]):
+class FineTuningProxy(LazyProxy["FineTuning"]):
     @override
-    def __load__(self) -> resources.FineTuning:
+    def __load__(self) -> FineTuning:
         return _load_client().fine_tuning
 
 
-chat: resources.Chat = ChatProxy().__as_proxied__()
-beta: resources.Beta = BetaProxy().__as_proxied__()
-files: resources.Files = FilesProxy().__as_proxied__()
-audio: resources.Audio = AudioProxy().__as_proxied__()
-images: resources.Images = ImagesProxy().__as_proxied__()
-models: resources.Models = ModelsProxy().__as_proxied__()
-batches: resources.Batches = BatchesProxy().__as_proxied__()
-embeddings: resources.Embeddings = EmbeddingsProxy().__as_proxied__()
-completions: resources.Completions = CompletionsProxy().__as_proxied__()
-moderations: resources.Moderations = ModerationsProxy().__as_proxied__()
-fine_tuning: resources.FineTuning = FineTuningProxy().__as_proxied__()
+class VectorStoresProxy(LazyProxy["VectorStores"]):
+    @override
+    def __load__(self) -> VectorStores:
+        return _load_client().vector_stores
+
+
+class ConversationsProxy(LazyProxy["Conversations"]):
+    @override
+    def __load__(self) -> Conversations:
+        return _load_client().conversations
+
+
+chat: Chat = ChatProxy().__as_proxied__()
+beta: Beta = BetaProxy().__as_proxied__()
+files: Files = FilesProxy().__as_proxied__()
+audio: Audio = AudioProxy().__as_proxied__()
+evals: Evals = EvalsProxy().__as_proxied__()
+images: Images = ImagesProxy().__as_proxied__()
+models: Models = ModelsProxy().__as_proxied__()
+videos: Videos = VideosProxy().__as_proxied__()
+batches: Batches = BatchesProxy().__as_proxied__()
+uploads: Uploads = UploadsProxy().__as_proxied__()
+webhooks: Webhooks = WebhooksProxy().__as_proxied__()
+realtime: Realtime = RealtimeProxy().__as_proxied__()
+responses: Responses = ResponsesProxy().__as_proxied__()
+embeddings: Embeddings = EmbeddingsProxy().__as_proxied__()
+containers: Containers = ContainersProxy().__as_proxied__()
+completions: Completions = CompletionsProxy().__as_proxied__()
+moderations: Moderations = ModerationsProxy().__as_proxied__()
+fine_tuning: FineTuning = FineTuningProxy().__as_proxied__()
+vector_stores: VectorStores = VectorStoresProxy().__as_proxied__()
+conversations: Conversations = ConversationsProxy().__as_proxied__()
diff --git a/src/openai/_qs.py b/src/openai/_qs.py
index 274320ca5e..ada6fd3f72 100644
--- a/src/openai/_qs.py
+++ b/src/openai/_qs.py
@@ -4,7 +4,7 @@
 from urllib.parse import parse_qs, urlencode
 from typing_extensions import Literal, get_args
 
-from ._types import NOT_GIVEN, NotGiven, NotGivenOr
+from ._types import NotGiven, not_given
 from ._utils import flatten
 
 _T = TypeVar("_T")
@@ -41,8 +41,8 @@ def stringify(
         self,
         params: Params,
         *,
-        array_format: NotGivenOr[ArrayFormat] = NOT_GIVEN,
-        nested_format: NotGivenOr[NestedFormat] = NOT_GIVEN,
+        array_format: ArrayFormat | NotGiven = not_given,
+        nested_format: NestedFormat | NotGiven = not_given,
     ) -> str:
         return urlencode(
             self.stringify_items(
@@ -56,8 +56,8 @@ def stringify_items(
         self,
         params: Params,
         *,
-        array_format: NotGivenOr[ArrayFormat] = NOT_GIVEN,
-        nested_format: NotGivenOr[NestedFormat] = NOT_GIVEN,
+        array_format: ArrayFormat | NotGiven = not_given,
+        nested_format: NestedFormat | NotGiven = not_given,
     ) -> list[tuple[str, str]]:
         opts = Options(
             qs=self,
@@ -143,8 +143,8 @@ def __init__(
         self,
         qs: Querystring = _qs,
         *,
-        array_format: NotGivenOr[ArrayFormat] = NOT_GIVEN,
-        nested_format: NotGivenOr[NestedFormat] = NOT_GIVEN,
+        array_format: ArrayFormat | NotGiven = not_given,
+        nested_format: NestedFormat | NotGiven = not_given,
     ) -> None:
         self.array_format = qs.array_format if isinstance(array_format, NotGiven) else array_format
         self.nested_format = qs.nested_format if isinstance(nested_format, NotGiven) else nested_format
diff --git a/src/openai/_response.py b/src/openai/_response.py
index 4ba2ae681c..e3ccdb5353 100644
--- a/src/openai/_response.py
+++ b/src/openai/_response.py
@@ -25,7 +25,7 @@
 import pydantic
 
 from ._types import NoneType
-from ._utils import is_given, extract_type_arg, is_annotated_type, extract_type_var_from_base
+from ._utils import is_given, extract_type_arg, is_annotated_type, is_type_alias_type, extract_type_var_from_base
 from ._models import BaseModel, is_basemodel
 from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER
 from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
@@ -55,6 +55,9 @@ class BaseAPIResponse(Generic[R]):
 
     http_response: httpx.Response
 
+    retries_taken: int
+    """The number of retries made. If no retries happened this will be `0`"""
+
     def __init__(
         self,
         *,
@@ -64,6 +67,7 @@ def __init__(
         stream: bool,
         stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
         options: FinalRequestOptions,
+        retries_taken: int = 0,
     ) -> None:
         self._cast_to = cast_to
         self._client = client
@@ -72,6 +76,7 @@ def __init__(
         self._stream_cls = stream_cls
         self._options = options
         self.http_response = raw
+        self.retries_taken = retries_taken
 
     @property
     def headers(self) -> httpx.Headers:
@@ -121,9 +126,17 @@ def __repr__(self) -> str:
         )
 
     def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        cast_to = to if to is not None else self._cast_to
+
+        # unwrap `TypeAlias('Name', T)` -> `T`
+        if is_type_alias_type(cast_to):
+            cast_to = cast_to.__value__  # type: ignore[unreachable]
+
         # unwrap `Annotated[T, ...]` -> `T`
-        if to and is_annotated_type(to):
-            to = extract_type_arg(to, 0)
+        if cast_to and is_annotated_type(cast_to):
+            cast_to = extract_type_arg(cast_to, 0)
+
+        origin = get_origin(cast_to) or cast_to
 
         if self._is_sse_stream:
             if to:
@@ -159,18 +172,12 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
             return cast(
                 R,
                 stream_cls(
-                    cast_to=self._cast_to,
+                    cast_to=cast_to,
                     response=self.http_response,
                     client=cast(Any, self._client),
                 ),
             )
 
-        cast_to = to if to is not None else self._cast_to
-
-        # unwrap `Annotated[T, ...]` -> `T`
-        if is_annotated_type(cast_to):
-            cast_to = extract_type_arg(cast_to, 0)
-
         if cast_to is NoneType:
             return cast(R, None)
 
@@ -187,7 +194,8 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         if cast_to == float:
             return cast(R, float(response.text))
 
-        origin = get_origin(cast_to) or cast_to
+        if cast_to == bool:
+            return cast(R, response.text.lower() == "true")
 
         # handle the legacy binary response case
         if inspect.isclass(cast_to) and cast_to.__name__ == "HttpxBinaryResponseContent":
@@ -206,7 +214,13 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
                 raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
             return cast(R, response)
 
-        if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel):
+        if (
+            inspect.isclass(
+                origin  # pyright: ignore[reportUnknownArgumentType]
+            )
+            and not issubclass(origin, BaseModel)
+            and issubclass(origin, pydantic.BaseModel)
+        ):
             raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`")
 
         if (
@@ -223,7 +237,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         # split is required to handle cases where additional information is included
         # in the response, e.g. application/json; charset=utf-8
         content_type, *_ = response.headers.get("content-type", "*").split(";")
-        if content_type != "application/json":
+        if not content_type.endswith("json"):
             if is_basemodel(cast_to):
                 try:
                     data = response.json()
@@ -263,12 +277,10 @@ def request_id(self) -> str | None:
         return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
 
     @overload
-    def parse(self, *, to: type[_T]) -> _T:
-        ...
+    def parse(self, *, to: type[_T]) -> _T: ...
 
     @overload
-    def parse(self) -> R:
-        ...
+    def parse(self) -> R: ...
 
     def parse(self, *, to: type[_T] | None = None) -> R | _T:
         """Returns the rich python representation of this response's data.
@@ -371,12 +383,10 @@ def request_id(self) -> str | None:
         return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
 
     @overload
-    async def parse(self, *, to: type[_T]) -> _T:
-        ...
+    async def parse(self, *, to: type[_T]) -> _T: ...
 
     @overload
-    async def parse(self) -> R:
-        ...
+    async def parse(self) -> R: ...
 
     async def parse(self, *, to: type[_T] | None = None) -> R | _T:
         """Returns the rich python representation of this response's data.
diff --git a/src/openai/_streaming.py b/src/openai/_streaming.py
index 0fda992cff..5261956160 100644
--- a/src/openai/_streaming.py
+++ b/src/openai/_streaming.py
@@ -55,11 +55,11 @@ def __stream__(self) -> Iterator[_T]:
         process_data = self._client._process_response_data
         iterator = self._iter_events()
 
-        for sse in iterator:
-            if sse.data.startswith("[DONE]"):
-                break
+        try:
+            for sse in iterator:
+                if sse.data.startswith("[DONE]"):
+                    break
 
-            if sse.event is None:
                 data = sse.json()
                 if is_mapping(data) and data.get("error"):
                     message = None
@@ -76,29 +76,9 @@ def __stream__(self) -> Iterator[_T]:
                     )
 
                 yield process_data(data=data, cast_to=cast_to, response=response)
-
-            else:
-                data = sse.json()
-
-                if sse.event == "error" and is_mapping(data) and data.get("error"):
-                    message = None
-                    error = data.get("error")
-                    if is_mapping(error):
-                        message = error.get("message")
-                    if not message or not isinstance(message, str):
-                        message = "An error occurred during streaming"
-
-                    raise APIError(
-                        message=message,
-                        request=self.response.request,
-                        body=data["error"],
-                    )
-
-                yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
-
-        # Ensure the entire stream is consumed
-        for _sse in iterator:
-            ...
+        finally:
+            # Ensure the response is closed even if the consumer doesn't read all data
+            response.close()
 
     def __enter__(self) -> Self:
         return self
@@ -157,11 +137,11 @@ async def __stream__(self) -> AsyncIterator[_T]:
         process_data = self._client._process_response_data
         iterator = self._iter_events()
 
-        async for sse in iterator:
-            if sse.data.startswith("[DONE]"):
-                break
+        try:
+            async for sse in iterator:
+                if sse.data.startswith("[DONE]"):
+                    break
 
-            if sse.event is None:
                 data = sse.json()
                 if is_mapping(data) and data.get("error"):
                     message = None
@@ -178,29 +158,9 @@ async def __stream__(self) -> AsyncIterator[_T]:
                     )
 
                 yield process_data(data=data, cast_to=cast_to, response=response)
-
-            else:
-                data = sse.json()
-
-                if sse.event == "error" and is_mapping(data) and data.get("error"):
-                    message = None
-                    error = data.get("error")
-                    if is_mapping(error):
-                        message = error.get("message")
-                    if not message or not isinstance(message, str):
-                        message = "An error occurred during streaming"
-
-                    raise APIError(
-                        message=message,
-                        request=self.response.request,
-                        body=data["error"],
-                    )
-
-                yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
-
-        # Ensure the entire stream is consumed
-        async for _sse in iterator:
-            ...
+        finally:
+            # Ensure the response is closed even if the consumer doesn't read all data
+            await response.aclose()
 
     async def __aenter__(self) -> Self:
         return self
diff --git a/src/openai/_types.py b/src/openai/_types.py
index de9b1dd48b..95c0ec17b9 100644
--- a/src/openai/_types.py
+++ b/src/openai/_types.py
@@ -13,10 +13,21 @@
     Mapping,
     TypeVar,
     Callable,
+    Iterator,
     Optional,
     Sequence,
 )
-from typing_extensions import Literal, Protocol, TypeAlias, TypedDict, override, runtime_checkable
+from typing_extensions import (
+    Set,
+    Literal,
+    Protocol,
+    TypeAlias,
+    TypedDict,
+    SupportsIndex,
+    overload,
+    override,
+    runtime_checkable,
+)
 
 import httpx
 import pydantic
@@ -101,24 +112,27 @@ class RequestOptions(TypedDict, total=False):
     params: Query
     extra_json: AnyMapping
     idempotency_key: str
+    follow_redirects: bool
 
 
 # Sentinel class used until PEP 0661 is accepted
 class NotGiven:
     """
-    A sentinel singleton class used to distinguish omitted keyword arguments
-    from those passed in with the value None (which may have different behavior).
+    For parameters with a meaningful None value, we need to distinguish between
+    the user explicitly passing None, and the user not passing the parameter at
+    all.
+
+    User code shouldn't need to use not_given directly.
 
     For example:
 
     ```py
-    def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
-        ...
+    def create(timeout: Timeout | None | NotGiven = not_given): ...
 
 
-    get(timeout=1)  # 1s timeout
-    get(timeout=None)  # No timeout
-    get()  # Default timeout behavior, which may not be statically known at the method definition.
+    create(timeout=1)  # 1s timeout
+    create(timeout=None)  # No timeout
+    create()  # Default timeout behavior
     ```
     """
 
@@ -130,13 +144,14 @@ def __repr__(self) -> str:
         return "NOT_GIVEN"
 
 
-NotGivenOr = Union[_T, NotGiven]
+not_given = NotGiven()
+# for backwards compatibility:
 NOT_GIVEN = NotGiven()
 
 
 class Omit:
-    """In certain situations you need to be able to represent a case where a default value has
-    to be explicitly removed and `None` is not an appropriate substitute, for example:
+    """
+    To explicitly omit something from being sent in a request, use `omit`.
 
     ```py
     # as the default `Content-Type` header is `application/json` that will be sent
@@ -146,8 +161,8 @@ class Omit:
     # to look something like: 'multipart/form-data; boundary=0d8382fcf5f8c3be01ca2e11002d2983'
     client.post(..., headers={"Content-Type": "multipart/form-data"})
 
-    # instead you can remove the default `application/json` header by passing Omit
-    client.post(..., headers={"Content-Type": Omit()})
+    # instead you can remove the default `application/json` header by passing omit
+    client.post(..., headers={"Content-Type": omit})
     ```
     """
 
@@ -155,6 +170,9 @@ def __bool__(self) -> Literal[False]:
         return False
 
 
+omit = Omit()
+
+
 @runtime_checkable
 class ModelBuilderProtocol(Protocol):
     @classmethod
@@ -163,16 +181,14 @@ def build(
         *,
         response: Response,
         data: object,
-    ) -> _T:
-        ...
+    ) -> _T: ...
 
 
 Headers = Mapping[str, Union[str, Omit]]
 
 
 class HeadersLikeProtocol(Protocol):
-    def get(self, __key: str) -> str | None:
-        ...
+    def get(self, __key: str) -> str | None: ...
 
 
 HeadersLike = Union[Headers, HeadersLikeProtocol]
@@ -197,8 +213,8 @@ def get(self, __key: str) -> str | None:
 StrBytesIntFloat = Union[str, bytes, int, float]
 
 # Note: copied from Pydantic
-# https://github.com/pydantic/pydantic/blob/32ea570bf96e84234d2992e1ddf40ab8a565925a/pydantic/main.py#L49
-IncEx: TypeAlias = "set[int] | set[str] | dict[int, Any] | dict[str, Any] | None"
+# https://github.com/pydantic/pydantic/blob/6f31f8f68ef011f84357330186f603ff295312fd/pydantic/main.py#L79
+IncEx: TypeAlias = Union[Set[int], Set[str], Mapping[int, Union["IncEx", bool]], Mapping[str, Union["IncEx", bool]]]
 
 PostParser = Callable[[Any], Any]
 
@@ -220,3 +236,28 @@ class _GenericAlias(Protocol):
 
 class HttpxSendArgs(TypedDict, total=False):
     auth: httpx.Auth
+    follow_redirects: bool
+
+
+_T_co = TypeVar("_T_co", covariant=True)
+
+
+if TYPE_CHECKING:
+    # This works because str.__contains__ does not accept object (either in typeshed or at runtime)
+    # https://github.com/hauntsaninja/useful_types/blob/5e9710f3875107d068e7679fd7fec9cfab0eff3b/useful_types/__init__.py#L285
+    #
+    # Note: index() and count() methods are intentionally omitted to allow pyright to properly
+    # infer TypedDict types when dict literals are used in lists assigned to SequenceNotStr.
+    class SequenceNotStr(Protocol[_T_co]):
+        @overload
+        def __getitem__(self, index: SupportsIndex, /) -> _T_co: ...
+        @overload
+        def __getitem__(self, index: slice, /) -> Sequence[_T_co]: ...
+        def __contains__(self, value: object, /) -> bool: ...
+        def __len__(self) -> int: ...
+        def __iter__(self) -> Iterator[_T_co]: ...
+        def __reversed__(self) -> Iterator[_T_co]: ...
+else:
+    # just point this to a normal `Sequence` at runtime to avoid having to special case
+    # deserializing our custom sequence type
+    SequenceNotStr = Sequence
diff --git a/src/openai/_utils/__init__.py b/src/openai/_utils/__init__.py
index 31b5b22799..dc64e29a1c 100644
--- a/src/openai/_utils/__init__.py
+++ b/src/openai/_utils/__init__.py
@@ -6,10 +6,10 @@
     is_list as is_list,
     is_given as is_given,
     is_tuple as is_tuple,
+    json_safe as json_safe,
     lru_cache as lru_cache,
     is_mapping as is_mapping,
     is_tuple_t as is_tuple_t,
-    parse_date as parse_date,
     is_iterable as is_iterable,
     is_sequence as is_sequence,
     coerce_float as coerce_float,
@@ -22,7 +22,6 @@
     coerce_boolean as coerce_boolean,
     coerce_integer as coerce_integer,
     file_from_path as file_from_path,
-    parse_datetime as parse_datetime,
     strip_not_given as strip_not_given,
     deepcopy_minimal as deepcopy_minimal,
     get_async_library as get_async_library,
@@ -31,13 +30,22 @@
     maybe_coerce_boolean as maybe_coerce_boolean,
     maybe_coerce_integer as maybe_coerce_integer,
 )
+from ._compat import (
+    get_args as get_args,
+    is_union as is_union,
+    get_origin as get_origin,
+    is_typeddict as is_typeddict,
+    is_literal_type as is_literal_type,
+)
 from ._typing import (
     is_list_type as is_list_type,
     is_union_type as is_union_type,
     extract_type_arg as extract_type_arg,
     is_iterable_type as is_iterable_type,
     is_required_type as is_required_type,
+    is_sequence_type as is_sequence_type,
     is_annotated_type as is_annotated_type,
+    is_type_alias_type as is_type_alias_type,
     strip_annotated_type as strip_annotated_type,
     extract_type_var_from_base as extract_type_var_from_base,
 )
@@ -49,3 +57,8 @@
     maybe_transform as maybe_transform,
     async_maybe_transform as async_maybe_transform,
 )
+from ._reflection import (
+    function_has_argument as function_has_argument,
+    assert_signatures_in_sync as assert_signatures_in_sync,
+)
+from ._datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime
diff --git a/src/openai/_utils/_compat.py b/src/openai/_utils/_compat.py
new file mode 100644
index 0000000000..dd703233c5
--- /dev/null
+++ b/src/openai/_utils/_compat.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+import sys
+import typing_extensions
+from typing import Any, Type, Union, Literal, Optional
+from datetime import date, datetime
+from typing_extensions import get_args as _get_args, get_origin as _get_origin
+
+from .._types import StrBytesIntFloat
+from ._datetime_parse import parse_date as _parse_date, parse_datetime as _parse_datetime
+
+_LITERAL_TYPES = {Literal, typing_extensions.Literal}
+
+
+def get_args(tp: type[Any]) -> tuple[Any, ...]:
+    return _get_args(tp)
+
+
+def get_origin(tp: type[Any]) -> type[Any] | None:
+    return _get_origin(tp)
+
+
+def is_union(tp: Optional[Type[Any]]) -> bool:
+    if sys.version_info < (3, 10):
+        return tp is Union  # type: ignore[comparison-overlap]
+    else:
+        import types
+
+        return tp is Union or tp is types.UnionType
+
+
+def is_typeddict(tp: Type[Any]) -> bool:
+    return typing_extensions.is_typeddict(tp)
+
+
+def is_literal_type(tp: Type[Any]) -> bool:
+    return get_origin(tp) in _LITERAL_TYPES
+
+
+def parse_date(value: Union[date, StrBytesIntFloat]) -> date:
+    return _parse_date(value)
+
+
+def parse_datetime(value: Union[datetime, StrBytesIntFloat]) -> datetime:
+    return _parse_datetime(value)
diff --git a/src/openai/_utils/_datetime_parse.py b/src/openai/_utils/_datetime_parse.py
new file mode 100644
index 0000000000..7cb9d9e668
--- /dev/null
+++ b/src/openai/_utils/_datetime_parse.py
@@ -0,0 +1,136 @@
+"""
+This file contains code from https://github.com/pydantic/pydantic/blob/main/pydantic/v1/datetime_parse.py
+without the Pydantic v1 specific errors.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Dict, Union, Optional
+from datetime import date, datetime, timezone, timedelta
+
+from .._types import StrBytesIntFloat
+
+date_expr = r"(?P<year>\d{4})-(?P<month>\d{1,2})-(?P<day>\d{1,2})"
+time_expr = (
+    r"(?P<hour>\d{1,2}):(?P<minute>\d{1,2})"
+    r"(?::(?P<second>\d{1,2})(?:\.(?P<microsecond>\d{1,6})\d{0,6})?)?"
+    r"(?P<tzinfo>Z|[+-]\d{2}(?::?\d{2})?)?$"
+)
+
+date_re = re.compile(f"{date_expr}$")
+datetime_re = re.compile(f"{date_expr}[T ]{time_expr}")
+
+
+EPOCH = datetime(1970, 1, 1)
+# if greater than this, the number is in ms, if less than or equal it's in seconds
+# (in seconds this is 11th October 2603, in ms it's 20th August 1970)
+MS_WATERSHED = int(2e10)
+# slightly more than datetime.max in ns - (datetime.max - EPOCH).total_seconds() * 1e9
+MAX_NUMBER = int(3e20)
+
+
+def _get_numeric(value: StrBytesIntFloat, native_expected_type: str) -> Union[None, int, float]:
+    if isinstance(value, (int, float)):
+        return value
+    try:
+        return float(value)
+    except ValueError:
+        return None
+    except TypeError:
+        raise TypeError(f"invalid type; expected {native_expected_type}, string, bytes, int or float") from None
+
+
+def _from_unix_seconds(seconds: Union[int, float]) -> datetime:
+    if seconds > MAX_NUMBER:
+        return datetime.max
+    elif seconds < -MAX_NUMBER:
+        return datetime.min
+
+    while abs(seconds) > MS_WATERSHED:
+        seconds /= 1000
+    dt = EPOCH + timedelta(seconds=seconds)
+    return dt.replace(tzinfo=timezone.utc)
+
+
+def _parse_timezone(value: Optional[str]) -> Union[None, int, timezone]:
+    if value == "Z":
+        return timezone.utc
+    elif value is not None:
+        offset_mins = int(value[-2:]) if len(value) > 3 else 0
+        offset = 60 * int(value[1:3]) + offset_mins
+        if value[0] == "-":
+            offset = -offset
+        return timezone(timedelta(minutes=offset))
+    else:
+        return None
+
+
+def parse_datetime(value: Union[datetime, StrBytesIntFloat]) -> datetime:
+    """
+    Parse a datetime/int/float/string and return a datetime.datetime.
+
+    This function supports time zone offsets. When the input contains one,
+    the output uses a timezone with a fixed offset from UTC.
+
+    Raise ValueError if the input is well formatted but not a valid datetime.
+    Raise ValueError if the input isn't well formatted.
+    """
+    if isinstance(value, datetime):
+        return value
+
+    number = _get_numeric(value, "datetime")
+    if number is not None:
+        return _from_unix_seconds(number)
+
+    if isinstance(value, bytes):
+        value = value.decode()
+
+    assert not isinstance(value, (float, int))
+
+    match = datetime_re.match(value)
+    if match is None:
+        raise ValueError("invalid datetime format")
+
+    kw = match.groupdict()
+    if kw["microsecond"]:
+        kw["microsecond"] = kw["microsecond"].ljust(6, "0")
+
+    tzinfo = _parse_timezone(kw.pop("tzinfo"))
+    kw_: Dict[str, Union[None, int, timezone]] = {k: int(v) for k, v in kw.items() if v is not None}
+    kw_["tzinfo"] = tzinfo
+
+    return datetime(**kw_)  # type: ignore
+
+
+def parse_date(value: Union[date, StrBytesIntFloat]) -> date:
+    """
+    Parse a date/int/float/string and return a datetime.date.
+
+    Raise ValueError if the input is well formatted but not a valid date.
+    Raise ValueError if the input isn't well formatted.
+    """
+    if isinstance(value, date):
+        if isinstance(value, datetime):
+            return value.date()
+        else:
+            return value
+
+    number = _get_numeric(value, "date")
+    if number is not None:
+        return _from_unix_seconds(number).date()
+
+    if isinstance(value, bytes):
+        value = value.decode()
+
+    assert not isinstance(value, (float, int))
+    match = date_re.match(value)
+    if match is None:
+        raise ValueError("invalid date format")
+
+    kw = {k: int(v) for k, v in match.groupdict().items()}
+
+    try:
+        return date(**kw)
+    except ValueError:
+        raise ValueError("invalid date format") from None
diff --git a/src/openai/_utils/_proxy.py b/src/openai/_utils/_proxy.py
index c46a62a698..0f239a33c6 100644
--- a/src/openai/_utils/_proxy.py
+++ b/src/openai/_utils/_proxy.py
@@ -46,7 +46,10 @@ def __dir__(self) -> Iterable[str]:
     @property  # type: ignore
     @override
     def __class__(self) -> type:  # pyright: ignore
-        proxied = self.__get_proxied__()
+        try:
+            proxied = self.__get_proxied__()
+        except Exception:
+            return type(self)
         if issubclass(type(proxied), LazyProxy):
             return type(proxied)
         return proxied.__class__
@@ -59,5 +62,4 @@ def __as_proxied__(self) -> T:
         return cast(T, self)
 
     @abstractmethod
-    def __load__(self) -> T:
-        ...
+    def __load__(self) -> T: ...
diff --git a/src/openai/_utils/_reflection.py b/src/openai/_utils/_reflection.py
new file mode 100644
index 0000000000..89aa712ac4
--- /dev/null
+++ b/src/openai/_utils/_reflection.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+import inspect
+from typing import Any, Callable
+
+
+def function_has_argument(func: Callable[..., Any], arg_name: str) -> bool:
+    """Returns whether or not the given function has a specific parameter"""
+    sig = inspect.signature(func)
+    return arg_name in sig.parameters
+
+
+def assert_signatures_in_sync(
+    source_func: Callable[..., Any],
+    check_func: Callable[..., Any],
+    *,
+    exclude_params: set[str] = set(),
+) -> None:
+    """Ensure that the signature of the second function matches the first."""
+
+    check_sig = inspect.signature(check_func)
+    source_sig = inspect.signature(source_func)
+
+    errors: list[str] = []
+
+    for name, source_param in source_sig.parameters.items():
+        if name in exclude_params:
+            continue
+
+        custom_param = check_sig.parameters.get(name)
+        if not custom_param:
+            errors.append(f"the `{name}` param is missing")
+            continue
+
+        if custom_param.annotation != source_param.annotation:
+            errors.append(
+                f"types for the `{name}` param are do not match; source={repr(source_param.annotation)} checking={repr(custom_param.annotation)}"
+            )
+            continue
+
+    if errors:
+        raise AssertionError(f"{len(errors)} errors encountered when comparing signatures:\n\n" + "\n\n".join(errors))
diff --git a/src/openai/_utils/_resources_proxy.py b/src/openai/_utils/_resources_proxy.py
new file mode 100644
index 0000000000..e5b9ec7a37
--- /dev/null
+++ b/src/openai/_utils/_resources_proxy.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from typing import Any
+from typing_extensions import override
+
+from ._proxy import LazyProxy
+
+
+class ResourcesProxy(LazyProxy[Any]):
+    """A proxy for the `openai.resources` module.
+
+    This is used so that we can lazily import `openai.resources` only when
+    needed *and* so that users can just import `openai` and reference `openai.resources`
+    """
+
+    @override
+    def __load__(self) -> Any:
+        import importlib
+
+        mod = importlib.import_module("openai.resources")
+        return mod
+
+
+resources = ResourcesProxy().__as_proxied__()
diff --git a/src/openai/_utils/_sync.py b/src/openai/_utils/_sync.py
index 595924e5b1..f6027c183d 100644
--- a/src/openai/_utils/_sync.py
+++ b/src/openai/_utils/_sync.py
@@ -1,54 +1,49 @@
 from __future__ import annotations
 
+import asyncio
 import functools
 from typing import TypeVar, Callable, Awaitable
 from typing_extensions import ParamSpec
 
 import anyio
+import sniffio
 import anyio.to_thread
 
 T_Retval = TypeVar("T_Retval")
 T_ParamSpec = ParamSpec("T_ParamSpec")
 
 
-# copied from `asyncer`, https://github.com/tiangolo/asyncer
-def asyncify(
-    function: Callable[T_ParamSpec, T_Retval],
-    *,
-    cancellable: bool = False,
-    limiter: anyio.CapacityLimiter | None = None,
-) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
+async def to_thread(
+    func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
+) -> T_Retval:
+    if sniffio.current_async_library() == "asyncio":
+        return await asyncio.to_thread(func, *args, **kwargs)
+
+    return await anyio.to_thread.run_sync(
+        functools.partial(func, *args, **kwargs),
+    )
+
+
+# inspired by `asyncer`, https://github.com/tiangolo/asyncer
+def asyncify(function: Callable[T_ParamSpec, T_Retval]) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
     """
     Take a blocking function and create an async one that receives the same
-    positional and keyword arguments, and that when called, calls the original function
-    in a worker thread using `anyio.to_thread.run_sync()`. Internally,
-    `asyncer.asyncify()` uses the same `anyio.to_thread.run_sync()`, but it supports
-    keyword arguments additional to positional arguments and it adds better support for
-    autocompletion and inline errors for the arguments of the function called and the
-    return value.
-
-    If the `cancellable` option is enabled and the task waiting for its completion is
-    cancelled, the thread will still run its course but its return value (or any raised
-    exception) will be ignored.
+    positional and keyword arguments.
 
-    Use it like this:
+    Usage:
 
-    ```Python
-    def do_work(arg1, arg2, kwarg1="", kwarg2="") -> str:
-        # Do work
-        return "Some result"
+    ```python
+    def blocking_func(arg1, arg2, kwarg1=None):
+        # blocking code
+        return result
 
 
-    result = await to_thread.asyncify(do_work)("spam", "ham", kwarg1="a", kwarg2="b")
-    print(result)
+    result = asyncify(blocking_function)(arg1, arg2, kwarg1=value1)
     ```
 
     ## Arguments
 
     `function`: a blocking regular callable (e.g. a function)
-    `cancellable`: `True` to allow cancellation of the operation
-    `limiter`: capacity limiter to use to limit the total amount of threads running
-        (if omitted, the default limiter is used)
 
     ## Return
 
@@ -58,7 +53,6 @@ def do_work(arg1, arg2, kwarg1="", kwarg2="") -> str:
     """
 
     async def wrapper(*args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs) -> T_Retval:
-        partial_f = functools.partial(function, *args, **kwargs)
-        return await anyio.to_thread.run_sync(partial_f, cancellable=cancellable, limiter=limiter)
+        return await to_thread(function, *args, **kwargs)
 
     return wrapper
diff --git a/src/openai/_utils/_transform.py b/src/openai/_utils/_transform.py
index 47e262a515..5207549207 100644
--- a/src/openai/_utils/_transform.py
+++ b/src/openai/_utils/_transform.py
@@ -5,27 +5,31 @@
 import pathlib
 from typing import Any, Mapping, TypeVar, cast
 from datetime import date, datetime
-from typing_extensions import Literal, get_args, override, get_type_hints
+from typing_extensions import Literal, get_args, override, get_type_hints as _get_type_hints
 
 import anyio
 import pydantic
 
 from ._utils import (
     is_list,
+    is_given,
+    lru_cache,
     is_mapping,
     is_iterable,
+    is_sequence,
 )
 from .._files import is_base64_file_input
+from ._compat import get_origin, is_typeddict
 from ._typing import (
     is_list_type,
     is_union_type,
     extract_type_arg,
     is_iterable_type,
     is_required_type,
+    is_sequence_type,
     is_annotated_type,
     strip_annotated_type,
 )
-from .._compat import model_dump, is_typeddict
 
 _T = TypeVar("_T")
 
@@ -108,6 +112,7 @@ class Params(TypedDict, total=False):
     return cast(_T, transformed)
 
 
+@lru_cache(maxsize=8096)
 def _get_annotated_type(type_: type) -> type | None:
     """If the given type is an `Annotated` type then it is returned, if not `None` is returned.
 
@@ -126,7 +131,7 @@ def _get_annotated_type(type_: type) -> type | None:
 def _maybe_transform_key(key: str, type_: type) -> str:
     """Transform the given `data` based on the annotations provided in `type_`.
 
-    Note: this function only looks at `Annotated` types that contain `PropertInfo` metadata.
+    Note: this function only looks at `Annotated` types that contain `PropertyInfo` metadata.
     """
     annotated_type = _get_annotated_type(type_)
     if annotated_type is None:
@@ -142,6 +147,10 @@ def _maybe_transform_key(key: str, type_: type) -> str:
     return key
 
 
+def _no_transform_needed(annotation: type) -> bool:
+    return annotation == float or annotation == int
+
+
 def _transform_recursive(
     data: object,
     *,
@@ -160,20 +169,43 @@ def _transform_recursive(
 
             Defaults to the same value as the `annotation` argument.
     """
+    from .._compat import model_dump
+
     if inner_type is None:
         inner_type = annotation
 
     stripped_type = strip_annotated_type(inner_type)
+    origin = get_origin(stripped_type) or stripped_type
     if is_typeddict(stripped_type) and is_mapping(data):
         return _transform_typeddict(data, stripped_type)
 
+    if origin == dict and is_mapping(data):
+        items_type = get_args(stripped_type)[1]
+        return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
     if (
         # List[T]
         (is_list_type(stripped_type) and is_list(data))
         # Iterable[T]
         or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
+        # Sequence[T]
+        or (is_sequence_type(stripped_type) and is_sequence(data) and not isinstance(data, str))
     ):
+        # dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
+        # intended as an iterable, so we don't transform it.
+        if isinstance(data, dict):
+            return cast(object, data)
+
         inner_type = extract_type_arg(stripped_type, 0)
+        if _no_transform_needed(inner_type):
+            # for some types there is no need to transform anything, so we can get a small
+            # perf boost from skipping that work.
+            #
+            # but we still need to convert to a list to ensure the data is json-serializable
+            if is_list(data):
+                return data
+            return list(data)
+
         return [_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
 
     if is_union_type(stripped_type):
@@ -186,7 +218,7 @@ def _transform_recursive(
         return data
 
     if isinstance(data, pydantic.BaseModel):
-        return model_dump(data, exclude_unset=True)
+        return model_dump(data, exclude_unset=True, mode="json")
 
     annotated_type = _get_annotated_type(annotation)
     if annotated_type is None:
@@ -235,6 +267,11 @@ def _transform_typeddict(
     result: dict[str, object] = {}
     annotations = get_type_hints(expected_type, include_extras=True)
     for key, value in data.items():
+        if not is_given(value):
+            # we don't need to include omitted values here as they'll
+            # be stripped out before the request is sent anyway
+            continue
+
         type_ = annotations.get(key)
         if type_ is None:
             # we do not have a type annotation for this field, leave it as is
@@ -298,20 +335,43 @@ async def _async_transform_recursive(
 
             Defaults to the same value as the `annotation` argument.
     """
+    from .._compat import model_dump
+
     if inner_type is None:
         inner_type = annotation
 
     stripped_type = strip_annotated_type(inner_type)
+    origin = get_origin(stripped_type) or stripped_type
     if is_typeddict(stripped_type) and is_mapping(data):
         return await _async_transform_typeddict(data, stripped_type)
 
+    if origin == dict and is_mapping(data):
+        items_type = get_args(stripped_type)[1]
+        return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
     if (
         # List[T]
         (is_list_type(stripped_type) and is_list(data))
         # Iterable[T]
         or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
+        # Sequence[T]
+        or (is_sequence_type(stripped_type) and is_sequence(data) and not isinstance(data, str))
     ):
+        # dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
+        # intended as an iterable, so we don't transform it.
+        if isinstance(data, dict):
+            return cast(object, data)
+
         inner_type = extract_type_arg(stripped_type, 0)
+        if _no_transform_needed(inner_type):
+            # for some types there is no need to transform anything, so we can get a small
+            # perf boost from skipping that work.
+            #
+            # but we still need to convert to a list to ensure the data is json-serializable
+            if is_list(data):
+                return data
+            return list(data)
+
         return [await _async_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
 
     if is_union_type(stripped_type):
@@ -324,7 +384,7 @@ async def _async_transform_recursive(
         return data
 
     if isinstance(data, pydantic.BaseModel):
-        return model_dump(data, exclude_unset=True)
+        return model_dump(data, exclude_unset=True, mode="json")
 
     annotated_type = _get_annotated_type(annotation)
     if annotated_type is None:
@@ -373,6 +433,11 @@ async def _async_transform_typeddict(
     result: dict[str, object] = {}
     annotations = get_type_hints(expected_type, include_extras=True)
     for key, value in data.items():
+        if not is_given(value):
+            # we don't need to include omitted values here as they'll
+            # be stripped out before the request is sent anyway
+            continue
+
         type_ = annotations.get(key)
         if type_ is None:
             # we do not have a type annotation for this field, leave it as is
@@ -380,3 +445,13 @@ async def _async_transform_typeddict(
         else:
             result[_maybe_transform_key(key, type_)] = await _async_transform_recursive(value, annotation=type_)
     return result
+
+
+@lru_cache(maxsize=8096)
+def get_type_hints(
+    obj: Any,
+    globalns: dict[str, Any] | None = None,
+    localns: Mapping[str, Any] | None = None,
+    include_extras: bool = False,
+) -> dict[str, Any]:
+    return _get_type_hints(obj, globalns=globalns, localns=localns, include_extras=include_extras)
diff --git a/src/openai/_utils/_typing.py b/src/openai/_utils/_typing.py
index c036991f04..193109f3ad 100644
--- a/src/openai/_utils/_typing.py
+++ b/src/openai/_utils/_typing.py
@@ -1,11 +1,21 @@
 from __future__ import annotations
 
+import sys
+import typing
+import typing_extensions
 from typing import Any, TypeVar, Iterable, cast
 from collections import abc as _c_abc
-from typing_extensions import Required, Annotated, get_args, get_origin
-
+from typing_extensions import (
+    TypeIs,
+    Required,
+    Annotated,
+    get_args,
+    get_origin,
+)
+
+from ._utils import lru_cache
 from .._types import InheritsGeneric
-from .._compat import is_union as _is_union
+from ._compat import is_union as _is_union
 
 
 def is_annotated_type(typ: type) -> bool:
@@ -16,6 +26,11 @@ def is_list_type(typ: type) -> bool:
     return (get_origin(typ) or typ) == list
 
 
+def is_sequence_type(typ: type) -> bool:
+    origin = get_origin(typ) or typ
+    return origin == typing_extensions.Sequence or origin == typing.Sequence or origin == _c_abc.Sequence
+
+
 def is_iterable_type(typ: type) -> bool:
     """If the given type is `typing.Iterable[T]`"""
     origin = get_origin(typ) or typ
@@ -36,7 +51,28 @@ def is_typevar(typ: type) -> bool:
     return type(typ) == TypeVar  # type: ignore
 
 
+_TYPE_ALIAS_TYPES: tuple[type[typing_extensions.TypeAliasType], ...] = (typing_extensions.TypeAliasType,)
+if sys.version_info >= (3, 12):
+    _TYPE_ALIAS_TYPES = (*_TYPE_ALIAS_TYPES, typing.TypeAliasType)
+
+
+def is_type_alias_type(tp: Any, /) -> TypeIs[typing_extensions.TypeAliasType]:
+    """Return whether the provided argument is an instance of `TypeAliasType`.
+
+    ```python
+    type Int = int
+    is_type_alias_type(Int)
+    # > True
+    Str = TypeAliasType("Str", str)
+    is_type_alias_type(Str)
+    # > True
+    ```
+    """
+    return isinstance(tp, _TYPE_ALIAS_TYPES)
+
+
 # Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]]
+@lru_cache(maxsize=8096)
 def strip_annotated_type(typ: type) -> type:
     if is_required_type(typ) or is_annotated_type(typ):
         return strip_annotated_type(cast(type, get_args(typ)[0]))
@@ -79,7 +115,7 @@ class MyResponse(Foo[_T]):
     ```
     """
     cls = cast(object, get_origin(typ) or typ)
-    if cls in generic_bases:
+    if cls in generic_bases:  # pyright: ignore[reportUnnecessaryContains]
         # we're given the class directly
         return extract_type_arg(typ, index)
 
diff --git a/src/openai/_utils/_utils.py b/src/openai/_utils/_utils.py
index 17904ce60d..eec7f4a1f2 100644
--- a/src/openai/_utils/_utils.py
+++ b/src/openai/_utils/_utils.py
@@ -16,12 +16,12 @@
     overload,
 )
 from pathlib import Path
+from datetime import date, datetime
 from typing_extensions import TypeGuard
 
 import sniffio
 
-from .._types import Headers, NotGiven, FileTypes, NotGivenOr, HeadersLike
-from .._compat import parse_date as parse_date, parse_datetime as parse_datetime
+from .._types import Omit, NotGiven, FileTypes, HeadersLike
 
 _T = TypeVar("_T")
 _TupleT = TypeVar("_TupleT", bound=Tuple[object, ...])
@@ -63,7 +63,7 @@ def _extract_items(
     try:
         key = path[index]
     except IndexError:
-        if isinstance(obj, NotGiven):
+        if not is_given(obj):
             # no value was provided - we can safely ignore
             return []
 
@@ -71,8 +71,16 @@ def _extract_items(
         from .._files import assert_is_file_content
 
         # We have exhausted the path, return the entry we found.
-        assert_is_file_content(obj, key=flattened_key)
         assert flattened_key is not None
+
+        if is_list(obj):
+            files: list[tuple[str, FileTypes]] = []
+            for entry in obj:
+                assert_is_file_content(entry, key=flattened_key + "[]" if flattened_key else "")
+                files.append((flattened_key + "[]", cast(FileTypes, entry)))
+            return files
+
+        assert_is_file_content(obj, key=flattened_key)
         return [(flattened_key, cast(FileTypes, obj))]
 
     index += 1
@@ -118,14 +126,14 @@ def _extract_items(
     return []
 
 
-def is_given(obj: NotGivenOr[_T]) -> TypeGuard[_T]:
-    return not isinstance(obj, NotGiven)
+def is_given(obj: _T | NotGiven | Omit) -> TypeGuard[_T]:
+    return not isinstance(obj, NotGiven) and not isinstance(obj, Omit)
 
 
 # Type safe methods for narrowing types with TypeVars.
 # The default narrowing for isinstance(obj, dict) is dict[unknown, unknown],
 # however this cause Pyright to rightfully report errors. As we know we don't
-# care about the contained types we can safely use `object` in it's place.
+# care about the contained types we can safely use `object` in its place.
 #
 # There are two separate functions defined, `is_*` and `is_*_t` for different use cases.
 # `is_*` is for when you're dealing with an unknown input
@@ -211,20 +219,17 @@ def required_args(*variants: Sequence[str]) -> Callable[[CallableT], CallableT]:
     Example usage:
     ```py
     @overload
-    def foo(*, a: str) -> str:
-        ...
+    def foo(*, a: str) -> str: ...
 
 
     @overload
-    def foo(*, b: bool) -> str:
-        ...
+    def foo(*, b: bool) -> str: ...
 
 
     # This enforces the same constraints that a static type checker would
     # i.e. that either a or b must be passed to the function
     @required_args(["a"], ["b"])
-    def foo(*, a: str | None = None, b: bool | None = None) -> str:
-        ...
+    def foo(*, a: str | None = None, b: bool | None = None) -> str: ...
     ```
     """
 
@@ -286,18 +291,15 @@ def wrapper(*args: object, **kwargs: object) -> object:
 
 
 @overload
-def strip_not_given(obj: None) -> None:
-    ...
+def strip_not_given(obj: None) -> None: ...
 
 
 @overload
-def strip_not_given(obj: Mapping[_K, _V | NotGiven]) -> dict[_K, _V]:
-    ...
+def strip_not_given(obj: Mapping[_K, _V | NotGiven]) -> dict[_K, _V]: ...
 
 
 @overload
-def strip_not_given(obj: object) -> object:
-    ...
+def strip_not_given(obj: object) -> object: ...
 
 
 def strip_not_given(obj: object | None) -> object:
@@ -369,13 +371,13 @@ def file_from_path(path: str) -> FileTypes:
 
 def get_required_header(headers: HeadersLike, header: str) -> str:
     lower_header = header.lower()
-    if isinstance(headers, Mapping):
-        headers = cast(Headers, headers)
-        for k, v in headers.items():
+    if is_mapping_t(headers):
+        # mypy doesn't understand the type narrowing here
+        for k, v in headers.items():  # type: ignore
             if k.lower() == lower_header and isinstance(v, str):
                 return v
 
-    """ to deal with the case where the header looks like Stainless-Event-Id """
+    # to deal with the case where the header looks like Stainless-Event-Id
     intercaps_header = re.sub(r"([^\w])(\w)", lambda pat: pat.group(1) + pat.group(2).upper(), header.capitalize())
 
     for normalized_header in [header, lower_header, header.upper(), intercaps_header]:
@@ -401,3 +403,19 @@ def lru_cache(*, maxsize: int | None = 128) -> Callable[[CallableT], CallableT]:
         maxsize=maxsize,
     )
     return cast(Any, wrapper)  # type: ignore[no-any-return]
+
+
+def json_safe(data: object) -> object:
+    """Translates a mapping / sequence recursively in the same fashion
+    as `pydantic` v2's `model_dump(mode="json")`.
+    """
+    if is_mapping(data):
+        return {json_safe(key): json_safe(value) for key, value in data.items()}
+
+    if is_iterable(data) and not isinstance(data, (str, bytes, bytearray)):
+        return [json_safe(item) for item in data]
+
+    if isinstance(data, (datetime, date)):
+        return data.isoformat()
+
+    return data
diff --git a/src/openai/_version.py b/src/openai/_version.py
index 83411041ae..9d853d6512 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.30.1"  # x-release-please-version
+__version__ = "2.12.0"  # x-release-please-version
diff --git a/src/openai/cli/__init__.py b/src/openai/cli/__init__.py
deleted file mode 100644
index d453d5e179..0000000000
--- a/src/openai/cli/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from ._cli import main as main
diff --git a/src/openai/cli/_api/__init__.py b/src/openai/cli/_api/__init__.py
deleted file mode 100644
index 56a0260a6d..0000000000
--- a/src/openai/cli/_api/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from ._main import register_commands as register_commands
diff --git a/src/openai/cli/_api/_main.py b/src/openai/cli/_api/_main.py
deleted file mode 100644
index fe5a5e6fc0..0000000000
--- a/src/openai/cli/_api/_main.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from __future__ import annotations
-
-from argparse import ArgumentParser
-
-from . import chat, audio, files, image, models, completions
-
-
-def register_commands(parser: ArgumentParser) -> None:
-    subparsers = parser.add_subparsers(help="All API subcommands")
-
-    chat.register(subparsers)
-    image.register(subparsers)
-    audio.register(subparsers)
-    files.register(subparsers)
-    models.register(subparsers)
-    completions.register(subparsers)
diff --git a/src/openai/cli/_api/audio.py b/src/openai/cli/_api/audio.py
deleted file mode 100644
index 90d21b9932..0000000000
--- a/src/openai/cli/_api/audio.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any, Optional, cast
-from argparse import ArgumentParser
-
-from .._utils import get_client, print_model
-from ..._types import NOT_GIVEN
-from .._models import BaseModel
-from .._progress import BufferReader
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    # transcriptions
-    sub = subparser.add_parser("audio.transcriptions.create")
-
-    # Required
-    sub.add_argument("-m", "--model", type=str, default="whisper-1")
-    sub.add_argument("-f", "--file", type=str, required=True)
-    # Optional
-    sub.add_argument("--response-format", type=str)
-    sub.add_argument("--language", type=str)
-    sub.add_argument("-t", "--temperature", type=float)
-    sub.add_argument("--prompt", type=str)
-    sub.set_defaults(func=CLIAudio.transcribe, args_model=CLITranscribeArgs)
-
-    # translations
-    sub = subparser.add_parser("audio.translations.create")
-
-    # Required
-    sub.add_argument("-f", "--file", type=str, required=True)
-    # Optional
-    sub.add_argument("-m", "--model", type=str, default="whisper-1")
-    sub.add_argument("--response-format", type=str)
-    # TODO: doesn't seem to be supported by the API
-    # sub.add_argument("--language", type=str)
-    sub.add_argument("-t", "--temperature", type=float)
-    sub.add_argument("--prompt", type=str)
-    sub.set_defaults(func=CLIAudio.translate, args_model=CLITranslationArgs)
-
-
-class CLITranscribeArgs(BaseModel):
-    model: str
-    file: str
-    response_format: Optional[str] = None
-    language: Optional[str] = None
-    temperature: Optional[float] = None
-    prompt: Optional[str] = None
-
-
-class CLITranslationArgs(BaseModel):
-    model: str
-    file: str
-    response_format: Optional[str] = None
-    language: Optional[str] = None
-    temperature: Optional[float] = None
-    prompt: Optional[str] = None
-
-
-class CLIAudio:
-    @staticmethod
-    def transcribe(args: CLITranscribeArgs) -> None:
-        with open(args.file, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
-
-        model = get_client().audio.transcriptions.create(
-            file=(args.file, buffer_reader),
-            model=args.model,
-            language=args.language or NOT_GIVEN,
-            temperature=args.temperature or NOT_GIVEN,
-            prompt=args.prompt or NOT_GIVEN,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(model)
-
-    @staticmethod
-    def translate(args: CLITranslationArgs) -> None:
-        with open(args.file, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
-
-        model = get_client().audio.translations.create(
-            file=(args.file, buffer_reader),
-            model=args.model,
-            temperature=args.temperature or NOT_GIVEN,
-            prompt=args.prompt or NOT_GIVEN,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(model)
diff --git a/src/openai/cli/_api/chat/__init__.py b/src/openai/cli/_api/chat/__init__.py
deleted file mode 100644
index 87d971630a..0000000000
--- a/src/openai/cli/_api/chat/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-from argparse import ArgumentParser
-
-from . import completions
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    completions.register(subparser)
diff --git a/src/openai/cli/_api/chat/completions.py b/src/openai/cli/_api/chat/completions.py
deleted file mode 100644
index c299741fe0..0000000000
--- a/src/openai/cli/_api/chat/completions.py
+++ /dev/null
@@ -1,156 +0,0 @@
-from __future__ import annotations
-
-import sys
-from typing import TYPE_CHECKING, List, Optional, cast
-from argparse import ArgumentParser
-from typing_extensions import Literal, NamedTuple
-
-from ..._utils import get_client
-from ..._models import BaseModel
-from ...._streaming import Stream
-from ....types.chat import (
-    ChatCompletionRole,
-    ChatCompletionChunk,
-    CompletionCreateParams,
-)
-from ....types.chat.completion_create_params import (
-    CompletionCreateParamsStreaming,
-    CompletionCreateParamsNonStreaming,
-)
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("chat.completions.create")
-
-    sub._action_groups.pop()
-    req = sub.add_argument_group("required arguments")
-    opt = sub.add_argument_group("optional arguments")
-
-    req.add_argument(
-        "-g",
-        "--message",
-        action="append",
-        nargs=2,
-        metavar=("ROLE", "CONTENT"),
-        help="A message in `{role} {content}` format. Use this argument multiple times to add multiple messages.",
-        required=True,
-    )
-    req.add_argument(
-        "-m",
-        "--model",
-        help="The model to use.",
-        required=True,
-    )
-
-    opt.add_argument(
-        "-n",
-        "--n",
-        help="How many completions to generate for the conversation.",
-        type=int,
-    )
-    opt.add_argument("-M", "--max-tokens", help="The maximum number of tokens to generate.", type=int)
-    opt.add_argument(
-        "-t",
-        "--temperature",
-        help="""What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.
-
-Mutually exclusive with `top_p`.""",
-        type=float,
-    )
-    opt.add_argument(
-        "-P",
-        "--top_p",
-        help="""An alternative to sampling with temperature, called nucleus sampling, where the considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%% probability mass are considered.
-
-            Mutually exclusive with `temperature`.""",
-        type=float,
-    )
-    opt.add_argument(
-        "--stop",
-        help="A stop sequence at which to stop generating tokens for the message.",
-    )
-    opt.add_argument("--stream", help="Stream messages as they're ready.", action="store_true")
-    sub.set_defaults(func=CLIChatCompletion.create, args_model=CLIChatCompletionCreateArgs)
-
-
-class CLIMessage(NamedTuple):
-    role: ChatCompletionRole
-    content: str
-
-
-class CLIChatCompletionCreateArgs(BaseModel):
-    message: List[CLIMessage]
-    model: str
-    n: Optional[int] = None
-    max_tokens: Optional[int] = None
-    temperature: Optional[float] = None
-    top_p: Optional[float] = None
-    stop: Optional[str] = None
-    stream: bool = False
-
-
-class CLIChatCompletion:
-    @staticmethod
-    def create(args: CLIChatCompletionCreateArgs) -> None:
-        params: CompletionCreateParams = {
-            "model": args.model,
-            "messages": [
-                {"role": cast(Literal["user"], message.role), "content": message.content} for message in args.message
-            ],
-            "n": args.n,
-            "temperature": args.temperature,
-            "top_p": args.top_p,
-            "stop": args.stop,
-            # type checkers are not good at inferring union types so we have to set stream afterwards
-            "stream": False,
-        }
-        if args.stream:
-            params["stream"] = args.stream  # type: ignore
-        if args.max_tokens is not None:
-            params["max_tokens"] = args.max_tokens
-
-        if args.stream:
-            return CLIChatCompletion._stream_create(cast(CompletionCreateParamsStreaming, params))
-
-        return CLIChatCompletion._create(cast(CompletionCreateParamsNonStreaming, params))
-
-    @staticmethod
-    def _create(params: CompletionCreateParamsNonStreaming) -> None:
-        completion = get_client().chat.completions.create(**params)
-        should_print_header = len(completion.choices) > 1
-        for choice in completion.choices:
-            if should_print_header:
-                sys.stdout.write("===== Chat Completion {} =====\n".format(choice.index))
-
-            content = choice.message.content if choice.message.content is not None else "None"
-            sys.stdout.write(content)
-
-            if should_print_header or not content.endswith("\n"):
-                sys.stdout.write("\n")
-
-            sys.stdout.flush()
-
-    @staticmethod
-    def _stream_create(params: CompletionCreateParamsStreaming) -> None:
-        # cast is required for mypy
-        stream = cast(  # pyright: ignore[reportUnnecessaryCast]
-            Stream[ChatCompletionChunk], get_client().chat.completions.create(**params)
-        )
-        for chunk in stream:
-            should_print_header = len(chunk.choices) > 1
-            for choice in chunk.choices:
-                if should_print_header:
-                    sys.stdout.write("===== Chat Completion {} =====\n".format(choice.index))
-
-                content = choice.delta.content or ""
-                sys.stdout.write(content)
-
-                if should_print_header:
-                    sys.stdout.write("\n")
-
-                sys.stdout.flush()
-
-        sys.stdout.write("\n")
diff --git a/src/openai/cli/_api/completions.py b/src/openai/cli/_api/completions.py
deleted file mode 100644
index cbdb35bf3a..0000000000
--- a/src/openai/cli/_api/completions.py
+++ /dev/null
@@ -1,173 +0,0 @@
-from __future__ import annotations
-
-import sys
-from typing import TYPE_CHECKING, Optional, cast
-from argparse import ArgumentParser
-from functools import partial
-
-from openai.types.completion import Completion
-
-from .._utils import get_client
-from ..._types import NOT_GIVEN, NotGivenOr
-from ..._utils import is_given
-from .._errors import CLIError
-from .._models import BaseModel
-from ..._streaming import Stream
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("completions.create")
-
-    # Required
-    sub.add_argument(
-        "-m",
-        "--model",
-        help="The model to use",
-        required=True,
-    )
-
-    # Optional
-    sub.add_argument("-p", "--prompt", help="An optional prompt to complete from")
-    sub.add_argument("--stream", help="Stream tokens as they're ready.", action="store_true")
-    sub.add_argument("-M", "--max-tokens", help="The maximum number of tokens to generate", type=int)
-    sub.add_argument(
-        "-t",
-        "--temperature",
-        help="""What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.
-
-Mutually exclusive with `top_p`.""",
-        type=float,
-    )
-    sub.add_argument(
-        "-P",
-        "--top_p",
-        help="""An alternative to sampling with temperature, called nucleus sampling, where the considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%% probability mass are considered.
-
-            Mutually exclusive with `temperature`.""",
-        type=float,
-    )
-    sub.add_argument(
-        "-n",
-        "--n",
-        help="How many sub-completions to generate for each prompt.",
-        type=int,
-    )
-    sub.add_argument(
-        "--logprobs",
-        help="Include the log probabilities on the `logprobs` most likely tokens, as well the chosen tokens. So for example, if `logprobs` is 10, the API will return a list of the 10 most likely tokens. If `logprobs` is 0, only the chosen tokens will have logprobs returned.",
-        type=int,
-    )
-    sub.add_argument(
-        "--best_of",
-        help="Generates `best_of` completions server-side and returns the 'best' (the one with the highest log probability per token). Results cannot be streamed.",
-        type=int,
-    )
-    sub.add_argument(
-        "--echo",
-        help="Echo back the prompt in addition to the completion",
-        action="store_true",
-    )
-    sub.add_argument(
-        "--frequency_penalty",
-        help="Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
-        type=float,
-    )
-    sub.add_argument(
-        "--presence_penalty",
-        help="Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
-        type=float,
-    )
-    sub.add_argument("--suffix", help="The suffix that comes after a completion of inserted text.")
-    sub.add_argument("--stop", help="A stop sequence at which to stop generating tokens.")
-    sub.add_argument(
-        "--user",
-        help="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.",
-    )
-    # TODO: add support for logit_bias
-    sub.set_defaults(func=CLICompletions.create, args_model=CLICompletionCreateArgs)
-
-
-class CLICompletionCreateArgs(BaseModel):
-    model: str
-    stream: bool = False
-
-    prompt: Optional[str] = None
-    n: NotGivenOr[int] = NOT_GIVEN
-    stop: NotGivenOr[str] = NOT_GIVEN
-    user: NotGivenOr[str] = NOT_GIVEN
-    echo: NotGivenOr[bool] = NOT_GIVEN
-    suffix: NotGivenOr[str] = NOT_GIVEN
-    best_of: NotGivenOr[int] = NOT_GIVEN
-    top_p: NotGivenOr[float] = NOT_GIVEN
-    logprobs: NotGivenOr[int] = NOT_GIVEN
-    max_tokens: NotGivenOr[int] = NOT_GIVEN
-    temperature: NotGivenOr[float] = NOT_GIVEN
-    presence_penalty: NotGivenOr[float] = NOT_GIVEN
-    frequency_penalty: NotGivenOr[float] = NOT_GIVEN
-
-
-class CLICompletions:
-    @staticmethod
-    def create(args: CLICompletionCreateArgs) -> None:
-        if is_given(args.n) and args.n > 1 and args.stream:
-            raise CLIError("Can't stream completions with n>1 with the current CLI")
-
-        make_request = partial(
-            get_client().completions.create,
-            n=args.n,
-            echo=args.echo,
-            stop=args.stop,
-            user=args.user,
-            model=args.model,
-            top_p=args.top_p,
-            prompt=args.prompt,
-            suffix=args.suffix,
-            best_of=args.best_of,
-            logprobs=args.logprobs,
-            max_tokens=args.max_tokens,
-            temperature=args.temperature,
-            presence_penalty=args.presence_penalty,
-            frequency_penalty=args.frequency_penalty,
-        )
-
-        if args.stream:
-            return CLICompletions._stream_create(
-                # mypy doesn't understand the `partial` function but pyright does
-                cast(Stream[Completion], make_request(stream=True))  # pyright: ignore[reportUnnecessaryCast]
-            )
-
-        return CLICompletions._create(make_request())
-
-    @staticmethod
-    def _create(completion: Completion) -> None:
-        should_print_header = len(completion.choices) > 1
-        for choice in completion.choices:
-            if should_print_header:
-                sys.stdout.write("===== Completion {} =====\n".format(choice.index))
-
-            sys.stdout.write(choice.text)
-
-            if should_print_header or not choice.text.endswith("\n"):
-                sys.stdout.write("\n")
-
-            sys.stdout.flush()
-
-    @staticmethod
-    def _stream_create(stream: Stream[Completion]) -> None:
-        for completion in stream:
-            should_print_header = len(completion.choices) > 1
-            for choice in sorted(completion.choices, key=lambda c: c.index):
-                if should_print_header:
-                    sys.stdout.write("===== Chat Completion {} =====\n".format(choice.index))
-
-                sys.stdout.write(choice.text)
-
-                if should_print_header:
-                    sys.stdout.write("\n")
-
-                sys.stdout.flush()
-
-        sys.stdout.write("\n")
diff --git a/src/openai/cli/_api/files.py b/src/openai/cli/_api/files.py
deleted file mode 100644
index 5f3631b284..0000000000
--- a/src/openai/cli/_api/files.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any, cast
-from argparse import ArgumentParser
-
-from .._utils import get_client, print_model
-from .._models import BaseModel
-from .._progress import BufferReader
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("files.create")
-
-    sub.add_argument(
-        "-f",
-        "--file",
-        required=True,
-        help="File to upload",
-    )
-    sub.add_argument(
-        "-p",
-        "--purpose",
-        help="Why are you uploading this file? (see https://platform.openai.com/docs/api-reference/ for purposes)",
-        required=True,
-    )
-    sub.set_defaults(func=CLIFile.create, args_model=CLIFileCreateArgs)
-
-    sub = subparser.add_parser("files.retrieve")
-    sub.add_argument("-i", "--id", required=True, help="The files ID")
-    sub.set_defaults(func=CLIFile.get, args_model=CLIFileCreateArgs)
-
-    sub = subparser.add_parser("files.delete")
-    sub.add_argument("-i", "--id", required=True, help="The files ID")
-    sub.set_defaults(func=CLIFile.delete, args_model=CLIFileCreateArgs)
-
-    sub = subparser.add_parser("files.list")
-    sub.set_defaults(func=CLIFile.list)
-
-
-class CLIFileIDArgs(BaseModel):
-    id: str
-
-
-class CLIFileCreateArgs(BaseModel):
-    file: str
-    purpose: str
-
-
-class CLIFile:
-    @staticmethod
-    def create(args: CLIFileCreateArgs) -> None:
-        with open(args.file, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
-
-        file = get_client().files.create(
-            file=(args.file, buffer_reader),
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            purpose=cast(Any, args.purpose),
-        )
-        print_model(file)
-
-    @staticmethod
-    def get(args: CLIFileIDArgs) -> None:
-        file = get_client().files.retrieve(file_id=args.id)
-        print_model(file)
-
-    @staticmethod
-    def delete(args: CLIFileIDArgs) -> None:
-        file = get_client().files.delete(file_id=args.id)
-        print_model(file)
-
-    @staticmethod
-    def list() -> None:
-        files = get_client().files.list()
-        for file in files:
-            print_model(file)
diff --git a/src/openai/cli/_api/image.py b/src/openai/cli/_api/image.py
deleted file mode 100644
index 3e2a0a90f1..0000000000
--- a/src/openai/cli/_api/image.py
+++ /dev/null
@@ -1,139 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any, cast
-from argparse import ArgumentParser
-
-from .._utils import get_client, print_model
-from ..._types import NOT_GIVEN, NotGiven, NotGivenOr
-from .._models import BaseModel
-from .._progress import BufferReader
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("images.generate")
-    sub.add_argument("-m", "--model", type=str)
-    sub.add_argument("-p", "--prompt", type=str, required=True)
-    sub.add_argument("-n", "--num-images", type=int, default=1)
-    sub.add_argument("-s", "--size", type=str, default="1024x1024", help="Size of the output image")
-    sub.add_argument("--response-format", type=str, default="url")
-    sub.set_defaults(func=CLIImage.create, args_model=CLIImageCreateArgs)
-
-    sub = subparser.add_parser("images.edit")
-    sub.add_argument("-m", "--model", type=str)
-    sub.add_argument("-p", "--prompt", type=str, required=True)
-    sub.add_argument("-n", "--num-images", type=int, default=1)
-    sub.add_argument(
-        "-I",
-        "--image",
-        type=str,
-        required=True,
-        help="Image to modify. Should be a local path and a PNG encoded image.",
-    )
-    sub.add_argument("-s", "--size", type=str, default="1024x1024", help="Size of the output image")
-    sub.add_argument("--response-format", type=str, default="url")
-    sub.add_argument(
-        "-M",
-        "--mask",
-        type=str,
-        required=False,
-        help="Path to a mask image. It should be the same size as the image you're editing and a RGBA PNG image. The Alpha channel acts as the mask.",
-    )
-    sub.set_defaults(func=CLIImage.edit, args_model=CLIImageEditArgs)
-
-    sub = subparser.add_parser("images.create_variation")
-    sub.add_argument("-m", "--model", type=str)
-    sub.add_argument("-n", "--num-images", type=int, default=1)
-    sub.add_argument(
-        "-I",
-        "--image",
-        type=str,
-        required=True,
-        help="Image to modify. Should be a local path and a PNG encoded image.",
-    )
-    sub.add_argument("-s", "--size", type=str, default="1024x1024", help="Size of the output image")
-    sub.add_argument("--response-format", type=str, default="url")
-    sub.set_defaults(func=CLIImage.create_variation, args_model=CLIImageCreateVariationArgs)
-
-
-class CLIImageCreateArgs(BaseModel):
-    prompt: str
-    num_images: int
-    size: str
-    response_format: str
-    model: NotGivenOr[str] = NOT_GIVEN
-
-
-class CLIImageCreateVariationArgs(BaseModel):
-    image: str
-    num_images: int
-    size: str
-    response_format: str
-    model: NotGivenOr[str] = NOT_GIVEN
-
-
-class CLIImageEditArgs(BaseModel):
-    image: str
-    num_images: int
-    size: str
-    response_format: str
-    prompt: str
-    mask: NotGivenOr[str] = NOT_GIVEN
-    model: NotGivenOr[str] = NOT_GIVEN
-
-
-class CLIImage:
-    @staticmethod
-    def create(args: CLIImageCreateArgs) -> None:
-        image = get_client().images.generate(
-            model=args.model,
-            prompt=args.prompt,
-            n=args.num_images,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            size=cast(Any, args.size),
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(image)
-
-    @staticmethod
-    def create_variation(args: CLIImageCreateVariationArgs) -> None:
-        with open(args.image, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
-
-        image = get_client().images.create_variation(
-            model=args.model,
-            image=("image", buffer_reader),
-            n=args.num_images,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            size=cast(Any, args.size),
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(image)
-
-    @staticmethod
-    def edit(args: CLIImageEditArgs) -> None:
-        with open(args.image, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Image upload progress")
-
-        if isinstance(args.mask, NotGiven):
-            mask: NotGivenOr[BufferReader] = NOT_GIVEN
-        else:
-            with open(args.mask, "rb") as file_reader:
-                mask = BufferReader(file_reader.read(), desc="Mask progress")
-
-        image = get_client().images.edit(
-            model=args.model,
-            prompt=args.prompt,
-            image=("image", buffer_reader),
-            n=args.num_images,
-            mask=("mask", mask) if not isinstance(mask, NotGiven) else mask,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            size=cast(Any, args.size),
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(image)
diff --git a/src/openai/cli/_api/models.py b/src/openai/cli/_api/models.py
deleted file mode 100644
index 017218fa6e..0000000000
--- a/src/openai/cli/_api/models.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-from argparse import ArgumentParser
-
-from .._utils import get_client, print_model
-from .._models import BaseModel
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("models.list")
-    sub.set_defaults(func=CLIModels.list)
-
-    sub = subparser.add_parser("models.retrieve")
-    sub.add_argument("-i", "--id", required=True, help="The model ID")
-    sub.set_defaults(func=CLIModels.get, args_model=CLIModelIDArgs)
-
-    sub = subparser.add_parser("models.delete")
-    sub.add_argument("-i", "--id", required=True, help="The model ID")
-    sub.set_defaults(func=CLIModels.delete, args_model=CLIModelIDArgs)
-
-
-class CLIModelIDArgs(BaseModel):
-    id: str
-
-
-class CLIModels:
-    @staticmethod
-    def get(args: CLIModelIDArgs) -> None:
-        model = get_client().models.retrieve(model=args.id)
-        print_model(model)
-
-    @staticmethod
-    def delete(args: CLIModelIDArgs) -> None:
-        model = get_client().models.delete(model=args.id)
-        print_model(model)
-
-    @staticmethod
-    def list() -> None:
-        models = get_client().models.list()
-        for model in models:
-            print_model(model)
diff --git a/src/openai/cli/_cli.py b/src/openai/cli/_cli.py
deleted file mode 100644
index 72e5c923bd..0000000000
--- a/src/openai/cli/_cli.py
+++ /dev/null
@@ -1,234 +0,0 @@
-from __future__ import annotations
-
-import sys
-import logging
-import argparse
-from typing import Any, List, Type, Optional
-from typing_extensions import ClassVar
-
-import httpx
-import pydantic
-
-import openai
-
-from . import _tools
-from .. import _ApiType, __version__
-from ._api import register_commands
-from ._utils import can_use_http2
-from .._types import ProxiesDict
-from ._errors import CLIError, display_error
-from .._compat import PYDANTIC_V2, ConfigDict, model_parse
-from .._models import BaseModel
-from .._exceptions import APIError
-
-logger = logging.getLogger()
-formatter = logging.Formatter("[%(asctime)s] %(message)s")
-handler = logging.StreamHandler(sys.stderr)
-handler.setFormatter(formatter)
-logger.addHandler(handler)
-
-
-class Arguments(BaseModel):
-    if PYDANTIC_V2:
-        model_config: ClassVar[ConfigDict] = ConfigDict(
-            extra="ignore",
-        )
-    else:
-
-        class Config(pydantic.BaseConfig):  # type: ignore
-            extra: Any = pydantic.Extra.ignore  # type: ignore
-
-    verbosity: int
-    version: Optional[str] = None
-
-    api_key: Optional[str]
-    api_base: Optional[str]
-    organization: Optional[str]
-    proxy: Optional[List[str]]
-    api_type: Optional[_ApiType] = None
-    api_version: Optional[str] = None
-
-    # azure
-    azure_endpoint: Optional[str] = None
-    azure_ad_token: Optional[str] = None
-
-    # internal, set by subparsers to parse their specific args
-    args_model: Optional[Type[BaseModel]] = None
-
-    # internal, used so that subparsers can forward unknown arguments
-    unknown_args: List[str] = []
-    allow_unknown_args: bool = False
-
-
-def _build_parser() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser(description=None, prog="openai")
-    parser.add_argument(
-        "-v",
-        "--verbose",
-        action="count",
-        dest="verbosity",
-        default=0,
-        help="Set verbosity.",
-    )
-    parser.add_argument("-b", "--api-base", help="What API base url to use.")
-    parser.add_argument("-k", "--api-key", help="What API key to use.")
-    parser.add_argument("-p", "--proxy", nargs="+", help="What proxy to use.")
-    parser.add_argument(
-        "-o",
-        "--organization",
-        help="Which organization to run as (will use your default organization if not specified)",
-    )
-    parser.add_argument(
-        "-t",
-        "--api-type",
-        type=str,
-        choices=("openai", "azure"),
-        help="The backend API to call, must be `openai` or `azure`",
-    )
-    parser.add_argument(
-        "--api-version",
-        help="The Azure API version, e.g. 'https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning'",
-    )
-
-    # azure
-    parser.add_argument(
-        "--azure-endpoint",
-        help="The Azure endpoint, e.g. 'https://endpoint.openai.azure.com'",
-    )
-    parser.add_argument(
-        "--azure-ad-token",
-        help="A token from Azure Active Directory, https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id",
-    )
-
-    # prints the package version
-    parser.add_argument(
-        "-V",
-        "--version",
-        action="version",
-        version="%(prog)s " + __version__,
-    )
-
-    def help() -> None:
-        parser.print_help()
-
-    parser.set_defaults(func=help)
-
-    subparsers = parser.add_subparsers()
-    sub_api = subparsers.add_parser("api", help="Direct API calls")
-
-    register_commands(sub_api)
-
-    sub_tools = subparsers.add_parser("tools", help="Client side tools for convenience")
-    _tools.register_commands(sub_tools, subparsers)
-
-    return parser
-
-
-def main() -> int:
-    try:
-        _main()
-    except (APIError, CLIError, pydantic.ValidationError) as err:
-        display_error(err)
-        return 1
-    except KeyboardInterrupt:
-        sys.stderr.write("\n")
-        return 1
-    return 0
-
-
-def _parse_args(parser: argparse.ArgumentParser) -> tuple[argparse.Namespace, Arguments, list[str]]:
-    # argparse by default will strip out the `--` but we want to keep it for unknown arguments
-    if "--" in sys.argv:
-        idx = sys.argv.index("--")
-        known_args = sys.argv[1:idx]
-        unknown_args = sys.argv[idx:]
-    else:
-        known_args = sys.argv[1:]
-        unknown_args = []
-
-    parsed, remaining_unknown = parser.parse_known_args(known_args)
-
-    # append any remaining unknown arguments from the initial parsing
-    remaining_unknown.extend(unknown_args)
-
-    args = model_parse(Arguments, vars(parsed))
-    if not args.allow_unknown_args:
-        # we have to parse twice to ensure any unknown arguments
-        # result in an error if that behaviour is desired
-        parser.parse_args()
-
-    return parsed, args, remaining_unknown
-
-
-def _main() -> None:
-    parser = _build_parser()
-    parsed, args, unknown = _parse_args(parser)
-
-    if args.verbosity != 0:
-        sys.stderr.write("Warning: --verbosity isn't supported yet\n")
-
-    proxies: ProxiesDict = {}
-    if args.proxy is not None:
-        for proxy in args.proxy:
-            key = "https://" if proxy.startswith("https") else "https://"
-            if key in proxies:
-                raise CLIError(f"Multiple {key} proxies given - only the last one would be used")
-
-            proxies[key] = proxy
-
-    http_client = httpx.Client(
-        proxies=proxies or None,
-        http2=can_use_http2(),
-    )
-    openai.http_client = http_client
-
-    if args.organization:
-        openai.organization = args.organization
-
-    if args.api_key:
-        openai.api_key = args.api_key
-
-    if args.api_base:
-        openai.base_url = args.api_base
-
-    # azure
-    if args.api_type is not None:
-        openai.api_type = args.api_type
-
-    if args.azure_endpoint is not None:
-        openai.azure_endpoint = args.azure_endpoint
-
-    if args.api_version is not None:
-        openai.api_version = args.api_version
-
-    if args.azure_ad_token is not None:
-        openai.azure_ad_token = args.azure_ad_token
-
-    try:
-        if args.args_model:
-            parsed.func(
-                model_parse(
-                    args.args_model,
-                    {
-                        **{
-                            # we omit None values so that they can be defaulted to `NotGiven`
-                            # and we'll strip it from the API request
-                            key: value
-                            for key, value in vars(parsed).items()
-                            if value is not None
-                        },
-                        "unknown_args": unknown,
-                    },
-                )
-            )
-        else:
-            parsed.func()
-    finally:
-        try:
-            http_client.close()
-        except Exception:
-            pass
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/src/openai/cli/_errors.py b/src/openai/cli/_errors.py
deleted file mode 100644
index 2bf06070d6..0000000000
--- a/src/openai/cli/_errors.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-import sys
-
-import pydantic
-
-from ._utils import Colors, organization_info
-from .._exceptions import APIError, OpenAIError
-
-
-class CLIError(OpenAIError):
-    ...
-
-
-class SilentCLIError(CLIError):
-    ...
-
-
-def display_error(err: CLIError | APIError | pydantic.ValidationError) -> None:
-    if isinstance(err, SilentCLIError):
-        return
-
-    sys.stderr.write("{}{}Error:{} {}\n".format(organization_info(), Colors.FAIL, Colors.ENDC, err))
diff --git a/src/openai/cli/_models.py b/src/openai/cli/_models.py
deleted file mode 100644
index 5583db2609..0000000000
--- a/src/openai/cli/_models.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from typing import Any
-from typing_extensions import ClassVar
-
-import pydantic
-
-from .. import _models
-from .._compat import PYDANTIC_V2, ConfigDict
-
-
-class BaseModel(_models.BaseModel):
-    if PYDANTIC_V2:
-        model_config: ClassVar[ConfigDict] = ConfigDict(extra="ignore", arbitrary_types_allowed=True)
-    else:
-
-        class Config(pydantic.BaseConfig):  # type: ignore
-            extra: Any = pydantic.Extra.ignore  # type: ignore
-            arbitrary_types_allowed: bool = True
diff --git a/src/openai/cli/_progress.py b/src/openai/cli/_progress.py
deleted file mode 100644
index 8a7f2525de..0000000000
--- a/src/openai/cli/_progress.py
+++ /dev/null
@@ -1,59 +0,0 @@
-from __future__ import annotations
-
-import io
-from typing import Callable
-from typing_extensions import override
-
-
-class CancelledError(Exception):
-    def __init__(self, msg: str) -> None:
-        self.msg = msg
-        super().__init__(msg)
-
-    @override
-    def __str__(self) -> str:
-        return self.msg
-
-    __repr__ = __str__
-
-
-class BufferReader(io.BytesIO):
-    def __init__(self, buf: bytes = b"", desc: str | None = None) -> None:
-        super().__init__(buf)
-        self._len = len(buf)
-        self._progress = 0
-        self._callback = progress(len(buf), desc=desc)
-
-    def __len__(self) -> int:
-        return self._len
-
-    @override
-    def read(self, n: int | None = -1) -> bytes:
-        chunk = io.BytesIO.read(self, n)
-        self._progress += len(chunk)
-
-        try:
-            self._callback(self._progress)
-        except Exception as e:  # catches exception from the callback
-            raise CancelledError("The upload was cancelled: {}".format(e)) from e
-
-        return chunk
-
-
-def progress(total: float, desc: str | None) -> Callable[[float], None]:
-    import tqdm
-
-    meter = tqdm.tqdm(total=total, unit_scale=True, desc=desc)
-
-    def incr(progress: float) -> None:
-        meter.n = progress
-        if progress == total:
-            meter.close()
-        else:
-            meter.refresh()
-
-    return incr
-
-
-def MB(i: int) -> int:
-    return int(i // 1024**2)
diff --git a/src/openai/cli/_tools/__init__.py b/src/openai/cli/_tools/__init__.py
deleted file mode 100644
index 56a0260a6d..0000000000
--- a/src/openai/cli/_tools/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from ._main import register_commands as register_commands
diff --git a/src/openai/cli/_tools/_main.py b/src/openai/cli/_tools/_main.py
deleted file mode 100644
index bd6cda408f..0000000000
--- a/src/openai/cli/_tools/_main.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-from argparse import ArgumentParser
-
-from . import migrate, fine_tunes
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register_commands(parser: ArgumentParser, subparser: _SubParsersAction[ArgumentParser]) -> None:
-    migrate.register(subparser)
-
-    namespaced = parser.add_subparsers(title="Tools", help="Convenience client side tools")
-
-    fine_tunes.register(namespaced)
diff --git a/src/openai/cli/_tools/fine_tunes.py b/src/openai/cli/_tools/fine_tunes.py
deleted file mode 100644
index 2128b88952..0000000000
--- a/src/openai/cli/_tools/fine_tunes.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from __future__ import annotations
-
-import sys
-from typing import TYPE_CHECKING
-from argparse import ArgumentParser
-
-from .._models import BaseModel
-from ...lib._validators import (
-    get_validators,
-    write_out_file,
-    read_any_format,
-    apply_validators,
-    apply_necessary_remediation,
-)
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("fine_tunes.prepare_data")
-    sub.add_argument(
-        "-f",
-        "--file",
-        required=True,
-        help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing prompt-completion examples to be analyzed."
-        "This should be the local file path.",
-    )
-    sub.add_argument(
-        "-q",
-        "--quiet",
-        required=False,
-        action="store_true",
-        help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
-    )
-    sub.set_defaults(func=prepare_data, args_model=PrepareDataArgs)
-
-
-class PrepareDataArgs(BaseModel):
-    file: str
-
-    quiet: bool
-
-
-def prepare_data(args: PrepareDataArgs) -> None:
-    sys.stdout.write("Analyzing...\n")
-    fname = args.file
-    auto_accept = args.quiet
-    df, remediation = read_any_format(fname)
-    apply_necessary_remediation(None, remediation)
-
-    validators = get_validators()
-
-    assert df is not None
-
-    apply_validators(
-        df,
-        fname,
-        remediation,
-        validators,
-        auto_accept,
-        write_out_file_func=write_out_file,
-    )
diff --git a/src/openai/cli/_tools/migrate.py b/src/openai/cli/_tools/migrate.py
deleted file mode 100644
index 53073b866f..0000000000
--- a/src/openai/cli/_tools/migrate.py
+++ /dev/null
@@ -1,181 +0,0 @@
-from __future__ import annotations
-
-import os
-import sys
-import json
-import shutil
-import tarfile
-import platform
-import subprocess
-from typing import TYPE_CHECKING, List
-from pathlib import Path
-from argparse import ArgumentParser
-
-import httpx
-
-from .._errors import CLIError, SilentCLIError
-from .._models import BaseModel
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("migrate")
-    sub.set_defaults(func=migrate, args_model=MigrateArgs, allow_unknown_args=True)
-
-    sub = subparser.add_parser("grit")
-    sub.set_defaults(func=grit, args_model=GritArgs, allow_unknown_args=True)
-
-
-class GritArgs(BaseModel):
-    # internal
-    unknown_args: List[str] = []
-
-
-def grit(args: GritArgs) -> None:
-    grit_path = install()
-
-    try:
-        subprocess.check_call([grit_path, *args.unknown_args])
-    except subprocess.CalledProcessError:
-        # stdout and stderr are forwarded by subprocess so an error will already
-        # have been displayed
-        raise SilentCLIError() from None
-
-
-class MigrateArgs(BaseModel):
-    # internal
-    unknown_args: List[str] = []
-
-
-def migrate(args: MigrateArgs) -> None:
-    grit_path = install()
-
-    try:
-        subprocess.check_call([grit_path, "apply", "openai", *args.unknown_args])
-    except subprocess.CalledProcessError:
-        # stdout and stderr are forwarded by subprocess so an error will already
-        # have been displayed
-        raise SilentCLIError() from None
-
-
-# handles downloading the Grit CLI until they provide their own PyPi package
-
-KEYGEN_ACCOUNT = "custodian-dev"
-
-
-def _cache_dir() -> Path:
-    xdg = os.environ.get("XDG_CACHE_HOME")
-    if xdg is not None:
-        return Path(xdg)
-
-    return Path.home() / ".cache"
-
-
-def _debug(message: str) -> None:
-    if not os.environ.get("DEBUG"):
-        return
-
-    sys.stdout.write(f"[DEBUG]: {message}\n")
-
-
-def install() -> Path:
-    """Installs the Grit CLI and returns the location of the binary"""
-    if sys.platform == "win32":
-        raise CLIError("Windows is not supported yet in the migration CLI")
-
-    platform = "macos" if sys.platform == "darwin" else "linux"
-
-    dir_name = _cache_dir() / "openai-python"
-    install_dir = dir_name / ".install"
-    target_dir = install_dir / "bin"
-
-    target_path = target_dir / "marzano"
-    temp_file = target_dir / "marzano.tmp"
-
-    if target_path.exists():
-        _debug(f"{target_path} already exists")
-        sys.stdout.flush()
-        return target_path
-
-    _debug(f"Using Grit CLI path: {target_path}")
-
-    target_dir.mkdir(parents=True, exist_ok=True)
-
-    if temp_file.exists():
-        temp_file.unlink()
-
-    arch = _get_arch()
-    _debug(f"Using architecture {arch}")
-
-    file_name = f"marzano-{platform}-{arch}"
-    meta_url = f"https://api.keygen.sh/v1/accounts/{KEYGEN_ACCOUNT}/artifacts/{file_name}"
-
-    sys.stdout.write(f"Retrieving Grit CLI metadata from {meta_url}\n")
-    with httpx.Client() as client:
-        response = client.get(meta_url)  # pyright: ignore[reportUnknownMemberType]
-
-        data = response.json()
-        errors = data.get("errors")
-        if errors:
-            for error in errors:
-                sys.stdout.write(f"{error}\n")
-
-            raise CLIError("Could not locate Grit CLI binary - see above errors")
-
-        write_manifest(install_dir, data["data"]["relationships"]["release"]["data"]["id"])
-
-        link = data["data"]["links"]["redirect"]
-        _debug(f"Redirect URL {link}")
-
-        download_response = client.get(link)  # pyright: ignore[reportUnknownMemberType]
-        with open(temp_file, "wb") as file:
-            for chunk in download_response.iter_bytes():
-                file.write(chunk)
-
-    unpacked_dir = target_dir / "cli-bin"
-    unpacked_dir.mkdir(parents=True, exist_ok=True)
-
-    with tarfile.open(temp_file, "r:gz") as archive:
-        archive.extractall(unpacked_dir, filter="data")
-
-    for item in unpacked_dir.iterdir():
-        item.rename(target_dir / item.name)
-
-    shutil.rmtree(unpacked_dir)
-    os.remove(temp_file)
-    os.chmod(target_path, 0o755)
-
-    sys.stdout.flush()
-
-    return target_path
-
-
-def _get_arch() -> str:
-    architecture = platform.machine().lower()
-
-    # Map the architecture names to Node.js equivalents
-    arch_map = {
-        "x86_64": "x64",
-        "amd64": "x64",
-        "armv7l": "arm",
-        "aarch64": "arm64",
-    }
-
-    return arch_map.get(architecture, architecture)
-
-
-def write_manifest(install_path: Path, release: str) -> None:
-    manifest = {
-        "installPath": str(install_path),
-        "binaries": {
-            "marzano": {
-                "name": "marzano",
-                "release": release,
-            },
-        },
-    }
-    manifest_path = Path(install_path) / "manifests.json"
-    with open(manifest_path, "w") as f:
-        json.dump(manifest, f, indent=2)
diff --git a/src/openai/cli/_utils.py b/src/openai/cli/_utils.py
deleted file mode 100644
index 673eed613c..0000000000
--- a/src/openai/cli/_utils.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from __future__ import annotations
-
-import sys
-
-import openai
-
-from .. import OpenAI, _load_client
-from .._compat import model_json
-from .._models import BaseModel
-
-
-class Colors:
-    HEADER = "\033[95m"
-    OKBLUE = "\033[94m"
-    OKGREEN = "\033[92m"
-    WARNING = "\033[93m"
-    FAIL = "\033[91m"
-    ENDC = "\033[0m"
-    BOLD = "\033[1m"
-    UNDERLINE = "\033[4m"
-
-
-def get_client() -> OpenAI:
-    return _load_client()
-
-
-def organization_info() -> str:
-    organization = openai.organization
-    if organization is not None:
-        return "[organization={}] ".format(organization)
-
-    return ""
-
-
-def print_model(model: BaseModel) -> None:
-    sys.stdout.write(model_json(model, indent=2) + "\n")
-
-
-def can_use_http2() -> bool:
-    try:
-        import h2  # type: ignore  # noqa
-    except ImportError:
-        return False
-
-    return True
diff --git a/src/openai/lib/_old_api.py b/src/openai/lib/_old_api.py
deleted file mode 100644
index 929c87e80b..0000000000
--- a/src/openai/lib/_old_api.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any
-from typing_extensions import override
-
-from .._utils import LazyProxy
-from .._exceptions import OpenAIError
-
-INSTRUCTIONS = """
-
-You tried to access openai.{symbol}, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.
-
-You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 
-
-Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`
-
-A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742
-"""
-
-
-class APIRemovedInV1(OpenAIError):
-    def __init__(self, *, symbol: str) -> None:
-        super().__init__(INSTRUCTIONS.format(symbol=symbol))
-
-
-class APIRemovedInV1Proxy(LazyProxy[Any]):
-    def __init__(self, *, symbol: str) -> None:
-        super().__init__()
-        self._symbol = symbol
-
-    @override
-    def __load__(self) -> Any:
-        # return the proxy until it is eventually called so that
-        # we don't break people that are just checking the attributes
-        # of a module
-        return self
-
-    def __call__(self, *_args: Any, **_kwargs: Any) -> Any:
-        raise APIRemovedInV1(symbol=self._symbol)
-
-
-SYMBOLS = [
-    "Edit",
-    "File",
-    "Audio",
-    "Image",
-    "Model",
-    "Engine",
-    "Customer",
-    "FineTune",
-    "Embedding",
-    "Completion",
-    "Deployment",
-    "Moderation",
-    "ErrorObject",
-    "FineTuningJob",
-    "ChatCompletion",
-]
-
-# we explicitly tell type checkers that nothing is exported
-# from this file so that when we re-export the old symbols
-# in `openai/__init__.py` they aren't added to the auto-complete
-# suggestions given by editors
-if TYPE_CHECKING:
-    __all__: list[str] = []
-else:
-    __all__ = SYMBOLS
-
-
-__locals = locals()
-for symbol in SYMBOLS:
-    __locals[symbol] = APIRemovedInV1Proxy(symbol=symbol)
diff --git a/src/openai/lib/_validators.py b/src/openai/lib/_validators.py
deleted file mode 100644
index cf24cd2294..0000000000
--- a/src/openai/lib/_validators.py
+++ /dev/null
@@ -1,809 +0,0 @@
-# pyright: basic
-from __future__ import annotations
-
-import os
-import sys
-from typing import Any, TypeVar, Callable, Optional, NamedTuple
-from typing_extensions import TypeAlias
-
-from .._extras import pandas as pd
-
-
-class Remediation(NamedTuple):
-    name: str
-    immediate_msg: Optional[str] = None
-    necessary_msg: Optional[str] = None
-    necessary_fn: Optional[Callable[[Any], Any]] = None
-    optional_msg: Optional[str] = None
-    optional_fn: Optional[Callable[[Any], Any]] = None
-    error_msg: Optional[str] = None
-
-
-OptionalDataFrameT = TypeVar("OptionalDataFrameT", bound="Optional[pd.DataFrame]")
-
-
-def num_examples_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will only print out the number of examples and recommend to the user to increase the number of examples if less than 100.
-    """
-    MIN_EXAMPLES = 100
-    optional_suggestion = (
-        ""
-        if len(df) >= MIN_EXAMPLES
-        else ". In general, we recommend having at least a few hundred examples. We've found that performance tends to linearly increase for every doubling of the number of examples"
-    )
-    immediate_msg = f"\n- Your file contains {len(df)} prompt-completion pairs{optional_suggestion}"
-    return Remediation(name="num_examples", immediate_msg=immediate_msg)
-
-
-def necessary_column_validator(df: pd.DataFrame, necessary_column: str) -> Remediation:
-    """
-    This validator will ensure that the necessary column is present in the dataframe.
-    """
-
-    def lower_case_column(df: pd.DataFrame, column: Any) -> pd.DataFrame:
-        cols = [c for c in df.columns if str(c).lower() == column]
-        df.rename(columns={cols[0]: column.lower()}, inplace=True)
-        return df
-
-    immediate_msg = None
-    necessary_fn = None
-    necessary_msg = None
-    error_msg = None
-
-    if necessary_column not in df.columns:
-        if necessary_column in [str(c).lower() for c in df.columns]:
-
-            def lower_case_column_creator(df: pd.DataFrame) -> pd.DataFrame:
-                return lower_case_column(df, necessary_column)
-
-            necessary_fn = lower_case_column_creator
-            immediate_msg = f"\n- The `{necessary_column}` column/key should be lowercase"
-            necessary_msg = f"Lower case column name to `{necessary_column}`"
-        else:
-            error_msg = f"`{necessary_column}` column/key is missing. Please make sure you name your columns/keys appropriately, then retry"
-
-    return Remediation(
-        name="necessary_column",
-        immediate_msg=immediate_msg,
-        necessary_msg=necessary_msg,
-        necessary_fn=necessary_fn,
-        error_msg=error_msg,
-    )
-
-
-def additional_column_validator(df: pd.DataFrame, fields: list[str] = ["prompt", "completion"]) -> Remediation:
-    """
-    This validator will remove additional columns from the dataframe.
-    """
-    additional_columns = []
-    necessary_msg = None
-    immediate_msg = None
-    necessary_fn = None  # type: ignore
-
-    if len(df.columns) > 2:
-        additional_columns = [c for c in df.columns if c not in fields]
-        warn_message = ""
-        for ac in additional_columns:
-            dups = [c for c in additional_columns if ac in c]
-            if len(dups) > 0:
-                warn_message += f"\n  WARNING: Some of the additional columns/keys contain `{ac}` in their name. These will be ignored, and the column/key `{ac}` will be used instead. This could also result from a duplicate column/key in the provided file."
-        immediate_msg = f"\n- The input file should contain exactly two columns/keys per row. Additional columns/keys present are: {additional_columns}{warn_message}"
-        necessary_msg = f"Remove additional columns/keys: {additional_columns}"
-
-        def necessary_fn(x: Any) -> Any:
-            return x[fields]
-
-    return Remediation(
-        name="additional_column",
-        immediate_msg=immediate_msg,
-        necessary_msg=necessary_msg,
-        necessary_fn=necessary_fn,
-    )
-
-
-def non_empty_field_validator(df: pd.DataFrame, field: str = "completion") -> Remediation:
-    """
-    This validator will ensure that no completion is empty.
-    """
-    necessary_msg = None
-    necessary_fn = None  # type: ignore
-    immediate_msg = None
-
-    if df[field].apply(lambda x: x == "").any() or df[field].isnull().any():
-        empty_rows = (df[field] == "") | (df[field].isnull())
-        empty_indexes = df.reset_index().index[empty_rows].tolist()
-        immediate_msg = f"\n- `{field}` column/key should not contain empty strings. These are rows: {empty_indexes}"
-
-        def necessary_fn(x: Any) -> Any:
-            return x[x[field] != ""].dropna(subset=[field])
-
-        necessary_msg = f"Remove {len(empty_indexes)} rows with empty {field}s"
-
-    return Remediation(
-        name=f"empty_{field}",
-        immediate_msg=immediate_msg,
-        necessary_msg=necessary_msg,
-        necessary_fn=necessary_fn,
-    )
-
-
-def duplicated_rows_validator(df: pd.DataFrame, fields: list[str] = ["prompt", "completion"]) -> Remediation:
-    """
-    This validator will suggest to the user to remove duplicate rows if they exist.
-    """
-    duplicated_rows = df.duplicated(subset=fields)
-    duplicated_indexes = df.reset_index().index[duplicated_rows].tolist()
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    if len(duplicated_indexes) > 0:
-        immediate_msg = f"\n- There are {len(duplicated_indexes)} duplicated {'-'.join(fields)} sets. These are rows: {duplicated_indexes}"
-        optional_msg = f"Remove {len(duplicated_indexes)} duplicate rows"
-
-        def optional_fn(x: Any) -> Any:
-            return x.drop_duplicates(subset=fields)
-
-    return Remediation(
-        name="duplicated_rows",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def long_examples_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to the user to remove examples that are too long.
-    """
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    ft_type = infer_task_type(df)
-    if ft_type != "open-ended generation":
-
-        def get_long_indexes(d: pd.DataFrame) -> Any:
-            long_examples = d.apply(lambda x: len(x.prompt) + len(x.completion) > 10000, axis=1)
-            return d.reset_index().index[long_examples].tolist()
-
-        long_indexes = get_long_indexes(df)
-
-        if len(long_indexes) > 0:
-            immediate_msg = f"\n- There are {len(long_indexes)} examples that are very long. These are rows: {long_indexes}\nFor conditional generation, and for classification the examples shouldn't be longer than 2048 tokens."
-            optional_msg = f"Remove {len(long_indexes)} long examples"
-
-            def optional_fn(x: Any) -> Any:
-                long_indexes_to_drop = get_long_indexes(x)
-                if long_indexes != long_indexes_to_drop:
-                    sys.stdout.write(
-                        f"The indices of the long examples has changed as a result of a previously applied recommendation.\nThe {len(long_indexes_to_drop)} long examples to be dropped are now at the following indices: {long_indexes_to_drop}\n"
-                    )
-                return x.drop(long_indexes_to_drop)
-
-    return Remediation(
-        name="long_examples",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def common_prompt_suffix_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to add a common suffix to the prompt if one doesn't already exist in case of classification or conditional generation.
-    """
-    error_msg = None
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    # Find a suffix which is not contained within the prompt otherwise
-    suggested_suffix = "\n\n### =>\n\n"
-    suffix_options = [
-        " ->",
-        "\n\n###\n\n",
-        "\n\n===\n\n",
-        "\n\n---\n\n",
-        "\n\n===>\n\n",
-        "\n\n--->\n\n",
-    ]
-    for suffix_option in suffix_options:
-        if suffix_option == " ->":
-            if df.prompt.str.contains("\n").any():
-                continue
-        if df.prompt.str.contains(suffix_option, regex=False).any():
-            continue
-        suggested_suffix = suffix_option
-        break
-    display_suggested_suffix = suggested_suffix.replace("\n", "\\n")
-
-    ft_type = infer_task_type(df)
-    if ft_type == "open-ended generation":
-        return Remediation(name="common_suffix")
-
-    def add_suffix(x: Any, suffix: Any) -> Any:
-        x["prompt"] += suffix
-        return x
-
-    common_suffix = get_common_xfix(df.prompt, xfix="suffix")
-    if (df.prompt == common_suffix).all():
-        error_msg = f"All prompts are identical: `{common_suffix}`\nConsider leaving the prompts blank if you want to do open-ended generation, otherwise ensure prompts are different"
-        return Remediation(name="common_suffix", error_msg=error_msg)
-
-    if common_suffix != "":
-        common_suffix_new_line_handled = common_suffix.replace("\n", "\\n")
-        immediate_msg = f"\n- All prompts end with suffix `{common_suffix_new_line_handled}`"
-        if len(common_suffix) > 10:
-            immediate_msg += f". This suffix seems very long. Consider replacing with a shorter suffix, such as `{display_suggested_suffix}`"
-        if df.prompt.str[: -len(common_suffix)].str.contains(common_suffix, regex=False).any():
-            immediate_msg += f"\n  WARNING: Some of your prompts contain the suffix `{common_suffix}` more than once. We strongly suggest that you review your prompts and add a unique suffix"
-
-    else:
-        immediate_msg = "\n- Your data does not contain a common separator at the end of your prompts. Having a separator string appended to the end of the prompt makes it clearer to the fine-tuned model where the completion should begin. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples. If you intend to do open-ended generation, then you should leave the prompts empty"
-
-    if common_suffix == "":
-        optional_msg = f"Add a suffix separator `{display_suggested_suffix}` to all prompts"
-
-        def optional_fn(x: Any) -> Any:
-            return add_suffix(x, suggested_suffix)
-
-    return Remediation(
-        name="common_completion_suffix",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-        error_msg=error_msg,
-    )
-
-
-def common_prompt_prefix_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to remove a common prefix from the prompt if a long one exist.
-    """
-    MAX_PREFIX_LEN = 12
-
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    common_prefix = get_common_xfix(df.prompt, xfix="prefix")
-    if common_prefix == "":
-        return Remediation(name="common_prefix")
-
-    def remove_common_prefix(x: Any, prefix: Any) -> Any:
-        x["prompt"] = x["prompt"].str[len(prefix) :]
-        return x
-
-    if (df.prompt == common_prefix).all():
-        # already handled by common_suffix_validator
-        return Remediation(name="common_prefix")
-
-    if common_prefix != "":
-        immediate_msg = f"\n- All prompts start with prefix `{common_prefix}`"
-        if MAX_PREFIX_LEN < len(common_prefix):
-            immediate_msg += ". Fine-tuning doesn't require the instruction specifying the task, or a few-shot example scenario. Most of the time you should only add the input data into the prompt, and the desired output into the completion"
-            optional_msg = f"Remove prefix `{common_prefix}` from all prompts"
-
-            def optional_fn(x: Any) -> Any:
-                return remove_common_prefix(x, common_prefix)
-
-    return Remediation(
-        name="common_prompt_prefix",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def common_completion_prefix_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to remove a common prefix from the completion if a long one exist.
-    """
-    MAX_PREFIX_LEN = 5
-
-    common_prefix = get_common_xfix(df.completion, xfix="prefix")
-    ws_prefix = len(common_prefix) > 0 and common_prefix[0] == " "
-    if len(common_prefix) < MAX_PREFIX_LEN:
-        return Remediation(name="common_prefix")
-
-    def remove_common_prefix(x: Any, prefix: Any, ws_prefix: Any) -> Any:
-        x["completion"] = x["completion"].str[len(prefix) :]
-        if ws_prefix:
-            # keep the single whitespace as prefix
-            x["completion"] = f" {x['completion']}"
-        return x
-
-    if (df.completion == common_prefix).all():
-        # already handled by common_suffix_validator
-        return Remediation(name="common_prefix")
-
-    immediate_msg = f"\n- All completions start with prefix `{common_prefix}`. Most of the time you should only add the output data into the completion, without any prefix"
-    optional_msg = f"Remove prefix `{common_prefix}` from all completions"
-
-    def optional_fn(x: Any) -> Any:
-        return remove_common_prefix(x, common_prefix, ws_prefix)
-
-    return Remediation(
-        name="common_completion_prefix",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def common_completion_suffix_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to add a common suffix to the completion if one doesn't already exist in case of classification or conditional generation.
-    """
-    error_msg = None
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    ft_type = infer_task_type(df)
-    if ft_type == "open-ended generation" or ft_type == "classification":
-        return Remediation(name="common_suffix")
-
-    common_suffix = get_common_xfix(df.completion, xfix="suffix")
-    if (df.completion == common_suffix).all():
-        error_msg = f"All completions are identical: `{common_suffix}`\nEnsure completions are different, otherwise the model will just repeat `{common_suffix}`"
-        return Remediation(name="common_suffix", error_msg=error_msg)
-
-    # Find a suffix which is not contained within the completion otherwise
-    suggested_suffix = " [END]"
-    suffix_options = [
-        "\n",
-        ".",
-        " END",
-        "***",
-        "+++",
-        "&&&",
-        "$$$",
-        "@@@",
-        "%%%",
-    ]
-    for suffix_option in suffix_options:
-        if df.completion.str.contains(suffix_option, regex=False).any():
-            continue
-        suggested_suffix = suffix_option
-        break
-    display_suggested_suffix = suggested_suffix.replace("\n", "\\n")
-
-    def add_suffix(x: Any, suffix: Any) -> Any:
-        x["completion"] += suffix
-        return x
-
-    if common_suffix != "":
-        common_suffix_new_line_handled = common_suffix.replace("\n", "\\n")
-        immediate_msg = f"\n- All completions end with suffix `{common_suffix_new_line_handled}`"
-        if len(common_suffix) > 10:
-            immediate_msg += f". This suffix seems very long. Consider replacing with a shorter suffix, such as `{display_suggested_suffix}`"
-        if df.completion.str[: -len(common_suffix)].str.contains(common_suffix, regex=False).any():
-            immediate_msg += f"\n  WARNING: Some of your completions contain the suffix `{common_suffix}` more than once. We suggest that you review your completions and add a unique ending"
-
-    else:
-        immediate_msg = "\n- Your data does not contain a common ending at the end of your completions. Having a common ending string appended to the end of the completion makes it clearer to the fine-tuned model where the completion should end. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples."
-
-    if common_suffix == "":
-        optional_msg = f"Add a suffix ending `{display_suggested_suffix}` to all completions"
-
-        def optional_fn(x: Any) -> Any:
-            return add_suffix(x, suggested_suffix)
-
-    return Remediation(
-        name="common_completion_suffix",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-        error_msg=error_msg,
-    )
-
-
-def completions_space_start_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to add a space at the start of the completion if it doesn't already exist. This helps with tokenization.
-    """
-
-    def add_space_start(x: Any) -> Any:
-        x["completion"] = x["completion"].apply(lambda s: ("" if s.startswith(" ") else " ") + s)
-        return x
-
-    optional_msg = None
-    optional_fn = None
-    immediate_msg = None
-
-    if df.completion.str[:1].nunique() != 1 or df.completion.values[0][0] != " ":
-        immediate_msg = "\n- The completion should start with a whitespace character (` `). This tends to produce better results due to the tokenization we use. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more details"
-        optional_msg = "Add a whitespace character to the beginning of the completion"
-        optional_fn = add_space_start
-    return Remediation(
-        name="completion_space_start",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def lower_case_validator(df: pd.DataFrame, column: Any) -> Remediation | None:
-    """
-    This validator will suggest to lowercase the column values, if more than a third of letters are uppercase.
-    """
-
-    def lower_case(x: Any) -> Any:
-        x[column] = x[column].str.lower()
-        return x
-
-    count_upper = df[column].apply(lambda x: sum(1 for c in x if c.isalpha() and c.isupper())).sum()
-    count_lower = df[column].apply(lambda x: sum(1 for c in x if c.isalpha() and c.islower())).sum()
-
-    if count_upper * 2 > count_lower:
-        return Remediation(
-            name="lower_case",
-            immediate_msg=f"\n- More than a third of your `{column}` column/key is uppercase. Uppercase {column}s tends to perform worse than a mixture of case encountered in normal language. We recommend to lower case the data if that makes sense in your domain. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more details",
-            optional_msg=f"Lowercase all your data in column/key `{column}`",
-            optional_fn=lower_case,
-        )
-    return None
-
-
-def read_any_format(
-    fname: str, fields: list[str] = ["prompt", "completion"]
-) -> tuple[pd.DataFrame | None, Remediation]:
-    """
-    This function will read a file saved in .csv, .json, .txt, .xlsx or .tsv format using pandas.
-     - for .xlsx it will read the first sheet
-     - for .txt it will assume completions and split on newline
-    """
-    remediation = None
-    necessary_msg = None
-    immediate_msg = None
-    error_msg = None
-    df = None
-
-    if os.path.isfile(fname):
-        try:
-            if fname.lower().endswith(".csv") or fname.lower().endswith(".tsv"):
-                file_extension_str, separator = ("CSV", ",") if fname.lower().endswith(".csv") else ("TSV", "\t")
-                immediate_msg = (
-                    f"\n- Based on your file extension, your file is formatted as a {file_extension_str} file"
-                )
-                necessary_msg = f"Your format `{file_extension_str}` will be converted to `JSONL`"
-                df = pd.read_csv(fname, sep=separator, dtype=str).fillna("")
-            elif fname.lower().endswith(".xlsx"):
-                immediate_msg = "\n- Based on your file extension, your file is formatted as an Excel file"
-                necessary_msg = "Your format `XLSX` will be converted to `JSONL`"
-                xls = pd.ExcelFile(fname)
-                sheets = xls.sheet_names
-                if len(sheets) > 1:
-                    immediate_msg += "\n- Your Excel file contains more than one sheet. Please either save as csv or ensure all data is present in the first sheet. WARNING: Reading only the first sheet..."
-                df = pd.read_excel(fname, dtype=str).fillna("")
-            elif fname.lower().endswith(".txt"):
-                immediate_msg = "\n- Based on your file extension, you provided a text file"
-                necessary_msg = "Your format `TXT` will be converted to `JSONL`"
-                with open(fname, "r") as f:
-                    content = f.read()
-                    df = pd.DataFrame(
-                        [["", line] for line in content.split("\n")],
-                        columns=fields,
-                        dtype=str,
-                    ).fillna("")
-            elif fname.lower().endswith(".jsonl"):
-                df = pd.read_json(fname, lines=True, dtype=str).fillna("")  # type: ignore
-                if len(df) == 1:  # type: ignore
-                    # this is NOT what we expect for a .jsonl file
-                    immediate_msg = "\n- Your JSONL file appears to be in a JSON format. Your file will be converted to JSONL format"
-                    necessary_msg = "Your format `JSON` will be converted to `JSONL`"
-                    df = pd.read_json(fname, dtype=str).fillna("")  # type: ignore
-                else:
-                    pass  # this is what we expect for a .jsonl file
-            elif fname.lower().endswith(".json"):
-                try:
-                    # to handle case where .json file is actually a .jsonl file
-                    df = pd.read_json(fname, lines=True, dtype=str).fillna("")  # type: ignore
-                    if len(df) == 1:  # type: ignore
-                        # this code path corresponds to a .json file that has one line
-                        df = pd.read_json(fname, dtype=str).fillna("")  # type: ignore
-                    else:
-                        # this is NOT what we expect for a .json file
-                        immediate_msg = "\n- Your JSON file appears to be in a JSONL format. Your file will be converted to JSONL format"
-                        necessary_msg = "Your format `JSON` will be converted to `JSONL`"
-                except ValueError:
-                    # this code path corresponds to a .json file that has multiple lines (i.e. it is indented)
-                    df = pd.read_json(fname, dtype=str).fillna("")  # type: ignore
-            else:
-                error_msg = (
-                    "Your file must have one of the following extensions: .CSV, .TSV, .XLSX, .TXT, .JSON or .JSONL"
-                )
-                if "." in fname:
-                    error_msg += f" Your file `{fname}` ends with the extension `.{fname.split('.')[-1]}` which is not supported."
-                else:
-                    error_msg += f" Your file `{fname}` is missing a file extension."
-
-        except (ValueError, TypeError):
-            file_extension_str = fname.split(".")[-1].upper()
-            error_msg = f"Your file `{fname}` does not appear to be in valid {file_extension_str} format. Please ensure your file is formatted as a valid {file_extension_str} file."
-
-    else:
-        error_msg = f"File {fname} does not exist."
-
-    remediation = Remediation(
-        name="read_any_format",
-        necessary_msg=necessary_msg,
-        immediate_msg=immediate_msg,
-        error_msg=error_msg,
-    )
-    return df, remediation
-
-
-def format_inferrer_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will infer the likely fine-tuning format of the data, and display it to the user if it is classification.
-    It will also suggest to use ada and explain train/validation split benefits.
-    """
-    ft_type = infer_task_type(df)
-    immediate_msg = None
-    if ft_type == "classification":
-        immediate_msg = f"\n- Based on your data it seems like you're trying to fine-tune a model for {ft_type}\n- For classification, we recommend you try one of the faster and cheaper models, such as `ada`\n- For classification, you can estimate the expected model performance by keeping a held out dataset, which is not used for training"
-    return Remediation(name="num_examples", immediate_msg=immediate_msg)
-
-
-def apply_necessary_remediation(df: OptionalDataFrameT, remediation: Remediation) -> OptionalDataFrameT:
-    """
-    This function will apply a necessary remediation to a dataframe, or print an error message if one exists.
-    """
-    if remediation.error_msg is not None:
-        sys.stderr.write(f"\n\nERROR in {remediation.name} validator: {remediation.error_msg}\n\nAborting...")
-        sys.exit(1)
-    if remediation.immediate_msg is not None:
-        sys.stdout.write(remediation.immediate_msg)
-    if remediation.necessary_fn is not None:
-        df = remediation.necessary_fn(df)
-    return df
-
-
-def accept_suggestion(input_text: str, auto_accept: bool) -> bool:
-    sys.stdout.write(input_text)
-    if auto_accept:
-        sys.stdout.write("Y\n")
-        return True
-    return input().lower() != "n"
-
-
-def apply_optional_remediation(
-    df: pd.DataFrame, remediation: Remediation, auto_accept: bool
-) -> tuple[pd.DataFrame, bool]:
-    """
-    This function will apply an optional remediation to a dataframe, based on the user input.
-    """
-    optional_applied = False
-    input_text = f"- [Recommended] {remediation.optional_msg} [Y/n]: "
-    if remediation.optional_msg is not None:
-        if accept_suggestion(input_text, auto_accept):
-            assert remediation.optional_fn is not None
-            df = remediation.optional_fn(df)
-            optional_applied = True
-    if remediation.necessary_msg is not None:
-        sys.stdout.write(f"- [Necessary] {remediation.necessary_msg}\n")
-    return df, optional_applied
-
-
-def estimate_fine_tuning_time(df: pd.DataFrame) -> None:
-    """
-    Estimate the time it'll take to fine-tune the dataset
-    """
-    ft_format = infer_task_type(df)
-    expected_time = 1.0
-    if ft_format == "classification":
-        num_examples = len(df)
-        expected_time = num_examples * 1.44
-    else:
-        size = df.memory_usage(index=True).sum()
-        expected_time = size * 0.0515
-
-    def format_time(time: float) -> str:
-        if time < 60:
-            return f"{round(time, 2)} seconds"
-        elif time < 3600:
-            return f"{round(time / 60, 2)} minutes"
-        elif time < 86400:
-            return f"{round(time / 3600, 2)} hours"
-        else:
-            return f"{round(time / 86400, 2)} days"
-
-    time_string = format_time(expected_time + 140)
-    sys.stdout.write(
-        f"Once your model starts training, it'll approximately take {time_string} to train a `curie` model, and less for `ada` and `babbage`. Queue will approximately take half an hour per job ahead of you.\n"
-    )
-
-
-def get_outfnames(fname: str, split: bool) -> list[str]:
-    suffixes = ["_train", "_valid"] if split else [""]
-    i = 0
-    while True:
-        index_suffix = f" ({i})" if i > 0 else ""
-        candidate_fnames = [f"{os.path.splitext(fname)[0]}_prepared{suffix}{index_suffix}.jsonl" for suffix in suffixes]
-        if not any(os.path.isfile(f) for f in candidate_fnames):
-            return candidate_fnames
-        i += 1
-
-
-def get_classification_hyperparams(df: pd.DataFrame) -> tuple[int, object]:
-    n_classes = df.completion.nunique()
-    pos_class = None
-    if n_classes == 2:
-        pos_class = df.completion.value_counts().index[0]
-    return n_classes, pos_class
-
-
-def write_out_file(df: pd.DataFrame, fname: str, any_remediations: bool, auto_accept: bool) -> None:
-    """
-    This function will write out a dataframe to a file, if the user would like to proceed, and also offer a fine-tuning command with the newly created file.
-    For classification it will optionally ask the user if they would like to split the data into train/valid files, and modify the suggested command to include the valid set.
-    """
-    ft_format = infer_task_type(df)
-    common_prompt_suffix = get_common_xfix(df.prompt, xfix="suffix")
-    common_completion_suffix = get_common_xfix(df.completion, xfix="suffix")
-
-    split = False
-    input_text = "- [Recommended] Would you like to split into training and validation set? [Y/n]: "
-    if ft_format == "classification":
-        if accept_suggestion(input_text, auto_accept):
-            split = True
-
-    additional_params = ""
-    common_prompt_suffix_new_line_handled = common_prompt_suffix.replace("\n", "\\n")
-    common_completion_suffix_new_line_handled = common_completion_suffix.replace("\n", "\\n")
-    optional_ending_string = (
-        f' Make sure to include `stop=["{common_completion_suffix_new_line_handled}"]` so that the generated texts ends at the expected place.'
-        if len(common_completion_suffix_new_line_handled) > 0
-        else ""
-    )
-
-    input_text = "\n\nYour data will be written to a new JSONL file. Proceed [Y/n]: "
-
-    if not any_remediations and not split:
-        sys.stdout.write(
-            f'\nYou can use your file for fine-tuning:\n> openai api fine_tunes.create -t "{fname}"{additional_params}\n\nAfter you’ve fine-tuned a model, remember that your prompt has to end with the indicator string `{common_prompt_suffix_new_line_handled}` for the model to start generating completions, rather than continuing with the prompt.{optional_ending_string}\n'
-        )
-        estimate_fine_tuning_time(df)
-
-    elif accept_suggestion(input_text, auto_accept):
-        fnames = get_outfnames(fname, split)
-        if split:
-            assert len(fnames) == 2 and "train" in fnames[0] and "valid" in fnames[1]
-            MAX_VALID_EXAMPLES = 1000
-            n_train = max(len(df) - MAX_VALID_EXAMPLES, int(len(df) * 0.8))
-            df_train = df.sample(n=n_train, random_state=42)
-            df_valid = df.drop(df_train.index)
-            df_train[["prompt", "completion"]].to_json(  # type: ignore
-                fnames[0], lines=True, orient="records", force_ascii=False, indent=None
-            )
-            df_valid[["prompt", "completion"]].to_json(
-                fnames[1], lines=True, orient="records", force_ascii=False, indent=None
-            )
-
-            n_classes, pos_class = get_classification_hyperparams(df)
-            additional_params += " --compute_classification_metrics"
-            if n_classes == 2:
-                additional_params += f' --classification_positive_class "{pos_class}"'
-            else:
-                additional_params += f" --classification_n_classes {n_classes}"
-        else:
-            assert len(fnames) == 1
-            df[["prompt", "completion"]].to_json(
-                fnames[0], lines=True, orient="records", force_ascii=False, indent=None
-            )
-
-        # Add -v VALID_FILE if we split the file into train / valid
-        files_string = ("s" if split else "") + " to `" + ("` and `".join(fnames))
-        valid_string = f' -v "{fnames[1]}"' if split else ""
-        separator_reminder = (
-            ""
-            if len(common_prompt_suffix_new_line_handled) == 0
-            else f"After you’ve fine-tuned a model, remember that your prompt has to end with the indicator string `{common_prompt_suffix_new_line_handled}` for the model to start generating completions, rather than continuing with the prompt."
-        )
-        sys.stdout.write(
-            f'\nWrote modified file{files_string}`\nFeel free to take a look!\n\nNow use that file when fine-tuning:\n> openai api fine_tunes.create -t "{fnames[0]}"{valid_string}{additional_params}\n\n{separator_reminder}{optional_ending_string}\n'
-        )
-        estimate_fine_tuning_time(df)
-    else:
-        sys.stdout.write("Aborting... did not write the file\n")
-
-
-def infer_task_type(df: pd.DataFrame) -> str:
-    """
-    Infer the likely fine-tuning task type from the data
-    """
-    CLASSIFICATION_THRESHOLD = 3  # min_average instances of each class
-    if sum(df.prompt.str.len()) == 0:
-        return "open-ended generation"
-
-    if len(df.completion.unique()) < len(df) / CLASSIFICATION_THRESHOLD:
-        return "classification"
-
-    return "conditional generation"
-
-
-def get_common_xfix(series: Any, xfix: str = "suffix") -> str:
-    """
-    Finds the longest common suffix or prefix of all the values in a series
-    """
-    common_xfix = ""
-    while True:
-        common_xfixes = (
-            series.str[-(len(common_xfix) + 1) :] if xfix == "suffix" else series.str[: len(common_xfix) + 1]
-        )  # first few or last few characters
-        if common_xfixes.nunique() != 1:  # we found the character at which we don't have a unique xfix anymore
-            break
-        elif common_xfix == common_xfixes.values[0]:  # the entire first row is a prefix of every other row
-            break
-        else:  # the first or last few characters are still common across all rows - let's try to add one more
-            common_xfix = common_xfixes.values[0]
-    return common_xfix
-
-
-Validator: TypeAlias = "Callable[[pd.DataFrame], Remediation | None]"
-
-
-def get_validators() -> list[Validator]:
-    return [
-        num_examples_validator,
-        lambda x: necessary_column_validator(x, "prompt"),
-        lambda x: necessary_column_validator(x, "completion"),
-        additional_column_validator,
-        non_empty_field_validator,
-        format_inferrer_validator,
-        duplicated_rows_validator,
-        long_examples_validator,
-        lambda x: lower_case_validator(x, "prompt"),
-        lambda x: lower_case_validator(x, "completion"),
-        common_prompt_suffix_validator,
-        common_prompt_prefix_validator,
-        common_completion_prefix_validator,
-        common_completion_suffix_validator,
-        completions_space_start_validator,
-    ]
-
-
-def apply_validators(
-    df: pd.DataFrame,
-    fname: str,
-    remediation: Remediation | None,
-    validators: list[Validator],
-    auto_accept: bool,
-    write_out_file_func: Callable[..., Any],
-) -> None:
-    optional_remediations: list[Remediation] = []
-    if remediation is not None:
-        optional_remediations.append(remediation)
-    for validator in validators:
-        remediation = validator(df)
-        if remediation is not None:
-            optional_remediations.append(remediation)
-            df = apply_necessary_remediation(df, remediation)
-
-    any_optional_or_necessary_remediations = any(
-        [
-            remediation
-            for remediation in optional_remediations
-            if remediation.optional_msg is not None or remediation.necessary_msg is not None
-        ]
-    )
-    any_necessary_applied = any(
-        [remediation for remediation in optional_remediations if remediation.necessary_msg is not None]
-    )
-    any_optional_applied = False
-
-    if any_optional_or_necessary_remediations:
-        sys.stdout.write("\n\nBased on the analysis we will perform the following actions:\n")
-        for remediation in optional_remediations:
-            df, optional_applied = apply_optional_remediation(df, remediation, auto_accept)
-            any_optional_applied = any_optional_applied or optional_applied
-    else:
-        sys.stdout.write("\n\nNo remediations found.\n")
-
-    any_optional_or_necessary_applied = any_optional_applied or any_necessary_applied
-
-    write_out_file_func(df, fname, any_optional_or_necessary_applied, auto_accept)
diff --git a/src/openai/lib/azure.py b/src/openai/lib/azure.py
deleted file mode 100644
index b76b83c61c..0000000000
--- a/src/openai/lib/azure.py
+++ /dev/null
@@ -1,542 +0,0 @@
-from __future__ import annotations
-
-import os
-import inspect
-from typing import Any, Union, Mapping, TypeVar, Callable, Awaitable, overload
-from typing_extensions import Self, override
-
-import httpx
-
-from .._types import NOT_GIVEN, Omit, Timeout, NotGiven
-from .._utils import is_given, is_mapping
-from .._client import OpenAI, AsyncOpenAI
-from .._models import FinalRequestOptions
-from .._streaming import Stream, AsyncStream
-from .._exceptions import OpenAIError
-from .._base_client import DEFAULT_MAX_RETRIES, BaseClient
-
-_deployments_endpoints = set(
-    [
-        "/completions",
-        "/chat/completions",
-        "/embeddings",
-        "/audio/transcriptions",
-        "/audio/translations",
-        "/audio/speech",
-        "/images/generations",
-    ]
-)
-
-
-AzureADTokenProvider = Callable[[], str]
-AsyncAzureADTokenProvider = Callable[[], "str | Awaitable[str]"]
-_HttpxClientT = TypeVar("_HttpxClientT", bound=Union[httpx.Client, httpx.AsyncClient])
-_DefaultStreamT = TypeVar("_DefaultStreamT", bound=Union[Stream[Any], AsyncStream[Any]])
-
-
-# we need to use a sentinel API key value for Azure AD
-# as we don't want to make the `api_key` in the main client Optional
-# and Azure AD tokens may be retrieved on a per-request basis
-API_KEY_SENTINEL = "".join(["<", "missing API key", ">"])
-
-
-class MutuallyExclusiveAuthError(OpenAIError):
-    def __init__(self) -> None:
-        super().__init__(
-            "The `api_key`, `azure_ad_token` and `azure_ad_token_provider` arguments are mutually exclusive; Only one can be passed at a time"
-        )
-
-
-class BaseAzureClient(BaseClient[_HttpxClientT, _DefaultStreamT]):
-    @override
-    def _build_request(
-        self,
-        options: FinalRequestOptions,
-    ) -> httpx.Request:
-        if options.url in _deployments_endpoints and is_mapping(options.json_data):
-            model = options.json_data.get("model")
-            if model is not None and not "/deployments" in str(self.base_url):
-                options.url = f"/deployments/{model}{options.url}"
-
-        return super()._build_request(options)
-
-
-class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
-    @overload
-    def __init__(
-        self,
-        *,
-        azure_endpoint: str,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        organization: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.Client | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    @overload
-    def __init__(
-        self,
-        *,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        organization: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.Client | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    @overload
-    def __init__(
-        self,
-        *,
-        base_url: str,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        organization: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.Client | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    def __init__(
-        self,
-        *,
-        api_version: str | None = None,
-        azure_endpoint: str | None = None,
-        azure_deployment: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        base_url: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.Client | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        """Construct a new synchronous azure openai client instance.
-
-        This automatically infers the following arguments from their corresponding environment variables if they are not provided:
-        - `api_key` from `AZURE_OPENAI_API_KEY`
-        - `organization` from `OPENAI_ORG_ID`
-        - `project` from `OPENAI_PROJECT_ID`
-        - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
-        - `api_version` from `OPENAI_API_VERSION`
-        - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
-
-        Args:
-            azure_endpoint: Your Azure endpoint, including the resource, e.g. `https://example-resource.azure.openai.com/`
-
-            azure_ad_token: Your Azure Active Directory token, https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id
-
-            azure_ad_token_provider: A function that returns an Azure Active Directory token, will be invoked on every request.
-
-            azure_deployment: A model deployment, if given sets the base client URL to include `/deployments/{azure_deployment}`.
-                Note: this means you won't be able to use non-deployment endpoints. Not supported with Assistants APIs.
-        """
-        if api_key is None:
-            api_key = os.environ.get("AZURE_OPENAI_API_KEY")
-
-        if azure_ad_token is None:
-            azure_ad_token = os.environ.get("AZURE_OPENAI_AD_TOKEN")
-
-        if api_key is None and azure_ad_token is None and azure_ad_token_provider is None:
-            raise OpenAIError(
-                "Missing credentials. Please pass one of `api_key`, `azure_ad_token`, `azure_ad_token_provider`, or the `AZURE_OPENAI_API_KEY` or `AZURE_OPENAI_AD_TOKEN` environment variables."
-            )
-
-        if api_version is None:
-            api_version = os.environ.get("OPENAI_API_VERSION")
-
-        if api_version is None:
-            raise ValueError(
-                "Must provide either the `api_version` argument or the `OPENAI_API_VERSION` environment variable"
-            )
-
-        if default_query is None:
-            default_query = {"api-version": api_version}
-        else:
-            default_query = {**default_query, "api-version": api_version}
-
-        if base_url is None:
-            if azure_endpoint is None:
-                azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
-
-            if azure_endpoint is None:
-                raise ValueError(
-                    "Must provide one of the `base_url` or `azure_endpoint` arguments, or the `AZURE_OPENAI_ENDPOINT` environment variable"
-                )
-
-            if azure_deployment is not None:
-                base_url = f"{azure_endpoint}/openai/deployments/{azure_deployment}"
-            else:
-                base_url = f"{azure_endpoint}/openai"
-        else:
-            if azure_endpoint is not None:
-                raise ValueError("base_url and azure_endpoint are mutually exclusive")
-
-        if api_key is None:
-            # define a sentinel value to avoid any typing issues
-            api_key = API_KEY_SENTINEL
-
-        super().__init__(
-            api_key=api_key,
-            organization=organization,
-            project=project,
-            base_url=base_url,
-            timeout=timeout,
-            max_retries=max_retries,
-            default_headers=default_headers,
-            default_query=default_query,
-            http_client=http_client,
-            _strict_response_validation=_strict_response_validation,
-        )
-        self._api_version = api_version
-        self._azure_ad_token = azure_ad_token
-        self._azure_ad_token_provider = azure_ad_token_provider
-
-    @override
-    def copy(
-        self,
-        *,
-        api_key: str | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        api_version: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        base_url: str | httpx.URL | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        http_client: httpx.Client | None = None,
-        max_retries: int | NotGiven = NOT_GIVEN,
-        default_headers: Mapping[str, str] | None = None,
-        set_default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        set_default_query: Mapping[str, object] | None = None,
-        _extra_kwargs: Mapping[str, Any] = {},
-    ) -> Self:
-        """
-        Create a new client instance re-using the same options given to the current client with optional overriding.
-        """
-        return super().copy(
-            api_key=api_key,
-            organization=organization,
-            project=project,
-            base_url=base_url,
-            timeout=timeout,
-            http_client=http_client,
-            max_retries=max_retries,
-            default_headers=default_headers,
-            set_default_headers=set_default_headers,
-            default_query=default_query,
-            set_default_query=set_default_query,
-            _extra_kwargs={
-                "api_version": api_version or self._api_version,
-                "azure_ad_token": azure_ad_token or self._azure_ad_token,
-                "azure_ad_token_provider": azure_ad_token_provider or self._azure_ad_token_provider,
-                **_extra_kwargs,
-            },
-        )
-
-    with_options = copy
-
-    def _get_azure_ad_token(self) -> str | None:
-        if self._azure_ad_token is not None:
-            return self._azure_ad_token
-
-        provider = self._azure_ad_token_provider
-        if provider is not None:
-            token = provider()
-            if not token or not isinstance(token, str):  # pyright: ignore[reportUnnecessaryIsInstance]
-                raise ValueError(
-                    f"Expected `azure_ad_token_provider` argument to return a string but it returned {token}",
-                )
-            return token
-
-        return None
-
-    @override
-    def _prepare_options(self, options: FinalRequestOptions) -> None:
-        headers: dict[str, str | Omit] = {**options.headers} if is_given(options.headers) else {}
-        options.headers = headers
-
-        azure_ad_token = self._get_azure_ad_token()
-        if azure_ad_token is not None:
-            if headers.get("Authorization") is None:
-                headers["Authorization"] = f"Bearer {azure_ad_token}"
-        elif self.api_key is not API_KEY_SENTINEL:
-            if headers.get("api-key") is None:
-                headers["api-key"] = self.api_key
-        else:
-            # should never be hit
-            raise ValueError("Unable to handle auth")
-
-        return super()._prepare_options(options)
-
-
-class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], AsyncOpenAI):
-    @overload
-    def __init__(
-        self,
-        *,
-        azure_endpoint: str,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.AsyncClient | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    @overload
-    def __init__(
-        self,
-        *,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.AsyncClient | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    @overload
-    def __init__(
-        self,
-        *,
-        base_url: str,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.AsyncClient | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    def __init__(
-        self,
-        *,
-        azure_endpoint: str | None = None,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        base_url: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.AsyncClient | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        """Construct a new asynchronous azure openai client instance.
-
-        This automatically infers the following arguments from their corresponding environment variables if they are not provided:
-        - `api_key` from `AZURE_OPENAI_API_KEY`
-        - `organization` from `OPENAI_ORG_ID`
-        - `project` from `OPENAI_PROJECT_ID`
-        - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
-        - `api_version` from `OPENAI_API_VERSION`
-        - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
-
-        Args:
-            azure_endpoint: Your Azure endpoint, including the resource, e.g. `https://example-resource.azure.openai.com/`
-
-            azure_ad_token: Your Azure Active Directory token, https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id
-
-            azure_ad_token_provider: A function that returns an Azure Active Directory token, will be invoked on every request.
-
-            azure_deployment: A model deployment, if given sets the base client URL to include `/deployments/{azure_deployment}`.
-                Note: this means you won't be able to use non-deployment endpoints. Not supported with Assistants APIs.
-        """
-        if api_key is None:
-            api_key = os.environ.get("AZURE_OPENAI_API_KEY")
-
-        if azure_ad_token is None:
-            azure_ad_token = os.environ.get("AZURE_OPENAI_AD_TOKEN")
-
-        if api_key is None and azure_ad_token is None and azure_ad_token_provider is None:
-            raise OpenAIError(
-                "Missing credentials. Please pass one of `api_key`, `azure_ad_token`, `azure_ad_token_provider`, or the `AZURE_OPENAI_API_KEY` or `AZURE_OPENAI_AD_TOKEN` environment variables."
-            )
-
-        if api_version is None:
-            api_version = os.environ.get("OPENAI_API_VERSION")
-
-        if api_version is None:
-            raise ValueError(
-                "Must provide either the `api_version` argument or the `OPENAI_API_VERSION` environment variable"
-            )
-
-        if default_query is None:
-            default_query = {"api-version": api_version}
-        else:
-            default_query = {**default_query, "api-version": api_version}
-
-        if base_url is None:
-            if azure_endpoint is None:
-                azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
-
-            if azure_endpoint is None:
-                raise ValueError(
-                    "Must provide one of the `base_url` or `azure_endpoint` arguments, or the `AZURE_OPENAI_ENDPOINT` environment variable"
-                )
-
-            if azure_deployment is not None:
-                base_url = f"{azure_endpoint}/openai/deployments/{azure_deployment}"
-            else:
-                base_url = f"{azure_endpoint}/openai"
-        else:
-            if azure_endpoint is not None:
-                raise ValueError("base_url and azure_endpoint are mutually exclusive")
-
-        if api_key is None:
-            # define a sentinel value to avoid any typing issues
-            api_key = API_KEY_SENTINEL
-
-        super().__init__(
-            api_key=api_key,
-            organization=organization,
-            project=project,
-            base_url=base_url,
-            timeout=timeout,
-            max_retries=max_retries,
-            default_headers=default_headers,
-            default_query=default_query,
-            http_client=http_client,
-            _strict_response_validation=_strict_response_validation,
-        )
-        self._api_version = api_version
-        self._azure_ad_token = azure_ad_token
-        self._azure_ad_token_provider = azure_ad_token_provider
-
-    @override
-    def copy(
-        self,
-        *,
-        api_key: str | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        api_version: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        base_url: str | httpx.URL | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        http_client: httpx.AsyncClient | None = None,
-        max_retries: int | NotGiven = NOT_GIVEN,
-        default_headers: Mapping[str, str] | None = None,
-        set_default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        set_default_query: Mapping[str, object] | None = None,
-        _extra_kwargs: Mapping[str, Any] = {},
-    ) -> Self:
-        """
-        Create a new client instance re-using the same options given to the current client with optional overriding.
-        """
-        return super().copy(
-            api_key=api_key,
-            organization=organization,
-            project=project,
-            base_url=base_url,
-            timeout=timeout,
-            http_client=http_client,
-            max_retries=max_retries,
-            default_headers=default_headers,
-            set_default_headers=set_default_headers,
-            default_query=default_query,
-            set_default_query=set_default_query,
-            _extra_kwargs={
-                "api_version": api_version or self._api_version,
-                "azure_ad_token": azure_ad_token or self._azure_ad_token,
-                "azure_ad_token_provider": azure_ad_token_provider or self._azure_ad_token_provider,
-                **_extra_kwargs,
-            },
-        )
-
-    with_options = copy
-
-    async def _get_azure_ad_token(self) -> str | None:
-        if self._azure_ad_token is not None:
-            return self._azure_ad_token
-
-        provider = self._azure_ad_token_provider
-        if provider is not None:
-            token = provider()
-            if inspect.isawaitable(token):
-                token = await token
-            if not token or not isinstance(token, str):
-                raise ValueError(
-                    f"Expected `azure_ad_token_provider` argument to return a string but it returned {token}",
-                )
-            return token
-
-        return None
-
-    @override
-    async def _prepare_options(self, options: FinalRequestOptions) -> None:
-        headers: dict[str, str | Omit] = {**options.headers} if is_given(options.headers) else {}
-        options.headers = headers
-
-        azure_ad_token = await self._get_azure_ad_token()
-        if azure_ad_token is not None:
-            if headers.get("Authorization") is None:
-                headers["Authorization"] = f"Bearer {azure_ad_token}"
-        elif self.api_key is not API_KEY_SENTINEL:
-            if headers.get("api-key") is None:
-                headers["api-key"] = self.api_key
-        else:
-            # should never be hit
-            raise ValueError("Unable to handle auth")
-
-        return await super()._prepare_options(options)
diff --git a/src/openai/lib/streaming/__init__.py b/src/openai/lib/streaming/__init__.py
deleted file mode 100644
index eb378d2561..0000000000
--- a/src/openai/lib/streaming/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from ._assistants import (
-    AssistantEventHandler as AssistantEventHandler,
-    AssistantEventHandlerT as AssistantEventHandlerT,
-    AssistantStreamManager as AssistantStreamManager,
-    AsyncAssistantEventHandler as AsyncAssistantEventHandler,
-    AsyncAssistantEventHandlerT as AsyncAssistantEventHandlerT,
-    AsyncAssistantStreamManager as AsyncAssistantStreamManager,
-)
diff --git a/src/openai/lib/streaming/_assistants.py b/src/openai/lib/streaming/_assistants.py
deleted file mode 100644
index 03d97ec2eb..0000000000
--- a/src/openai/lib/streaming/_assistants.py
+++ /dev/null
@@ -1,1035 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-from types import TracebackType
-from typing import TYPE_CHECKING, Any, Generic, TypeVar, Callable, Iterable, Iterator, cast
-from typing_extensions import Awaitable, AsyncIterable, AsyncIterator, assert_never
-
-import httpx
-
-from ..._utils import is_dict, is_list, consume_sync_iterator, consume_async_iterator
-from ..._models import construct_type
-from ..._streaming import Stream, AsyncStream
-from ...types.beta import AssistantStreamEvent
-from ...types.beta.threads import (
-    Run,
-    Text,
-    Message,
-    ImageFile,
-    TextDelta,
-    MessageDelta,
-    MessageContent,
-    MessageContentDelta,
-)
-from ...types.beta.threads.runs import RunStep, ToolCall, RunStepDelta, ToolCallDelta
-
-
-class AssistantEventHandler:
-    text_deltas: Iterable[str]
-    """Iterator over just the text deltas in the stream.
-
-    This corresponds to the `thread.message.delta` event
-    in the API.
-
-    ```py
-    for text in stream.text_deltas:
-        print(text, end="", flush=True)
-    print()
-    ```
-    """
-
-    def __init__(self) -> None:
-        self._current_event: AssistantStreamEvent | None = None
-        self._current_message_content_index: int | None = None
-        self._current_message_content: MessageContent | None = None
-        self._current_tool_call_index: int | None = None
-        self._current_tool_call: ToolCall | None = None
-        self.__current_run_step_id: str | None = None
-        self.__current_run: Run | None = None
-        self.__run_step_snapshots: dict[str, RunStep] = {}
-        self.__message_snapshots: dict[str, Message] = {}
-        self.__current_message_snapshot: Message | None = None
-
-        self.text_deltas = self.__text_deltas__()
-        self._iterator = self.__stream__()
-        self.__stream: Stream[AssistantStreamEvent] | None = None
-
-    def _init(self, stream: Stream[AssistantStreamEvent]) -> None:
-        if self.__stream:
-            raise RuntimeError(
-                "A single event handler cannot be shared between multiple streams; You will need to construct a new event handler instance"
-            )
-
-        self.__stream = stream
-
-    def __next__(self) -> AssistantStreamEvent:
-        return self._iterator.__next__()
-
-    def __iter__(self) -> Iterator[AssistantStreamEvent]:
-        for item in self._iterator:
-            yield item
-
-    @property
-    def current_event(self) -> AssistantStreamEvent | None:
-        return self._current_event
-
-    @property
-    def current_run(self) -> Run | None:
-        return self.__current_run
-
-    @property
-    def current_run_step_snapshot(self) -> RunStep | None:
-        if not self.__current_run_step_id:
-            return None
-
-        return self.__run_step_snapshots[self.__current_run_step_id]
-
-    @property
-    def current_message_snapshot(self) -> Message | None:
-        return self.__current_message_snapshot
-
-    def close(self) -> None:
-        """
-        Close the response and release the connection.
-
-        Automatically called when the context manager exits.
-        """
-        if self.__stream:
-            self.__stream.close()
-
-    def until_done(self) -> None:
-        """Waits until the stream has been consumed"""
-        consume_sync_iterator(self)
-
-    def get_final_run(self) -> Run:
-        """Wait for the stream to finish and returns the completed Run object"""
-        self.until_done()
-
-        if not self.__current_run:
-            raise RuntimeError("No final run object found")
-
-        return self.__current_run
-
-    def get_final_run_steps(self) -> list[RunStep]:
-        """Wait for the stream to finish and returns the steps taken in this run"""
-        self.until_done()
-
-        if not self.__run_step_snapshots:
-            raise RuntimeError("No run steps found")
-
-        return [step for step in self.__run_step_snapshots.values()]
-
-    def get_final_messages(self) -> list[Message]:
-        """Wait for the stream to finish and returns the messages emitted in this run"""
-        self.until_done()
-
-        if not self.__message_snapshots:
-            raise RuntimeError("No messages found")
-
-        return [message for message in self.__message_snapshots.values()]
-
-    def __text_deltas__(self) -> Iterator[str]:
-        for event in self:
-            if event.event != "thread.message.delta":
-                continue
-
-            for content_delta in event.data.delta.content or []:
-                if content_delta.type == "text" and content_delta.text and content_delta.text.value:
-                    yield content_delta.text.value
-
-    # event handlers
-
-    def on_end(self) -> None:
-        """Fires when the stream has finished.
-
-        This happens if the stream is read to completion
-        or if an exception occurs during iteration.
-        """
-
-    def on_event(self, event: AssistantStreamEvent) -> None:
-        """Callback that is fired for every Server-Sent-Event"""
-
-    def on_run_step_created(self, run_step: RunStep) -> None:
-        """Callback that is fired when a run step is created"""
-
-    def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
-        """Callback that is fired whenever a run step delta is returned from the API
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the run step. For example, a tool calls event may
-        look like this:
-
-        # delta
-        tool_calls=[
-            RunStepDeltaToolCallsCodeInterpreter(
-                index=0,
-                type='code_interpreter',
-                id=None,
-                code_interpreter=CodeInterpreter(input=' sympy', outputs=None)
-            )
-        ]
-        # snapshot
-        tool_calls=[
-            CodeToolCall(
-                id='call_wKayJlcYV12NiadiZuJXxcfx',
-                code_interpreter=CodeInterpreter(input='from sympy', outputs=[]),
-                type='code_interpreter',
-                index=0
-            )
-        ],
-        """
-
-    def on_run_step_done(self, run_step: RunStep) -> None:
-        """Callback that is fired when a run step is completed"""
-
-    def on_tool_call_created(self, tool_call: ToolCall) -> None:
-        """Callback that is fired when a tool call is created"""
-
-    def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall) -> None:
-        """Callback that is fired when a tool call delta is encountered"""
-
-    def on_tool_call_done(self, tool_call: ToolCall) -> None:
-        """Callback that is fired when a tool call delta is encountered"""
-
-    def on_exception(self, exception: Exception) -> None:
-        """Fired whenever an exception happens during streaming"""
-
-    def on_timeout(self) -> None:
-        """Fires if the request times out"""
-
-    def on_message_created(self, message: Message) -> None:
-        """Callback that is fired when a message is created"""
-
-    def on_message_delta(self, delta: MessageDelta, snapshot: Message) -> None:
-        """Callback that is fired whenever a message delta is returned from the API
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the message. For example, a text content event may
-        look like this:
-
-        # delta
-        MessageDeltaText(
-            index=0,
-            type='text',
-            text=Text(
-                value=' Jane'
-            ),
-        )
-        # snapshot
-        MessageContentText(
-            index=0,
-            type='text',
-            text=Text(
-                value='Certainly, Jane'
-            ),
-        )
-        """
-
-    def on_message_done(self, message: Message) -> None:
-        """Callback that is fired when a message is completed"""
-
-    def on_text_created(self, text: Text) -> None:
-        """Callback that is fired when a text content block is created"""
-
-    def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
-        """Callback that is fired whenever a text content delta is returned
-        by the API.
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the text. For example:
-
-        on_text_delta(TextDelta(value="The"), Text(value="The")),
-        on_text_delta(TextDelta(value=" solution"), Text(value="The solution")),
-        on_text_delta(TextDelta(value=" to"), Text(value="The solution to")),
-        on_text_delta(TextDelta(value=" the"), Text(value="The solution to the")),
-        on_text_delta(TextDelta(value=" equation"), Text(value="The solution to the equivalent")),
-        """
-
-    def on_text_done(self, text: Text) -> None:
-        """Callback that is fired when a text content block is finished"""
-
-    def on_image_file_done(self, image_file: ImageFile) -> None:
-        """Callback that is fired when an image file block is finished"""
-
-    def _emit_sse_event(self, event: AssistantStreamEvent) -> None:
-        self._current_event = event
-        self.on_event(event)
-
-        self.__current_message_snapshot, new_content = accumulate_event(
-            event=event,
-            current_message_snapshot=self.__current_message_snapshot,
-        )
-        if self.__current_message_snapshot is not None:
-            self.__message_snapshots[self.__current_message_snapshot.id] = self.__current_message_snapshot
-
-        accumulate_run_step(
-            event=event,
-            run_step_snapshots=self.__run_step_snapshots,
-        )
-
-        for content_delta in new_content:
-            assert self.__current_message_snapshot is not None
-
-            block = self.__current_message_snapshot.content[content_delta.index]
-            if block.type == "text":
-                self.on_text_created(block.text)
-
-        if (
-            event.event == "thread.run.completed"
-            or event.event == "thread.run.cancelled"
-            or event.event == "thread.run.expired"
-            or event.event == "thread.run.failed"
-            or event.event == "thread.run.requires_action"
-        ):
-            self.__current_run = event.data
-            if self._current_tool_call:
-                self.on_tool_call_done(self._current_tool_call)
-        elif (
-            event.event == "thread.run.created"
-            or event.event == "thread.run.in_progress"
-            or event.event == "thread.run.cancelling"
-            or event.event == "thread.run.queued"
-        ):
-            self.__current_run = event.data
-        elif event.event == "thread.message.created":
-            self.on_message_created(event.data)
-        elif event.event == "thread.message.delta":
-            snapshot = self.__current_message_snapshot
-            assert snapshot is not None
-
-            message_delta = event.data.delta
-            if message_delta.content is not None:
-                for content_delta in message_delta.content:
-                    if content_delta.type == "text" and content_delta.text:
-                        snapshot_content = snapshot.content[content_delta.index]
-                        assert snapshot_content.type == "text"
-                        self.on_text_delta(content_delta.text, snapshot_content.text)
-
-                    # If the delta is for a new message content:
-                    # - emit on_text_done/on_image_file_done for the previous message content
-                    # - emit on_text_created/on_image_created for the new message content
-                    if content_delta.index != self._current_message_content_index:
-                        if self._current_message_content is not None:
-                            if self._current_message_content.type == "text":
-                                self.on_text_done(self._current_message_content.text)
-                            elif self._current_message_content.type == "image_file":
-                                self.on_image_file_done(self._current_message_content.image_file)
-
-                        self._current_message_content_index = content_delta.index
-                        self._current_message_content = snapshot.content[content_delta.index]
-
-                    # Update the current_message_content (delta event is correctly emitted already)
-                    self._current_message_content = snapshot.content[content_delta.index]
-
-            self.on_message_delta(event.data.delta, snapshot)
-        elif event.event == "thread.message.completed" or event.event == "thread.message.incomplete":
-            self.__current_message_snapshot = event.data
-            self.__message_snapshots[event.data.id] = event.data
-
-            if self._current_message_content_index is not None:
-                content = event.data.content[self._current_message_content_index]
-                if content.type == "text":
-                    self.on_text_done(content.text)
-                elif content.type == "image_file":
-                    self.on_image_file_done(content.image_file)
-
-            self.on_message_done(event.data)
-        elif event.event == "thread.run.step.created":
-            self.__current_run_step_id = event.data.id
-            self.on_run_step_created(event.data)
-        elif event.event == "thread.run.step.in_progress":
-            self.__current_run_step_id = event.data.id
-        elif event.event == "thread.run.step.delta":
-            step_snapshot = self.__run_step_snapshots[event.data.id]
-
-            run_step_delta = event.data.delta
-            if (
-                run_step_delta.step_details
-                and run_step_delta.step_details.type == "tool_calls"
-                and run_step_delta.step_details.tool_calls is not None
-            ):
-                assert step_snapshot.step_details.type == "tool_calls"
-                for tool_call_delta in run_step_delta.step_details.tool_calls:
-                    if tool_call_delta.index == self._current_tool_call_index:
-                        self.on_tool_call_delta(
-                            tool_call_delta,
-                            step_snapshot.step_details.tool_calls[tool_call_delta.index],
-                        )
-
-                    # If the delta is for a new tool call:
-                    # - emit on_tool_call_done for the previous tool_call
-                    # - emit on_tool_call_created for the new tool_call
-                    if tool_call_delta.index != self._current_tool_call_index:
-                        if self._current_tool_call is not None:
-                            self.on_tool_call_done(self._current_tool_call)
-
-                        self._current_tool_call_index = tool_call_delta.index
-                        self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
-                        self.on_tool_call_created(self._current_tool_call)
-
-                    # Update the current_tool_call (delta event is correctly emitted already)
-                    self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
-
-            self.on_run_step_delta(
-                event.data.delta,
-                step_snapshot,
-            )
-        elif (
-            event.event == "thread.run.step.completed"
-            or event.event == "thread.run.step.cancelled"
-            or event.event == "thread.run.step.expired"
-            or event.event == "thread.run.step.failed"
-        ):
-            if self._current_tool_call:
-                self.on_tool_call_done(self._current_tool_call)
-
-            self.on_run_step_done(event.data)
-            self.__current_run_step_id = None
-        elif event.event == "thread.created" or event.event == "thread.message.in_progress" or event.event == "error":
-            # currently no special handling
-            ...
-        else:
-            # we only want to error at build-time
-            if TYPE_CHECKING:  # type: ignore[unreachable]
-                assert_never(event)
-
-        self._current_event = None
-
-    def __stream__(self) -> Iterator[AssistantStreamEvent]:
-        stream = self.__stream
-        if not stream:
-            raise RuntimeError("Stream has not been started yet")
-
-        try:
-            for event in stream:
-                self._emit_sse_event(event)
-
-                yield event
-        except (httpx.TimeoutException, asyncio.TimeoutError) as exc:
-            self.on_timeout()
-            self.on_exception(exc)
-            raise
-        except Exception as exc:
-            self.on_exception(exc)
-            raise
-        finally:
-            self.on_end()
-
-
-AssistantEventHandlerT = TypeVar("AssistantEventHandlerT", bound=AssistantEventHandler)
-
-
-class AssistantStreamManager(Generic[AssistantEventHandlerT]):
-    """Wrapper over AssistantStreamEventHandler that is returned by `.stream()`
-    so that a context manager can be used.
-
-    ```py
-    with client.threads.create_and_run_stream(...) as stream:
-        for event in stream:
-            ...
-    ```
-    """
-
-    def __init__(
-        self,
-        api_request: Callable[[], Stream[AssistantStreamEvent]],
-        *,
-        event_handler: AssistantEventHandlerT,
-    ) -> None:
-        self.__stream: Stream[AssistantStreamEvent] | None = None
-        self.__event_handler = event_handler
-        self.__api_request = api_request
-
-    def __enter__(self) -> AssistantEventHandlerT:
-        self.__stream = self.__api_request()
-        self.__event_handler._init(self.__stream)
-        return self.__event_handler
-
-    def __exit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        if self.__stream is not None:
-            self.__stream.close()
-
-
-class AsyncAssistantEventHandler:
-    text_deltas: AsyncIterable[str]
-    """Iterator over just the text deltas in the stream.
-
-    This corresponds to the `thread.message.delta` event
-    in the API.
-
-    ```py
-    async for text in stream.text_deltas:
-        print(text, end="", flush=True)
-    print()
-    ```
-    """
-
-    def __init__(self) -> None:
-        self._current_event: AssistantStreamEvent | None = None
-        self._current_message_content_index: int | None = None
-        self._current_message_content: MessageContent | None = None
-        self._current_tool_call_index: int | None = None
-        self._current_tool_call: ToolCall | None = None
-        self.__current_run_step_id: str | None = None
-        self.__current_run: Run | None = None
-        self.__run_step_snapshots: dict[str, RunStep] = {}
-        self.__message_snapshots: dict[str, Message] = {}
-        self.__current_message_snapshot: Message | None = None
-
-        self.text_deltas = self.__text_deltas__()
-        self._iterator = self.__stream__()
-        self.__stream: AsyncStream[AssistantStreamEvent] | None = None
-
-    def _init(self, stream: AsyncStream[AssistantStreamEvent]) -> None:
-        if self.__stream:
-            raise RuntimeError(
-                "A single event handler cannot be shared between multiple streams; You will need to construct a new event handler instance"
-            )
-
-        self.__stream = stream
-
-    async def __anext__(self) -> AssistantStreamEvent:
-        return await self._iterator.__anext__()
-
-    async def __aiter__(self) -> AsyncIterator[AssistantStreamEvent]:
-        async for item in self._iterator:
-            yield item
-
-    async def close(self) -> None:
-        """
-        Close the response and release the connection.
-
-        Automatically called when the context manager exits.
-        """
-        if self.__stream:
-            await self.__stream.close()
-
-    @property
-    def current_event(self) -> AssistantStreamEvent | None:
-        return self._current_event
-
-    @property
-    def current_run(self) -> Run | None:
-        return self.__current_run
-
-    @property
-    def current_run_step_snapshot(self) -> RunStep | None:
-        if not self.__current_run_step_id:
-            return None
-
-        return self.__run_step_snapshots[self.__current_run_step_id]
-
-    @property
-    def current_message_snapshot(self) -> Message | None:
-        return self.__current_message_snapshot
-
-    async def until_done(self) -> None:
-        """Waits until the stream has been consumed"""
-        await consume_async_iterator(self)
-
-    async def get_final_run(self) -> Run:
-        """Wait for the stream to finish and returns the completed Run object"""
-        await self.until_done()
-
-        if not self.__current_run:
-            raise RuntimeError("No final run object found")
-
-        return self.__current_run
-
-    async def get_final_run_steps(self) -> list[RunStep]:
-        """Wait for the stream to finish and returns the steps taken in this run"""
-        await self.until_done()
-
-        if not self.__run_step_snapshots:
-            raise RuntimeError("No run steps found")
-
-        return [step for step in self.__run_step_snapshots.values()]
-
-    async def get_final_messages(self) -> list[Message]:
-        """Wait for the stream to finish and returns the messages emitted in this run"""
-        await self.until_done()
-
-        if not self.__message_snapshots:
-            raise RuntimeError("No messages found")
-
-        return [message for message in self.__message_snapshots.values()]
-
-    async def __text_deltas__(self) -> AsyncIterator[str]:
-        async for event in self:
-            if event.event != "thread.message.delta":
-                continue
-
-            for content_delta in event.data.delta.content or []:
-                if content_delta.type == "text" and content_delta.text and content_delta.text.value:
-                    yield content_delta.text.value
-
-    # event handlers
-
-    async def on_end(self) -> None:
-        """Fires when the stream has finished.
-
-        This happens if the stream is read to completion
-        or if an exception occurs during iteration.
-        """
-
-    async def on_event(self, event: AssistantStreamEvent) -> None:
-        """Callback that is fired for every Server-Sent-Event"""
-
-    async def on_run_step_created(self, run_step: RunStep) -> None:
-        """Callback that is fired when a run step is created"""
-
-    async def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
-        """Callback that is fired whenever a run step delta is returned from the API
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the run step. For example, a tool calls event may
-        look like this:
-
-        # delta
-        tool_calls=[
-            RunStepDeltaToolCallsCodeInterpreter(
-                index=0,
-                type='code_interpreter',
-                id=None,
-                code_interpreter=CodeInterpreter(input=' sympy', outputs=None)
-            )
-        ]
-        # snapshot
-        tool_calls=[
-            CodeToolCall(
-                id='call_wKayJlcYV12NiadiZuJXxcfx',
-                code_interpreter=CodeInterpreter(input='from sympy', outputs=[]),
-                type='code_interpreter',
-                index=0
-            )
-        ],
-        """
-
-    async def on_run_step_done(self, run_step: RunStep) -> None:
-        """Callback that is fired when a run step is completed"""
-
-    async def on_tool_call_created(self, tool_call: ToolCall) -> None:
-        """Callback that is fired when a tool call is created"""
-
-    async def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall) -> None:
-        """Callback that is fired when a tool call delta is encountered"""
-
-    async def on_tool_call_done(self, tool_call: ToolCall) -> None:
-        """Callback that is fired when a tool call delta is encountered"""
-
-    async def on_exception(self, exception: Exception) -> None:
-        """Fired whenever an exception happens during streaming"""
-
-    async def on_timeout(self) -> None:
-        """Fires if the request times out"""
-
-    async def on_message_created(self, message: Message) -> None:
-        """Callback that is fired when a message is created"""
-
-    async def on_message_delta(self, delta: MessageDelta, snapshot: Message) -> None:
-        """Callback that is fired whenever a message delta is returned from the API
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the message. For example, a text content event may
-        look like this:
-
-        # delta
-        MessageDeltaText(
-            index=0,
-            type='text',
-            text=Text(
-                value=' Jane'
-            ),
-        )
-        # snapshot
-        MessageContentText(
-            index=0,
-            type='text',
-            text=Text(
-                value='Certainly, Jane'
-            ),
-        )
-        """
-
-    async def on_message_done(self, message: Message) -> None:
-        """Callback that is fired when a message is completed"""
-
-    async def on_text_created(self, text: Text) -> None:
-        """Callback that is fired when a text content block is created"""
-
-    async def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
-        """Callback that is fired whenever a text content delta is returned
-        by the API.
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the text. For example:
-
-        on_text_delta(TextDelta(value="The"), Text(value="The")),
-        on_text_delta(TextDelta(value=" solution"), Text(value="The solution")),
-        on_text_delta(TextDelta(value=" to"), Text(value="The solution to")),
-        on_text_delta(TextDelta(value=" the"), Text(value="The solution to the")),
-        on_text_delta(TextDelta(value=" equation"), Text(value="The solution to the equivalent")),
-        """
-
-    async def on_text_done(self, text: Text) -> None:
-        """Callback that is fired when a text content block is finished"""
-
-    async def on_image_file_done(self, image_file: ImageFile) -> None:
-        """Callback that is fired when an image file block is finished"""
-
-    async def _emit_sse_event(self, event: AssistantStreamEvent) -> None:
-        self._current_event = event
-        await self.on_event(event)
-
-        self.__current_message_snapshot, new_content = accumulate_event(
-            event=event,
-            current_message_snapshot=self.__current_message_snapshot,
-        )
-        if self.__current_message_snapshot is not None:
-            self.__message_snapshots[self.__current_message_snapshot.id] = self.__current_message_snapshot
-
-        accumulate_run_step(
-            event=event,
-            run_step_snapshots=self.__run_step_snapshots,
-        )
-
-        for content_delta in new_content:
-            assert self.__current_message_snapshot is not None
-
-            block = self.__current_message_snapshot.content[content_delta.index]
-            if block.type == "text":
-                await self.on_text_created(block.text)
-
-        if (
-            event.event == "thread.run.completed"
-            or event.event == "thread.run.cancelled"
-            or event.event == "thread.run.expired"
-            or event.event == "thread.run.failed"
-            or event.event == "thread.run.requires_action"
-        ):
-            self.__current_run = event.data
-            if self._current_tool_call:
-                await self.on_tool_call_done(self._current_tool_call)
-        elif (
-            event.event == "thread.run.created"
-            or event.event == "thread.run.in_progress"
-            or event.event == "thread.run.cancelling"
-            or event.event == "thread.run.queued"
-        ):
-            self.__current_run = event.data
-        elif event.event == "thread.message.created":
-            await self.on_message_created(event.data)
-        elif event.event == "thread.message.delta":
-            snapshot = self.__current_message_snapshot
-            assert snapshot is not None
-
-            message_delta = event.data.delta
-            if message_delta.content is not None:
-                for content_delta in message_delta.content:
-                    if content_delta.type == "text" and content_delta.text:
-                        snapshot_content = snapshot.content[content_delta.index]
-                        assert snapshot_content.type == "text"
-                        await self.on_text_delta(content_delta.text, snapshot_content.text)
-
-                    # If the delta is for a new message content:
-                    # - emit on_text_done/on_image_file_done for the previous message content
-                    # - emit on_text_created/on_image_created for the new message content
-                    if content_delta.index != self._current_message_content_index:
-                        if self._current_message_content is not None:
-                            if self._current_message_content.type == "text":
-                                await self.on_text_done(self._current_message_content.text)
-                            elif self._current_message_content.type == "image_file":
-                                await self.on_image_file_done(self._current_message_content.image_file)
-
-                        self._current_message_content_index = content_delta.index
-                        self._current_message_content = snapshot.content[content_delta.index]
-
-                    # Update the current_message_content (delta event is correctly emitted already)
-                    self._current_message_content = snapshot.content[content_delta.index]
-
-            await self.on_message_delta(event.data.delta, snapshot)
-        elif event.event == "thread.message.completed" or event.event == "thread.message.incomplete":
-            self.__current_message_snapshot = event.data
-            self.__message_snapshots[event.data.id] = event.data
-
-            if self._current_message_content_index is not None:
-                content = event.data.content[self._current_message_content_index]
-                if content.type == "text":
-                    await self.on_text_done(content.text)
-                elif content.type == "image_file":
-                    await self.on_image_file_done(content.image_file)
-
-            await self.on_message_done(event.data)
-        elif event.event == "thread.run.step.created":
-            self.__current_run_step_id = event.data.id
-            await self.on_run_step_created(event.data)
-        elif event.event == "thread.run.step.in_progress":
-            self.__current_run_step_id = event.data.id
-        elif event.event == "thread.run.step.delta":
-            step_snapshot = self.__run_step_snapshots[event.data.id]
-
-            run_step_delta = event.data.delta
-            if (
-                run_step_delta.step_details
-                and run_step_delta.step_details.type == "tool_calls"
-                and run_step_delta.step_details.tool_calls is not None
-            ):
-                assert step_snapshot.step_details.type == "tool_calls"
-                for tool_call_delta in run_step_delta.step_details.tool_calls:
-                    if tool_call_delta.index == self._current_tool_call_index:
-                        await self.on_tool_call_delta(
-                            tool_call_delta,
-                            step_snapshot.step_details.tool_calls[tool_call_delta.index],
-                        )
-
-                    # If the delta is for a new tool call:
-                    # - emit on_tool_call_done for the previous tool_call
-                    # - emit on_tool_call_created for the new tool_call
-                    if tool_call_delta.index != self._current_tool_call_index:
-                        if self._current_tool_call is not None:
-                            await self.on_tool_call_done(self._current_tool_call)
-
-                        self._current_tool_call_index = tool_call_delta.index
-                        self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
-                        await self.on_tool_call_created(self._current_tool_call)
-
-                    # Update the current_tool_call (delta event is correctly emitted already)
-                    self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
-
-            await self.on_run_step_delta(
-                event.data.delta,
-                step_snapshot,
-            )
-        elif (
-            event.event == "thread.run.step.completed"
-            or event.event == "thread.run.step.cancelled"
-            or event.event == "thread.run.step.expired"
-            or event.event == "thread.run.step.failed"
-        ):
-            if self._current_tool_call:
-                await self.on_tool_call_done(self._current_tool_call)
-
-            await self.on_run_step_done(event.data)
-            self.__current_run_step_id = None
-        elif event.event == "thread.created" or event.event == "thread.message.in_progress" or event.event == "error":
-            # currently no special handling
-            ...
-        else:
-            # we only want to error at build-time
-            if TYPE_CHECKING:  # type: ignore[unreachable]
-                assert_never(event)
-
-        self._current_event = None
-
-    async def __stream__(self) -> AsyncIterator[AssistantStreamEvent]:
-        stream = self.__stream
-        if not stream:
-            raise RuntimeError("Stream has not been started yet")
-
-        try:
-            async for event in stream:
-                await self._emit_sse_event(event)
-
-                yield event
-        except (httpx.TimeoutException, asyncio.TimeoutError) as exc:
-            await self.on_timeout()
-            await self.on_exception(exc)
-            raise
-        except Exception as exc:
-            await self.on_exception(exc)
-            raise
-        finally:
-            await self.on_end()
-
-
-AsyncAssistantEventHandlerT = TypeVar("AsyncAssistantEventHandlerT", bound=AsyncAssistantEventHandler)
-
-
-class AsyncAssistantStreamManager(Generic[AsyncAssistantEventHandlerT]):
-    """Wrapper over AsyncAssistantStreamEventHandler that is returned by `.stream()`
-    so that an async context manager can be used without `await`ing the
-    original client call.
-
-    ```py
-    async with client.threads.create_and_run_stream(...) as stream:
-        async for event in stream:
-            ...
-    ```
-    """
-
-    def __init__(
-        self,
-        api_request: Awaitable[AsyncStream[AssistantStreamEvent]],
-        *,
-        event_handler: AsyncAssistantEventHandlerT,
-    ) -> None:
-        self.__stream: AsyncStream[AssistantStreamEvent] | None = None
-        self.__event_handler = event_handler
-        self.__api_request = api_request
-
-    async def __aenter__(self) -> AsyncAssistantEventHandlerT:
-        self.__stream = await self.__api_request
-        self.__event_handler._init(self.__stream)
-        return self.__event_handler
-
-    async def __aexit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        if self.__stream is not None:
-            await self.__stream.close()
-
-
-def accumulate_run_step(
-    *,
-    event: AssistantStreamEvent,
-    run_step_snapshots: dict[str, RunStep],
-) -> None:
-    if event.event == "thread.run.step.created":
-        run_step_snapshots[event.data.id] = event.data
-        return
-
-    if event.event == "thread.run.step.delta":
-        data = event.data
-        snapshot = run_step_snapshots[data.id]
-
-        if data.delta:
-            merged = accumulate_delta(
-                cast(
-                    "dict[object, object]",
-                    snapshot.model_dump(exclude_unset=True),
-                ),
-                cast(
-                    "dict[object, object]",
-                    data.delta.model_dump(exclude_unset=True),
-                ),
-            )
-            run_step_snapshots[snapshot.id] = cast(RunStep, construct_type(type_=RunStep, value=merged))
-
-    return None
-
-
-def accumulate_event(
-    *,
-    event: AssistantStreamEvent,
-    current_message_snapshot: Message | None,
-) -> tuple[Message | None, list[MessageContentDelta]]:
-    """Returns a tuple of message snapshot and newly created text message deltas"""
-    if event.event == "thread.message.created":
-        return event.data, []
-
-    new_content: list[MessageContentDelta] = []
-
-    if event.event != "thread.message.delta":
-        return current_message_snapshot, []
-
-    if not current_message_snapshot:
-        raise RuntimeError("Encountered a message delta with no previous snapshot")
-
-    data = event.data
-    if data.delta.content:
-        for content_delta in data.delta.content:
-            try:
-                block = current_message_snapshot.content[content_delta.index]
-            except IndexError:
-                current_message_snapshot.content.insert(
-                    content_delta.index,
-                    cast(
-                        MessageContent,
-                        construct_type(
-                            # mypy doesn't allow Content for some reason
-                            type_=cast(Any, MessageContent),
-                            value=content_delta.model_dump(exclude_unset=True),
-                        ),
-                    ),
-                )
-                new_content.append(content_delta)
-            else:
-                merged = accumulate_delta(
-                    cast(
-                        "dict[object, object]",
-                        block.model_dump(exclude_unset=True),
-                    ),
-                    cast(
-                        "dict[object, object]",
-                        content_delta.model_dump(exclude_unset=True),
-                    ),
-                )
-                current_message_snapshot.content[content_delta.index] = cast(
-                    MessageContent,
-                    construct_type(
-                        # mypy doesn't allow Content for some reason
-                        type_=cast(Any, MessageContent),
-                        value=merged,
-                    ),
-                )
-
-    return current_message_snapshot, new_content
-
-
-def accumulate_delta(acc: dict[object, object], delta: dict[object, object]) -> dict[object, object]:
-    for key, delta_value in delta.items():
-        if key not in acc:
-            acc[key] = delta_value
-            continue
-
-        acc_value = acc[key]
-        if acc_value is None:
-            acc[key] = delta_value
-            continue
-
-        # the `index` property is used in arrays of objects so it should
-        # not be accumulated like other values e.g.
-        # [{'foo': 'bar', 'index': 0}]
-        #
-        # the same applies to `type` properties as they're used for
-        # discriminated unions
-        if key == "index" or key == "type":
-            acc[key] = delta_value
-            continue
-
-        if isinstance(acc_value, str) and isinstance(delta_value, str):
-            acc_value += delta_value
-        elif isinstance(acc_value, (int, float)) and isinstance(delta_value, (int, float)):
-            acc_value += delta_value
-        elif is_dict(acc_value) and is_dict(delta_value):
-            acc_value = accumulate_delta(acc_value, delta_value)
-        elif is_list(acc_value) and is_list(delta_value):
-            # for lists of non-dictionary items we'll only ever get new entries
-            # in the array, existing entries will never be changed
-            if all(isinstance(x, (str, int, float)) for x in acc_value):
-                acc_value.extend(delta_value)
-                continue
-
-            for delta_entry in delta_value:
-                if not is_dict(delta_entry):
-                    raise TypeError(f"Unexpected list delta entry is not a dictionary: {delta_entry}")
-
-                try:
-                    index = delta_entry["index"]
-                except KeyError as exc:
-                    raise RuntimeError(f"Expected list delta entry to have an `index` key; {delta_entry}") from exc
-
-                if not isinstance(index, int):
-                    raise TypeError(f"Unexpected, list delta entry `index` value is not an integer; {index}")
-
-                try:
-                    acc_entry = acc_value[index]
-                except IndexError:
-                    acc_value.insert(index, delta_entry)
-                else:
-                    if not is_dict(acc_entry):
-                        raise TypeError("not handled yet")
-
-                    acc_value[index] = accumulate_delta(acc_entry, delta_entry)
-
-        acc[key] = acc_value
-
-    return acc
diff --git a/src/openai/pagination.py b/src/openai/pagination.py
index 8293638269..4dd3788aa3 100644
--- a/src/openai/pagination.py
+++ b/src/openai/pagination.py
@@ -5,7 +5,14 @@
 
 from ._base_client import BasePage, PageInfo, BaseSyncPage, BaseAsyncPage
 
-__all__ = ["SyncPage", "AsyncPage", "SyncCursorPage", "AsyncCursorPage"]
+__all__ = [
+    "SyncPage",
+    "AsyncPage",
+    "SyncCursorPage",
+    "AsyncCursorPage",
+    "SyncConversationCursorPage",
+    "AsyncConversationCursorPage",
+]
 
 _T = TypeVar("_T")
 
@@ -61,6 +68,7 @@ def next_page_info(self) -> None:
 
 class SyncCursorPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
     data: List[_T]
+    has_more: Optional[bool] = None
 
     @override
     def _get_page_items(self) -> List[_T]:
@@ -69,6 +77,14 @@ def _get_page_items(self) -> List[_T]:
             return []
         return data
 
+    @override
+    def has_next_page(self) -> bool:
+        has_more = self.has_more
+        if has_more is not None and has_more is False:
+            return False
+
+        return super().has_next_page()
+
     @override
     def next_page_info(self) -> Optional[PageInfo]:
         data = self.data
@@ -85,6 +101,7 @@ def next_page_info(self) -> Optional[PageInfo]:
 
 class AsyncCursorPage(BaseAsyncPage[_T], BasePage[_T], Generic[_T]):
     data: List[_T]
+    has_more: Optional[bool] = None
 
     @override
     def _get_page_items(self) -> List[_T]:
@@ -93,6 +110,14 @@ def _get_page_items(self) -> List[_T]:
             return []
         return data
 
+    @override
+    def has_next_page(self) -> bool:
+        has_more = self.has_more
+        if has_more is not None and has_more is False:
+            return False
+
+        return super().has_next_page()
+
     @override
     def next_page_info(self) -> Optional[PageInfo]:
         data = self.data
@@ -105,3 +130,61 @@ def next_page_info(self) -> Optional[PageInfo]:
             return None
 
         return PageInfo(params={"after": item.id})
+
+
+class SyncConversationCursorPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
+    data: List[_T]
+    has_more: Optional[bool] = None
+    last_id: Optional[str] = None
+
+    @override
+    def _get_page_items(self) -> List[_T]:
+        data = self.data
+        if not data:
+            return []
+        return data
+
+    @override
+    def has_next_page(self) -> bool:
+        has_more = self.has_more
+        if has_more is not None and has_more is False:
+            return False
+
+        return super().has_next_page()
+
+    @override
+    def next_page_info(self) -> Optional[PageInfo]:
+        last_id = self.last_id
+        if not last_id:
+            return None
+
+        return PageInfo(params={"after": last_id})
+
+
+class AsyncConversationCursorPage(BaseAsyncPage[_T], BasePage[_T], Generic[_T]):
+    data: List[_T]
+    has_more: Optional[bool] = None
+    last_id: Optional[str] = None
+
+    @override
+    def _get_page_items(self) -> List[_T]:
+        data = self.data
+        if not data:
+            return []
+        return data
+
+    @override
+    def has_next_page(self) -> bool:
+        has_more = self.has_more
+        if has_more is not None and has_more is False:
+            return False
+
+        return super().has_next_page()
+
+    @override
+    def next_page_info(self) -> Optional[PageInfo]:
+        last_id = self.last_id
+        if not last_id:
+            return None
+
+        return PageInfo(params={"after": last_id})
diff --git a/src/openai/resources/__init__.py b/src/openai/resources/__init__.py
index ecae4243fc..b793fbc7b0 100644
--- a/src/openai/resources/__init__.py
+++ b/src/openai/resources/__init__.py
@@ -24,6 +24,14 @@
     AudioWithStreamingResponse,
     AsyncAudioWithStreamingResponse,
 )
+from .evals import (
+    Evals,
+    AsyncEvals,
+    EvalsWithRawResponse,
+    AsyncEvalsWithRawResponse,
+    EvalsWithStreamingResponse,
+    AsyncEvalsWithStreamingResponse,
+)
 from .files import (
     Files,
     AsyncFiles,
@@ -48,6 +56,14 @@
     ModelsWithStreamingResponse,
     AsyncModelsWithStreamingResponse,
 )
+from .videos import (
+    Videos,
+    AsyncVideos,
+    VideosWithRawResponse,
+    AsyncVideosWithRawResponse,
+    VideosWithStreamingResponse,
+    AsyncVideosWithStreamingResponse,
+)
 from .batches import (
     Batches,
     AsyncBatches,
@@ -56,6 +72,22 @@
     BatchesWithStreamingResponse,
     AsyncBatchesWithStreamingResponse,
 )
+from .uploads import (
+    Uploads,
+    AsyncUploads,
+    UploadsWithRawResponse,
+    AsyncUploadsWithRawResponse,
+    UploadsWithStreamingResponse,
+    AsyncUploadsWithStreamingResponse,
+)
+from .containers import (
+    Containers,
+    AsyncContainers,
+    ContainersWithRawResponse,
+    AsyncContainersWithRawResponse,
+    ContainersWithStreamingResponse,
+    AsyncContainersWithStreamingResponse,
+)
 from .embeddings import (
     Embeddings,
     AsyncEmbeddings,
@@ -88,6 +120,14 @@
     ModerationsWithStreamingResponse,
     AsyncModerationsWithStreamingResponse,
 )
+from .vector_stores import (
+    VectorStores,
+    AsyncVectorStores,
+    VectorStoresWithRawResponse,
+    AsyncVectorStoresWithRawResponse,
+    VectorStoresWithStreamingResponse,
+    AsyncVectorStoresWithStreamingResponse,
+)
 
 __all__ = [
     "Completions",
@@ -144,6 +184,12 @@
     "AsyncFineTuningWithRawResponse",
     "FineTuningWithStreamingResponse",
     "AsyncFineTuningWithStreamingResponse",
+    "VectorStores",
+    "AsyncVectorStores",
+    "VectorStoresWithRawResponse",
+    "AsyncVectorStoresWithRawResponse",
+    "VectorStoresWithStreamingResponse",
+    "AsyncVectorStoresWithStreamingResponse",
     "Beta",
     "AsyncBeta",
     "BetaWithRawResponse",
@@ -156,4 +202,28 @@
     "AsyncBatchesWithRawResponse",
     "BatchesWithStreamingResponse",
     "AsyncBatchesWithStreamingResponse",
+    "Uploads",
+    "AsyncUploads",
+    "UploadsWithRawResponse",
+    "AsyncUploadsWithRawResponse",
+    "UploadsWithStreamingResponse",
+    "AsyncUploadsWithStreamingResponse",
+    "Evals",
+    "AsyncEvals",
+    "EvalsWithRawResponse",
+    "AsyncEvalsWithRawResponse",
+    "EvalsWithStreamingResponse",
+    "AsyncEvalsWithStreamingResponse",
+    "Containers",
+    "AsyncContainers",
+    "ContainersWithRawResponse",
+    "AsyncContainersWithRawResponse",
+    "ContainersWithStreamingResponse",
+    "AsyncContainersWithStreamingResponse",
+    "Videos",
+    "AsyncVideos",
+    "VideosWithRawResponse",
+    "AsyncVideosWithRawResponse",
+    "VideosWithStreamingResponse",
+    "AsyncVideosWithStreamingResponse",
 ]
diff --git a/src/openai/resources/audio/audio.py b/src/openai/resources/audio/audio.py
index 537ad573d0..383b7073bf 100644
--- a/src/openai/resources/audio/audio.py
+++ b/src/openai/resources/audio/audio.py
@@ -47,10 +47,21 @@ def speech(self) -> Speech:
 
     @cached_property
     def with_raw_response(self) -> AudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AudioWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AudioWithStreamingResponse(self)
 
 
@@ -69,10 +80,21 @@ def speech(self) -> AsyncSpeech:
 
     @cached_property
     def with_raw_response(self) -> AsyncAudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncAudioWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncAudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncAudioWithStreamingResponse(self)
 
 
diff --git a/src/openai/resources/audio/speech.py b/src/openai/resources/audio/speech.py
index e26c58051e..992fb5971a 100644
--- a/src/openai/resources/audio/speech.py
+++ b/src/openai/resources/audio/speech.py
@@ -8,11 +8,8 @@
 import httpx
 
 from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import (
@@ -22,9 +19,8 @@
     async_to_custom_streamed_response_wrapper,
 )
 from ...types.audio import speech_create_params
-from ..._base_client import (
-    make_request_options,
-)
+from ..._base_client import make_request_options
+from ...types.audio.speech_model import SpeechModel
 
 __all__ = ["Speech", "AsyncSpeech"]
 
@@ -32,26 +28,41 @@
 class Speech(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> SpeechWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return SpeechWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> SpeechWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return SpeechWithStreamingResponse(self)
 
     def create(
         self,
         *,
         input: str,
-        model: Union[str, Literal["tts-1", "tts-1-hd"]],
-        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
-        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
-        speed: float | NotGiven = NOT_GIVEN,
+        model: Union[str, SpeechModel],
+        voice: Union[
+            str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]
+        ],
+        instructions: str | Omit = omit,
+        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | Omit = omit,
+        speed: float | Omit = omit,
+        stream_format: Literal["sse", "audio"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> _legacy_response.HttpxBinaryResponseContent:
         """
         Generates audio from the input text.
@@ -60,13 +71,16 @@ def create(
           input: The text to generate audio for. The maximum length is 4096 characters.
 
           model:
-              One of the available [TTS models](https://platform.openai.com/docs/models/tts):
-              `tts-1` or `tts-1-hd`
+              One of the available [TTS models](https://platform.openai.com/docs/models#tts):
+              `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
+
+          voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
+              `ballad`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and
+              `verse`. Previews of the voices are available in the
+              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
 
-          voice: The voice to use when generating the audio. Supported voices are `alloy`,
-              `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are
-              available in the
-              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
+          instructions: Control the voice of your generated audio with additional instructions. Does not
+              work with `tts-1` or `tts-1-hd`.
 
           response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
               `wav`, and `pcm`.
@@ -74,6 +88,9 @@ def create(
           speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
               the default.
 
+          stream_format: The format to stream the audio in. Supported formats are `sse` and `audio`.
+              `sse` is not supported for `tts-1` or `tts-1-hd`.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -90,8 +107,10 @@ def create(
                     "input": input,
                     "model": model,
                     "voice": voice,
+                    "instructions": instructions,
                     "response_format": response_format,
                     "speed": speed,
+                    "stream_format": stream_format,
                 },
                 speech_create_params.SpeechCreateParams,
             ),
@@ -105,26 +124,41 @@ def create(
 class AsyncSpeech(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncSpeechWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncSpeechWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncSpeechWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncSpeechWithStreamingResponse(self)
 
     async def create(
         self,
         *,
         input: str,
-        model: Union[str, Literal["tts-1", "tts-1-hd"]],
-        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
-        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
-        speed: float | NotGiven = NOT_GIVEN,
+        model: Union[str, SpeechModel],
+        voice: Union[
+            str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]
+        ],
+        instructions: str | Omit = omit,
+        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | Omit = omit,
+        speed: float | Omit = omit,
+        stream_format: Literal["sse", "audio"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> _legacy_response.HttpxBinaryResponseContent:
         """
         Generates audio from the input text.
@@ -133,13 +167,16 @@ async def create(
           input: The text to generate audio for. The maximum length is 4096 characters.
 
           model:
-              One of the available [TTS models](https://platform.openai.com/docs/models/tts):
-              `tts-1` or `tts-1-hd`
+              One of the available [TTS models](https://platform.openai.com/docs/models#tts):
+              `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
 
-          voice: The voice to use when generating the audio. Supported voices are `alloy`,
-              `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are
-              available in the
-              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
+          voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
+              `ballad`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and
+              `verse`. Previews of the voices are available in the
+              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
+
+          instructions: Control the voice of your generated audio with additional instructions. Does not
+              work with `tts-1` or `tts-1-hd`.
 
           response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
               `wav`, and `pcm`.
@@ -147,6 +184,9 @@ async def create(
           speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
               the default.
 
+          stream_format: The format to stream the audio in. Supported formats are `sse` and `audio`.
+              `sse` is not supported for `tts-1` or `tts-1-hd`.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -163,8 +203,10 @@ async def create(
                     "input": input,
                     "model": model,
                     "voice": voice,
+                    "instructions": instructions,
                     "response_format": response_format,
                     "speed": speed,
+                    "stream_format": stream_format,
                 },
                 speech_create_params.SpeechCreateParams,
             ),
diff --git a/src/openai/resources/audio/transcriptions.py b/src/openai/resources/audio/transcriptions.py
index 995680186b..57bc1f89fc 100644
--- a/src/openai/resources/audio/transcriptions.py
+++ b/src/openai/resources/audio/transcriptions.py
@@ -2,27 +2,36 @@
 
 from __future__ import annotations
 
-from typing import List, Union, Mapping, cast
-from typing_extensions import Literal
+from typing import Any, List, Union, Mapping, Optional, cast
+from typing_extensions import Literal, overload
 
 import httpx
 
 from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import (
-    extract_files,
-    maybe_transform,
-    deepcopy_minimal,
-    async_maybe_transform,
+from ...types import AudioResponseFormat
+from ..._types import (
+    Body,
+    Omit,
+    Query,
+    Headers,
+    NotGiven,
+    FileTypes,
+    SequenceNotStr,
+    omit,
+    not_given,
 )
+from ..._utils import extract_files, required_args, maybe_transform, deepcopy_minimal, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._streaming import Stream, AsyncStream
 from ...types.audio import transcription_create_params
-from ..._base_client import (
-    make_request_options,
-)
-from ...types.audio.transcription import Transcription
+from ..._base_client import make_request_options
+from ...types.audio_model import AudioModel
+from ...types.audio_response_format import AudioResponseFormat
+from ...types.audio.transcription_include import TranscriptionInclude
+from ...types.audio.transcription_stream_event import TranscriptionStreamEvent
+from ...types.audio.transcription_create_response import TranscriptionCreateResponse
 
 __all__ = ["Transcriptions", "AsyncTranscriptions"]
 
@@ -30,29 +39,260 @@
 class Transcriptions(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> TranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return TranscriptionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> TranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return TranscriptionsWithStreamingResponse(self)
 
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | Omit = omit,
+        include: List[TranscriptionInclude] | Omit = omit,
+        known_speaker_names: SequenceNotStr[str] | Omit = omit,
+        known_speaker_references: SequenceNotStr[str] | Omit = omit,
+        language: str | Omit = omit,
+        prompt: str | Omit = omit,
+        response_format: AudioResponseFormat | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        temperature: float | Omit = omit,
+        timestamp_granularities: List[Literal["word", "segment"]] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> TranscriptionCreateResponse:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
+              Whisper V2 model), and `gpt-4o-transcribe-diarize`.
+
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+              Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
+              seconds.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`. This field is not supported when using
+              `gpt-4o-transcribe-diarize`.
+
+          known_speaker_names: Optional list of speaker names that correspond to the audio samples provided in
+              `known_speaker_references[]`. Each entry should be a short identifier (for
+              example `customer` or `agent`). Up to 4 speakers are supported.
+
+          known_speaker_references: Optional list of audio samples (as
+              [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
+              that contain known speaker references matching `known_speaker_names[]`. Each
+              sample must be between 2 and 10 seconds, and can use any of the same input audio
+              formats supported by `file`.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language. This field is not supported when using
+              `gpt-4o-transcribe-diarize`.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`, the only supported format is `json`. For
+              `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
+              `diarized_json`, with `diarized_json` required to receive speaker annotations.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency. This option is not available for
+              `gpt-4o-transcribe-diarize`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        stream: Literal[True],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | Omit = omit,
+        include: List[TranscriptionInclude] | Omit = omit,
+        known_speaker_names: SequenceNotStr[str] | Omit = omit,
+        known_speaker_references: SequenceNotStr[str] | Omit = omit,
+        language: str | Omit = omit,
+        prompt: str | Omit = omit,
+        response_format: AudioResponseFormat | Omit = omit,
+        temperature: float | Omit = omit,
+        timestamp_granularities: List[Literal["word", "segment"]] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Stream[TranscriptionStreamEvent]:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
+              Whisper V2 model), and `gpt-4o-transcribe-diarize`.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+              Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
+              seconds.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`. This field is not supported when using
+              `gpt-4o-transcribe-diarize`.
+
+          known_speaker_names: Optional list of speaker names that correspond to the audio samples provided in
+              `known_speaker_references[]`. Each entry should be a short identifier (for
+              example `customer` or `agent`). Up to 4 speakers are supported.
+
+          known_speaker_references: Optional list of audio samples (as
+              [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
+              that contain known speaker references matching `known_speaker_names[]`. Each
+              sample must be between 2 and 10 seconds, and can use any of the same input audio
+              formats supported by `file`.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language. This field is not supported when using
+              `gpt-4o-transcribe-diarize`.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`, the only supported format is `json`. For
+              `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
+              `diarized_json`, with `diarized_json` required to receive speaker annotations.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency. This option is not available for
+              `gpt-4o-transcribe-diarize`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
     def create(
         self,
         *,
         file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
-        language: str | NotGiven = NOT_GIVEN,
-        prompt: str | NotGiven = NOT_GIVEN,
-        response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        model: Union[str, AudioModel],
+        stream: bool,
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | Omit = omit,
+        include: List[TranscriptionInclude] | Omit = omit,
+        known_speaker_names: SequenceNotStr[str] | Omit = omit,
+        known_speaker_references: SequenceNotStr[str] | Omit = omit,
+        language: str | Omit = omit,
+        prompt: str | Omit = omit,
+        response_format: AudioResponseFormat | Omit = omit,
+        temperature: float | Omit = omit,
+        timestamp_granularities: List[Literal["word", "segment"]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Transcription:
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> TranscriptionCreateResponse | Stream[TranscriptionStreamEvent]:
         """
         Transcribes audio into the input language.
 
@@ -61,20 +301,58 @@ def create(
               The audio file object (not file name) to transcribe, in one of these formats:
               flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
 
-          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
-              Whisper V2 model) is currently available.
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
+              Whisper V2 model), and `gpt-4o-transcribe-diarize`.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+              Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
+              seconds.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`. This field is not supported when using
+              `gpt-4o-transcribe-diarize`.
+
+          known_speaker_names: Optional list of speaker names that correspond to the audio samples provided in
+              `known_speaker_references[]`. Each entry should be a short identifier (for
+              example `customer` or `agent`). Up to 4 speakers are supported.
+
+          known_speaker_references: Optional list of audio samples (as
+              [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
+              that contain known speaker references matching `known_speaker_names[]`. Each
+              sample must be between 2 and 10 seconds, and can use any of the same input audio
+              formats supported by `file`.
 
           language: The language of the input audio. Supplying the input language in
-              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
-              improve accuracy and latency.
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
 
           prompt: An optional text to guide the model's style or continue a previous audio
               segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
-              should match the audio language.
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language. This field is not supported when using
+              `gpt-4o-transcribe-diarize`.
 
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`, the only supported format is `json`. For
+              `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
+              `diarized_json`, with `diarized_json` required to receive speaker annotations.
 
           temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
               output more random, while lower values like 0.2 will make it more focused and
@@ -86,7 +364,8 @@ def create(
               `response_format` must be set `verbose_json` to use timestamp granularities.
               Either or both of these options are supported: `word`, or `segment`. Note: There
               is no additional latency for segment timestamps, but generating word timestamps
-              incurs additional latency.
+              incurs additional latency. This option is not available for
+              `gpt-4o-transcribe-diarize`.
 
           extra_headers: Send extra headers
 
@@ -96,60 +375,222 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        ...
+
+    @required_args(["file", "model"], ["file", "model", "stream"])
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | Omit = omit,
+        include: List[TranscriptionInclude] | Omit = omit,
+        known_speaker_names: SequenceNotStr[str] | Omit = omit,
+        known_speaker_references: SequenceNotStr[str] | Omit = omit,
+        language: str | Omit = omit,
+        prompt: str | Omit = omit,
+        response_format: AudioResponseFormat | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        temperature: float | Omit = omit,
+        timestamp_granularities: List[Literal["word", "segment"]] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> TranscriptionCreateResponse | Stream[TranscriptionStreamEvent]:
         body = deepcopy_minimal(
             {
                 "file": file,
                 "model": model,
+                "chunking_strategy": chunking_strategy,
+                "include": include,
+                "known_speaker_names": known_speaker_names,
+                "known_speaker_references": known_speaker_references,
                 "language": language,
                 "prompt": prompt,
                 "response_format": response_format,
+                "stream": stream,
                 "temperature": temperature,
                 "timestamp_granularities": timestamp_granularities,
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return self._post(
             "/audio/transcriptions",
-            body=maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+            body=maybe_transform(
+                body,
+                transcription_create_params.TranscriptionCreateParamsStreaming
+                if stream
+                else transcription_create_params.TranscriptionCreateParamsNonStreaming,
+            ),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=Transcription,
+            cast_to=cast(
+                Any, TranscriptionCreateResponse
+            ),  # Union types cannot be passed in as arguments in the type system
+            stream=stream or False,
+            stream_cls=Stream[TranscriptionStreamEvent],
         )
 
 
 class AsyncTranscriptions(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncTranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncTranscriptionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncTranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncTranscriptionsWithStreamingResponse(self)
 
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | Omit = omit,
+        include: List[TranscriptionInclude] | Omit = omit,
+        known_speaker_names: SequenceNotStr[str] | Omit = omit,
+        known_speaker_references: SequenceNotStr[str] | Omit = omit,
+        language: str | Omit = omit,
+        prompt: str | Omit = omit,
+        response_format: AudioResponseFormat | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        temperature: float | Omit = omit,
+        timestamp_granularities: List[Literal["word", "segment"]] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> TranscriptionCreateResponse:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
+              Whisper V2 model), and `gpt-4o-transcribe-diarize`.
+
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+              Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
+              seconds.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`. This field is not supported when using
+              `gpt-4o-transcribe-diarize`.
+
+          known_speaker_names: Optional list of speaker names that correspond to the audio samples provided in
+              `known_speaker_references[]`. Each entry should be a short identifier (for
+              example `customer` or `agent`). Up to 4 speakers are supported.
+
+          known_speaker_references: Optional list of audio samples (as
+              [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
+              that contain known speaker references matching `known_speaker_names[]`. Each
+              sample must be between 2 and 10 seconds, and can use any of the same input audio
+              formats supported by `file`.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language. This field is not supported when using
+              `gpt-4o-transcribe-diarize`.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`, the only supported format is `json`. For
+              `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
+              `diarized_json`, with `diarized_json` required to receive speaker annotations.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency. This option is not available for
+              `gpt-4o-transcribe-diarize`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
     async def create(
         self,
         *,
         file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
-        language: str | NotGiven = NOT_GIVEN,
-        prompt: str | NotGiven = NOT_GIVEN,
-        response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        model: Union[str, AudioModel],
+        stream: Literal[True],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | Omit = omit,
+        include: List[TranscriptionInclude] | Omit = omit,
+        known_speaker_names: SequenceNotStr[str] | Omit = omit,
+        known_speaker_references: SequenceNotStr[str] | Omit = omit,
+        language: str | Omit = omit,
+        prompt: str | Omit = omit,
+        response_format: AudioResponseFormat | Omit = omit,
+        temperature: float | Omit = omit,
+        timestamp_granularities: List[Literal["word", "segment"]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Transcription:
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncStream[TranscriptionStreamEvent]:
         """
         Transcribes audio into the input language.
 
@@ -158,20 +599,58 @@ async def create(
               The audio file object (not file name) to transcribe, in one of these formats:
               flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
 
-          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
-              Whisper V2 model) is currently available.
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
+              Whisper V2 model), and `gpt-4o-transcribe-diarize`.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+              Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
+              seconds.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`. This field is not supported when using
+              `gpt-4o-transcribe-diarize`.
+
+          known_speaker_names: Optional list of speaker names that correspond to the audio samples provided in
+              `known_speaker_references[]`. Each entry should be a short identifier (for
+              example `customer` or `agent`). Up to 4 speakers are supported.
+
+          known_speaker_references: Optional list of audio samples (as
+              [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
+              that contain known speaker references matching `known_speaker_names[]`. Each
+              sample must be between 2 and 10 seconds, and can use any of the same input audio
+              formats supported by `file`.
 
           language: The language of the input audio. Supplying the input language in
-              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
-              improve accuracy and latency.
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
 
           prompt: An optional text to guide the model's style or continue a previous audio
               segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
-              should match the audio language.
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language. This field is not supported when using
+              `gpt-4o-transcribe-diarize`.
 
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`, the only supported format is `json`. For
+              `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
+              `diarized_json`, with `diarized_json` required to receive speaker annotations.
 
           temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
               output more random, while lower values like 0.2 will make it more focused and
@@ -183,7 +662,8 @@ async def create(
               `response_format` must be set `verbose_json` to use timestamp granularities.
               Either or both of these options are supported: `word`, or `segment`. Note: There
               is no additional latency for segment timestamps, but generating word timestamps
-              incurs additional latency.
+              incurs additional latency. This option is not available for
+              `gpt-4o-transcribe-diarize`.
 
           extra_headers: Send extra headers
 
@@ -193,31 +673,176 @@ async def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        stream: bool,
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | Omit = omit,
+        include: List[TranscriptionInclude] | Omit = omit,
+        known_speaker_names: SequenceNotStr[str] | Omit = omit,
+        known_speaker_references: SequenceNotStr[str] | Omit = omit,
+        language: str | Omit = omit,
+        prompt: str | Omit = omit,
+        response_format: AudioResponseFormat | Omit = omit,
+        temperature: float | Omit = omit,
+        timestamp_granularities: List[Literal["word", "segment"]] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> TranscriptionCreateResponse | AsyncStream[TranscriptionStreamEvent]:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
+              Whisper V2 model), and `gpt-4o-transcribe-diarize`.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+              Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
+              seconds.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`. This field is not supported when using
+              `gpt-4o-transcribe-diarize`.
+
+          known_speaker_names: Optional list of speaker names that correspond to the audio samples provided in
+              `known_speaker_references[]`. Each entry should be a short identifier (for
+              example `customer` or `agent`). Up to 4 speakers are supported.
+
+          known_speaker_references: Optional list of audio samples (as
+              [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
+              that contain known speaker references matching `known_speaker_names[]`. Each
+              sample must be between 2 and 10 seconds, and can use any of the same input audio
+              formats supported by `file`.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language. This field is not supported when using
+              `gpt-4o-transcribe-diarize`.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`, the only supported format is `json`. For
+              `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
+              `diarized_json`, with `diarized_json` required to receive speaker annotations.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency. This option is not available for
+              `gpt-4o-transcribe-diarize`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["file", "model"], ["file", "model", "stream"])
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | Omit = omit,
+        include: List[TranscriptionInclude] | Omit = omit,
+        known_speaker_names: SequenceNotStr[str] | Omit = omit,
+        known_speaker_references: SequenceNotStr[str] | Omit = omit,
+        language: str | Omit = omit,
+        prompt: str | Omit = omit,
+        response_format: AudioResponseFormat | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        temperature: float | Omit = omit,
+        timestamp_granularities: List[Literal["word", "segment"]] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> TranscriptionCreateResponse | AsyncStream[TranscriptionStreamEvent]:
         body = deepcopy_minimal(
             {
                 "file": file,
                 "model": model,
+                "chunking_strategy": chunking_strategy,
+                "include": include,
+                "known_speaker_names": known_speaker_names,
+                "known_speaker_references": known_speaker_references,
                 "language": language,
                 "prompt": prompt,
                 "response_format": response_format,
+                "stream": stream,
                 "temperature": temperature,
                 "timestamp_granularities": timestamp_granularities,
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/audio/transcriptions",
-            body=await async_maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+            body=await async_maybe_transform(
+                body,
+                transcription_create_params.TranscriptionCreateParamsStreaming
+                if stream
+                else transcription_create_params.TranscriptionCreateParamsNonStreaming,
+            ),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=Transcription,
+            cast_to=cast(
+                Any, TranscriptionCreateResponse
+            ),  # Union types cannot be passed in as arguments in the type system
+            stream=stream or False,
+            stream_cls=AsyncStream[TranscriptionStreamEvent],
         )
 
 
diff --git a/src/openai/resources/audio/translations.py b/src/openai/resources/audio/translations.py
index d711ee2fbd..c55c992781 100644
--- a/src/openai/resources/audio/translations.py
+++ b/src/openai/resources/audio/translations.py
@@ -2,27 +2,21 @@
 
 from __future__ import annotations
 
-from typing import Union, Mapping, cast
+from typing import Any, Union, Mapping, cast
 from typing_extensions import Literal
 
 import httpx
 
 from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import (
-    extract_files,
-    maybe_transform,
-    deepcopy_minimal,
-    async_maybe_transform,
-)
+from ..._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given
+from ..._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ...types.audio import translation_create_params
-from ..._base_client import (
-    make_request_options,
-)
-from ...types.audio.translation import Translation
+from ..._base_client import make_request_options
+from ...types.audio_model import AudioModel
+from ...types.audio.translation_create_response import TranslationCreateResponse
 
 __all__ = ["Translations", "AsyncTranslations"]
 
@@ -30,27 +24,38 @@
 class Translations(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> TranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return TranslationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> TranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return TranslationsWithStreamingResponse(self)
 
     def create(
         self,
         *,
         file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
-        prompt: str | NotGiven = NOT_GIVEN,
-        response_format: str | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
+        model: Union[str, AudioModel],
+        prompt: str | Omit = omit,
+        response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | Omit = omit,
+        temperature: float | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Translation:
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> TranslationCreateResponse:
         """
         Translates audio into English.
 
@@ -63,11 +68,11 @@ def create(
 
           prompt: An optional text to guide the model's style or continue a previous audio
               segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
               should be in English.
 
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`.
 
           temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
               output more random, while lower values like 0.2 will make it more focused and
@@ -93,46 +98,61 @@ def create(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return self._post(
-            "/audio/translations",
-            body=maybe_transform(body, translation_create_params.TranslationCreateParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return cast(
+            TranslationCreateResponse,
+            self._post(
+                "/audio/translations",
+                body=maybe_transform(body, translation_create_params.TranslationCreateParams),
+                files=files,
+                options=make_request_options(
+                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                ),
+                cast_to=cast(
+                    Any, TranslationCreateResponse
+                ),  # Union types cannot be passed in as arguments in the type system
             ),
-            cast_to=Translation,
         )
 
 
 class AsyncTranslations(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncTranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncTranslationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncTranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncTranslationsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
         file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
-        prompt: str | NotGiven = NOT_GIVEN,
-        response_format: str | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
+        model: Union[str, AudioModel],
+        prompt: str | Omit = omit,
+        response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | Omit = omit,
+        temperature: float | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Translation:
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> TranslationCreateResponse:
         """
         Translates audio into English.
 
@@ -145,11 +165,11 @@ async def create(
 
           prompt: An optional text to guide the model's style or continue a previous audio
               segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
               should be in English.
 
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`.
 
           temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
               output more random, while lower values like 0.2 will make it more focused and
@@ -175,19 +195,23 @@ async def create(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return await self._post(
-            "/audio/translations",
-            body=await async_maybe_transform(body, translation_create_params.TranslationCreateParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return cast(
+            TranslationCreateResponse,
+            await self._post(
+                "/audio/translations",
+                body=await async_maybe_transform(body, translation_create_params.TranslationCreateParams),
+                files=files,
+                options=make_request_options(
+                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                ),
+                cast_to=cast(
+                    Any, TranslationCreateResponse
+                ),  # Union types cannot be passed in as arguments in the type system
             ),
-            cast_to=Translation,
         )
 
 
diff --git a/src/openai/resources/batches.py b/src/openai/resources/batches.py
index db4c4da235..80400839e4 100644
--- a/src/openai/resources/batches.py
+++ b/src/openai/resources/batches.py
@@ -2,27 +2,22 @@
 
 from __future__ import annotations
 
-from typing import Dict, Optional
+from typing import Optional
 from typing_extensions import Literal
 
 import httpx
 
 from .. import _legacy_response
 from ..types import batch_list_params, batch_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ..pagination import SyncCursorPage, AsyncCursorPage
 from ..types.batch import Batch
-from .._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from .._base_client import AsyncPaginator, make_request_options
+from ..types.shared_params.metadata import Metadata
 
 __all__ = ["Batches", "AsyncBatches"]
 
@@ -30,25 +25,39 @@
 class Batches(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> BatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return BatchesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return BatchesWithStreamingResponse(self)
 
     def create(
         self,
         *,
         completion_window: Literal["24h"],
-        endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
+        endpoint: Literal[
+            "/v1/responses", "/v1/chat/completions", "/v1/embeddings", "/v1/completions", "/v1/moderations"
+        ],
         input_file_id: str,
-        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | Omit = omit,
+        output_expires_after: batch_create_params.OutputExpiresAfter | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Batch:
         """
         Creates and executes a batch from an uploaded file of requests
@@ -58,9 +67,10 @@ def create(
               is supported.
 
           endpoint: The endpoint to be used for all requests in the batch. Currently
-              `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are supported.
-              Note that `/v1/embeddings` batches are also restricted to a maximum of 50,000
-              embedding inputs across all requests in the batch.
+              `/v1/responses`, `/v1/chat/completions`, `/v1/embeddings`, `/v1/completions`,
+              and `/v1/moderations` are supported. Note that `/v1/embeddings` batches are also
+              restricted to a maximum of 50,000 embedding inputs across all requests in the
+              batch.
 
           input_file_id: The ID of an uploaded file that contains requests for the new batch.
 
@@ -68,11 +78,19 @@ def create(
               for how to upload a file.
 
               Your input file must be formatted as a
-              [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput),
+              [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input),
               and must be uploaded with the purpose `batch`. The file can contain up to 50,000
-              requests, and can be up to 100 MB in size.
+              requests, and can be up to 200 MB in size.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
-          metadata: Optional custom metadata for the batch.
+          output_expires_after: The expiration policy for the output and/or error file that are generated for a
+              batch.
 
           extra_headers: Send extra headers
 
@@ -90,6 +108,7 @@ def create(
                     "endpoint": endpoint,
                     "input_file_id": input_file_id,
                     "metadata": metadata,
+                    "output_expires_after": output_expires_after,
                 },
                 batch_create_params.BatchCreateParams,
             ),
@@ -108,7 +127,7 @@ def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Batch:
         """
         Retrieves a batch.
@@ -135,14 +154,14 @@ def retrieve(
     def list(
         self,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> SyncCursorPage[Batch]:
         """List your organization's batches.
 
@@ -193,10 +212,13 @@ def cancel(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Batch:
-        """
-        Cancels an in-progress batch.
+        """Cancels an in-progress batch.
+
+        The batch will be in status `cancelling` for up to
+        10 minutes, before changing to `cancelled`, where it will have partial results
+        (if any) available in the output file.
 
         Args:
           extra_headers: Send extra headers
@@ -221,25 +243,39 @@ def cancel(
 class AsyncBatches(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncBatchesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncBatchesWithStreamingResponse(self)
 
     async def create(
         self,
         *,
         completion_window: Literal["24h"],
-        endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
+        endpoint: Literal[
+            "/v1/responses", "/v1/chat/completions", "/v1/embeddings", "/v1/completions", "/v1/moderations"
+        ],
         input_file_id: str,
-        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | Omit = omit,
+        output_expires_after: batch_create_params.OutputExpiresAfter | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Batch:
         """
         Creates and executes a batch from an uploaded file of requests
@@ -249,9 +285,10 @@ async def create(
               is supported.
 
           endpoint: The endpoint to be used for all requests in the batch. Currently
-              `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are supported.
-              Note that `/v1/embeddings` batches are also restricted to a maximum of 50,000
-              embedding inputs across all requests in the batch.
+              `/v1/responses`, `/v1/chat/completions`, `/v1/embeddings`, `/v1/completions`,
+              and `/v1/moderations` are supported. Note that `/v1/embeddings` batches are also
+              restricted to a maximum of 50,000 embedding inputs across all requests in the
+              batch.
 
           input_file_id: The ID of an uploaded file that contains requests for the new batch.
 
@@ -259,11 +296,19 @@ async def create(
               for how to upload a file.
 
               Your input file must be formatted as a
-              [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput),
+              [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input),
               and must be uploaded with the purpose `batch`. The file can contain up to 50,000
-              requests, and can be up to 100 MB in size.
+              requests, and can be up to 200 MB in size.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
-          metadata: Optional custom metadata for the batch.
+          output_expires_after: The expiration policy for the output and/or error file that are generated for a
+              batch.
 
           extra_headers: Send extra headers
 
@@ -281,6 +326,7 @@ async def create(
                     "endpoint": endpoint,
                     "input_file_id": input_file_id,
                     "metadata": metadata,
+                    "output_expires_after": output_expires_after,
                 },
                 batch_create_params.BatchCreateParams,
             ),
@@ -299,7 +345,7 @@ async def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Batch:
         """
         Retrieves a batch.
@@ -326,14 +372,14 @@ async def retrieve(
     def list(
         self,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncPaginator[Batch, AsyncCursorPage[Batch]]:
         """List your organization's batches.
 
@@ -384,10 +430,13 @@ async def cancel(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Batch:
-        """
-        Cancels an in-progress batch.
+        """Cancels an in-progress batch.
+
+        The batch will be in status `cancelling` for up to
+        10 minutes, before changing to `cancelled`, where it will have partial results
+        (if any) available in the output file.
 
         Args:
           extra_headers: Send extra headers
diff --git a/src/openai/resources/beta/__init__.py b/src/openai/resources/beta/__init__.py
index 01f5338757..6d6f538670 100644
--- a/src/openai/resources/beta/__init__.py
+++ b/src/openai/resources/beta/__init__.py
@@ -8,6 +8,14 @@
     BetaWithStreamingResponse,
     AsyncBetaWithStreamingResponse,
 )
+from .chatkit import (
+    ChatKit,
+    AsyncChatKit,
+    ChatKitWithRawResponse,
+    AsyncChatKitWithRawResponse,
+    ChatKitWithStreamingResponse,
+    AsyncChatKitWithStreamingResponse,
+)
 from .threads import (
     Threads,
     AsyncThreads,
@@ -24,22 +32,14 @@
     AssistantsWithStreamingResponse,
     AsyncAssistantsWithStreamingResponse,
 )
-from .vector_stores import (
-    VectorStores,
-    AsyncVectorStores,
-    VectorStoresWithRawResponse,
-    AsyncVectorStoresWithRawResponse,
-    VectorStoresWithStreamingResponse,
-    AsyncVectorStoresWithStreamingResponse,
-)
 
 __all__ = [
-    "VectorStores",
-    "AsyncVectorStores",
-    "VectorStoresWithRawResponse",
-    "AsyncVectorStoresWithRawResponse",
-    "VectorStoresWithStreamingResponse",
-    "AsyncVectorStoresWithStreamingResponse",
+    "ChatKit",
+    "AsyncChatKit",
+    "ChatKitWithRawResponse",
+    "AsyncChatKitWithRawResponse",
+    "ChatKitWithStreamingResponse",
+    "AsyncChatKitWithStreamingResponse",
     "Assistants",
     "AsyncAssistants",
     "AssistantsWithRawResponse",
diff --git a/src/openai/resources/beta/assistants.py b/src/openai/resources/beta/assistants.py
index 5912aff77a..ab0947abf4 100644
--- a/src/openai/resources/beta/assistants.py
+++ b/src/openai/resources/beta/assistants.py
@@ -8,11 +8,8 @@
 import httpx
 
 from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
@@ -22,12 +19,12 @@
     assistant_create_params,
     assistant_update_params,
 )
-from ..._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from ..._base_client import AsyncPaginator, make_request_options
 from ...types.beta.assistant import Assistant
+from ...types.shared.chat_model import ChatModel
 from ...types.beta.assistant_deleted import AssistantDeleted
+from ...types.shared_params.metadata import Metadata
+from ...types.shared.reasoning_effort import ReasoningEffort
 from ...types.beta.assistant_tool_param import AssistantToolParam
 from ...types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
@@ -37,55 +34,43 @@
 class Assistants(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AssistantsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AssistantsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AssistantsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AssistantsWithStreamingResponse(self)
 
     def create(
         self,
         *,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        description: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        name: Optional[str] | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel],
+        description: Optional[str] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        name: Optional[str] | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_resources: Optional[assistant_create_params.ToolResources] | Omit = omit,
+        tools: Iterable[AssistantToolParam] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Assistant:
         """
         Create an assistant with a model and instructions.
@@ -94,8 +79,8 @@ def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           description: The description of the assistant. The maximum length is 512 characters.
 
@@ -103,18 +88,39 @@ def create(
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           name: The name of the assistant. The maximum length is 256 characters.
 
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -162,6 +168,7 @@ def create(
                     "instructions": instructions,
                     "metadata": metadata,
                     "name": name,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_resources": tool_resources,
@@ -185,7 +192,7 @@ def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Assistant:
         """
         Retrieves an assistant.
@@ -214,22 +221,70 @@ def update(
         self,
         assistant_id: str,
         *,
-        description: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        name: Optional[str] | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        description: Optional[str] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[
+            str,
+            Literal[
+                "gpt-5",
+                "gpt-5-mini",
+                "gpt-5-nano",
+                "gpt-5-2025-08-07",
+                "gpt-5-mini-2025-08-07",
+                "gpt-5-nano-2025-08-07",
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "gpt-4.1-2025-04-14",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4.5-preview",
+                "gpt-4.5-preview-2025-02-27",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ]
+        | Omit = omit,
+        name: Optional[str] | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_resources: Optional[assistant_update_params.ToolResources] | Omit = omit,
+        tools: Iterable[AssistantToolParam] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Assistant:
         """Modifies an assistant.
 
@@ -242,24 +297,45 @@ def update(
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           name: The name of the assistant. The maximum length is 256 characters.
 
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -309,6 +385,7 @@ def update(
                     "metadata": metadata,
                     "model": model,
                     "name": name,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_resources": tool_resources,
@@ -326,16 +403,16 @@ def update(
     def list(
         self,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> SyncCursorPage[Assistant]:
         """Returns a list of assistants.
 
@@ -349,8 +426,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -397,7 +474,7 @@ def delete(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AssistantDeleted:
         """
         Delete an assistant.
@@ -426,55 +503,43 @@ def delete(
 class AsyncAssistants(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncAssistantsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncAssistantsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        description: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        name: Optional[str] | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel],
+        description: Optional[str] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        name: Optional[str] | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_resources: Optional[assistant_create_params.ToolResources] | Omit = omit,
+        tools: Iterable[AssistantToolParam] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Assistant:
         """
         Create an assistant with a model and instructions.
@@ -483,8 +548,8 @@ async def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           description: The description of the assistant. The maximum length is 512 characters.
 
@@ -492,18 +557,39 @@ async def create(
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           name: The name of the assistant. The maximum length is 256 characters.
 
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -551,6 +637,7 @@ async def create(
                     "instructions": instructions,
                     "metadata": metadata,
                     "name": name,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_resources": tool_resources,
@@ -574,7 +661,7 @@ async def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Assistant:
         """
         Retrieves an assistant.
@@ -603,22 +690,70 @@ async def update(
         self,
         assistant_id: str,
         *,
-        description: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        name: Optional[str] | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        description: Optional[str] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[
+            str,
+            Literal[
+                "gpt-5",
+                "gpt-5-mini",
+                "gpt-5-nano",
+                "gpt-5-2025-08-07",
+                "gpt-5-mini-2025-08-07",
+                "gpt-5-nano-2025-08-07",
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "gpt-4.1-2025-04-14",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4.5-preview",
+                "gpt-4.5-preview-2025-02-27",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ]
+        | Omit = omit,
+        name: Optional[str] | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_resources: Optional[assistant_update_params.ToolResources] | Omit = omit,
+        tools: Iterable[AssistantToolParam] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Assistant:
         """Modifies an assistant.
 
@@ -631,24 +766,45 @@ async def update(
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           name: The name of the assistant. The maximum length is 256 characters.
 
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -698,6 +854,7 @@ async def update(
                     "metadata": metadata,
                     "model": model,
                     "name": name,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_resources": tool_resources,
@@ -715,16 +872,16 @@ async def update(
     def list(
         self,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncPaginator[Assistant, AsyncCursorPage[Assistant]]:
         """Returns a list of assistants.
 
@@ -738,8 +895,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -786,7 +943,7 @@ async def delete(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AssistantDeleted:
         """
         Delete an assistant.
diff --git a/src/openai/resources/beta/beta.py b/src/openai/resources/beta/beta.py
index 0d9806678f..edf30be005 100644
--- a/src/openai/resources/beta/beta.py
+++ b/src/openai/resources/beta/beta.py
@@ -2,14 +2,6 @@
 
 from __future__ import annotations
 
-from .threads import (
-    Threads,
-    AsyncThreads,
-    ThreadsWithRawResponse,
-    AsyncThreadsWithRawResponse,
-    ThreadsWithStreamingResponse,
-    AsyncThreadsWithStreamingResponse,
-)
 from ..._compat import cached_property
 from .assistants import (
     Assistants,
@@ -20,24 +12,30 @@
     AsyncAssistantsWithStreamingResponse,
 )
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from .vector_stores import (
-    VectorStores,
-    AsyncVectorStores,
-    VectorStoresWithRawResponse,
-    AsyncVectorStoresWithRawResponse,
-    VectorStoresWithStreamingResponse,
-    AsyncVectorStoresWithStreamingResponse,
+from .chatkit.chatkit import (
+    ChatKit,
+    AsyncChatKit,
+    ChatKitWithRawResponse,
+    AsyncChatKitWithRawResponse,
+    ChatKitWithStreamingResponse,
+    AsyncChatKitWithStreamingResponse,
+)
+from .threads.threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
 )
-from .threads.threads import Threads, AsyncThreads
-from .vector_stores.vector_stores import VectorStores, AsyncVectorStores
 
 __all__ = ["Beta", "AsyncBeta"]
 
 
 class Beta(SyncAPIResource):
     @cached_property
-    def vector_stores(self) -> VectorStores:
-        return VectorStores(self._client)
+    def chatkit(self) -> ChatKit:
+        return ChatKit(self._client)
 
     @cached_property
     def assistants(self) -> Assistants:
@@ -49,17 +47,28 @@ def threads(self) -> Threads:
 
     @cached_property
     def with_raw_response(self) -> BetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return BetaWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> BetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return BetaWithStreamingResponse(self)
 
 
 class AsyncBeta(AsyncAPIResource):
     @cached_property
-    def vector_stores(self) -> AsyncVectorStores:
-        return AsyncVectorStores(self._client)
+    def chatkit(self) -> AsyncChatKit:
+        return AsyncChatKit(self._client)
 
     @cached_property
     def assistants(self) -> AsyncAssistants:
@@ -71,10 +80,21 @@ def threads(self) -> AsyncThreads:
 
     @cached_property
     def with_raw_response(self) -> AsyncBetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncBetaWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncBetaWithStreamingResponse(self)
 
 
@@ -83,8 +103,8 @@ def __init__(self, beta: Beta) -> None:
         self._beta = beta
 
     @cached_property
-    def vector_stores(self) -> VectorStoresWithRawResponse:
-        return VectorStoresWithRawResponse(self._beta.vector_stores)
+    def chatkit(self) -> ChatKitWithRawResponse:
+        return ChatKitWithRawResponse(self._beta.chatkit)
 
     @cached_property
     def assistants(self) -> AssistantsWithRawResponse:
@@ -100,8 +120,8 @@ def __init__(self, beta: AsyncBeta) -> None:
         self._beta = beta
 
     @cached_property
-    def vector_stores(self) -> AsyncVectorStoresWithRawResponse:
-        return AsyncVectorStoresWithRawResponse(self._beta.vector_stores)
+    def chatkit(self) -> AsyncChatKitWithRawResponse:
+        return AsyncChatKitWithRawResponse(self._beta.chatkit)
 
     @cached_property
     def assistants(self) -> AsyncAssistantsWithRawResponse:
@@ -117,8 +137,8 @@ def __init__(self, beta: Beta) -> None:
         self._beta = beta
 
     @cached_property
-    def vector_stores(self) -> VectorStoresWithStreamingResponse:
-        return VectorStoresWithStreamingResponse(self._beta.vector_stores)
+    def chatkit(self) -> ChatKitWithStreamingResponse:
+        return ChatKitWithStreamingResponse(self._beta.chatkit)
 
     @cached_property
     def assistants(self) -> AssistantsWithStreamingResponse:
@@ -134,8 +154,8 @@ def __init__(self, beta: AsyncBeta) -> None:
         self._beta = beta
 
     @cached_property
-    def vector_stores(self) -> AsyncVectorStoresWithStreamingResponse:
-        return AsyncVectorStoresWithStreamingResponse(self._beta.vector_stores)
+    def chatkit(self) -> AsyncChatKitWithStreamingResponse:
+        return AsyncChatKitWithStreamingResponse(self._beta.chatkit)
 
     @cached_property
     def assistants(self) -> AsyncAssistantsWithStreamingResponse:
diff --git a/src/openai/resources/beta/chatkit/__init__.py b/src/openai/resources/beta/chatkit/__init__.py
new file mode 100644
index 0000000000..05f24d6238
--- /dev/null
+++ b/src/openai/resources/beta/chatkit/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .chatkit import (
+    ChatKit,
+    AsyncChatKit,
+    ChatKitWithRawResponse,
+    AsyncChatKitWithRawResponse,
+    ChatKitWithStreamingResponse,
+    AsyncChatKitWithStreamingResponse,
+)
+from .threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from .sessions import (
+    Sessions,
+    AsyncSessions,
+    SessionsWithRawResponse,
+    AsyncSessionsWithRawResponse,
+    SessionsWithStreamingResponse,
+    AsyncSessionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Sessions",
+    "AsyncSessions",
+    "SessionsWithRawResponse",
+    "AsyncSessionsWithRawResponse",
+    "SessionsWithStreamingResponse",
+    "AsyncSessionsWithStreamingResponse",
+    "Threads",
+    "AsyncThreads",
+    "ThreadsWithRawResponse",
+    "AsyncThreadsWithRawResponse",
+    "ThreadsWithStreamingResponse",
+    "AsyncThreadsWithStreamingResponse",
+    "ChatKit",
+    "AsyncChatKit",
+    "ChatKitWithRawResponse",
+    "AsyncChatKitWithRawResponse",
+    "ChatKitWithStreamingResponse",
+    "AsyncChatKitWithStreamingResponse",
+]
diff --git a/src/openai/resources/beta/chatkit/chatkit.py b/src/openai/resources/beta/chatkit/chatkit.py
new file mode 100644
index 0000000000..5a10a39c7b
--- /dev/null
+++ b/src/openai/resources/beta/chatkit/chatkit.py
@@ -0,0 +1,134 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from .sessions import (
+    Sessions,
+    AsyncSessions,
+    SessionsWithRawResponse,
+    AsyncSessionsWithRawResponse,
+    SessionsWithStreamingResponse,
+    AsyncSessionsWithStreamingResponse,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["ChatKit", "AsyncChatKit"]
+
+
+class ChatKit(SyncAPIResource):
+    @cached_property
+    def sessions(self) -> Sessions:
+        return Sessions(self._client)
+
+    @cached_property
+    def threads(self) -> Threads:
+        return Threads(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ChatKitWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ChatKitWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ChatKitWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ChatKitWithStreamingResponse(self)
+
+
+class AsyncChatKit(AsyncAPIResource):
+    @cached_property
+    def sessions(self) -> AsyncSessions:
+        return AsyncSessions(self._client)
+
+    @cached_property
+    def threads(self) -> AsyncThreads:
+        return AsyncThreads(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncChatKitWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncChatKitWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncChatKitWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncChatKitWithStreamingResponse(self)
+
+
+class ChatKitWithRawResponse:
+    def __init__(self, chatkit: ChatKit) -> None:
+        self._chatkit = chatkit
+
+    @cached_property
+    def sessions(self) -> SessionsWithRawResponse:
+        return SessionsWithRawResponse(self._chatkit.sessions)
+
+    @cached_property
+    def threads(self) -> ThreadsWithRawResponse:
+        return ThreadsWithRawResponse(self._chatkit.threads)
+
+
+class AsyncChatKitWithRawResponse:
+    def __init__(self, chatkit: AsyncChatKit) -> None:
+        self._chatkit = chatkit
+
+    @cached_property
+    def sessions(self) -> AsyncSessionsWithRawResponse:
+        return AsyncSessionsWithRawResponse(self._chatkit.sessions)
+
+    @cached_property
+    def threads(self) -> AsyncThreadsWithRawResponse:
+        return AsyncThreadsWithRawResponse(self._chatkit.threads)
+
+
+class ChatKitWithStreamingResponse:
+    def __init__(self, chatkit: ChatKit) -> None:
+        self._chatkit = chatkit
+
+    @cached_property
+    def sessions(self) -> SessionsWithStreamingResponse:
+        return SessionsWithStreamingResponse(self._chatkit.sessions)
+
+    @cached_property
+    def threads(self) -> ThreadsWithStreamingResponse:
+        return ThreadsWithStreamingResponse(self._chatkit.threads)
+
+
+class AsyncChatKitWithStreamingResponse:
+    def __init__(self, chatkit: AsyncChatKit) -> None:
+        self._chatkit = chatkit
+
+    @cached_property
+    def sessions(self) -> AsyncSessionsWithStreamingResponse:
+        return AsyncSessionsWithStreamingResponse(self._chatkit.sessions)
+
+    @cached_property
+    def threads(self) -> AsyncThreadsWithStreamingResponse:
+        return AsyncThreadsWithStreamingResponse(self._chatkit.threads)
diff --git a/src/openai/resources/beta/chatkit/sessions.py b/src/openai/resources/beta/chatkit/sessions.py
new file mode 100644
index 0000000000..a814f1058e
--- /dev/null
+++ b/src/openai/resources/beta/chatkit/sessions.py
@@ -0,0 +1,301 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.beta.chatkit import (
+    ChatSessionWorkflowParam,
+    ChatSessionRateLimitsParam,
+    ChatSessionExpiresAfterParam,
+    ChatSessionChatKitConfigurationParam,
+    session_create_params,
+)
+from ....types.beta.chatkit.chat_session import ChatSession
+from ....types.beta.chatkit.chat_session_workflow_param import ChatSessionWorkflowParam
+from ....types.beta.chatkit.chat_session_rate_limits_param import ChatSessionRateLimitsParam
+from ....types.beta.chatkit.chat_session_expires_after_param import ChatSessionExpiresAfterParam
+from ....types.beta.chatkit.chat_session_chatkit_configuration_param import ChatSessionChatKitConfigurationParam
+
+__all__ = ["Sessions", "AsyncSessions"]
+
+
+class Sessions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> SessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return SessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> SessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return SessionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        user: str,
+        workflow: ChatSessionWorkflowParam,
+        chatkit_configuration: ChatSessionChatKitConfigurationParam | Omit = omit,
+        expires_after: ChatSessionExpiresAfterParam | Omit = omit,
+        rate_limits: ChatSessionRateLimitsParam | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatSession:
+        """
+        Create a ChatKit session
+
+        Args:
+          user: A free-form string that identifies your end user; ensures this Session can
+              access other objects that have the same `user` scope.
+
+          workflow: Workflow that powers the session.
+
+          chatkit_configuration: Optional overrides for ChatKit runtime configuration features
+
+          expires_after: Optional override for session expiration timing in seconds from creation.
+              Defaults to 10 minutes.
+
+          rate_limits: Optional override for per-minute request limits. When omitted, defaults to 10.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "chatkit_beta=v1", **(extra_headers or {})}
+        return self._post(
+            "/chatkit/sessions",
+            body=maybe_transform(
+                {
+                    "user": user,
+                    "workflow": workflow,
+                    "chatkit_configuration": chatkit_configuration,
+                    "expires_after": expires_after,
+                    "rate_limits": rate_limits,
+                },
+                session_create_params.SessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatSession,
+        )
+
+    def cancel(
+        self,
+        session_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatSession:
+        """
+        Cancel a ChatKit session
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not session_id:
+            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
+        extra_headers = {"OpenAI-Beta": "chatkit_beta=v1", **(extra_headers or {})}
+        return self._post(
+            f"/chatkit/sessions/{session_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatSession,
+        )
+
+
+class AsyncSessions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncSessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncSessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncSessionsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        user: str,
+        workflow: ChatSessionWorkflowParam,
+        chatkit_configuration: ChatSessionChatKitConfigurationParam | Omit = omit,
+        expires_after: ChatSessionExpiresAfterParam | Omit = omit,
+        rate_limits: ChatSessionRateLimitsParam | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatSession:
+        """
+        Create a ChatKit session
+
+        Args:
+          user: A free-form string that identifies your end user; ensures this Session can
+              access other objects that have the same `user` scope.
+
+          workflow: Workflow that powers the session.
+
+          chatkit_configuration: Optional overrides for ChatKit runtime configuration features
+
+          expires_after: Optional override for session expiration timing in seconds from creation.
+              Defaults to 10 minutes.
+
+          rate_limits: Optional override for per-minute request limits. When omitted, defaults to 10.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "chatkit_beta=v1", **(extra_headers or {})}
+        return await self._post(
+            "/chatkit/sessions",
+            body=await async_maybe_transform(
+                {
+                    "user": user,
+                    "workflow": workflow,
+                    "chatkit_configuration": chatkit_configuration,
+                    "expires_after": expires_after,
+                    "rate_limits": rate_limits,
+                },
+                session_create_params.SessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatSession,
+        )
+
+    async def cancel(
+        self,
+        session_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatSession:
+        """
+        Cancel a ChatKit session
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not session_id:
+            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
+        extra_headers = {"OpenAI-Beta": "chatkit_beta=v1", **(extra_headers or {})}
+        return await self._post(
+            f"/chatkit/sessions/{session_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatSession,
+        )
+
+
+class SessionsWithRawResponse:
+    def __init__(self, sessions: Sessions) -> None:
+        self._sessions = sessions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            sessions.create,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            sessions.cancel,
+        )
+
+
+class AsyncSessionsWithRawResponse:
+    def __init__(self, sessions: AsyncSessions) -> None:
+        self._sessions = sessions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            sessions.create,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            sessions.cancel,
+        )
+
+
+class SessionsWithStreamingResponse:
+    def __init__(self, sessions: Sessions) -> None:
+        self._sessions = sessions
+
+        self.create = to_streamed_response_wrapper(
+            sessions.create,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            sessions.cancel,
+        )
+
+
+class AsyncSessionsWithStreamingResponse:
+    def __init__(self, sessions: AsyncSessions) -> None:
+        self._sessions = sessions
+
+        self.create = async_to_streamed_response_wrapper(
+            sessions.create,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            sessions.cancel,
+        )
diff --git a/src/openai/resources/beta/chatkit/threads.py b/src/openai/resources/beta/chatkit/threads.py
new file mode 100644
index 0000000000..37cd57295a
--- /dev/null
+++ b/src/openai/resources/beta/chatkit/threads.py
@@ -0,0 +1,521 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, cast
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncConversationCursorPage, AsyncConversationCursorPage
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.beta.chatkit import thread_list_params, thread_list_items_params
+from ....types.beta.chatkit.chatkit_thread import ChatKitThread
+from ....types.beta.chatkit.thread_delete_response import ThreadDeleteResponse
+from ....types.beta.chatkit.chatkit_thread_item_list import Data
+
+__all__ = ["Threads", "AsyncThreads"]
+
+
+class Threads(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ThreadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ThreadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ThreadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ThreadsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatKitThread:
+        """
+        Retrieve a ChatKit thread
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "chatkit_beta=v1", **(extra_headers or {})}
+        return self._get(
+            f"/chatkit/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatKitThread,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncConversationCursorPage[ChatKitThread]:
+        """
+        List ChatKit threads
+
+        Args:
+          after: List items created after this thread item ID. Defaults to null for the first
+              page.
+
+          before: List items created before this thread item ID. Defaults to null for the newest
+              results.
+
+          limit: Maximum number of thread items to return. Defaults to 20.
+
+          order: Sort order for results by creation time. Defaults to `desc`.
+
+          user: Filter threads that belong to this user identifier. Defaults to null to return
+              all users.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "chatkit_beta=v1", **(extra_headers or {})}
+        return self._get_api_list(
+            "/chatkit/threads",
+            page=SyncConversationCursorPage[ChatKitThread],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                        "user": user,
+                    },
+                    thread_list_params.ThreadListParams,
+                ),
+            ),
+            model=ChatKitThread,
+        )
+
+    def delete(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ThreadDeleteResponse:
+        """
+        Delete a ChatKit thread
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "chatkit_beta=v1", **(extra_headers or {})}
+        return self._delete(
+            f"/chatkit/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ThreadDeleteResponse,
+        )
+
+    def list_items(
+        self,
+        thread_id: str,
+        *,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncConversationCursorPage[Data]:
+        """
+        List ChatKit thread items
+
+        Args:
+          after: List items created after this thread item ID. Defaults to null for the first
+              page.
+
+          before: List items created before this thread item ID. Defaults to null for the newest
+              results.
+
+          limit: Maximum number of thread items to return. Defaults to 20.
+
+          order: Sort order for results by creation time. Defaults to `desc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "chatkit_beta=v1", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/chatkit/threads/{thread_id}/items",
+            page=SyncConversationCursorPage[Data],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    thread_list_items_params.ThreadListItemsParams,
+                ),
+            ),
+            model=cast(Any, Data),  # Union types cannot be passed in as arguments in the type system
+        )
+
+
+class AsyncThreads(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncThreadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncThreadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncThreadsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatKitThread:
+        """
+        Retrieve a ChatKit thread
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "chatkit_beta=v1", **(extra_headers or {})}
+        return await self._get(
+            f"/chatkit/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatKitThread,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[ChatKitThread, AsyncConversationCursorPage[ChatKitThread]]:
+        """
+        List ChatKit threads
+
+        Args:
+          after: List items created after this thread item ID. Defaults to null for the first
+              page.
+
+          before: List items created before this thread item ID. Defaults to null for the newest
+              results.
+
+          limit: Maximum number of thread items to return. Defaults to 20.
+
+          order: Sort order for results by creation time. Defaults to `desc`.
+
+          user: Filter threads that belong to this user identifier. Defaults to null to return
+              all users.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "chatkit_beta=v1", **(extra_headers or {})}
+        return self._get_api_list(
+            "/chatkit/threads",
+            page=AsyncConversationCursorPage[ChatKitThread],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                        "user": user,
+                    },
+                    thread_list_params.ThreadListParams,
+                ),
+            ),
+            model=ChatKitThread,
+        )
+
+    async def delete(
+        self,
+        thread_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ThreadDeleteResponse:
+        """
+        Delete a ChatKit thread
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "chatkit_beta=v1", **(extra_headers or {})}
+        return await self._delete(
+            f"/chatkit/threads/{thread_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ThreadDeleteResponse,
+        )
+
+    def list_items(
+        self,
+        thread_id: str,
+        *,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[Data, AsyncConversationCursorPage[Data]]:
+        """
+        List ChatKit thread items
+
+        Args:
+          after: List items created after this thread item ID. Defaults to null for the first
+              page.
+
+          before: List items created before this thread item ID. Defaults to null for the newest
+              results.
+
+          limit: Maximum number of thread items to return. Defaults to 20.
+
+          order: Sort order for results by creation time. Defaults to `desc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "chatkit_beta=v1", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/chatkit/threads/{thread_id}/items",
+            page=AsyncConversationCursorPage[Data],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    thread_list_items_params.ThreadListItemsParams,
+                ),
+            ),
+            model=cast(Any, Data),  # Union types cannot be passed in as arguments in the type system
+        )
+
+
+class ThreadsWithRawResponse:
+    def __init__(self, threads: Threads) -> None:
+        self._threads = threads
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            threads.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            threads.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            threads.delete,
+        )
+        self.list_items = _legacy_response.to_raw_response_wrapper(
+            threads.list_items,
+        )
+
+
+class AsyncThreadsWithRawResponse:
+    def __init__(self, threads: AsyncThreads) -> None:
+        self._threads = threads
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            threads.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            threads.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            threads.delete,
+        )
+        self.list_items = _legacy_response.async_to_raw_response_wrapper(
+            threads.list_items,
+        )
+
+
+class ThreadsWithStreamingResponse:
+    def __init__(self, threads: Threads) -> None:
+        self._threads = threads
+
+        self.retrieve = to_streamed_response_wrapper(
+            threads.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            threads.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            threads.delete,
+        )
+        self.list_items = to_streamed_response_wrapper(
+            threads.list_items,
+        )
+
+
+class AsyncThreadsWithStreamingResponse:
+    def __init__(self, threads: AsyncThreads) -> None:
+        self._threads = threads
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            threads.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            threads.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            threads.delete,
+        )
+        self.list_items = async_to_streamed_response_wrapper(
+            threads.list_items,
+        )
diff --git a/src/openai/resources/beta/threads/messages.py b/src/openai/resources/beta/threads/messages.py
index f0832515ce..572a2e1f75 100644
--- a/src/openai/resources/beta/threads/messages.py
+++ b/src/openai/resources/beta/threads/messages.py
@@ -2,27 +2,23 @@
 
 from __future__ import annotations
 
+import typing_extensions
 from typing import Union, Iterable, Optional
 from typing_extensions import Literal
 
 import httpx
 
 from .... import _legacy_response
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....pagination import SyncCursorPage, AsyncCursorPage
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from ...._base_client import AsyncPaginator, make_request_options
 from ....types.beta.threads import message_list_params, message_create_params, message_update_params
 from ....types.beta.threads.message import Message
+from ....types.shared_params.metadata import Metadata
 from ....types.beta.threads.message_deleted import MessageDeleted
 from ....types.beta.threads.message_content_part_param import MessageContentPartParam
 
@@ -32,26 +28,38 @@
 class Messages(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return MessagesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return MessagesWithStreamingResponse(self)
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def create(
         self,
         thread_id: str,
         *,
         content: Union[str, Iterable[MessageContentPartParam]],
         role: Literal["user", "assistant"],
-        attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        attachments: Optional[Iterable[message_create_params.Attachment]] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Message:
         """
         Create a message.
@@ -70,9 +78,11 @@ def create(
           attachments: A list of files attached to the message, and the tools they should be added to.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           extra_headers: Send extra headers
 
@@ -102,6 +112,7 @@ def create(
             cast_to=Message,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def retrieve(
         self,
         message_id: str,
@@ -112,7 +123,7 @@ def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Message:
         """
         Retrieve a message.
@@ -139,27 +150,30 @@ def retrieve(
             cast_to=Message,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def update(
         self,
         message_id: str,
         *,
         thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Message:
         """
         Modifies a message.
 
         Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           extra_headers: Send extra headers
 
@@ -183,21 +197,22 @@ def update(
             cast_to=Message,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def list(
         self,
         thread_id: str,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        run_id: str | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        run_id: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> SyncCursorPage[Message]:
         """
         Returns a list of messages for a given thread.
@@ -210,8 +225,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -254,6 +269,7 @@ def list(
             model=Message,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def delete(
         self,
         message_id: str,
@@ -264,7 +280,7 @@ def delete(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> MessageDeleted:
         """
         Deletes a message.
@@ -295,26 +311,38 @@ def delete(
 class AsyncMessages(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncMessagesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncMessagesWithStreamingResponse(self)
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def create(
         self,
         thread_id: str,
         *,
         content: Union[str, Iterable[MessageContentPartParam]],
         role: Literal["user", "assistant"],
-        attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        attachments: Optional[Iterable[message_create_params.Attachment]] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Message:
         """
         Create a message.
@@ -333,9 +361,11 @@ async def create(
           attachments: A list of files attached to the message, and the tools they should be added to.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           extra_headers: Send extra headers
 
@@ -365,6 +395,7 @@ async def create(
             cast_to=Message,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def retrieve(
         self,
         message_id: str,
@@ -375,7 +406,7 @@ async def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Message:
         """
         Retrieve a message.
@@ -402,27 +433,30 @@ async def retrieve(
             cast_to=Message,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def update(
         self,
         message_id: str,
         *,
         thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Message:
         """
         Modifies a message.
 
         Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           extra_headers: Send extra headers
 
@@ -446,21 +480,22 @@ async def update(
             cast_to=Message,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def list(
         self,
         thread_id: str,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        run_id: str | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        run_id: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncPaginator[Message, AsyncCursorPage[Message]]:
         """
         Returns a list of messages for a given thread.
@@ -473,8 +508,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -517,6 +552,7 @@ def list(
             model=Message,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def delete(
         self,
         message_id: str,
@@ -527,7 +563,7 @@ async def delete(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> MessageDeleted:
         """
         Deletes a message.
@@ -559,20 +595,30 @@ class MessagesWithRawResponse:
     def __init__(self, messages: Messages) -> None:
         self._messages = messages
 
-        self.create = _legacy_response.to_raw_response_wrapper(
-            messages.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                messages.create,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            messages.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                messages.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = _legacy_response.to_raw_response_wrapper(
-            messages.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                messages.update,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = _legacy_response.to_raw_response_wrapper(
-            messages.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                messages.list,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.delete = _legacy_response.to_raw_response_wrapper(
-            messages.delete,
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                messages.delete,  # pyright: ignore[reportDeprecated],
+            )
         )
 
 
@@ -580,20 +626,30 @@ class AsyncMessagesWithRawResponse:
     def __init__(self, messages: AsyncMessages) -> None:
         self._messages = messages
 
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            messages.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                messages.create,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            messages.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                messages.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = _legacy_response.async_to_raw_response_wrapper(
-            messages.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                messages.update,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = _legacy_response.async_to_raw_response_wrapper(
-            messages.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                messages.list,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.delete = _legacy_response.async_to_raw_response_wrapper(
-            messages.delete,
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                messages.delete,  # pyright: ignore[reportDeprecated],
+            )
         )
 
 
@@ -601,20 +657,30 @@ class MessagesWithStreamingResponse:
     def __init__(self, messages: Messages) -> None:
         self._messages = messages
 
-        self.create = to_streamed_response_wrapper(
-            messages.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                messages.create,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = to_streamed_response_wrapper(
-            messages.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                messages.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = to_streamed_response_wrapper(
-            messages.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                messages.update,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = to_streamed_response_wrapper(
-            messages.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                messages.list,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.delete = to_streamed_response_wrapper(
-            messages.delete,
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                messages.delete,  # pyright: ignore[reportDeprecated],
+            )
         )
 
 
@@ -622,18 +688,28 @@ class AsyncMessagesWithStreamingResponse:
     def __init__(self, messages: AsyncMessages) -> None:
         self._messages = messages
 
-        self.create = async_to_streamed_response_wrapper(
-            messages.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                messages.create,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = async_to_streamed_response_wrapper(
-            messages.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                messages.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = async_to_streamed_response_wrapper(
-            messages.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                messages.update,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = async_to_streamed_response_wrapper(
-            messages.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                messages.list,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.delete = async_to_streamed_response_wrapper(
-            messages.delete,
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                messages.delete,  # pyright: ignore[reportDeprecated],
+            )
         )
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index c37071529c..e7626abcdf 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 import typing_extensions
-from typing import Union, Iterable, Optional, overload
-from functools import partial
-from typing_extensions import Literal
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, overload
 
 import httpx
 
@@ -18,30 +17,14 @@
     StepsWithStreamingResponse,
     AsyncStepsWithStreamingResponse,
 )
-from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import (
-    is_given,
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
-)
+from ....._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ....._utils import required_args, maybe_transform, async_maybe_transform
 from ....._compat import cached_property
 from ....._resource import SyncAPIResource, AsyncAPIResource
 from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....._streaming import Stream, AsyncStream
 from .....pagination import SyncCursorPage, AsyncCursorPage
-from ....._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from .....lib.streaming import (
-    AssistantEventHandler,
-    AssistantEventHandlerT,
-    AssistantStreamManager,
-    AsyncAssistantEventHandler,
-    AsyncAssistantEventHandlerT,
-    AsyncAssistantStreamManager,
-)
+from ....._base_client import AsyncPaginator, make_request_options
 from .....types.beta.threads import (
     run_list_params,
     run_create_params,
@@ -49,8 +32,12 @@
     run_submit_tool_outputs_params,
 )
 from .....types.beta.threads.run import Run
+from .....types.shared.chat_model import ChatModel
+from .....types.shared_params.metadata import Metadata
+from .....types.shared.reasoning_effort import ReasoningEffort
 from .....types.beta.assistant_tool_param import AssistantToolParam
 from .....types.beta.assistant_stream_event import AssistantStreamEvent
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
 from .....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
 from .....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
@@ -64,64 +51,53 @@ def steps(self) -> Steps:
 
     @cached_property
     def with_raw_response(self) -> RunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return RunsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> RunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return RunsWithStreamingResponse(self)
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     def create(
         self,
         thread_id: str,
         *,
         assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | Omit = omit,
+        additional_instructions: Optional[str] | Omit = omit,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run:
         """
         Create a run.
@@ -131,6 +107,14 @@ def create(
               [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
               execute this run.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -154,21 +138,46 @@ def create(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -205,7 +214,7 @@ def create(
               We generally recommend altering this or temperature but not both.
 
           truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
+              control the initial context window of the run.
 
           extra_headers: Send extra headers
 
@@ -217,6 +226,7 @@ def create(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     def create(
         self,
@@ -224,51 +234,28 @@ def create(
         *,
         assistant_id: str,
         stream: Literal[True],
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | Omit = omit,
+        additional_instructions: Optional[str] | Omit = omit,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Stream[AssistantStreamEvent]:
         """
         Create a run.
@@ -282,6 +269,14 @@ def create(
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -305,21 +300,46 @@ def create(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -352,7 +372,7 @@ def create(
               We generally recommend altering this or temperature but not both.
 
           truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
+              control the initial context window of the run.
 
           extra_headers: Send extra headers
 
@@ -364,6 +384,7 @@ def create(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     def create(
         self,
@@ -371,51 +392,28 @@ def create(
         *,
         assistant_id: str,
         stream: bool,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | Omit = omit,
+        additional_instructions: Optional[str] | Omit = omit,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run | Stream[AssistantStreamEvent]:
         """
         Create a run.
@@ -429,6 +427,14 @@ def create(
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -452,21 +458,46 @@ def create(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -499,7 +530,7 @@ def create(
               We generally recommend altering this or temperature but not both.
 
           truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
+              control the initial context window of the run.
 
           extra_headers: Send extra headers
 
@@ -511,58 +542,36 @@ def create(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @required_args(["assistant_id"], ["assistant_id", "stream"])
     def create(
         self,
         thread_id: str,
         *,
         assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | Omit = omit,
+        additional_instructions: Optional[str] | Omit = omit,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run | Stream[AssistantStreamEvent]:
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
@@ -579,6 +588,8 @@ def create(
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
@@ -587,16 +598,21 @@ def create(
                     "top_p": top_p,
                     "truncation_strategy": truncation_strategy,
                 },
-                run_create_params.RunCreateParams,
+                run_create_params.RunCreateParamsStreaming if stream else run_create_params.RunCreateParamsNonStreaming,
             ),
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
             ),
             cast_to=Run,
             stream=stream or False,
             stream_cls=Stream[AssistantStreamEvent],
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def retrieve(
         self,
         run_id: str,
@@ -607,7 +623,7 @@ def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run:
         """
         Retrieves a run.
@@ -634,27 +650,30 @@ def retrieve(
             cast_to=Run,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def update(
         self,
         run_id: str,
         *,
         thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run:
         """
         Modifies a run.
 
         Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           extra_headers: Send extra headers
 
@@ -678,20 +697,21 @@ def update(
             cast_to=Run,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def list(
         self,
         thread_id: str,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> SyncCursorPage[Run]:
         """
         Returns a list of runs belonging to a thread.
@@ -704,8 +724,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -745,6 +765,7 @@ def list(
             model=Run,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def cancel(
         self,
         run_id: str,
@@ -755,7 +776,7 @@ def cancel(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run:
         """
         Cancels a run that is `in_progress`.
@@ -782,554 +803,7 @@ def cancel(
             cast_to=Run,
         )
 
-    def create_and_poll(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to create a run an poll for a terminal state. More information on Run
-        lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = self.create(
-            thread_id=thread_id,
-            assistant_id=assistant_id,
-            additional_instructions=additional_instructions,
-            additional_messages=additional_messages,
-            instructions=instructions,
-            max_completion_tokens=max_completion_tokens,
-            max_prompt_tokens=max_prompt_tokens,
-            metadata=metadata,
-            model=model,
-            response_format=response_format,
-            temperature=temperature,
-            tool_choice=tool_choice,
-            # We assume we are not streaming when polling
-            stream=False,
-            tools=tools,
-            truncation_strategy=truncation_strategy,
-            top_p=top_p,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return self.poll(
-            run.id,
-            thread_id=thread_id,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            poll_interval_ms=poll_interval_ms,
-            timeout=timeout,
-        )
-
-    @overload
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler]:
-        """Create a Run stream"""
-        ...
-
-    @overload
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a Run stream"""
-        ...
-
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a Run stream"""
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        make_request = partial(
-            self._post,
-            f"/threads/{thread_id}/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "additional_instructions": additional_instructions,
-                    "additional_messages": additional_messages,
-                    "instructions": instructions,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_prompt_tokens": max_prompt_tokens,
-                    "metadata": metadata,
-                    "model": model,
-                    "response_format": response_format,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "stream": True,
-                    "tools": tools,
-                    "truncation_strategy": truncation_strategy,
-                    "top_p": top_p,
-                },
-                run_create_params.RunCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
-
-    def poll(
-        self,
-        run_id: str,
-        thread_id: str,
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to poll a run status until it reaches a terminal state. More
-        information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
-
-        if is_given(poll_interval_ms):
-            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
-
-        terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
-        while True:
-            response = self.with_raw_response.retrieve(
-                thread_id=thread_id,
-                run_id=run_id,
-                extra_headers=extra_headers,
-                extra_body=extra_body,
-                extra_query=extra_query,
-                timeout=timeout,
-            )
-
-            run = response.parse()
-            # Return if we reached a terminal state
-            if run.status in terminal_states:
-                return run
-
-            if not is_given(poll_interval_ms):
-                from_header = response.headers.get("openai-poll-after-ms")
-                if from_header is not None:
-                    poll_interval_ms = int(from_header)
-                else:
-                    poll_interval_ms = 1000
-
-            self._sleep(poll_interval_ms / 1000)
-
-    @overload
-    def stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler]:
-        """Create a Run stream"""
-        ...
-
-    @overload
-    def stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a Run stream"""
-        ...
-
-    def stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a Run stream"""
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        make_request = partial(
-            self._post,
-            f"/threads/{thread_id}/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "additional_instructions": additional_instructions,
-                    "additional_messages": additional_messages,
-                    "instructions": instructions,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_prompt_tokens": max_prompt_tokens,
-                    "metadata": metadata,
-                    "model": model,
-                    "response_format": response_format,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "stream": True,
-                    "tools": tools,
-                    "truncation_strategy": truncation_strategy,
-                    "top_p": top_p,
-                },
-                run_create_params.RunCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
-
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     def submit_tool_outputs(
         self,
@@ -1337,13 +811,13 @@ def submit_tool_outputs(
         *,
         thread_id: str,
         tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run:
         """
         When a run has the `status: "requires_action"` and `required_action.type` is
@@ -1368,6 +842,7 @@ def submit_tool_outputs(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     def submit_tool_outputs(
         self,
@@ -1381,7 +856,7 @@ def submit_tool_outputs(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Stream[AssistantStreamEvent]:
         """
         When a run has the `status: "requires_action"` and `required_action.type` is
@@ -1406,6 +881,7 @@ def submit_tool_outputs(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     def submit_tool_outputs(
         self,
@@ -1419,7 +895,7 @@ def submit_tool_outputs(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run | Stream[AssistantStreamEvent]:
         """
         When a run has the `status: "requires_action"` and `required_action.type` is
@@ -1444,6 +920,7 @@ def submit_tool_outputs(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
     def submit_tool_outputs(
         self,
@@ -1451,13 +928,13 @@ def submit_tool_outputs(
         *,
         thread_id: str,
         tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run | Stream[AssistantStreamEvent]:
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
@@ -1471,7 +948,9 @@ def submit_tool_outputs(
                     "tool_outputs": tool_outputs,
                     "stream": stream,
                 },
-                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParamsStreaming
+                if stream
+                else run_submit_tool_outputs_params.RunSubmitToolOutputsParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -1481,205 +960,62 @@ def submit_tool_outputs(
             stream_cls=Stream[AssistantStreamEvent],
         )
 
-    def submit_tool_outputs_and_poll(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
+
+class AsyncRuns(AsyncAPIResource):
+    @cached_property
+    def steps(self) -> AsyncSteps:
+        return AsyncSteps(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRunsWithRawResponse:
         """
-        A helper to submit a tool output to a run and poll for a terminal run state.
-        More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
         """
-        run = self.submit_tool_outputs(
-            run_id=run_id,
-            thread_id=thread_id,
-            tool_outputs=tool_outputs,
-            stream=False,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return self.poll(
-            run_id=run.id,
-            thread_id=thread_id,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-            poll_interval_ms=poll_interval_ms,
-        )
+        return AsyncRunsWithRawResponse(self)
 
-    @overload
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler]:
+    @cached_property
+    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
         """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
         """
-        ...
+        return AsyncRunsWithStreamingResponse(self)
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
-    def submit_tool_outputs_stream(
+    async def create(
         self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
         thread_id: str,
-        event_handler: AssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandlerT]:
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        ...
-
-    def submit_tool_outputs_stream(
-        self,
         *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT | None = None,
+        assistant_id: str,
+        include: List[RunStepInclude] | Omit = omit,
+        additional_instructions: Optional[str] | Omit = omit,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = partial(
-            self._post,
-            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
-            body=maybe_transform(
-                {
-                    "tool_outputs": tool_outputs,
-                    "stream": True,
-                },
-                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-        return AssistantStreamManager(request, event_handler=event_handler or AssistantEventHandler())
-
-
-class AsyncRuns(AsyncAPIResource):
-    @cached_property
-    def steps(self) -> AsyncSteps:
-        return AsyncSteps(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncRunsWithRawResponse:
-        return AsyncRunsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
-        return AsyncRunsWithStreamingResponse(self)
-
-    @overload
-    async def create(
-        self,
-        thread_id: str,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Run:
         """
         Create a run.
 
@@ -1688,6 +1024,14 @@ async def create(
               [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
               execute this run.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -1711,21 +1055,46 @@ async def create(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1762,7 +1131,7 @@ async def create(
               We generally recommend altering this or temperature but not both.
 
           truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
+              control the initial context window of the run.
 
           extra_headers: Send extra headers
 
@@ -1774,6 +1143,7 @@ async def create(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     async def create(
         self,
@@ -1781,51 +1151,28 @@ async def create(
         *,
         assistant_id: str,
         stream: Literal[True],
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | Omit = omit,
+        additional_instructions: Optional[str] | Omit = omit,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncStream[AssistantStreamEvent]:
         """
         Create a run.
@@ -1839,6 +1186,14 @@ async def create(
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -1862,21 +1217,46 @@ async def create(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1909,7 +1289,7 @@ async def create(
               We generally recommend altering this or temperature but not both.
 
           truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
+              control the initial context window of the run.
 
           extra_headers: Send extra headers
 
@@ -1921,6 +1301,7 @@ async def create(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     async def create(
         self,
@@ -1928,51 +1309,28 @@ async def create(
         *,
         assistant_id: str,
         stream: bool,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | Omit = omit,
+        additional_instructions: Optional[str] | Omit = omit,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run | AsyncStream[AssistantStreamEvent]:
         """
         Create a run.
@@ -1986,6 +1344,14 @@ async def create(
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -2009,21 +1375,46 @@ async def create(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -2056,7 +1447,7 @@ async def create(
               We generally recommend altering this or temperature but not both.
 
           truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
+              control the initial context window of the run.
 
           extra_headers: Send extra headers
 
@@ -2068,58 +1459,36 @@ async def create(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @required_args(["assistant_id"], ["assistant_id", "stream"])
     async def create(
         self,
         thread_id: str,
         *,
         assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | Omit = omit,
+        additional_instructions: Optional[str] | Omit = omit,
+        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[run_create_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run | AsyncStream[AssistantStreamEvent]:
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
@@ -2136,6 +1505,8 @@ async def create(
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
@@ -2144,16 +1515,21 @@ async def create(
                     "top_p": top_p,
                     "truncation_strategy": truncation_strategy,
                 },
-                run_create_params.RunCreateParams,
+                run_create_params.RunCreateParamsStreaming if stream else run_create_params.RunCreateParamsNonStreaming,
             ),
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"include": include}, run_create_params.RunCreateParams),
             ),
             cast_to=Run,
             stream=stream or False,
             stream_cls=AsyncStream[AssistantStreamEvent],
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def retrieve(
         self,
         run_id: str,
@@ -2164,7 +1540,7 @@ async def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run:
         """
         Retrieves a run.
@@ -2191,27 +1567,30 @@ async def retrieve(
             cast_to=Run,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def update(
         self,
         run_id: str,
         *,
         thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run:
         """
         Modifies a run.
 
         Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           extra_headers: Send extra headers
 
@@ -2235,20 +1614,21 @@ async def update(
             cast_to=Run,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def list(
         self,
         thread_id: str,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncPaginator[Run, AsyncCursorPage[Run]]:
         """
         Returns a list of runs belonging to a thread.
@@ -2261,8 +1641,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -2302,6 +1682,7 @@ def list(
             model=Run,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def cancel(
         self,
         run_id: str,
@@ -2312,7 +1693,7 @@ async def cancel(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run:
         """
         Cancels a run that is `in_progress`.
@@ -2339,558 +1720,7 @@ async def cancel(
             cast_to=Run,
         )
 
-    async def create_and_poll(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to create a run an poll for a terminal state. More information on Run
-        lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = await self.create(
-            thread_id=thread_id,
-            assistant_id=assistant_id,
-            additional_instructions=additional_instructions,
-            additional_messages=additional_messages,
-            instructions=instructions,
-            max_completion_tokens=max_completion_tokens,
-            max_prompt_tokens=max_prompt_tokens,
-            metadata=metadata,
-            model=model,
-            response_format=response_format,
-            temperature=temperature,
-            tool_choice=tool_choice,
-            # We assume we are not streaming when polling
-            stream=False,
-            tools=tools,
-            truncation_strategy=truncation_strategy,
-            top_p=top_p,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return await self.poll(
-            run.id,
-            thread_id=thread_id,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            poll_interval_ms=poll_interval_ms,
-            timeout=timeout,
-        )
-
-    @overload
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        """Create a Run stream"""
-        ...
-
-    @overload
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
-        """Create a Run stream"""
-        ...
-
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> (
-        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
-        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
-    ):
-        """Create a Run stream"""
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = self._post(
-            f"/threads/{thread_id}/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "additional_instructions": additional_instructions,
-                    "additional_messages": additional_messages,
-                    "instructions": instructions,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_prompt_tokens": max_prompt_tokens,
-                    "metadata": metadata,
-                    "model": model,
-                    "response_format": response_format,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "stream": True,
-                    "tools": tools,
-                    "truncation_strategy": truncation_strategy,
-                    "top_p": top_p,
-                },
-                run_create_params.RunCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=AsyncStream[AssistantStreamEvent],
-        )
-        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
-
-    async def poll(
-        self,
-        run_id: str,
-        thread_id: str,
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to poll a run status until it reaches a terminal state. More
-        information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
-
-        if is_given(poll_interval_ms):
-            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
-
-        terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
-        while True:
-            response = await self.with_raw_response.retrieve(
-                thread_id=thread_id,
-                run_id=run_id,
-                extra_headers=extra_headers,
-                extra_body=extra_body,
-                extra_query=extra_query,
-                timeout=timeout,
-            )
-
-            run = response.parse()
-            # Return if we reached a terminal state
-            if run.status in terminal_states:
-                return run
-
-            if not is_given(poll_interval_ms):
-                from_header = response.headers.get("openai-poll-after-ms")
-                if from_header is not None:
-                    poll_interval_ms = int(from_header)
-                else:
-                    poll_interval_ms = 1000
-
-            await self._sleep(poll_interval_ms / 1000)
-
-    @overload
-    def stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        """Create a Run stream"""
-        ...
-
-    @overload
-    def stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
-        """Create a Run stream"""
-        ...
-
-    def stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> (
-        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
-        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
-    ):
-        """Create a Run stream"""
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = self._post(
-            f"/threads/{thread_id}/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "additional_instructions": additional_instructions,
-                    "additional_messages": additional_messages,
-                    "instructions": instructions,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_prompt_tokens": max_prompt_tokens,
-                    "metadata": metadata,
-                    "model": model,
-                    "response_format": response_format,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "stream": True,
-                    "tools": tools,
-                    "truncation_strategy": truncation_strategy,
-                    "top_p": top_p,
-                },
-                run_create_params.RunCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=AsyncStream[AssistantStreamEvent],
-        )
-        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
-
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     async def submit_tool_outputs(
         self,
@@ -2898,13 +1728,13 @@ async def submit_tool_outputs(
         *,
         thread_id: str,
         tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run:
         """
         When a run has the `status: "requires_action"` and `required_action.type` is
@@ -2929,6 +1759,7 @@ async def submit_tool_outputs(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     async def submit_tool_outputs(
         self,
@@ -2942,7 +1773,7 @@ async def submit_tool_outputs(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncStream[AssistantStreamEvent]:
         """
         When a run has the `status: "requires_action"` and `required_action.type` is
@@ -2967,6 +1798,7 @@ async def submit_tool_outputs(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     async def submit_tool_outputs(
         self,
@@ -2980,7 +1812,7 @@ async def submit_tool_outputs(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run | AsyncStream[AssistantStreamEvent]:
         """
         When a run has the `status: "requires_action"` and `required_action.type` is
@@ -3005,6 +1837,7 @@ async def submit_tool_outputs(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
     async def submit_tool_outputs(
         self,
@@ -3012,13 +1845,13 @@ async def submit_tool_outputs(
         *,
         thread_id: str,
         tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run | AsyncStream[AssistantStreamEvent]:
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
@@ -3032,7 +1865,9 @@ async def submit_tool_outputs(
                     "tool_outputs": tool_outputs,
                     "stream": stream,
                 },
-                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParamsStreaming
+                if stream
+                else run_submit_tool_outputs_params.RunSubmitToolOutputsParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -3042,162 +1877,40 @@ async def submit_tool_outputs(
             stream_cls=AsyncStream[AssistantStreamEvent],
         )
 
-    async def submit_tool_outputs_and_poll(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to submit a tool output to a run and poll for a terminal run state.
-        More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = await self.submit_tool_outputs(
-            run_id=run_id,
-            thread_id=thread_id,
-            tool_outputs=tool_outputs,
-            stream=False,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return await self.poll(
-            run_id=run.id,
-            thread_id=thread_id,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-            poll_interval_ms=poll_interval_ms,
-        )
-
-    @overload
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        ...
-
-    @overload
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        ...
-
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> (
-        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
-        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
-    ):
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = self._post(
-            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
-            body=maybe_transform(
-                {
-                    "tool_outputs": tool_outputs,
-                    "stream": True,
-                },
-                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=AsyncStream[AssistantStreamEvent],
-        )
-        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
-
 
 class RunsWithRawResponse:
     def __init__(self, runs: Runs) -> None:
         self._runs = runs
 
-        self.create = _legacy_response.to_raw_response_wrapper(
-            runs.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                runs.create,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            runs.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                runs.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = _legacy_response.to_raw_response_wrapper(
-            runs.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                runs.update,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = _legacy_response.to_raw_response_wrapper(
-            runs.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                runs.list,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.cancel = _legacy_response.to_raw_response_wrapper(
-            runs.cancel,
+        self.cancel = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                runs.cancel,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.submit_tool_outputs = _legacy_response.to_raw_response_wrapper(
-            runs.submit_tool_outputs,
+        self.submit_tool_outputs = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                runs.submit_tool_outputs,  # pyright: ignore[reportDeprecated],
+            )
         )
 
     @cached_property
@@ -3209,23 +1922,35 @@ class AsyncRunsWithRawResponse:
     def __init__(self, runs: AsyncRuns) -> None:
         self._runs = runs
 
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            runs.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                runs.create,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            runs.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                runs.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = _legacy_response.async_to_raw_response_wrapper(
-            runs.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                runs.update,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = _legacy_response.async_to_raw_response_wrapper(
-            runs.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                runs.list,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.cancel = _legacy_response.async_to_raw_response_wrapper(
-            runs.cancel,
+        self.cancel = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                runs.cancel,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.submit_tool_outputs = _legacy_response.async_to_raw_response_wrapper(
-            runs.submit_tool_outputs,
+        self.submit_tool_outputs = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                runs.submit_tool_outputs,  # pyright: ignore[reportDeprecated],
+            )
         )
 
     @cached_property
@@ -3237,23 +1962,35 @@ class RunsWithStreamingResponse:
     def __init__(self, runs: Runs) -> None:
         self._runs = runs
 
-        self.create = to_streamed_response_wrapper(
-            runs.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                runs.create,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = to_streamed_response_wrapper(
-            runs.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                runs.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = to_streamed_response_wrapper(
-            runs.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                runs.update,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = to_streamed_response_wrapper(
-            runs.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                runs.list,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.cancel = to_streamed_response_wrapper(
-            runs.cancel,
+        self.cancel = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                runs.cancel,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.submit_tool_outputs = to_streamed_response_wrapper(
-            runs.submit_tool_outputs,
+        self.submit_tool_outputs = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                runs.submit_tool_outputs,  # pyright: ignore[reportDeprecated],
+            )
         )
 
     @cached_property
@@ -3265,23 +2002,35 @@ class AsyncRunsWithStreamingResponse:
     def __init__(self, runs: AsyncRuns) -> None:
         self._runs = runs
 
-        self.create = async_to_streamed_response_wrapper(
-            runs.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                runs.create,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = async_to_streamed_response_wrapper(
-            runs.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                runs.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = async_to_streamed_response_wrapper(
-            runs.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                runs.update,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = async_to_streamed_response_wrapper(
-            runs.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                runs.list,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.cancel = async_to_streamed_response_wrapper(
-            runs.cancel,
+        self.cancel = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                runs.cancel,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.submit_tool_outputs = async_to_streamed_response_wrapper(
-            runs.submit_tool_outputs,
+        self.submit_tool_outputs = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                runs.submit_tool_outputs,  # pyright: ignore[reportDeprecated],
+            )
         )
 
     @cached_property
diff --git a/src/openai/resources/beta/threads/runs/steps.py b/src/openai/resources/beta/threads/runs/steps.py
index 512008939c..254a94435c 100644
--- a/src/openai/resources/beta/threads/runs/steps.py
+++ b/src/openai/resources/beta/threads/runs/steps.py
@@ -2,23 +2,23 @@
 
 from __future__ import annotations
 
+import typing_extensions
+from typing import List
 from typing_extensions import Literal
 
 import httpx
 
 from ..... import _legacy_response
-from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import maybe_transform
+from ....._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ....._utils import maybe_transform, async_maybe_transform
 from ....._compat import cached_property
 from ....._resource import SyncAPIResource, AsyncAPIResource
 from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .....pagination import SyncCursorPage, AsyncCursorPage
-from ....._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from .....types.beta.threads.runs import step_list_params
+from ....._base_client import AsyncPaginator, make_request_options
+from .....types.beta.threads.runs import step_list_params, step_retrieve_params
 from .....types.beta.threads.runs.run_step import RunStep
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
 
 __all__ = ["Steps", "AsyncSteps"]
 
@@ -26,29 +26,50 @@
 class Steps(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> StepsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return StepsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> StepsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return StepsWithStreamingResponse(self)
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def retrieve(
         self,
         step_id: str,
         *,
         thread_id: str,
         run_id: str,
+        include: List[RunStepInclude] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> RunStep:
         """
         Retrieves a run step.
 
         Args:
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -67,26 +88,32 @@ def retrieve(
         return self._get(
             f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
             ),
             cast_to=RunStep,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def list(
         self,
         run_id: str,
         *,
         thread_id: str,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        include: List[RunStepInclude] | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> SyncCursorPage[RunStep]:
         """
         Returns a list of run steps belonging to a run.
@@ -99,8 +126,16 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -133,6 +168,7 @@ def list(
                     {
                         "after": after,
                         "before": before,
+                        "include": include,
                         "limit": limit,
                         "order": order,
                     },
@@ -146,29 +182,50 @@ def list(
 class AsyncSteps(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncStepsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncStepsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncStepsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncStepsWithStreamingResponse(self)
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def retrieve(
         self,
         step_id: str,
         *,
         thread_id: str,
         run_id: str,
+        include: List[RunStepInclude] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> RunStep:
         """
         Retrieves a run step.
 
         Args:
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -187,26 +244,32 @@ async def retrieve(
         return await self._get(
             f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
             ),
             cast_to=RunStep,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def list(
         self,
         run_id: str,
         *,
         thread_id: str,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        include: List[RunStepInclude] | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncPaginator[RunStep, AsyncCursorPage[RunStep]]:
         """
         Returns a list of run steps belonging to a run.
@@ -219,8 +282,16 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -253,6 +324,7 @@ def list(
                     {
                         "after": after,
                         "before": before,
+                        "include": include,
                         "limit": limit,
                         "order": order,
                     },
@@ -267,11 +339,15 @@ class StepsWithRawResponse:
     def __init__(self, steps: Steps) -> None:
         self._steps = steps
 
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            steps.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                steps.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = _legacy_response.to_raw_response_wrapper(
-            steps.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                steps.list,  # pyright: ignore[reportDeprecated],
+            )
         )
 
 
@@ -279,11 +355,15 @@ class AsyncStepsWithRawResponse:
     def __init__(self, steps: AsyncSteps) -> None:
         self._steps = steps
 
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            steps.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                steps.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = _legacy_response.async_to_raw_response_wrapper(
-            steps.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                steps.list,  # pyright: ignore[reportDeprecated],
+            )
         )
 
 
@@ -291,11 +371,15 @@ class StepsWithStreamingResponse:
     def __init__(self, steps: Steps) -> None:
         self._steps = steps
 
-        self.retrieve = to_streamed_response_wrapper(
-            steps.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                steps.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = to_streamed_response_wrapper(
-            steps.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                steps.list,  # pyright: ignore[reportDeprecated],
+            )
         )
 
 
@@ -303,9 +387,13 @@ class AsyncStepsWithStreamingResponse:
     def __init__(self, steps: AsyncSteps) -> None:
         self._steps = steps
 
-        self.retrieve = async_to_streamed_response_wrapper(
-            steps.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                steps.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.list = async_to_streamed_response_wrapper(
-            steps.list,
+        self.list = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                steps.list,  # pyright: ignore[reportDeprecated],
+            )
         )
diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py
index 36cdd03f91..d9604d7165 100644
--- a/src/openai/resources/beta/threads/threads.py
+++ b/src/openai/resources/beta/threads/threads.py
@@ -2,21 +2,13 @@
 
 from __future__ import annotations
 
-from typing import Union, Iterable, Optional, overload
-from functools import partial
-from typing_extensions import Literal
+import typing_extensions
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, overload
 
 import httpx
 
 from .... import _legacy_response
-from .runs import (
-    Runs,
-    AsyncRuns,
-    RunsWithRawResponse,
-    AsyncRunsWithRawResponse,
-    RunsWithStreamingResponse,
-    AsyncRunsWithStreamingResponse,
-)
 from .messages import (
     Messages,
     AsyncMessages,
@@ -25,13 +17,16 @@
     MessagesWithStreamingResponse,
     AsyncMessagesWithStreamingResponse,
 )
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
+from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ...._utils import required_args, maybe_transform, async_maybe_transform
+from .runs.runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
 )
-from .runs.runs import Runs, AsyncRuns
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
@@ -41,20 +36,13 @@
     thread_update_params,
     thread_create_and_run_params,
 )
-from ...._base_client import (
-    make_request_options,
-)
-from ....lib.streaming import (
-    AssistantEventHandler,
-    AssistantEventHandlerT,
-    AssistantStreamManager,
-    AsyncAssistantEventHandler,
-    AsyncAssistantEventHandlerT,
-    AsyncAssistantStreamManager,
-)
+from ...._base_client import make_request_options
 from ....types.beta.thread import Thread
 from ....types.beta.threads.run import Run
+from ....types.shared.chat_model import ChatModel
 from ....types.beta.thread_deleted import ThreadDeleted
+from ....types.shared_params.metadata import Metadata
+from ....types.beta.assistant_tool_param import AssistantToolParam
 from ....types.beta.assistant_stream_event import AssistantStreamEvent
 from ....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
 from ....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
@@ -73,24 +61,36 @@ def messages(self) -> Messages:
 
     @cached_property
     def with_raw_response(self) -> ThreadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ThreadsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ThreadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ThreadsWithStreamingResponse(self)
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def create(
         self,
         *,
-        messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+        messages: Iterable[thread_create_params.Message] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        tool_resources: Optional[thread_create_params.ToolResources] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Thread:
         """
         Create a thread.
@@ -100,9 +100,11 @@ def create(
               start the thread with.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           tool_resources: A set of resources that are made available to the assistant's tools in this
               thread. The resources are specific to the type of tool. For example, the
@@ -134,6 +136,7 @@ def create(
             cast_to=Thread,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def retrieve(
         self,
         thread_id: str,
@@ -143,7 +146,7 @@ def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Thread:
         """
         Retrieves a thread.
@@ -168,27 +171,30 @@ def retrieve(
             cast_to=Thread,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def update(
         self,
         thread_id: str,
         *,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | Omit = omit,
+        tool_resources: Optional[thread_update_params.ToolResources] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Thread:
         """
         Modifies a thread.
 
         Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           tool_resources: A set of resources that are made available to the assistant's tools in this
               thread. The resources are specific to the type of tool. For example, the
@@ -221,6 +227,7 @@ def update(
             cast_to=Thread,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     def delete(
         self,
         thread_id: str,
@@ -230,7 +237,7 @@ def delete(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ThreadDeleted:
         """
         Delete a thread.
@@ -255,57 +262,33 @@ def delete(
             cast_to=ThreadDeleted,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     def create_and_run(
         self,
         *,
         assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thread: thread_create_and_run_params.Thread | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run:
         """
         Create a thread and run it in one request.
@@ -331,21 +314,32 @@ def create_and_run(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -364,7 +358,8 @@ def create_and_run(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
-          thread: If no thread is provided, an empty thread will be created.
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
@@ -389,7 +384,7 @@ def create_and_run(
               We generally recommend altering this or temperature but not both.
 
           truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
+              control the initial context window of the run.
 
           extra_headers: Send extra headers
 
@@ -401,57 +396,33 @@ def create_and_run(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     def create_and_run(
         self,
         *,
         assistant_id: str,
         stream: Literal[True],
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thread: thread_create_and_run_params.Thread | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Stream[AssistantStreamEvent]:
         """
         Create a thread and run it in one request.
@@ -481,21 +452,32 @@ def create_and_run(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -510,7 +492,8 @@ def create_and_run(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
-          thread: If no thread is provided, an empty thread will be created.
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
@@ -535,7 +518,7 @@ def create_and_run(
               We generally recommend altering this or temperature but not both.
 
           truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
+              control the initial context window of the run.
 
           extra_headers: Send extra headers
 
@@ -547,57 +530,33 @@ def create_and_run(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     def create_and_run(
         self,
         *,
         assistant_id: str,
         stream: bool,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thread: thread_create_and_run_params.Thread | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run | Stream[AssistantStreamEvent]:
         """
         Create a thread and run it in one request.
@@ -627,21 +586,32 @@ def create_and_run(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -656,7 +626,8 @@ def create_and_run(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
-          thread: If no thread is provided, an empty thread will be created.
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
@@ -681,7 +652,7 @@ def create_and_run(
               We generally recommend altering this or temperature but not both.
 
           truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
+              control the initial context window of the run.
 
           extra_headers: Send extra headers
 
@@ -693,57 +664,33 @@ def create_and_run(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @required_args(["assistant_id"], ["assistant_id", "stream"])
     def create_and_run(
         self,
         *,
         assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thread: thread_create_and_run_params.Thread | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run | Stream[AssistantStreamEvent]:
         extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
@@ -756,6 +703,7 @@ def create_and_run(
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
                     "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
@@ -766,7 +714,9 @@ def create_and_run(
                     "top_p": top_p,
                     "truncation_strategy": truncation_strategy,
                 },
-                thread_create_and_run_params.ThreadCreateAndRunParams,
+                thread_create_and_run_params.ThreadCreateAndRunParamsStreaming
+                if stream
+                else thread_create_and_run_params.ThreadCreateAndRunParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -776,284 +726,6 @@ def create_and_run(
             stream_cls=Stream[AssistantStreamEvent],
         )
 
-    def create_and_run_poll(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to create a thread, start a run and then poll for a terminal state.
-        More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = self.create_and_run(
-            assistant_id=assistant_id,
-            instructions=instructions,
-            max_completion_tokens=max_completion_tokens,
-            max_prompt_tokens=max_prompt_tokens,
-            metadata=metadata,
-            model=model,
-            response_format=response_format,
-            temperature=temperature,
-            stream=False,
-            thread=thread,
-            tool_resources=tool_resources,
-            tool_choice=tool_choice,
-            truncation_strategy=truncation_strategy,
-            top_p=top_p,
-            tools=tools,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return self.runs.poll(run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms)
-
-    @overload
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler]:
-        """Create a thread and stream the run back"""
-        ...
-
-    @overload
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        event_handler: AssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a thread and stream the run back"""
-        ...
-
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        event_handler: AssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a thread and stream the run back"""
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        make_request = partial(
-            self._post,
-            "/threads/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "instructions": instructions,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_prompt_tokens": max_prompt_tokens,
-                    "metadata": metadata,
-                    "model": model,
-                    "response_format": response_format,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "stream": True,
-                    "thread": thread,
-                    "tools": tools,
-                    "tool": tool_resources,
-                    "truncation_strategy": truncation_strategy,
-                    "top_p": top_p,
-                },
-                thread_create_and_run_params.ThreadCreateAndRunParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
-
 
 class AsyncThreads(AsyncAPIResource):
     @cached_property
@@ -1066,24 +738,36 @@ def messages(self) -> AsyncMessages:
 
     @cached_property
     def with_raw_response(self) -> AsyncThreadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncThreadsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncThreadsWithStreamingResponse(self)
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def create(
         self,
         *,
-        messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
+        messages: Iterable[thread_create_params.Message] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        tool_resources: Optional[thread_create_params.ToolResources] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Thread:
         """
         Create a thread.
@@ -1093,9 +777,11 @@ async def create(
               start the thread with.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           tool_resources: A set of resources that are made available to the assistant's tools in this
               thread. The resources are specific to the type of tool. For example, the
@@ -1127,6 +813,7 @@ async def create(
             cast_to=Thread,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def retrieve(
         self,
         thread_id: str,
@@ -1136,7 +823,7 @@ async def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Thread:
         """
         Retrieves a thread.
@@ -1161,27 +848,30 @@ async def retrieve(
             cast_to=Thread,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def update(
         self,
         thread_id: str,
         *,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | Omit = omit,
+        tool_resources: Optional[thread_update_params.ToolResources] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Thread:
         """
         Modifies a thread.
 
         Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           tool_resources: A set of resources that are made available to the assistant's tools in this
               thread. The resources are specific to the type of tool. For example, the
@@ -1214,6 +904,7 @@ async def update(
             cast_to=Thread,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     async def delete(
         self,
         thread_id: str,
@@ -1223,7 +914,7 @@ async def delete(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ThreadDeleted:
         """
         Delete a thread.
@@ -1248,57 +939,33 @@ async def delete(
             cast_to=ThreadDeleted,
         )
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     async def create_and_run(
         self,
         *,
         assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thread: thread_create_and_run_params.Thread | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run:
         """
         Create a thread and run it in one request.
@@ -1324,21 +991,32 @@ async def create_and_run(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1357,7 +1035,8 @@ async def create_and_run(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
-          thread: If no thread is provided, an empty thread will be created.
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
@@ -1382,7 +1061,7 @@ async def create_and_run(
               We generally recommend altering this or temperature but not both.
 
           truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
+              control the initial context window of the run.
 
           extra_headers: Send extra headers
 
@@ -1394,57 +1073,33 @@ async def create_and_run(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     async def create_and_run(
         self,
         *,
         assistant_id: str,
         stream: Literal[True],
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thread: thread_create_and_run_params.Thread | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncStream[AssistantStreamEvent]:
         """
         Create a thread and run it in one request.
@@ -1474,21 +1129,32 @@ async def create_and_run(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1503,7 +1169,8 @@ async def create_and_run(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
-          thread: If no thread is provided, an empty thread will be created.
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
@@ -1528,7 +1195,7 @@ async def create_and_run(
               We generally recommend altering this or temperature but not both.
 
           truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
+              control the initial context window of the run.
 
           extra_headers: Send extra headers
 
@@ -1540,57 +1207,33 @@ async def create_and_run(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @overload
     async def create_and_run(
         self,
         *,
         assistant_id: str,
         stream: bool,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thread: thread_create_and_run_params.Thread | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run | AsyncStream[AssistantStreamEvent]:
         """
         Create a thread and run it in one request.
@@ -1620,21 +1263,32 @@ async def create_and_run(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1649,7 +1303,8 @@ async def create_and_run(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
-          thread: If no thread is provided, an empty thread will be created.
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
@@ -1674,7 +1329,7 @@ async def create_and_run(
               We generally recommend altering this or temperature but not both.
 
           truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
+              control the initial context window of the run.
 
           extra_headers: Send extra headers
 
@@ -1686,57 +1341,33 @@ async def create_and_run(
         """
         ...
 
+    @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
     @required_args(["assistant_id"], ["assistant_id", "stream"])
     async def create_and_run(
         self,
         *,
         assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_prompt_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: Union[str, ChatModel, None] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        thread: thread_create_and_run_params.Thread | Omit = omit,
+        tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
+        tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
+        tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Run | AsyncStream[AssistantStreamEvent]:
         extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
@@ -1749,6 +1380,7 @@ async def create_and_run(
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
                     "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
@@ -1759,7 +1391,9 @@ async def create_and_run(
                     "top_p": top_p,
                     "truncation_strategy": truncation_strategy,
                 },
-                thread_create_and_run_params.ThreadCreateAndRunParams,
+                thread_create_and_run_params.ThreadCreateAndRunParamsStreaming
+                if stream
+                else thread_create_and_run_params.ThreadCreateAndRunParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -1769,307 +1403,35 @@ async def create_and_run(
             stream_cls=AsyncStream[AssistantStreamEvent],
         )
 
-    async def create_and_run_poll(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to create a thread, start a run and then poll for a terminal state.
-        More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = await self.create_and_run(
-            assistant_id=assistant_id,
-            instructions=instructions,
-            max_completion_tokens=max_completion_tokens,
-            max_prompt_tokens=max_prompt_tokens,
-            metadata=metadata,
-            model=model,
-            response_format=response_format,
-            temperature=temperature,
-            stream=False,
-            thread=thread,
-            tool_resources=tool_resources,
-            tool_choice=tool_choice,
-            truncation_strategy=truncation_strategy,
-            top_p=top_p,
-            tools=tools,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return await self.runs.poll(
-            run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms
-        )
-
-    @overload
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        """Create a thread and stream the run back"""
-        ...
-
-    @overload
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        event_handler: AsyncAssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
-        """Create a thread and stream the run back"""
-        ...
-
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        event_handler: AsyncAssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> (
-        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
-        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
-    ):
-        """Create a thread and stream the run back"""
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = self._post(
-            "/threads/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "instructions": instructions,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_prompt_tokens": max_prompt_tokens,
-                    "metadata": metadata,
-                    "model": model,
-                    "response_format": response_format,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "stream": True,
-                    "thread": thread,
-                    "tools": tools,
-                    "tool": tool_resources,
-                    "truncation_strategy": truncation_strategy,
-                    "top_p": top_p,
-                },
-                thread_create_and_run_params.ThreadCreateAndRunParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=AsyncStream[AssistantStreamEvent],
-        )
-        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
-
 
 class ThreadsWithRawResponse:
     def __init__(self, threads: Threads) -> None:
         self._threads = threads
 
-        self.create = _legacy_response.to_raw_response_wrapper(
-            threads.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                threads.create,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            threads.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                threads.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = _legacy_response.to_raw_response_wrapper(
-            threads.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                threads.update,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.delete = _legacy_response.to_raw_response_wrapper(
-            threads.delete,
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                threads.delete,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.create_and_run = _legacy_response.to_raw_response_wrapper(
-            threads.create_and_run,
+        self.create_and_run = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                threads.create_and_run,  # pyright: ignore[reportDeprecated],
+            )
         )
 
     @cached_property
@@ -2085,20 +1447,30 @@ class AsyncThreadsWithRawResponse:
     def __init__(self, threads: AsyncThreads) -> None:
         self._threads = threads
 
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            threads.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                threads.create,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            threads.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                threads.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = _legacy_response.async_to_raw_response_wrapper(
-            threads.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                threads.update,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.delete = _legacy_response.async_to_raw_response_wrapper(
-            threads.delete,
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                threads.delete,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.create_and_run = _legacy_response.async_to_raw_response_wrapper(
-            threads.create_and_run,
+        self.create_and_run = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                threads.create_and_run,  # pyright: ignore[reportDeprecated],
+            )
         )
 
     @cached_property
@@ -2114,20 +1486,30 @@ class ThreadsWithStreamingResponse:
     def __init__(self, threads: Threads) -> None:
         self._threads = threads
 
-        self.create = to_streamed_response_wrapper(
-            threads.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                threads.create,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = to_streamed_response_wrapper(
-            threads.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                threads.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = to_streamed_response_wrapper(
-            threads.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                threads.update,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.delete = to_streamed_response_wrapper(
-            threads.delete,
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                threads.delete,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.create_and_run = to_streamed_response_wrapper(
-            threads.create_and_run,
+        self.create_and_run = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                threads.create_and_run,  # pyright: ignore[reportDeprecated],
+            )
         )
 
     @cached_property
@@ -2143,20 +1525,30 @@ class AsyncThreadsWithStreamingResponse:
     def __init__(self, threads: AsyncThreads) -> None:
         self._threads = threads
 
-        self.create = async_to_streamed_response_wrapper(
-            threads.create,
+        self.create = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                threads.create,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.retrieve = async_to_streamed_response_wrapper(
-            threads.retrieve,
+        self.retrieve = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                threads.retrieve,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.update = async_to_streamed_response_wrapper(
-            threads.update,
+        self.update = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                threads.update,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.delete = async_to_streamed_response_wrapper(
-            threads.delete,
+        self.delete = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                threads.delete,  # pyright: ignore[reportDeprecated],
+            )
         )
-        self.create_and_run = async_to_streamed_response_wrapper(
-            threads.create_and_run,
+        self.create_and_run = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                threads.create_and_run,  # pyright: ignore[reportDeprecated],
+            )
         )
 
     @cached_property
diff --git a/src/openai/resources/chat/chat.py b/src/openai/resources/chat/chat.py
index d14d055506..14f9224b41 100644
--- a/src/openai/resources/chat/chat.py
+++ b/src/openai/resources/chat/chat.py
@@ -4,7 +4,7 @@
 
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from .completions import (
+from .completions.completions import (
     Completions,
     AsyncCompletions,
     CompletionsWithRawResponse,
@@ -23,10 +23,21 @@ def completions(self) -> Completions:
 
     @cached_property
     def with_raw_response(self) -> ChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ChatWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ChatWithStreamingResponse(self)
 
 
@@ -37,10 +48,21 @@ def completions(self) -> AsyncCompletions:
 
     @cached_property
     def with_raw_response(self) -> AsyncChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncChatWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncChatWithStreamingResponse(self)
 
 
diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
deleted file mode 100644
index aa25bc1858..0000000000
--- a/src/openai/resources/chat/completions.py
+++ /dev/null
@@ -1,1251 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable, Optional, overload
-from typing_extensions import Literal
-
-import httpx
-
-from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
-)
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ..._streaming import Stream, AsyncStream
-from ...types.chat import completion_create_params
-from ..._base_client import (
-    make_request_options,
-)
-from ...types.chat_model import ChatModel
-from ...types.chat.chat_completion import ChatCompletion
-from ...types.chat.chat_completion_chunk import ChatCompletionChunk
-from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam
-from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam
-from ...types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
-from ...types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
-
-__all__ = ["Completions", "AsyncCompletions"]
-
-
-class Completions(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> CompletionsWithRawResponse:
-        return CompletionsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
-        return CompletionsWithStreamingResponse(self)
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          stream_options: Options for streaming response. Only set this when you set `stream: true`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tool and instead generates a message. `auto` means the model can
-              pick between generating a message or calling one or more tools. `required` means
-              the model must call one or more tools. Specifying a particular tool via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-              `none` is the default when no tools are present. `auto` is the default if tools
-              are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        stream: Literal[True],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream_options: Options for streaming response. Only set this when you set `stream: true`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tool and instead generates a message. `auto` means the model can
-              pick between generating a message or calling one or more tools. `required` means
-              the model must call one or more tools. Specifying a particular tool via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-              `none` is the default when no tools are present. `auto` is the default if tools
-              are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        stream: bool,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream_options: Options for streaming response. Only set this when you set `stream: true`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tool and instead generates a message. `auto` means the model can
-              pick between generating a message or calling one or more tools. `required` means
-              the model must call one or more tools. Specifying a particular tool via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-              `none` is the default when no tools are present. `auto` is the default if tools
-              are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["messages", "model"], ["messages", "model", "stream"])
-    def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        return self._post(
-            "/chat/completions",
-            body=maybe_transform(
-                {
-                    "messages": messages,
-                    "model": model,
-                    "frequency_penalty": frequency_penalty,
-                    "function_call": function_call,
-                    "functions": functions,
-                    "logit_bias": logit_bias,
-                    "logprobs": logprobs,
-                    "max_tokens": max_tokens,
-                    "n": n,
-                    "presence_penalty": presence_penalty,
-                    "response_format": response_format,
-                    "seed": seed,
-                    "stop": stop,
-                    "stream": stream,
-                    "stream_options": stream_options,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_logprobs": top_logprobs,
-                    "top_p": top_p,
-                    "user": user,
-                },
-                completion_create_params.CompletionCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=Stream[ChatCompletionChunk],
-        )
-
-
-class AsyncCompletions(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
-        return AsyncCompletionsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
-        return AsyncCompletionsWithStreamingResponse(self)
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          stream_options: Options for streaming response. Only set this when you set `stream: true`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tool and instead generates a message. `auto` means the model can
-              pick between generating a message or calling one or more tools. `required` means
-              the model must call one or more tools. Specifying a particular tool via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-              `none` is the default when no tools are present. `auto` is the default if tools
-              are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        stream: Literal[True],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream_options: Options for streaming response. Only set this when you set `stream: true`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tool and instead generates a message. `auto` means the model can
-              pick between generating a message or calling one or more tools. `required` means
-              the model must call one or more tools. Specifying a particular tool via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-              `none` is the default when no tools are present. `auto` is the default if tools
-              are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        stream: bool,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream_options: Options for streaming response. Only set this when you set `stream: true`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tool and instead generates a message. `auto` means the model can
-              pick between generating a message or calling one or more tools. `required` means
-              the model must call one or more tools. Specifying a particular tool via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-              `none` is the default when no tools are present. `auto` is the default if tools
-              are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["messages", "model"], ["messages", "model", "stream"])
-    async def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        return await self._post(
-            "/chat/completions",
-            body=await async_maybe_transform(
-                {
-                    "messages": messages,
-                    "model": model,
-                    "frequency_penalty": frequency_penalty,
-                    "function_call": function_call,
-                    "functions": functions,
-                    "logit_bias": logit_bias,
-                    "logprobs": logprobs,
-                    "max_tokens": max_tokens,
-                    "n": n,
-                    "presence_penalty": presence_penalty,
-                    "response_format": response_format,
-                    "seed": seed,
-                    "stop": stop,
-                    "stream": stream,
-                    "stream_options": stream_options,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_logprobs": top_logprobs,
-                    "top_p": top_p,
-                    "user": user,
-                },
-                completion_create_params.CompletionCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=AsyncStream[ChatCompletionChunk],
-        )
-
-
-class CompletionsWithRawResponse:
-    def __init__(self, completions: Completions) -> None:
-        self._completions = completions
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            completions.create,
-        )
-
-
-class AsyncCompletionsWithRawResponse:
-    def __init__(self, completions: AsyncCompletions) -> None:
-        self._completions = completions
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            completions.create,
-        )
-
-
-class CompletionsWithStreamingResponse:
-    def __init__(self, completions: Completions) -> None:
-        self._completions = completions
-
-        self.create = to_streamed_response_wrapper(
-            completions.create,
-        )
-
-
-class AsyncCompletionsWithStreamingResponse:
-    def __init__(self, completions: AsyncCompletions) -> None:
-        self._completions = completions
-
-        self.create = async_to_streamed_response_wrapper(
-            completions.create,
-        )
diff --git a/src/openai/resources/chat/completions/__init__.py b/src/openai/resources/chat/completions/__init__.py
new file mode 100644
index 0000000000..12d3b3aa28
--- /dev/null
+++ b/src/openai/resources/chat/completions/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Messages",
+    "AsyncMessages",
+    "MessagesWithRawResponse",
+    "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
+    "Completions",
+    "AsyncCompletions",
+    "CompletionsWithRawResponse",
+    "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
+]
diff --git a/src/openai/resources/chat/completions/completions.py b/src/openai/resources/chat/completions/completions.py
new file mode 100644
index 0000000000..7e64c9eb3b
--- /dev/null
+++ b/src/openai/resources/chat/completions/completions.py
@@ -0,0 +1,2568 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, overload
+
+import httpx
+
+from .... import _legacy_response
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+from ...._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from ...._utils import required_args, maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._streaming import Stream, AsyncStream
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ....types.chat import (
+    ChatCompletionAudioParam,
+    completion_list_params,
+    completion_create_params,
+    completion_update_params,
+)
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.shared.chat_model import ChatModel
+from ....types.chat.chat_completion import ChatCompletion
+from ....types.shared_params.metadata import Metadata
+from ....types.shared.reasoning_effort import ReasoningEffort
+from ....types.chat.chat_completion_chunk import ChatCompletionChunk
+from ....types.chat.chat_completion_deleted import ChatCompletionDeleted
+from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam
+from ....types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ....types.chat.chat_completion_tool_union_param import ChatCompletionToolUnionParam
+from ....types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+from ....types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
+from ....types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
+
+__all__ = ["Completions", "AsyncCompletions"]
+
+
+class Completions(SyncAPIResource):
+    @cached_property
+    def messages(self) -> Messages:
+        return Messages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return CompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return CompletionsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        function_call: completion_create_params.FunctionCall | Omit = omit,
+        functions: Iterable[completion_create_params.Function] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[bool] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: completion_create_params.ResponseFormat | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
+        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
+        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatCompletion:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
+              hit rates. Replaces the `user` field.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+
+          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
+              prompt caching, which keeps cached prefixes active for longer, up to a maximum
+              of 24 hours.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          safety_identifier: A stable identifier used to help detect users of your application that may be
+              violating OpenAI's usage policies. The IDs should be a string that uniquely
+              identifies each user. We recommend hashing their username or email address, in
+              order to avoid sending us any identifying information.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the request will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                '[priority](https://openai.com/api-priority-processing/)', then the request
+                will be processed with the corresponding service tier.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+              Supports text and image inputs. Note: image inputs over 8MB will be dropped.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. You can provide either
+              [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
+              or [function tools](https://platform.openai.com/docs/guides/function-calling).
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
+              `prompt_cache_key` instead to maintain caching optimizations. A stable
+              identifier for your end-users. Used to boost cache hit rates by better bucketing
+              similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          verbosity: Constrains the verbosity of the model's response. Lower values will result in
+              more concise responses, while higher values will result in more verbose
+              responses. Currently supported values are `low`, `medium`, and `high`.
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: Literal[True],
+        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        function_call: completion_create_params.FunctionCall | Omit = omit,
+        functions: Iterable[completion_create_params.Function] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[bool] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: completion_create_params.ResponseFormat | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
+        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
+        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Stream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
+              hit rates. Replaces the `user` field.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+
+          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
+              prompt caching, which keeps cached prefixes active for longer, up to a maximum
+              of 24 hours.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          safety_identifier: A stable identifier used to help detect users of your application that may be
+              violating OpenAI's usage policies. The IDs should be a string that uniquely
+              identifies each user. We recommend hashing their username or email address, in
+              order to avoid sending us any identifying information.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the request will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                '[priority](https://openai.com/api-priority-processing/)', then the request
+                will be processed with the corresponding service tier.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+              Supports text and image inputs. Note: image inputs over 8MB will be dropped.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. You can provide either
+              [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
+              or [function tools](https://platform.openai.com/docs/guides/function-calling).
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
+              `prompt_cache_key` instead to maintain caching optimizations. A stable
+              identifier for your end-users. Used to boost cache hit rates by better bucketing
+              similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          verbosity: Constrains the verbosity of the model's response. Lower values will result in
+              more concise responses, while higher values will result in more verbose
+              responses. Currently supported values are `low`, `medium`, and `high`.
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: bool,
+        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        function_call: completion_create_params.FunctionCall | Omit = omit,
+        functions: Iterable[completion_create_params.Function] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[bool] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: completion_create_params.ResponseFormat | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
+        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
+        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
+              hit rates. Replaces the `user` field.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+
+          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
+              prompt caching, which keeps cached prefixes active for longer, up to a maximum
+              of 24 hours.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          safety_identifier: A stable identifier used to help detect users of your application that may be
+              violating OpenAI's usage policies. The IDs should be a string that uniquely
+              identifies each user. We recommend hashing their username or email address, in
+              order to avoid sending us any identifying information.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the request will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                '[priority](https://openai.com/api-priority-processing/)', then the request
+                will be processed with the corresponding service tier.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+              Supports text and image inputs. Note: image inputs over 8MB will be dropped.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. You can provide either
+              [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
+              or [function tools](https://platform.openai.com/docs/guides/function-calling).
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
+              `prompt_cache_key` instead to maintain caching optimizations. A stable
+              identifier for your end-users. Used to boost cache hit rates by better bucketing
+              similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          verbosity: Constrains the verbosity of the model's response. Lower values will result in
+              more concise responses, while higher values will result in more verbose
+              responses. Currently supported values are `low`, `medium`, and `high`.
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["messages", "model"], ["messages", "model", "stream"])
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        function_call: completion_create_params.FunctionCall | Omit = omit,
+        functions: Iterable[completion_create_params.Function] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[bool] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: completion_create_params.ResponseFormat | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
+        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
+        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        return self._post(
+            "/chat/completions",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "audio": audio,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "prediction": prediction,
+                    "presence_penalty": presence_penalty,
+                    "prompt_cache_key": prompt_cache_key,
+                    "prompt_cache_retention": prompt_cache_retention,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "safety_identifier": safety_identifier,
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "stop": stop,
+                    "store": store,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                    "verbosity": verbosity,
+                    "web_search_options": web_search_options,
+                },
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=Stream[ChatCompletionChunk],
+        )
+
+    def retrieve(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatCompletion:
+        """Get a stored chat completion.
+
+        Only Chat Completions that have been created with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._get(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    def update(
+        self,
+        completion_id: str,
+        *,
+        metadata: Optional[Metadata],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatCompletion:
+        """Modify a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be modified. Currently, the only
+        supported modification is to update the `metadata` field.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._post(
+            f"/chat/completions/{completion_id}",
+            body=maybe_transform({"metadata": metadata}, completion_update_params.CompletionUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: str | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncCursorPage[ChatCompletion]:
+        """List stored Chat Completions.
+
+        Only Chat Completions that have been stored with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last chat completion from the previous pagination request.
+
+          limit: Number of Chat Completions to retrieve.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The model used to generate the Chat Completions.
+
+          order: Sort order for Chat Completions by timestamp. Use `asc` for ascending order or
+              `desc` for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/chat/completions",
+            page=SyncCursorPage[ChatCompletion],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "metadata": metadata,
+                        "model": model,
+                        "order": order,
+                    },
+                    completion_list_params.CompletionListParams,
+                ),
+            ),
+            model=ChatCompletion,
+        )
+
+    def delete(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatCompletionDeleted:
+        """Delete a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be deleted.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._delete(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletionDeleted,
+        )
+
+
+class AsyncCompletions(AsyncAPIResource):
+    @cached_property
+    def messages(self) -> AsyncMessages:
+        return AsyncMessages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncCompletionsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        function_call: completion_create_params.FunctionCall | Omit = omit,
+        functions: Iterable[completion_create_params.Function] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[bool] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: completion_create_params.ResponseFormat | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
+        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
+        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatCompletion:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
+              hit rates. Replaces the `user` field.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+
+          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
+              prompt caching, which keeps cached prefixes active for longer, up to a maximum
+              of 24 hours.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          safety_identifier: A stable identifier used to help detect users of your application that may be
+              violating OpenAI's usage policies. The IDs should be a string that uniquely
+              identifies each user. We recommend hashing their username or email address, in
+              order to avoid sending us any identifying information.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the request will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                '[priority](https://openai.com/api-priority-processing/)', then the request
+                will be processed with the corresponding service tier.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+              Supports text and image inputs. Note: image inputs over 8MB will be dropped.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. You can provide either
+              [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
+              or [function tools](https://platform.openai.com/docs/guides/function-calling).
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
+              `prompt_cache_key` instead to maintain caching optimizations. A stable
+              identifier for your end-users. Used to boost cache hit rates by better bucketing
+              similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          verbosity: Constrains the verbosity of the model's response. Lower values will result in
+              more concise responses, while higher values will result in more verbose
+              responses. Currently supported values are `low`, `medium`, and `high`.
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: Literal[True],
+        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        function_call: completion_create_params.FunctionCall | Omit = omit,
+        functions: Iterable[completion_create_params.Function] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[bool] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: completion_create_params.ResponseFormat | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
+        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
+        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncStream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
+              hit rates. Replaces the `user` field.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+
+          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
+              prompt caching, which keeps cached prefixes active for longer, up to a maximum
+              of 24 hours.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          safety_identifier: A stable identifier used to help detect users of your application that may be
+              violating OpenAI's usage policies. The IDs should be a string that uniquely
+              identifies each user. We recommend hashing their username or email address, in
+              order to avoid sending us any identifying information.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the request will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                '[priority](https://openai.com/api-priority-processing/)', then the request
+                will be processed with the corresponding service tier.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+              Supports text and image inputs. Note: image inputs over 8MB will be dropped.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. You can provide either
+              [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
+              or [function tools](https://platform.openai.com/docs/guides/function-calling).
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
+              `prompt_cache_key` instead to maintain caching optimizations. A stable
+              identifier for your end-users. Used to boost cache hit rates by better bucketing
+              similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          verbosity: Constrains the verbosity of the model's response. Lower values will result in
+              more concise responses, while higher values will result in more verbose
+              responses. Currently supported values are `low`, `medium`, and `high`.
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: bool,
+        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        function_call: completion_create_params.FunctionCall | Omit = omit,
+        functions: Iterable[completion_create_params.Function] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[bool] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: completion_create_params.ResponseFormat | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
+        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
+        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
+              hit rates. Replaces the `user` field.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+
+          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
+              prompt caching, which keeps cached prefixes active for longer, up to a maximum
+              of 24 hours.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+
+          reasoning_effort: Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
+
+              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+                calls are supported for all reasoning values in gpt-5.1.
+              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+                support `none`.
+              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          safety_identifier: A stable identifier used to help detect users of your application that may be
+              violating OpenAI's usage policies. The IDs should be a string that uniquely
+              identifies each user. We recommend hashing their username or email address, in
+              order to avoid sending us any identifying information.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the request will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                '[priority](https://openai.com/api-priority-processing/)', then the request
+                will be processed with the corresponding service tier.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+              Supports text and image inputs. Note: image inputs over 8MB will be dropped.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. You can provide either
+              [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
+              or [function tools](https://platform.openai.com/docs/guides/function-calling).
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
+              `prompt_cache_key` instead to maintain caching optimizations. A stable
+              identifier for your end-users. Used to boost cache hit rates by better bucketing
+              similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          verbosity: Constrains the verbosity of the model's response. Lower values will result in
+              more concise responses, while higher values will result in more verbose
+              responses. Currently supported values are `low`, `medium`, and `high`.
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["messages", "model"], ["messages", "model", "stream"])
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        function_call: completion_create_params.FunctionCall | Omit = omit,
+        functions: Iterable[completion_create_params.Function] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[bool] | Omit = omit,
+        max_completion_tokens: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        parallel_tool_calls: bool | Omit = omit,
+        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
+        response_format: completion_create_params.ResponseFormat | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
+        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
+        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
+        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
+        return await self._post(
+            "/chat/completions",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "audio": audio,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "prediction": prediction,
+                    "presence_penalty": presence_penalty,
+                    "prompt_cache_key": prompt_cache_key,
+                    "prompt_cache_retention": prompt_cache_retention,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "safety_identifier": safety_identifier,
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "stop": stop,
+                    "store": store,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                    "verbosity": verbosity,
+                    "web_search_options": web_search_options,
+                },
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=AsyncStream[ChatCompletionChunk],
+        )
+
+    async def retrieve(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatCompletion:
+        """Get a stored chat completion.
+
+        Only Chat Completions that have been created with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return await self._get(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    async def update(
+        self,
+        completion_id: str,
+        *,
+        metadata: Optional[Metadata],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatCompletion:
+        """Modify a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be modified. Currently, the only
+        supported modification is to update the `metadata` field.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return await self._post(
+            f"/chat/completions/{completion_id}",
+            body=await async_maybe_transform({"metadata": metadata}, completion_update_params.CompletionUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: str | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[ChatCompletion, AsyncCursorPage[ChatCompletion]]:
+        """List stored Chat Completions.
+
+        Only Chat Completions that have been stored with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last chat completion from the previous pagination request.
+
+          limit: Number of Chat Completions to retrieve.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The model used to generate the Chat Completions.
+
+          order: Sort order for Chat Completions by timestamp. Use `asc` for ascending order or
+              `desc` for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/chat/completions",
+            page=AsyncCursorPage[ChatCompletion],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "metadata": metadata,
+                        "model": model,
+                        "order": order,
+                    },
+                    completion_list_params.CompletionListParams,
+                ),
+            ),
+            model=ChatCompletion,
+        )
+
+    async def delete(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ChatCompletionDeleted:
+        """Delete a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be deleted.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return await self._delete(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletionDeleted,
+        )
+
+
+class CompletionsWithRawResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            completions.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            completions.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self._completions.messages)
+
+
+class AsyncCompletionsWithRawResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            completions.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            completions.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self._completions.messages)
+
+
+class CompletionsWithStreamingResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = to_streamed_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            completions.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            completions.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self._completions.messages)
+
+
+class AsyncCompletionsWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = async_to_streamed_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            completions.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            completions.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self._completions.messages)
diff --git a/src/openai/resources/chat/completions/messages.py b/src/openai/resources/chat/completions/messages.py
new file mode 100644
index 0000000000..3d6dc79cd6
--- /dev/null
+++ b/src/openai/resources/chat/completions/messages.py
@@ -0,0 +1,212 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.chat.completions import message_list_params
+from ....types.chat.chat_completion_store_message import ChatCompletionStoreMessage
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return MessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return MessagesWithStreamingResponse(self)
+
+    def list(
+        self,
+        completion_id: str,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncCursorPage[ChatCompletionStoreMessage]:
+        """Get the messages in a stored chat completion.
+
+        Only Chat Completions that have
+        been created with the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last message from the previous pagination request.
+
+          limit: Number of messages to retrieve.
+
+          order: Sort order for messages by timestamp. Use `asc` for ascending order or `desc`
+              for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._get_api_list(
+            f"/chat/completions/{completion_id}/messages",
+            page=SyncCursorPage[ChatCompletionStoreMessage],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=ChatCompletionStoreMessage,
+        )
+
+
+class AsyncMessages(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncMessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncMessagesWithStreamingResponse(self)
+
+    def list(
+        self,
+        completion_id: str,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[ChatCompletionStoreMessage, AsyncCursorPage[ChatCompletionStoreMessage]]:
+        """Get the messages in a stored chat completion.
+
+        Only Chat Completions that have
+        been created with the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last message from the previous pagination request.
+
+          limit: Number of messages to retrieve.
+
+          order: Sort order for messages by timestamp. Use `asc` for ascending order or `desc`
+              for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._get_api_list(
+            f"/chat/completions/{completion_id}/messages",
+            page=AsyncCursorPage[ChatCompletionStoreMessage],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=ChatCompletionStoreMessage,
+        )
+
+
+class MessagesWithRawResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.list = _legacy_response.to_raw_response_wrapper(
+            messages.list,
+        )
+
+
+class AsyncMessagesWithRawResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            messages.list,
+        )
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.list = to_streamed_response_wrapper(
+            messages.list,
+        )
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.list = async_to_streamed_response_wrapper(
+            messages.list,
+        )
diff --git a/src/openai/resources/completions.py b/src/openai/resources/completions.py
index 0812000f78..de98d3f156 100644
--- a/src/openai/resources/completions.py
+++ b/src/openai/resources/completions.py
@@ -2,26 +2,20 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Iterable, Optional, overload
-from typing_extensions import Literal
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, overload
 
 import httpx
 
 from .. import _legacy_response
 from ..types import completion_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
-)
+from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from .._utils import required_args, maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .._streaming import Stream, AsyncStream
-from .._base_client import (
-    make_request_options,
-)
+from .._base_client import make_request_options
 from ..types.completion import Completion
 from ..types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 
@@ -31,10 +25,21 @@
 class Completions(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return CompletionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return CompletionsWithStreamingResponse(self)
 
     @overload
@@ -42,29 +47,29 @@ def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | Omit = omit,
+        echo: Optional[bool] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        suffix: Optional[str] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Completion:
         """
         Creates a completion for the provided prompt and parameters.
@@ -73,8 +78,8 @@ def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -99,7 +104,7 @@ def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -139,7 +144,7 @@ def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -148,7 +153,9 @@ def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream: Whether to stream back partial progress. If set, tokens will be sent as
@@ -178,7 +185,7 @@ def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -195,29 +202,29 @@ def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
         stream: Literal[True],
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        best_of: Optional[int] | Omit = omit,
+        echo: Optional[bool] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        suffix: Optional[str] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Stream[Completion]:
         """
         Creates a completion for the provided prompt and parameters.
@@ -226,8 +233,8 @@ def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -259,7 +266,7 @@ def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -299,7 +306,7 @@ def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -308,7 +315,9 @@ def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -331,7 +340,7 @@ def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -348,29 +357,29 @@ def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
         stream: bool,
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        best_of: Optional[int] | Omit = omit,
+        echo: Optional[bool] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        suffix: Optional[str] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Completion | Stream[Completion]:
         """
         Creates a completion for the provided prompt and parameters.
@@ -379,8 +388,8 @@ def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -412,7 +421,7 @@ def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -452,7 +461,7 @@ def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -461,7 +470,9 @@ def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -484,7 +495,7 @@ def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -501,29 +512,29 @@ def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | Omit = omit,
+        echo: Optional[bool] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        suffix: Optional[str] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Completion | Stream[Completion]:
         return self._post(
             "/completions",
@@ -548,7 +559,9 @@ def create(
                     "top_p": top_p,
                     "user": user,
                 },
-                completion_create_params.CompletionCreateParams,
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -562,10 +575,21 @@ def create(
 class AsyncCompletions(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncCompletionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncCompletionsWithStreamingResponse(self)
 
     @overload
@@ -573,29 +597,29 @@ async def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | Omit = omit,
+        echo: Optional[bool] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        suffix: Optional[str] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Completion:
         """
         Creates a completion for the provided prompt and parameters.
@@ -604,8 +628,8 @@ async def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -630,7 +654,7 @@ async def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -670,7 +694,7 @@ async def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -679,7 +703,9 @@ async def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream: Whether to stream back partial progress. If set, tokens will be sent as
@@ -709,7 +735,7 @@ async def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -726,29 +752,29 @@ async def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
         stream: Literal[True],
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        best_of: Optional[int] | Omit = omit,
+        echo: Optional[bool] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        suffix: Optional[str] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncStream[Completion]:
         """
         Creates a completion for the provided prompt and parameters.
@@ -757,8 +783,8 @@ async def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -790,7 +816,7 @@ async def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -830,7 +856,7 @@ async def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -839,7 +865,9 @@ async def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -862,7 +890,7 @@ async def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -879,29 +907,29 @@ async def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
+        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
         stream: bool,
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        best_of: Optional[int] | Omit = omit,
+        echo: Optional[bool] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        suffix: Optional[str] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Completion | AsyncStream[Completion]:
         """
         Creates a completion for the provided prompt and parameters.
@@ -910,8 +938,8 @@ async def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -943,7 +971,7 @@ async def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -983,7 +1011,7 @@ async def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -992,7 +1020,9 @@ async def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -1015,7 +1045,7 @@ async def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -1032,29 +1062,29 @@ async def create(
         self,
         *,
         model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
+        best_of: Optional[int] | Omit = omit,
+        echo: Optional[bool] | Omit = omit,
+        frequency_penalty: Optional[float] | Omit = omit,
+        logit_bias: Optional[Dict[str, int]] | Omit = omit,
+        logprobs: Optional[int] | Omit = omit,
+        max_tokens: Optional[int] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        presence_penalty: Optional[float] | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
+        suffix: Optional[str] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Completion | AsyncStream[Completion]:
         return await self._post(
             "/completions",
@@ -1079,7 +1109,9 @@ async def create(
                     "top_p": top_p,
                     "user": user,
                 },
-                completion_create_params.CompletionCreateParams,
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/openai/resources/containers/__init__.py b/src/openai/resources/containers/__init__.py
new file mode 100644
index 0000000000..dc1936780b
--- /dev/null
+++ b/src/openai/resources/containers/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .containers import (
+    Containers,
+    AsyncContainers,
+    ContainersWithRawResponse,
+    AsyncContainersWithRawResponse,
+    ContainersWithStreamingResponse,
+    AsyncContainersWithStreamingResponse,
+)
+
+__all__ = [
+    "Files",
+    "AsyncFiles",
+    "FilesWithRawResponse",
+    "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
+    "Containers",
+    "AsyncContainers",
+    "ContainersWithRawResponse",
+    "AsyncContainersWithRawResponse",
+    "ContainersWithStreamingResponse",
+    "AsyncContainersWithStreamingResponse",
+]
diff --git a/src/openai/resources/containers/containers.py b/src/openai/resources/containers/containers.py
new file mode 100644
index 0000000000..0cbb400d4a
--- /dev/null
+++ b/src/openai/resources/containers/containers.py
@@ -0,0 +1,518 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ...types import container_list_params, container_create_params
+from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, SequenceNotStr, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .files.files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.container_list_response import ContainerListResponse
+from ...types.container_create_response import ContainerCreateResponse
+from ...types.container_retrieve_response import ContainerRetrieveResponse
+
+__all__ = ["Containers", "AsyncContainers"]
+
+
+class Containers(SyncAPIResource):
+    @cached_property
+    def files(self) -> Files:
+        return Files(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ContainersWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ContainersWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ContainersWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ContainersWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        name: str,
+        expires_after: container_create_params.ExpiresAfter | Omit = omit,
+        file_ids: SequenceNotStr[str] | Omit = omit,
+        memory_limit: Literal["1g", "4g", "16g", "64g"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ContainerCreateResponse:
+        """
+        Create Container
+
+        Args:
+          name: Name of the container to create.
+
+          expires_after: Container expiration time in seconds relative to the 'anchor' time.
+
+          file_ids: IDs of files to copy to the container.
+
+          memory_limit: Optional memory limit for the container. Defaults to "1g".
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/containers",
+            body=maybe_transform(
+                {
+                    "name": name,
+                    "expires_after": expires_after,
+                    "file_ids": file_ids,
+                    "memory_limit": memory_limit,
+                },
+                container_create_params.ContainerCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ContainerCreateResponse,
+        )
+
+    def retrieve(
+        self,
+        container_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ContainerRetrieveResponse:
+        """
+        Retrieve Container
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        return self._get(
+            f"/containers/{container_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ContainerRetrieveResponse,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncCursorPage[ContainerListResponse]:
+        """List Containers
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/containers",
+            page=SyncCursorPage[ContainerListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    container_list_params.ContainerListParams,
+                ),
+            ),
+            model=ContainerListResponse,
+        )
+
+    def delete(
+        self,
+        container_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Delete Container
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/containers/{container_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AsyncContainers(AsyncAPIResource):
+    @cached_property
+    def files(self) -> AsyncFiles:
+        return AsyncFiles(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncContainersWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncContainersWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncContainersWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncContainersWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        name: str,
+        expires_after: container_create_params.ExpiresAfter | Omit = omit,
+        file_ids: SequenceNotStr[str] | Omit = omit,
+        memory_limit: Literal["1g", "4g", "16g", "64g"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ContainerCreateResponse:
+        """
+        Create Container
+
+        Args:
+          name: Name of the container to create.
+
+          expires_after: Container expiration time in seconds relative to the 'anchor' time.
+
+          file_ids: IDs of files to copy to the container.
+
+          memory_limit: Optional memory limit for the container. Defaults to "1g".
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/containers",
+            body=await async_maybe_transform(
+                {
+                    "name": name,
+                    "expires_after": expires_after,
+                    "file_ids": file_ids,
+                    "memory_limit": memory_limit,
+                },
+                container_create_params.ContainerCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ContainerCreateResponse,
+        )
+
+    async def retrieve(
+        self,
+        container_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ContainerRetrieveResponse:
+        """
+        Retrieve Container
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        return await self._get(
+            f"/containers/{container_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ContainerRetrieveResponse,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[ContainerListResponse, AsyncCursorPage[ContainerListResponse]]:
+        """List Containers
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/containers",
+            page=AsyncCursorPage[ContainerListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    container_list_params.ContainerListParams,
+                ),
+            ),
+            model=ContainerListResponse,
+        )
+
+    async def delete(
+        self,
+        container_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Delete Container
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/containers/{container_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class ContainersWithRawResponse:
+    def __init__(self, containers: Containers) -> None:
+        self._containers = containers
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            containers.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            containers.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            containers.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            containers.delete,
+        )
+
+    @cached_property
+    def files(self) -> FilesWithRawResponse:
+        return FilesWithRawResponse(self._containers.files)
+
+
+class AsyncContainersWithRawResponse:
+    def __init__(self, containers: AsyncContainers) -> None:
+        self._containers = containers
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            containers.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            containers.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            containers.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            containers.delete,
+        )
+
+    @cached_property
+    def files(self) -> AsyncFilesWithRawResponse:
+        return AsyncFilesWithRawResponse(self._containers.files)
+
+
+class ContainersWithStreamingResponse:
+    def __init__(self, containers: Containers) -> None:
+        self._containers = containers
+
+        self.create = to_streamed_response_wrapper(
+            containers.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            containers.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            containers.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            containers.delete,
+        )
+
+    @cached_property
+    def files(self) -> FilesWithStreamingResponse:
+        return FilesWithStreamingResponse(self._containers.files)
+
+
+class AsyncContainersWithStreamingResponse:
+    def __init__(self, containers: AsyncContainers) -> None:
+        self._containers = containers
+
+        self.create = async_to_streamed_response_wrapper(
+            containers.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            containers.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            containers.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            containers.delete,
+        )
+
+    @cached_property
+    def files(self) -> AsyncFilesWithStreamingResponse:
+        return AsyncFilesWithStreamingResponse(self._containers.files)
diff --git a/src/openai/resources/containers/files/__init__.py b/src/openai/resources/containers/files/__init__.py
new file mode 100644
index 0000000000..f71f7dbf55
--- /dev/null
+++ b/src/openai/resources/containers/files/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .content import (
+    Content,
+    AsyncContent,
+    ContentWithRawResponse,
+    AsyncContentWithRawResponse,
+    ContentWithStreamingResponse,
+    AsyncContentWithStreamingResponse,
+)
+
+__all__ = [
+    "Content",
+    "AsyncContent",
+    "ContentWithRawResponse",
+    "AsyncContentWithRawResponse",
+    "ContentWithStreamingResponse",
+    "AsyncContentWithStreamingResponse",
+    "Files",
+    "AsyncFiles",
+    "FilesWithRawResponse",
+    "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
+]
diff --git a/src/openai/resources/containers/files/content.py b/src/openai/resources/containers/files/content.py
new file mode 100644
index 0000000000..a3dbd0e8c7
--- /dev/null
+++ b/src/openai/resources/containers/files/content.py
@@ -0,0 +1,173 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import Body, Query, Headers, NotGiven, not_given
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from ...._base_client import make_request_options
+
+__all__ = ["Content", "AsyncContent"]
+
+
+class Content(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ContentWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ContentWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ContentWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ContentWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        file_id: str,
+        *,
+        container_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Retrieve Container File Content
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return self._get(
+            f"/containers/{container_id}/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+
+class AsyncContent(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncContentWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncContentWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncContentWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncContentWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        file_id: str,
+        *,
+        container_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Retrieve Container File Content
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return await self._get(
+            f"/containers/{container_id}/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+
+class ContentWithRawResponse:
+    def __init__(self, content: Content) -> None:
+        self._content = content
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            content.retrieve,
+        )
+
+
+class AsyncContentWithRawResponse:
+    def __init__(self, content: AsyncContent) -> None:
+        self._content = content
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            content.retrieve,
+        )
+
+
+class ContentWithStreamingResponse:
+    def __init__(self, content: Content) -> None:
+        self._content = content
+
+        self.retrieve = to_custom_streamed_response_wrapper(
+            content.retrieve,
+            StreamedBinaryAPIResponse,
+        )
+
+
+class AsyncContentWithStreamingResponse:
+    def __init__(self, content: AsyncContent) -> None:
+        self._content = content
+
+        self.retrieve = async_to_custom_streamed_response_wrapper(
+            content.retrieve,
+            AsyncStreamedBinaryAPIResponse,
+        )
diff --git a/src/openai/resources/containers/files/files.py b/src/openai/resources/containers/files/files.py
new file mode 100644
index 0000000000..a472cfc9f3
--- /dev/null
+++ b/src/openai/resources/containers/files/files.py
@@ -0,0 +1,545 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Mapping, cast
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from .content import (
+    Content,
+    AsyncContent,
+    ContentWithRawResponse,
+    AsyncContentWithRawResponse,
+    ContentWithStreamingResponse,
+    AsyncContentWithStreamingResponse,
+)
+from ...._types import Body, Omit, Query, Headers, NoneType, NotGiven, FileTypes, omit, not_given
+from ...._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.containers import file_list_params, file_create_params
+from ....types.containers.file_list_response import FileListResponse
+from ....types.containers.file_create_response import FileCreateResponse
+from ....types.containers.file_retrieve_response import FileRetrieveResponse
+
+__all__ = ["Files", "AsyncFiles"]
+
+
+class Files(SyncAPIResource):
+    @cached_property
+    def content(self) -> Content:
+        return Content(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return FilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return FilesWithStreamingResponse(self)
+
+    def create(
+        self,
+        container_id: str,
+        *,
+        file: FileTypes | Omit = omit,
+        file_id: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> FileCreateResponse:
+        """
+        Create a Container File
+
+        You can send either a multipart/form-data request with the raw file content, or
+        a JSON request with a file ID.
+
+        Args:
+          file: The File object (not file name) to be uploaded.
+
+          file_id: Name of the file to create.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "file_id": file_id,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            f"/containers/{container_id}/files",
+            body=maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileCreateResponse,
+        )
+
+    def retrieve(
+        self,
+        file_id: str,
+        *,
+        container_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> FileRetrieveResponse:
+        """
+        Retrieve Container File
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return self._get(
+            f"/containers/{container_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileRetrieveResponse,
+        )
+
+    def list(
+        self,
+        container_id: str,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncCursorPage[FileListResponse]:
+        """List Container files
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        return self._get_api_list(
+            f"/containers/{container_id}/files",
+            page=SyncCursorPage[FileListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_list_params.FileListParams,
+                ),
+            ),
+            model=FileListResponse,
+        )
+
+    def delete(
+        self,
+        file_id: str,
+        *,
+        container_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Delete Container File
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/containers/{container_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AsyncFiles(AsyncAPIResource):
+    @cached_property
+    def content(self) -> AsyncContent:
+        return AsyncContent(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncFilesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        container_id: str,
+        *,
+        file: FileTypes | Omit = omit,
+        file_id: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> FileCreateResponse:
+        """
+        Create a Container File
+
+        You can send either a multipart/form-data request with the raw file content, or
+        a JSON request with a file ID.
+
+        Args:
+          file: The File object (not file name) to be uploaded.
+
+          file_id: Name of the file to create.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "file_id": file_id,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            f"/containers/{container_id}/files",
+            body=await async_maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileCreateResponse,
+        )
+
+    async def retrieve(
+        self,
+        file_id: str,
+        *,
+        container_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> FileRetrieveResponse:
+        """
+        Retrieve Container File
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return await self._get(
+            f"/containers/{container_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileRetrieveResponse,
+        )
+
+    def list(
+        self,
+        container_id: str,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[FileListResponse, AsyncCursorPage[FileListResponse]]:
+        """List Container files
+
+        Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        return self._get_api_list(
+            f"/containers/{container_id}/files",
+            page=AsyncCursorPage[FileListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_list_params.FileListParams,
+                ),
+            ),
+            model=FileListResponse,
+        )
+
+    async def delete(
+        self,
+        file_id: str,
+        *,
+        container_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Delete Container File
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not container_id:
+            raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/containers/{container_id}/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class FilesWithRawResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            files.delete,
+        )
+
+    @cached_property
+    def content(self) -> ContentWithRawResponse:
+        return ContentWithRawResponse(self._files.content)
+
+
+class AsyncFilesWithRawResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            files.delete,
+        )
+
+    @cached_property
+    def content(self) -> AsyncContentWithRawResponse:
+        return AsyncContentWithRawResponse(self._files.content)
+
+
+class FilesWithStreamingResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            files.delete,
+        )
+
+    @cached_property
+    def content(self) -> ContentWithStreamingResponse:
+        return ContentWithStreamingResponse(self._files.content)
+
+
+class AsyncFilesWithStreamingResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = async_to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            files.delete,
+        )
+
+    @cached_property
+    def content(self) -> AsyncContentWithStreamingResponse:
+        return AsyncContentWithStreamingResponse(self._files.content)
diff --git a/src/openai/resources/conversations/__init__.py b/src/openai/resources/conversations/__init__.py
new file mode 100644
index 0000000000..c6c4fd6ee4
--- /dev/null
+++ b/src/openai/resources/conversations/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .items import (
+    Items,
+    AsyncItems,
+    ItemsWithRawResponse,
+    AsyncItemsWithRawResponse,
+    ItemsWithStreamingResponse,
+    AsyncItemsWithStreamingResponse,
+)
+from .conversations import (
+    Conversations,
+    AsyncConversations,
+    ConversationsWithRawResponse,
+    AsyncConversationsWithRawResponse,
+    ConversationsWithStreamingResponse,
+    AsyncConversationsWithStreamingResponse,
+)
+
+__all__ = [
+    "Items",
+    "AsyncItems",
+    "ItemsWithRawResponse",
+    "AsyncItemsWithRawResponse",
+    "ItemsWithStreamingResponse",
+    "AsyncItemsWithStreamingResponse",
+    "Conversations",
+    "AsyncConversations",
+    "ConversationsWithRawResponse",
+    "AsyncConversationsWithRawResponse",
+    "ConversationsWithStreamingResponse",
+    "AsyncConversationsWithStreamingResponse",
+]
diff --git a/src/openai/resources/conversations/conversations.py b/src/openai/resources/conversations/conversations.py
new file mode 100644
index 0000000000..da037a4e22
--- /dev/null
+++ b/src/openai/resources/conversations/conversations.py
@@ -0,0 +1,486 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+
+import httpx
+
+from ... import _legacy_response
+from .items import (
+    Items,
+    AsyncItems,
+    ItemsWithRawResponse,
+    AsyncItemsWithRawResponse,
+    ItemsWithStreamingResponse,
+    AsyncItemsWithStreamingResponse,
+)
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._base_client import make_request_options
+from ...types.conversations import conversation_create_params, conversation_update_params
+from ...types.shared_params.metadata import Metadata
+from ...types.conversations.conversation import Conversation
+from ...types.responses.response_input_item_param import ResponseInputItemParam
+from ...types.conversations.conversation_deleted_resource import ConversationDeletedResource
+
+__all__ = ["Conversations", "AsyncConversations"]
+
+
+class Conversations(SyncAPIResource):
+    @cached_property
+    def items(self) -> Items:
+        return Items(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ConversationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ConversationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ConversationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ConversationsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        items: Optional[Iterable[ResponseInputItemParam]] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Conversation:
+        """
+        Create a conversation.
+
+        Args:
+          items: Initial items to include in the conversation context. You may add up to 20 items
+              at a time.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/conversations",
+            body=maybe_transform(
+                {
+                    "items": items,
+                    "metadata": metadata,
+                },
+                conversation_create_params.ConversationCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Conversation,
+        )
+
+    def retrieve(
+        self,
+        conversation_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Conversation:
+        """
+        Get a conversation
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        return self._get(
+            f"/conversations/{conversation_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Conversation,
+        )
+
+    def update(
+        self,
+        conversation_id: str,
+        *,
+        metadata: Optional[Metadata],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Conversation:
+        """
+        Update a conversation
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        return self._post(
+            f"/conversations/{conversation_id}",
+            body=maybe_transform({"metadata": metadata}, conversation_update_params.ConversationUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Conversation,
+        )
+
+    def delete(
+        self,
+        conversation_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ConversationDeletedResource:
+        """Delete a conversation.
+
+        Items in the conversation will not be deleted.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        return self._delete(
+            f"/conversations/{conversation_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ConversationDeletedResource,
+        )
+
+
+class AsyncConversations(AsyncAPIResource):
+    @cached_property
+    def items(self) -> AsyncItems:
+        return AsyncItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncConversationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncConversationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncConversationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncConversationsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        items: Optional[Iterable[ResponseInputItemParam]] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Conversation:
+        """
+        Create a conversation.
+
+        Args:
+          items: Initial items to include in the conversation context. You may add up to 20 items
+              at a time.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/conversations",
+            body=await async_maybe_transform(
+                {
+                    "items": items,
+                    "metadata": metadata,
+                },
+                conversation_create_params.ConversationCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Conversation,
+        )
+
+    async def retrieve(
+        self,
+        conversation_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Conversation:
+        """
+        Get a conversation
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        return await self._get(
+            f"/conversations/{conversation_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Conversation,
+        )
+
+    async def update(
+        self,
+        conversation_id: str,
+        *,
+        metadata: Optional[Metadata],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Conversation:
+        """
+        Update a conversation
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        return await self._post(
+            f"/conversations/{conversation_id}",
+            body=await async_maybe_transform(
+                {"metadata": metadata}, conversation_update_params.ConversationUpdateParams
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Conversation,
+        )
+
+    async def delete(
+        self,
+        conversation_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ConversationDeletedResource:
+        """Delete a conversation.
+
+        Items in the conversation will not be deleted.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        return await self._delete(
+            f"/conversations/{conversation_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ConversationDeletedResource,
+        )
+
+
+class ConversationsWithRawResponse:
+    def __init__(self, conversations: Conversations) -> None:
+        self._conversations = conversations
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            conversations.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            conversations.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            conversations.update,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            conversations.delete,
+        )
+
+    @cached_property
+    def items(self) -> ItemsWithRawResponse:
+        return ItemsWithRawResponse(self._conversations.items)
+
+
+class AsyncConversationsWithRawResponse:
+    def __init__(self, conversations: AsyncConversations) -> None:
+        self._conversations = conversations
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            conversations.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            conversations.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            conversations.update,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            conversations.delete,
+        )
+
+    @cached_property
+    def items(self) -> AsyncItemsWithRawResponse:
+        return AsyncItemsWithRawResponse(self._conversations.items)
+
+
+class ConversationsWithStreamingResponse:
+    def __init__(self, conversations: Conversations) -> None:
+        self._conversations = conversations
+
+        self.create = to_streamed_response_wrapper(
+            conversations.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            conversations.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            conversations.update,
+        )
+        self.delete = to_streamed_response_wrapper(
+            conversations.delete,
+        )
+
+    @cached_property
+    def items(self) -> ItemsWithStreamingResponse:
+        return ItemsWithStreamingResponse(self._conversations.items)
+
+
+class AsyncConversationsWithStreamingResponse:
+    def __init__(self, conversations: AsyncConversations) -> None:
+        self._conversations = conversations
+
+        self.create = async_to_streamed_response_wrapper(
+            conversations.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            conversations.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            conversations.update,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            conversations.delete,
+        )
+
+    @cached_property
+    def items(self) -> AsyncItemsWithStreamingResponse:
+        return AsyncItemsWithStreamingResponse(self._conversations.items)
diff --git a/src/openai/resources/conversations/items.py b/src/openai/resources/conversations/items.py
new file mode 100644
index 0000000000..3dba144849
--- /dev/null
+++ b/src/openai/resources/conversations/items.py
@@ -0,0 +1,557 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, List, Iterable, cast
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncConversationCursorPage, AsyncConversationCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.conversations import item_list_params, item_create_params, item_retrieve_params
+from ...types.conversations.conversation import Conversation
+from ...types.responses.response_includable import ResponseIncludable
+from ...types.conversations.conversation_item import ConversationItem
+from ...types.responses.response_input_item_param import ResponseInputItemParam
+from ...types.conversations.conversation_item_list import ConversationItemList
+
+__all__ = ["Items", "AsyncItems"]
+
+
+class Items(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ItemsWithStreamingResponse(self)
+
+    def create(
+        self,
+        conversation_id: str,
+        *,
+        items: Iterable[ResponseInputItemParam],
+        include: List[ResponseIncludable] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ConversationItemList:
+        """
+        Create items in a conversation with the given ID.
+
+        Args:
+          items: The items to add to the conversation. You may add up to 20 items at a time.
+
+          include: Additional fields to include in the response. See the `include` parameter for
+              [listing Conversation items above](https://platform.openai.com/docs/api-reference/conversations/list-items#conversations_list_items-include)
+              for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        return self._post(
+            f"/conversations/{conversation_id}/items",
+            body=maybe_transform({"items": items}, item_create_params.ItemCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, item_create_params.ItemCreateParams),
+            ),
+            cast_to=ConversationItemList,
+        )
+
+    def retrieve(
+        self,
+        item_id: str,
+        *,
+        conversation_id: str,
+        include: List[ResponseIncludable] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ConversationItem:
+        """
+        Get a single item from a conversation with the given IDs.
+
+        Args:
+          include: Additional fields to include in the response. See the `include` parameter for
+              [listing Conversation items above](https://platform.openai.com/docs/api-reference/conversations/list-items#conversations_list_items-include)
+              for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        if not item_id:
+            raise ValueError(f"Expected a non-empty value for `item_id` but received {item_id!r}")
+        return cast(
+            ConversationItem,
+            self._get(
+                f"/conversations/{conversation_id}/items/{item_id}",
+                options=make_request_options(
+                    extra_headers=extra_headers,
+                    extra_query=extra_query,
+                    extra_body=extra_body,
+                    timeout=timeout,
+                    query=maybe_transform({"include": include}, item_retrieve_params.ItemRetrieveParams),
+                ),
+                cast_to=cast(Any, ConversationItem),  # Union types cannot be passed in as arguments in the type system
+            ),
+        )
+
+    def list(
+        self,
+        conversation_id: str,
+        *,
+        after: str | Omit = omit,
+        include: List[ResponseIncludable] | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncConversationCursorPage[ConversationItem]:
+        """
+        List all items for a conversation with the given ID.
+
+        Args:
+          after: An item ID to list items after, used in pagination.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `web_search_call.action.sources`: Include the sources of the web search tool
+                call.
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
+              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+                tokens in reasoning item outputs. This enables reasoning items to be used in
+                multi-turn conversations when using the Responses API statelessly (like when
+                the `store` parameter is set to `false`, or when an organization is enrolled
+                in the zero data retention program).
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: The order to return the input items in. Default is `desc`.
+
+              - `asc`: Return the input items in ascending order.
+              - `desc`: Return the input items in descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        return self._get_api_list(
+            f"/conversations/{conversation_id}/items",
+            page=SyncConversationCursorPage[ConversationItem],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "include": include,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    item_list_params.ItemListParams,
+                ),
+            ),
+            model=cast(Any, ConversationItem),  # Union types cannot be passed in as arguments in the type system
+        )
+
+    def delete(
+        self,
+        item_id: str,
+        *,
+        conversation_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Conversation:
+        """
+        Delete an item from a conversation with the given IDs.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        if not item_id:
+            raise ValueError(f"Expected a non-empty value for `item_id` but received {item_id!r}")
+        return self._delete(
+            f"/conversations/{conversation_id}/items/{item_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Conversation,
+        )
+
+
+class AsyncItems(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncItemsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        conversation_id: str,
+        *,
+        items: Iterable[ResponseInputItemParam],
+        include: List[ResponseIncludable] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ConversationItemList:
+        """
+        Create items in a conversation with the given ID.
+
+        Args:
+          items: The items to add to the conversation. You may add up to 20 items at a time.
+
+          include: Additional fields to include in the response. See the `include` parameter for
+              [listing Conversation items above](https://platform.openai.com/docs/api-reference/conversations/list-items#conversations_list_items-include)
+              for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        return await self._post(
+            f"/conversations/{conversation_id}/items",
+            body=await async_maybe_transform({"items": items}, item_create_params.ItemCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"include": include}, item_create_params.ItemCreateParams),
+            ),
+            cast_to=ConversationItemList,
+        )
+
+    async def retrieve(
+        self,
+        item_id: str,
+        *,
+        conversation_id: str,
+        include: List[ResponseIncludable] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ConversationItem:
+        """
+        Get a single item from a conversation with the given IDs.
+
+        Args:
+          include: Additional fields to include in the response. See the `include` parameter for
+              [listing Conversation items above](https://platform.openai.com/docs/api-reference/conversations/list-items#conversations_list_items-include)
+              for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        if not item_id:
+            raise ValueError(f"Expected a non-empty value for `item_id` but received {item_id!r}")
+        return cast(
+            ConversationItem,
+            await self._get(
+                f"/conversations/{conversation_id}/items/{item_id}",
+                options=make_request_options(
+                    extra_headers=extra_headers,
+                    extra_query=extra_query,
+                    extra_body=extra_body,
+                    timeout=timeout,
+                    query=await async_maybe_transform({"include": include}, item_retrieve_params.ItemRetrieveParams),
+                ),
+                cast_to=cast(Any, ConversationItem),  # Union types cannot be passed in as arguments in the type system
+            ),
+        )
+
+    def list(
+        self,
+        conversation_id: str,
+        *,
+        after: str | Omit = omit,
+        include: List[ResponseIncludable] | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[ConversationItem, AsyncConversationCursorPage[ConversationItem]]:
+        """
+        List all items for a conversation with the given ID.
+
+        Args:
+          after: An item ID to list items after, used in pagination.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `web_search_call.action.sources`: Include the sources of the web search tool
+                call.
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
+              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+                tokens in reasoning item outputs. This enables reasoning items to be used in
+                multi-turn conversations when using the Responses API statelessly (like when
+                the `store` parameter is set to `false`, or when an organization is enrolled
+                in the zero data retention program).
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: The order to return the input items in. Default is `desc`.
+
+              - `asc`: Return the input items in ascending order.
+              - `desc`: Return the input items in descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        return self._get_api_list(
+            f"/conversations/{conversation_id}/items",
+            page=AsyncConversationCursorPage[ConversationItem],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "include": include,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    item_list_params.ItemListParams,
+                ),
+            ),
+            model=cast(Any, ConversationItem),  # Union types cannot be passed in as arguments in the type system
+        )
+
+    async def delete(
+        self,
+        item_id: str,
+        *,
+        conversation_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Conversation:
+        """
+        Delete an item from a conversation with the given IDs.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not conversation_id:
+            raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
+        if not item_id:
+            raise ValueError(f"Expected a non-empty value for `item_id` but received {item_id!r}")
+        return await self._delete(
+            f"/conversations/{conversation_id}/items/{item_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Conversation,
+        )
+
+
+class ItemsWithRawResponse:
+    def __init__(self, items: Items) -> None:
+        self._items = items
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            items.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            items.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            items.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            items.delete,
+        )
+
+
+class AsyncItemsWithRawResponse:
+    def __init__(self, items: AsyncItems) -> None:
+        self._items = items
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            items.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            items.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            items.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            items.delete,
+        )
+
+
+class ItemsWithStreamingResponse:
+    def __init__(self, items: Items) -> None:
+        self._items = items
+
+        self.create = to_streamed_response_wrapper(
+            items.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            items.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            items.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            items.delete,
+        )
+
+
+class AsyncItemsWithStreamingResponse:
+    def __init__(self, items: AsyncItems) -> None:
+        self._items = items
+
+        self.create = async_to_streamed_response_wrapper(
+            items.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            items.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            items.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            items.delete,
+        )
diff --git a/src/openai/resources/embeddings.py b/src/openai/resources/embeddings.py
index 773b6f0968..874da7d8f9 100644
--- a/src/openai/resources/embeddings.py
+++ b/src/openai/resources/embeddings.py
@@ -2,23 +2,20 @@
 
 from __future__ import annotations
 
-import base64
-from typing import List, Union, Iterable, cast
+from typing import Union, Iterable
 from typing_extensions import Literal
 
 import httpx
 
 from .. import _legacy_response
 from ..types import embedding_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import is_given, maybe_transform
+from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
-from .._extras import numpy as np, has_numpy
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .._base_client import (
-    make_request_options,
-)
+from .._base_client import make_request_options
+from ..types.embedding_model import EmbeddingModel
 from ..types.create_embedding_response import CreateEmbeddingResponse
 
 __all__ = ["Embeddings", "AsyncEmbeddings"]
@@ -27,26 +24,37 @@
 class Embeddings(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> EmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return EmbeddingsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> EmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return EmbeddingsWithStreamingResponse(self)
 
     def create(
         self,
         *,
-        input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
-        model: Union[str, Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]],
-        dimensions: int | NotGiven = NOT_GIVEN,
-        encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        input: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]]],
+        model: Union[str, EmbeddingModel],
+        dimensions: int | Omit = omit,
+        encoding_format: Literal["float", "base64"] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> CreateEmbeddingResponse:
         """
         Creates an embedding vector representing the input text.
@@ -55,16 +63,18 @@ def create(
           input: Input text to embed, encoded as a string or array of tokens. To embed multiple
               inputs in a single request, pass an array of strings or array of token arrays.
               The input must not exceed the max input tokens for the model (8192 tokens for
-              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
+              all embedding models), cannot be an empty string, and any array must be 2048
               dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              for counting tokens. In addition to the per-input token limit, all embedding
+              models enforce a maximum of 300,000 tokens summed across all inputs in a single
+              request.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           dimensions: The number of dimensions the resulting output embeddings should have. Only
               supported in `text-embedding-3` and later models.
@@ -74,7 +84,7 @@ def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -84,42 +94,20 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        params = {
-            "input": input,
-            "model": model,
-            "user": user,
-            "dimensions": dimensions,
-            "encoding_format": encoding_format,
-        }
-        if not is_given(encoding_format) and has_numpy():
-            params["encoding_format"] = "base64"
-
-        def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
-            if is_given(encoding_format):
-                # don't modify the response object if a user explicitly asked for a format
-                return obj
-
-            for embedding in obj.data:
-                data = cast(object, embedding.embedding)
-                if not isinstance(data, str):
-                    # numpy is not installed / base64 optimisation isn't enabled for this model yet
-                    continue
-
-                embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
-                    base64.b64decode(data), dtype="float32"
-                ).tolist()
-
-            return obj
-
         return self._post(
             "/embeddings",
-            body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams),
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "dimensions": dimensions,
+                    "encoding_format": encoding_format,
+                    "user": user,
+                },
+                embedding_create_params.EmbeddingCreateParams,
+            ),
             options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=parser,
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=CreateEmbeddingResponse,
         )
@@ -128,26 +116,37 @@ def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
 class AsyncEmbeddings(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncEmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncEmbeddingsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncEmbeddingsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
-        input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
-        model: Union[str, Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]],
-        dimensions: int | NotGiven = NOT_GIVEN,
-        encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        input: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]]],
+        model: Union[str, EmbeddingModel],
+        dimensions: int | Omit = omit,
+        encoding_format: Literal["float", "base64"] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> CreateEmbeddingResponse:
         """
         Creates an embedding vector representing the input text.
@@ -156,16 +155,18 @@ async def create(
           input: Input text to embed, encoded as a string or array of tokens. To embed multiple
               inputs in a single request, pass an array of strings or array of token arrays.
               The input must not exceed the max input tokens for the model (8192 tokens for
-              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
+              all embedding models), cannot be an empty string, and any array must be 2048
               dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              for counting tokens. In addition to the per-input token limit, all embedding
+              models enforce a maximum of 300,000 tokens summed across all inputs in a single
+              request.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           dimensions: The number of dimensions the resulting output embeddings should have. Only
               supported in `text-embedding-3` and later models.
@@ -175,7 +176,7 @@ async def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -185,42 +186,20 @@ async def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        params = {
-            "input": input,
-            "model": model,
-            "user": user,
-            "dimensions": dimensions,
-            "encoding_format": encoding_format,
-        }
-        if not is_given(encoding_format) and has_numpy():
-            params["encoding_format"] = "base64"
-
-        def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
-            if is_given(encoding_format):
-                # don't modify the response object if a user explicitly asked for a format
-                return obj
-
-            for embedding in obj.data:
-                data = cast(object, embedding.embedding)
-                if not isinstance(data, str):
-                    # numpy is not installed / base64 optimisation isn't enabled for this model yet
-                    continue
-
-                embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
-                    base64.b64decode(data), dtype="float32"
-                ).tolist()
-
-            return obj
-
         return await self._post(
             "/embeddings",
-            body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams),
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "dimensions": dimensions,
+                    "encoding_format": encoding_format,
+                    "user": user,
+                },
+                embedding_create_params.EmbeddingCreateParams,
+            ),
             options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=parser,
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=CreateEmbeddingResponse,
         )
diff --git a/src/openai/resources/evals/__init__.py b/src/openai/resources/evals/__init__.py
new file mode 100644
index 0000000000..84f707511d
--- /dev/null
+++ b/src/openai/resources/evals/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .evals import (
+    Evals,
+    AsyncEvals,
+    EvalsWithRawResponse,
+    AsyncEvalsWithRawResponse,
+    EvalsWithStreamingResponse,
+    AsyncEvalsWithStreamingResponse,
+)
+
+__all__ = [
+    "Runs",
+    "AsyncRuns",
+    "RunsWithRawResponse",
+    "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
+    "Evals",
+    "AsyncEvals",
+    "EvalsWithRawResponse",
+    "AsyncEvalsWithRawResponse",
+    "EvalsWithStreamingResponse",
+    "AsyncEvalsWithStreamingResponse",
+]
diff --git a/src/openai/resources/evals/evals.py b/src/openai/resources/evals/evals.py
new file mode 100644
index 0000000000..40c4a3e9a3
--- /dev/null
+++ b/src/openai/resources/evals/evals.py
@@ -0,0 +1,662 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ...types import eval_list_params, eval_create_params, eval_update_params
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from .runs.runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.eval_list_response import EvalListResponse
+from ...types.eval_create_response import EvalCreateResponse
+from ...types.eval_delete_response import EvalDeleteResponse
+from ...types.eval_update_response import EvalUpdateResponse
+from ...types.eval_retrieve_response import EvalRetrieveResponse
+from ...types.shared_params.metadata import Metadata
+
+__all__ = ["Evals", "AsyncEvals"]
+
+
+class Evals(SyncAPIResource):
+    @cached_property
+    def runs(self) -> Runs:
+        return Runs(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> EvalsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return EvalsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> EvalsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return EvalsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        data_source_config: eval_create_params.DataSourceConfig,
+        testing_criteria: Iterable[eval_create_params.TestingCriterion],
+        metadata: Optional[Metadata] | Omit = omit,
+        name: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EvalCreateResponse:
+        """
+        Create the structure of an evaluation that can be used to test a model's
+        performance. An evaluation is a set of testing criteria and the config for a
+        data source, which dictates the schema of the data used in the evaluation. After
+        creating an evaluation, you can run it on different models and model parameters.
+        We support several types of graders and datasources. For more information, see
+        the [Evals guide](https://platform.openai.com/docs/guides/evals).
+
+        Args:
+          data_source_config: The configuration for the data source used for the evaluation runs. Dictates the
+              schema of the data used in the evaluation.
+
+          testing_criteria: A list of graders for all eval runs in this group. Graders can reference
+              variables in the data source using double curly braces notation, like
+              `{{item.variable_name}}`. To reference the model's output, use the `sample`
+              namespace (ie, `{{sample.output_text}}`).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/evals",
+            body=maybe_transform(
+                {
+                    "data_source_config": data_source_config,
+                    "testing_criteria": testing_criteria,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                eval_create_params.EvalCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalCreateResponse,
+        )
+
+    def retrieve(
+        self,
+        eval_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EvalRetrieveResponse:
+        """
+        Get an evaluation by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._get(
+            f"/evals/{eval_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalRetrieveResponse,
+        )
+
+    def update(
+        self,
+        eval_id: str,
+        *,
+        metadata: Optional[Metadata] | Omit = omit,
+        name: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EvalUpdateResponse:
+        """
+        Update certain properties of an evaluation.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: Rename the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._post(
+            f"/evals/{eval_id}",
+            body=maybe_transform(
+                {
+                    "metadata": metadata,
+                    "name": name,
+                },
+                eval_update_params.EvalUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalUpdateResponse,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        order_by: Literal["created_at", "updated_at"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncCursorPage[EvalListResponse]:
+        """
+        List evaluations for a project.
+
+        Args:
+          after: Identifier for the last eval from the previous pagination request.
+
+          limit: Number of evals to retrieve.
+
+          order: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for
+              descending order.
+
+          order_by: Evals can be ordered by creation time or last updated time. Use `created_at` for
+              creation time or `updated_at` for last updated time.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/evals",
+            page=SyncCursorPage[EvalListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "order_by": order_by,
+                    },
+                    eval_list_params.EvalListParams,
+                ),
+            ),
+            model=EvalListResponse,
+        )
+
+    def delete(
+        self,
+        eval_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EvalDeleteResponse:
+        """
+        Delete an evaluation.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._delete(
+            f"/evals/{eval_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalDeleteResponse,
+        )
+
+
+class AsyncEvals(AsyncAPIResource):
+    @cached_property
+    def runs(self) -> AsyncRuns:
+        return AsyncRuns(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncEvalsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncEvalsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncEvalsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncEvalsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        data_source_config: eval_create_params.DataSourceConfig,
+        testing_criteria: Iterable[eval_create_params.TestingCriterion],
+        metadata: Optional[Metadata] | Omit = omit,
+        name: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EvalCreateResponse:
+        """
+        Create the structure of an evaluation that can be used to test a model's
+        performance. An evaluation is a set of testing criteria and the config for a
+        data source, which dictates the schema of the data used in the evaluation. After
+        creating an evaluation, you can run it on different models and model parameters.
+        We support several types of graders and datasources. For more information, see
+        the [Evals guide](https://platform.openai.com/docs/guides/evals).
+
+        Args:
+          data_source_config: The configuration for the data source used for the evaluation runs. Dictates the
+              schema of the data used in the evaluation.
+
+          testing_criteria: A list of graders for all eval runs in this group. Graders can reference
+              variables in the data source using double curly braces notation, like
+              `{{item.variable_name}}`. To reference the model's output, use the `sample`
+              namespace (ie, `{{sample.output_text}}`).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/evals",
+            body=await async_maybe_transform(
+                {
+                    "data_source_config": data_source_config,
+                    "testing_criteria": testing_criteria,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                eval_create_params.EvalCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalCreateResponse,
+        )
+
+    async def retrieve(
+        self,
+        eval_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EvalRetrieveResponse:
+        """
+        Get an evaluation by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return await self._get(
+            f"/evals/{eval_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalRetrieveResponse,
+        )
+
+    async def update(
+        self,
+        eval_id: str,
+        *,
+        metadata: Optional[Metadata] | Omit = omit,
+        name: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EvalUpdateResponse:
+        """
+        Update certain properties of an evaluation.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: Rename the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return await self._post(
+            f"/evals/{eval_id}",
+            body=await async_maybe_transform(
+                {
+                    "metadata": metadata,
+                    "name": name,
+                },
+                eval_update_params.EvalUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalUpdateResponse,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        order_by: Literal["created_at", "updated_at"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[EvalListResponse, AsyncCursorPage[EvalListResponse]]:
+        """
+        List evaluations for a project.
+
+        Args:
+          after: Identifier for the last eval from the previous pagination request.
+
+          limit: Number of evals to retrieve.
+
+          order: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for
+              descending order.
+
+          order_by: Evals can be ordered by creation time or last updated time. Use `created_at` for
+              creation time or `updated_at` for last updated time.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/evals",
+            page=AsyncCursorPage[EvalListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "order_by": order_by,
+                    },
+                    eval_list_params.EvalListParams,
+                ),
+            ),
+            model=EvalListResponse,
+        )
+
+    async def delete(
+        self,
+        eval_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EvalDeleteResponse:
+        """
+        Delete an evaluation.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return await self._delete(
+            f"/evals/{eval_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalDeleteResponse,
+        )
+
+
+class EvalsWithRawResponse:
+    def __init__(self, evals: Evals) -> None:
+        self._evals = evals
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            evals.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            evals.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            evals.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            evals.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            evals.delete,
+        )
+
+    @cached_property
+    def runs(self) -> RunsWithRawResponse:
+        return RunsWithRawResponse(self._evals.runs)
+
+
+class AsyncEvalsWithRawResponse:
+    def __init__(self, evals: AsyncEvals) -> None:
+        self._evals = evals
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            evals.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            evals.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            evals.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            evals.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            evals.delete,
+        )
+
+    @cached_property
+    def runs(self) -> AsyncRunsWithRawResponse:
+        return AsyncRunsWithRawResponse(self._evals.runs)
+
+
+class EvalsWithStreamingResponse:
+    def __init__(self, evals: Evals) -> None:
+        self._evals = evals
+
+        self.create = to_streamed_response_wrapper(
+            evals.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            evals.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            evals.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            evals.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            evals.delete,
+        )
+
+    @cached_property
+    def runs(self) -> RunsWithStreamingResponse:
+        return RunsWithStreamingResponse(self._evals.runs)
+
+
+class AsyncEvalsWithStreamingResponse:
+    def __init__(self, evals: AsyncEvals) -> None:
+        self._evals = evals
+
+        self.create = async_to_streamed_response_wrapper(
+            evals.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            evals.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            evals.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            evals.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            evals.delete,
+        )
+
+    @cached_property
+    def runs(self) -> AsyncRunsWithStreamingResponse:
+        return AsyncRunsWithStreamingResponse(self._evals.runs)
diff --git a/src/openai/resources/evals/runs/__init__.py b/src/openai/resources/evals/runs/__init__.py
new file mode 100644
index 0000000000..d189f16fb7
--- /dev/null
+++ b/src/openai/resources/evals/runs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .output_items import (
+    OutputItems,
+    AsyncOutputItems,
+    OutputItemsWithRawResponse,
+    AsyncOutputItemsWithRawResponse,
+    OutputItemsWithStreamingResponse,
+    AsyncOutputItemsWithStreamingResponse,
+)
+
+__all__ = [
+    "OutputItems",
+    "AsyncOutputItems",
+    "OutputItemsWithRawResponse",
+    "AsyncOutputItemsWithRawResponse",
+    "OutputItemsWithStreamingResponse",
+    "AsyncOutputItemsWithStreamingResponse",
+    "Runs",
+    "AsyncRuns",
+    "RunsWithRawResponse",
+    "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
+]
diff --git a/src/openai/resources/evals/runs/output_items.py b/src/openai/resources/evals/runs/output_items.py
new file mode 100644
index 0000000000..c2dee72122
--- /dev/null
+++ b/src/openai/resources/evals/runs/output_items.py
@@ -0,0 +1,315 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.evals.runs import output_item_list_params
+from ....types.evals.runs.output_item_list_response import OutputItemListResponse
+from ....types.evals.runs.output_item_retrieve_response import OutputItemRetrieveResponse
+
+__all__ = ["OutputItems", "AsyncOutputItems"]
+
+
+class OutputItems(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> OutputItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return OutputItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> OutputItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return OutputItemsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        output_item_id: str,
+        *,
+        eval_id: str,
+        run_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> OutputItemRetrieveResponse:
+        """
+        Get an evaluation run output item by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        if not output_item_id:
+            raise ValueError(f"Expected a non-empty value for `output_item_id` but received {output_item_id!r}")
+        return self._get(
+            f"/evals/{eval_id}/runs/{run_id}/output_items/{output_item_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=OutputItemRetrieveResponse,
+        )
+
+    def list(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        status: Literal["fail", "pass"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncCursorPage[OutputItemListResponse]:
+        """
+        Get a list of output items for an evaluation run.
+
+        Args:
+          after: Identifier for the last output item from the previous pagination request.
+
+          limit: Number of output items to retrieve.
+
+          order: Sort order for output items by timestamp. Use `asc` for ascending order or
+              `desc` for descending order. Defaults to `asc`.
+
+          status: Filter output items by status. Use `failed` to filter by failed output items or
+              `pass` to filter by passed output items.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._get_api_list(
+            f"/evals/{eval_id}/runs/{run_id}/output_items",
+            page=SyncCursorPage[OutputItemListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "status": status,
+                    },
+                    output_item_list_params.OutputItemListParams,
+                ),
+            ),
+            model=OutputItemListResponse,
+        )
+
+
+class AsyncOutputItems(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncOutputItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncOutputItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncOutputItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncOutputItemsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        output_item_id: str,
+        *,
+        eval_id: str,
+        run_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> OutputItemRetrieveResponse:
+        """
+        Get an evaluation run output item by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        if not output_item_id:
+            raise ValueError(f"Expected a non-empty value for `output_item_id` but received {output_item_id!r}")
+        return await self._get(
+            f"/evals/{eval_id}/runs/{run_id}/output_items/{output_item_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=OutputItemRetrieveResponse,
+        )
+
+    def list(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        status: Literal["fail", "pass"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[OutputItemListResponse, AsyncCursorPage[OutputItemListResponse]]:
+        """
+        Get a list of output items for an evaluation run.
+
+        Args:
+          after: Identifier for the last output item from the previous pagination request.
+
+          limit: Number of output items to retrieve.
+
+          order: Sort order for output items by timestamp. Use `asc` for ascending order or
+              `desc` for descending order. Defaults to `asc`.
+
+          status: Filter output items by status. Use `failed` to filter by failed output items or
+              `pass` to filter by passed output items.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._get_api_list(
+            f"/evals/{eval_id}/runs/{run_id}/output_items",
+            page=AsyncCursorPage[OutputItemListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "status": status,
+                    },
+                    output_item_list_params.OutputItemListParams,
+                ),
+            ),
+            model=OutputItemListResponse,
+        )
+
+
+class OutputItemsWithRawResponse:
+    def __init__(self, output_items: OutputItems) -> None:
+        self._output_items = output_items
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            output_items.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            output_items.list,
+        )
+
+
+class AsyncOutputItemsWithRawResponse:
+    def __init__(self, output_items: AsyncOutputItems) -> None:
+        self._output_items = output_items
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            output_items.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            output_items.list,
+        )
+
+
+class OutputItemsWithStreamingResponse:
+    def __init__(self, output_items: OutputItems) -> None:
+        self._output_items = output_items
+
+        self.retrieve = to_streamed_response_wrapper(
+            output_items.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            output_items.list,
+        )
+
+
+class AsyncOutputItemsWithStreamingResponse:
+    def __init__(self, output_items: AsyncOutputItems) -> None:
+        self._output_items = output_items
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            output_items.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            output_items.list,
+        )
diff --git a/src/openai/resources/evals/runs/runs.py b/src/openai/resources/evals/runs/runs.py
new file mode 100644
index 0000000000..b747b198f8
--- /dev/null
+++ b/src/openai/resources/evals/runs/runs.py
@@ -0,0 +1,634 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .output_items import (
+    OutputItems,
+    AsyncOutputItems,
+    OutputItemsWithRawResponse,
+    AsyncOutputItemsWithRawResponse,
+    OutputItemsWithStreamingResponse,
+    AsyncOutputItemsWithStreamingResponse,
+)
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ....types.evals import run_list_params, run_create_params
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.shared_params.metadata import Metadata
+from ....types.evals.run_list_response import RunListResponse
+from ....types.evals.run_cancel_response import RunCancelResponse
+from ....types.evals.run_create_response import RunCreateResponse
+from ....types.evals.run_delete_response import RunDeleteResponse
+from ....types.evals.run_retrieve_response import RunRetrieveResponse
+
+__all__ = ["Runs", "AsyncRuns"]
+
+
+class Runs(SyncAPIResource):
+    @cached_property
+    def output_items(self) -> OutputItems:
+        return OutputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> RunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return RunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> RunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return RunsWithStreamingResponse(self)
+
+    def create(
+        self,
+        eval_id: str,
+        *,
+        data_source: run_create_params.DataSource,
+        metadata: Optional[Metadata] | Omit = omit,
+        name: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> RunCreateResponse:
+        """
+        Kicks off a new run for a given evaluation, specifying the data source, and what
+        model configuration to use to test. The datasource will be validated against the
+        schema specified in the config of the evaluation.
+
+        Args:
+          data_source: Details about the run's data source.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._post(
+            f"/evals/{eval_id}/runs",
+            body=maybe_transform(
+                {
+                    "data_source": data_source,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunCreateResponse,
+        )
+
+    def retrieve(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> RunRetrieveResponse:
+        """
+        Get an evaluation run by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._get(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunRetrieveResponse,
+        )
+
+    def list(
+        self,
+        eval_id: str,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        status: Literal["queued", "in_progress", "completed", "canceled", "failed"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncCursorPage[RunListResponse]:
+        """
+        Get a list of runs for an evaluation.
+
+        Args:
+          after: Identifier for the last run from the previous pagination request.
+
+          limit: Number of runs to retrieve.
+
+          order: Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for
+              descending order. Defaults to `asc`.
+
+          status: Filter runs by status. One of `queued` | `in_progress` | `failed` | `completed`
+              | `canceled`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._get_api_list(
+            f"/evals/{eval_id}/runs",
+            page=SyncCursorPage[RunListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "status": status,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=RunListResponse,
+        )
+
+    def delete(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> RunDeleteResponse:
+        """
+        Delete an eval run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._delete(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunDeleteResponse,
+        )
+
+    def cancel(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> RunCancelResponse:
+        """
+        Cancel an ongoing evaluation run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._post(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunCancelResponse,
+        )
+
+
+class AsyncRuns(AsyncAPIResource):
+    @cached_property
+    def output_items(self) -> AsyncOutputItems:
+        return AsyncOutputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncRunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncRunsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        eval_id: str,
+        *,
+        data_source: run_create_params.DataSource,
+        metadata: Optional[Metadata] | Omit = omit,
+        name: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> RunCreateResponse:
+        """
+        Kicks off a new run for a given evaluation, specifying the data source, and what
+        model configuration to use to test. The datasource will be validated against the
+        schema specified in the config of the evaluation.
+
+        Args:
+          data_source: Details about the run's data source.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return await self._post(
+            f"/evals/{eval_id}/runs",
+            body=await async_maybe_transform(
+                {
+                    "data_source": data_source,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunCreateResponse,
+        )
+
+    async def retrieve(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> RunRetrieveResponse:
+        """
+        Get an evaluation run by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return await self._get(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunRetrieveResponse,
+        )
+
+    def list(
+        self,
+        eval_id: str,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        status: Literal["queued", "in_progress", "completed", "canceled", "failed"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[RunListResponse, AsyncCursorPage[RunListResponse]]:
+        """
+        Get a list of runs for an evaluation.
+
+        Args:
+          after: Identifier for the last run from the previous pagination request.
+
+          limit: Number of runs to retrieve.
+
+          order: Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for
+              descending order. Defaults to `asc`.
+
+          status: Filter runs by status. One of `queued` | `in_progress` | `failed` | `completed`
+              | `canceled`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._get_api_list(
+            f"/evals/{eval_id}/runs",
+            page=AsyncCursorPage[RunListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "status": status,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=RunListResponse,
+        )
+
+    async def delete(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> RunDeleteResponse:
+        """
+        Delete an eval run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return await self._delete(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunDeleteResponse,
+        )
+
+    async def cancel(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> RunCancelResponse:
+        """
+        Cancel an ongoing evaluation run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return await self._post(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunCancelResponse,
+        )
+
+
+class RunsWithRawResponse:
+    def __init__(self, runs: Runs) -> None:
+        self._runs = runs
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            runs.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            runs.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            runs.delete,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            runs.cancel,
+        )
+
+    @cached_property
+    def output_items(self) -> OutputItemsWithRawResponse:
+        return OutputItemsWithRawResponse(self._runs.output_items)
+
+
+class AsyncRunsWithRawResponse:
+    def __init__(self, runs: AsyncRuns) -> None:
+        self._runs = runs
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            runs.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            runs.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            runs.delete,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            runs.cancel,
+        )
+
+    @cached_property
+    def output_items(self) -> AsyncOutputItemsWithRawResponse:
+        return AsyncOutputItemsWithRawResponse(self._runs.output_items)
+
+
+class RunsWithStreamingResponse:
+    def __init__(self, runs: Runs) -> None:
+        self._runs = runs
+
+        self.create = to_streamed_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            runs.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            runs.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            runs.delete,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            runs.cancel,
+        )
+
+    @cached_property
+    def output_items(self) -> OutputItemsWithStreamingResponse:
+        return OutputItemsWithStreamingResponse(self._runs.output_items)
+
+
+class AsyncRunsWithStreamingResponse:
+    def __init__(self, runs: AsyncRuns) -> None:
+        self._runs = runs
+
+        self.create = async_to_streamed_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            runs.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            runs.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            runs.delete,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            runs.cancel,
+        )
+
+    @cached_property
+    def output_items(self) -> AsyncOutputItemsWithStreamingResponse:
+        return AsyncOutputItemsWithStreamingResponse(self._runs.output_items)
diff --git a/src/openai/resources/files.py b/src/openai/resources/files.py
index aed0829dfe..0e9671e23e 100644
--- a/src/openai/resources/files.py
+++ b/src/openai/resources/files.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import time
 import typing_extensions
 from typing import Mapping, cast
 from typing_extensions import Literal
@@ -10,14 +9,9 @@
 import httpx
 
 from .. import _legacy_response
-from ..types import file_list_params, file_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from .._utils import (
-    extract_files,
-    maybe_transform,
-    deepcopy_minimal,
-    async_maybe_transform,
-)
+from ..types import FilePurpose, file_list_params, file_create_params
+from .._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given
+from .._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
@@ -28,13 +22,11 @@
     to_custom_streamed_response_wrapper,
     async_to_custom_streamed_response_wrapper,
 )
-from ..pagination import SyncPage, AsyncPage
-from .._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from ..pagination import SyncCursorPage, AsyncCursorPage
+from .._base_client import AsyncPaginator, make_request_options
 from ..types.file_object import FileObject
 from ..types.file_deleted import FileDeleted
+from ..types.file_purpose import FilePurpose
 
 __all__ = ["Files", "AsyncFiles"]
 
@@ -42,38 +34,55 @@
 class Files(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return FilesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return FilesWithStreamingResponse(self)
 
     def create(
         self,
         *,
         file: FileTypes,
-        purpose: Literal["assistants", "batch", "fine-tune"],
+        purpose: FilePurpose,
+        expires_after: file_create_params.ExpiresAfter | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> FileObject:
         """Upload a file that can be used across various endpoints.
 
         Individual files can be
         up to 512 MB, and the size of all files uploaded by one organization can be up
-        to 100 GB.
-
-        The Assistants API supports files up to 2 million tokens and of specific file
-        types. See the
-        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for
-        details.
-
-        The Fine-tuning API only supports `.jsonl` files.
-
-        The Batch API only supports `.jsonl` files up to 100 MB in size.
+        to 1 TB.
+
+        - The Assistants API supports files up to 2 million tokens and of specific file
+          types. See the
+          [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools)
+          for details.
+        - The Fine-tuning API only supports `.jsonl` files. The input also has certain
+          required formats for fine-tuning
+          [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input)
+          or
+          [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+          models.
+        - The Batch API only supports `.jsonl` files up to 200 MB in size. The input
+          also has a specific required
+          [format](https://platform.openai.com/docs/api-reference/batch/request-input).
 
         Please [contact us](https://help.openai.com/) if you need to increase these
         storage limits.
@@ -81,14 +90,13 @@ def create(
         Args:
           file: The File object (not file name) to be uploaded.
 
-          purpose: The intended purpose of the uploaded file.
+          purpose: The intended purpose of the uploaded file. One of: - `assistants`: Used in the
+              Assistants API - `batch`: Used in the Batch API - `fine-tune`: Used for
+              fine-tuning - `vision`: Images used for vision fine-tuning - `user_data`:
+              Flexible file type for any purpose - `evals`: Used for eval data sets
 
-              Use "assistants" for
-              [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-              [Message](https://platform.openai.com/docs/api-reference/messages) files,
-              "vision" for Assistants image file inputs, "batch" for
-              [Batch API](https://platform.openai.com/docs/guides/batch), and "fine-tune" for
-              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning).
+          expires_after: The expiration policy for a file. By default, files with `purpose=batch` expire
+              after 30 days and all other files are persisted until they are manually deleted.
 
           extra_headers: Send extra headers
 
@@ -102,14 +110,14 @@ def create(
             {
                 "file": file,
                 "purpose": purpose,
+                "expires_after": expires_after,
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return self._post(
             "/files",
             body=maybe_transform(body, file_create_params.FileCreateParams),
@@ -129,7 +137,7 @@ def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> FileObject:
         """
         Returns information about a specific file.
@@ -156,18 +164,33 @@ def retrieve(
     def list(
         self,
         *,
-        purpose: str | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        purpose: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncPage[FileObject]:
-        """
-        Returns a list of files that belong to the user's organization.
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncCursorPage[FileObject]:
+        """Returns a list of files.
 
         Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              10,000, and the default is 10,000.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
           purpose: Only return files with the given purpose.
 
           extra_headers: Send extra headers
@@ -180,13 +203,21 @@ def list(
         """
         return self._get_api_list(
             "/files",
-            page=SyncPage[FileObject],
+            page=SyncCursorPage[FileObject],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=maybe_transform({"purpose": purpose}, file_list_params.FileListParams),
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "purpose": purpose,
+                    },
+                    file_list_params.FileListParams,
+                ),
             ),
             model=FileObject,
         )
@@ -200,10 +231,10 @@ def delete(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> FileDeleted:
         """
-        Delete a file.
+        Delete a file and remove it from all vector stores.
 
         Args:
           extra_headers: Send extra headers
@@ -233,7 +264,7 @@ def content(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> _legacy_response.HttpxBinaryResponseContent:
         """
         Returns the contents of the specified file.
@@ -268,7 +299,7 @@ def retrieve_content(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> str:
         """
         Returns the contents of the specified file.
@@ -292,65 +323,59 @@ def retrieve_content(
             cast_to=str,
         )
 
-    def wait_for_processing(
-        self,
-        id: str,
-        *,
-        poll_interval: float = 5.0,
-        max_wait_seconds: float = 30 * 60,
-    ) -> FileObject:
-        """Waits for the given file to be processed, default timeout is 30 mins."""
-        TERMINAL_STATES = {"processed", "error", "deleted"}
-
-        start = time.time()
-        file = self.retrieve(id)
-        while file.status not in TERMINAL_STATES:
-            self._sleep(poll_interval)
-
-            file = self.retrieve(id)
-            if time.time() - start > max_wait_seconds:
-                raise RuntimeError(
-                    f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds."
-                )
-
-        return file
-
 
 class AsyncFiles(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncFilesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncFilesWithStreamingResponse(self)
 
     async def create(
         self,
         *,
         file: FileTypes,
-        purpose: Literal["assistants", "batch", "fine-tune"],
+        purpose: FilePurpose,
+        expires_after: file_create_params.ExpiresAfter | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> FileObject:
         """Upload a file that can be used across various endpoints.
 
         Individual files can be
         up to 512 MB, and the size of all files uploaded by one organization can be up
-        to 100 GB.
-
-        The Assistants API supports files up to 2 million tokens and of specific file
-        types. See the
-        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for
-        details.
-
-        The Fine-tuning API only supports `.jsonl` files.
-
-        The Batch API only supports `.jsonl` files up to 100 MB in size.
+        to 1 TB.
+
+        - The Assistants API supports files up to 2 million tokens and of specific file
+          types. See the
+          [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools)
+          for details.
+        - The Fine-tuning API only supports `.jsonl` files. The input also has certain
+          required formats for fine-tuning
+          [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input)
+          or
+          [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+          models.
+        - The Batch API only supports `.jsonl` files up to 200 MB in size. The input
+          also has a specific required
+          [format](https://platform.openai.com/docs/api-reference/batch/request-input).
 
         Please [contact us](https://help.openai.com/) if you need to increase these
         storage limits.
@@ -358,14 +383,13 @@ async def create(
         Args:
           file: The File object (not file name) to be uploaded.
 
-          purpose: The intended purpose of the uploaded file.
+          purpose: The intended purpose of the uploaded file. One of: - `assistants`: Used in the
+              Assistants API - `batch`: Used in the Batch API - `fine-tune`: Used for
+              fine-tuning - `vision`: Images used for vision fine-tuning - `user_data`:
+              Flexible file type for any purpose - `evals`: Used for eval data sets
 
-              Use "assistants" for
-              [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-              [Message](https://platform.openai.com/docs/api-reference/messages) files,
-              "vision" for Assistants image file inputs, "batch" for
-              [Batch API](https://platform.openai.com/docs/guides/batch), and "fine-tune" for
-              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning).
+          expires_after: The expiration policy for a file. By default, files with `purpose=batch` expire
+              after 30 days and all other files are persisted until they are manually deleted.
 
           extra_headers: Send extra headers
 
@@ -379,14 +403,14 @@ async def create(
             {
                 "file": file,
                 "purpose": purpose,
+                "expires_after": expires_after,
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/files",
             body=await async_maybe_transform(body, file_create_params.FileCreateParams),
@@ -406,7 +430,7 @@ async def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> FileObject:
         """
         Returns information about a specific file.
@@ -433,18 +457,33 @@ async def retrieve(
     def list(
         self,
         *,
-        purpose: str | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        purpose: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[FileObject, AsyncPage[FileObject]]:
-        """
-        Returns a list of files that belong to the user's organization.
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[FileObject, AsyncCursorPage[FileObject]]:
+        """Returns a list of files.
 
         Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              10,000, and the default is 10,000.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
           purpose: Only return files with the given purpose.
 
           extra_headers: Send extra headers
@@ -457,13 +496,21 @@ def list(
         """
         return self._get_api_list(
             "/files",
-            page=AsyncPage[FileObject],
+            page=AsyncCursorPage[FileObject],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=maybe_transform({"purpose": purpose}, file_list_params.FileListParams),
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "purpose": purpose,
+                    },
+                    file_list_params.FileListParams,
+                ),
             ),
             model=FileObject,
         )
@@ -477,10 +524,10 @@ async def delete(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> FileDeleted:
         """
-        Delete a file.
+        Delete a file and remove it from all vector stores.
 
         Args:
           extra_headers: Send extra headers
@@ -510,7 +557,7 @@ async def content(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> _legacy_response.HttpxBinaryResponseContent:
         """
         Returns the contents of the specified file.
@@ -545,7 +592,7 @@ async def retrieve_content(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> str:
         """
         Returns the contents of the specified file.
@@ -569,29 +616,6 @@ async def retrieve_content(
             cast_to=str,
         )
 
-    async def wait_for_processing(
-        self,
-        id: str,
-        *,
-        poll_interval: float = 5.0,
-        max_wait_seconds: float = 30 * 60,
-    ) -> FileObject:
-        """Waits for the given file to be processed, default timeout is 30 mins."""
-        TERMINAL_STATES = {"processed", "error", "deleted"}
-
-        start = time.time()
-        file = await self.retrieve(id)
-        while file.status not in TERMINAL_STATES:
-            await self._sleep(poll_interval)
-
-            file = await self.retrieve(id)
-            if time.time() - start > max_wait_seconds:
-                raise RuntimeError(
-                    f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds."
-                )
-
-        return file
-
 
 class FilesWithRawResponse:
     def __init__(self, files: Files) -> None:
@@ -614,7 +638,7 @@ def __init__(self, files: Files) -> None:
         )
         self.retrieve_content = (  # pyright: ignore[reportDeprecated]
             _legacy_response.to_raw_response_wrapper(
-                files.retrieve_content  # pyright: ignore[reportDeprecated],
+                files.retrieve_content,  # pyright: ignore[reportDeprecated],
             )
         )
 
@@ -640,7 +664,7 @@ def __init__(self, files: AsyncFiles) -> None:
         )
         self.retrieve_content = (  # pyright: ignore[reportDeprecated]
             _legacy_response.async_to_raw_response_wrapper(
-                files.retrieve_content  # pyright: ignore[reportDeprecated],
+                files.retrieve_content,  # pyright: ignore[reportDeprecated],
             )
         )
 
@@ -667,7 +691,7 @@ def __init__(self, files: Files) -> None:
         )
         self.retrieve_content = (  # pyright: ignore[reportDeprecated]
             to_streamed_response_wrapper(
-                files.retrieve_content  # pyright: ignore[reportDeprecated],
+                files.retrieve_content,  # pyright: ignore[reportDeprecated],
             )
         )
 
@@ -694,6 +718,6 @@ def __init__(self, files: AsyncFiles) -> None:
         )
         self.retrieve_content = (  # pyright: ignore[reportDeprecated]
             async_to_streamed_response_wrapper(
-                files.retrieve_content  # pyright: ignore[reportDeprecated],
+                files.retrieve_content,  # pyright: ignore[reportDeprecated],
             )
         )
diff --git a/src/openai/resources/fine_tuning/__init__.py b/src/openai/resources/fine_tuning/__init__.py
index 7765231fee..c76af83deb 100644
--- a/src/openai/resources/fine_tuning/__init__.py
+++ b/src/openai/resources/fine_tuning/__init__.py
@@ -8,6 +8,22 @@
     JobsWithStreamingResponse,
     AsyncJobsWithStreamingResponse,
 )
+from .alpha import (
+    Alpha,
+    AsyncAlpha,
+    AlphaWithRawResponse,
+    AsyncAlphaWithRawResponse,
+    AlphaWithStreamingResponse,
+    AsyncAlphaWithStreamingResponse,
+)
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
 from .fine_tuning import (
     FineTuning,
     AsyncFineTuning,
@@ -24,6 +40,18 @@
     "AsyncJobsWithRawResponse",
     "JobsWithStreamingResponse",
     "AsyncJobsWithStreamingResponse",
+    "Checkpoints",
+    "AsyncCheckpoints",
+    "CheckpointsWithRawResponse",
+    "AsyncCheckpointsWithRawResponse",
+    "CheckpointsWithStreamingResponse",
+    "AsyncCheckpointsWithStreamingResponse",
+    "Alpha",
+    "AsyncAlpha",
+    "AlphaWithRawResponse",
+    "AsyncAlphaWithRawResponse",
+    "AlphaWithStreamingResponse",
+    "AsyncAlphaWithStreamingResponse",
     "FineTuning",
     "AsyncFineTuning",
     "FineTuningWithRawResponse",
diff --git a/src/openai/resources/fine_tuning/alpha/__init__.py b/src/openai/resources/fine_tuning/alpha/__init__.py
new file mode 100644
index 0000000000..8bed8af4fd
--- /dev/null
+++ b/src/openai/resources/fine_tuning/alpha/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .alpha import (
+    Alpha,
+    AsyncAlpha,
+    AlphaWithRawResponse,
+    AsyncAlphaWithRawResponse,
+    AlphaWithStreamingResponse,
+    AsyncAlphaWithStreamingResponse,
+)
+from .graders import (
+    Graders,
+    AsyncGraders,
+    GradersWithRawResponse,
+    AsyncGradersWithRawResponse,
+    GradersWithStreamingResponse,
+    AsyncGradersWithStreamingResponse,
+)
+
+__all__ = [
+    "Graders",
+    "AsyncGraders",
+    "GradersWithRawResponse",
+    "AsyncGradersWithRawResponse",
+    "GradersWithStreamingResponse",
+    "AsyncGradersWithStreamingResponse",
+    "Alpha",
+    "AsyncAlpha",
+    "AlphaWithRawResponse",
+    "AsyncAlphaWithRawResponse",
+    "AlphaWithStreamingResponse",
+    "AsyncAlphaWithStreamingResponse",
+]
diff --git a/src/openai/resources/fine_tuning/alpha/alpha.py b/src/openai/resources/fine_tuning/alpha/alpha.py
new file mode 100644
index 0000000000..54c05fab69
--- /dev/null
+++ b/src/openai/resources/fine_tuning/alpha/alpha.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .graders import (
+    Graders,
+    AsyncGraders,
+    GradersWithRawResponse,
+    AsyncGradersWithRawResponse,
+    GradersWithStreamingResponse,
+    AsyncGradersWithStreamingResponse,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["Alpha", "AsyncAlpha"]
+
+
+class Alpha(SyncAPIResource):
+    @cached_property
+    def graders(self) -> Graders:
+        return Graders(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AlphaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AlphaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AlphaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AlphaWithStreamingResponse(self)
+
+
+class AsyncAlpha(AsyncAPIResource):
+    @cached_property
+    def graders(self) -> AsyncGraders:
+        return AsyncGraders(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncAlphaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncAlphaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAlphaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncAlphaWithStreamingResponse(self)
+
+
+class AlphaWithRawResponse:
+    def __init__(self, alpha: Alpha) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def graders(self) -> GradersWithRawResponse:
+        return GradersWithRawResponse(self._alpha.graders)
+
+
+class AsyncAlphaWithRawResponse:
+    def __init__(self, alpha: AsyncAlpha) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def graders(self) -> AsyncGradersWithRawResponse:
+        return AsyncGradersWithRawResponse(self._alpha.graders)
+
+
+class AlphaWithStreamingResponse:
+    def __init__(self, alpha: Alpha) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def graders(self) -> GradersWithStreamingResponse:
+        return GradersWithStreamingResponse(self._alpha.graders)
+
+
+class AsyncAlphaWithStreamingResponse:
+    def __init__(self, alpha: AsyncAlpha) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def graders(self) -> AsyncGradersWithStreamingResponse:
+        return AsyncGradersWithStreamingResponse(self._alpha.graders)
diff --git a/src/openai/resources/fine_tuning/alpha/graders.py b/src/openai/resources/fine_tuning/alpha/graders.py
new file mode 100644
index 0000000000..e7a9b925ea
--- /dev/null
+++ b/src/openai/resources/fine_tuning/alpha/graders.py
@@ -0,0 +1,282 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.fine_tuning.alpha import grader_run_params, grader_validate_params
+from ....types.fine_tuning.alpha.grader_run_response import GraderRunResponse
+from ....types.fine_tuning.alpha.grader_validate_response import GraderValidateResponse
+
+__all__ = ["Graders", "AsyncGraders"]
+
+
+class Graders(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> GradersWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return GradersWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> GradersWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return GradersWithStreamingResponse(self)
+
+    def run(
+        self,
+        *,
+        grader: grader_run_params.Grader,
+        model_sample: str,
+        item: object | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> GraderRunResponse:
+        """
+        Run a grader.
+
+        Args:
+          grader: The grader used for the fine-tuning job.
+
+          model_sample: The model sample to be evaluated. This value will be used to populate the
+              `sample` namespace. See
+              [the guide](https://platform.openai.com/docs/guides/graders) for more details.
+              The `output_json` variable will be populated if the model sample is a valid JSON
+              string.
+
+          item: The dataset item provided to the grader. This will be used to populate the
+              `item` namespace. See
+              [the guide](https://platform.openai.com/docs/guides/graders) for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/fine_tuning/alpha/graders/run",
+            body=maybe_transform(
+                {
+                    "grader": grader,
+                    "model_sample": model_sample,
+                    "item": item,
+                },
+                grader_run_params.GraderRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=GraderRunResponse,
+        )
+
+    def validate(
+        self,
+        *,
+        grader: grader_validate_params.Grader,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> GraderValidateResponse:
+        """
+        Validate a grader.
+
+        Args:
+          grader: The grader used for the fine-tuning job.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/fine_tuning/alpha/graders/validate",
+            body=maybe_transform({"grader": grader}, grader_validate_params.GraderValidateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=GraderValidateResponse,
+        )
+
+
+class AsyncGraders(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncGradersWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncGradersWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncGradersWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncGradersWithStreamingResponse(self)
+
+    async def run(
+        self,
+        *,
+        grader: grader_run_params.Grader,
+        model_sample: str,
+        item: object | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> GraderRunResponse:
+        """
+        Run a grader.
+
+        Args:
+          grader: The grader used for the fine-tuning job.
+
+          model_sample: The model sample to be evaluated. This value will be used to populate the
+              `sample` namespace. See
+              [the guide](https://platform.openai.com/docs/guides/graders) for more details.
+              The `output_json` variable will be populated if the model sample is a valid JSON
+              string.
+
+          item: The dataset item provided to the grader. This will be used to populate the
+              `item` namespace. See
+              [the guide](https://platform.openai.com/docs/guides/graders) for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/fine_tuning/alpha/graders/run",
+            body=await async_maybe_transform(
+                {
+                    "grader": grader,
+                    "model_sample": model_sample,
+                    "item": item,
+                },
+                grader_run_params.GraderRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=GraderRunResponse,
+        )
+
+    async def validate(
+        self,
+        *,
+        grader: grader_validate_params.Grader,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> GraderValidateResponse:
+        """
+        Validate a grader.
+
+        Args:
+          grader: The grader used for the fine-tuning job.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/fine_tuning/alpha/graders/validate",
+            body=await async_maybe_transform({"grader": grader}, grader_validate_params.GraderValidateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=GraderValidateResponse,
+        )
+
+
+class GradersWithRawResponse:
+    def __init__(self, graders: Graders) -> None:
+        self._graders = graders
+
+        self.run = _legacy_response.to_raw_response_wrapper(
+            graders.run,
+        )
+        self.validate = _legacy_response.to_raw_response_wrapper(
+            graders.validate,
+        )
+
+
+class AsyncGradersWithRawResponse:
+    def __init__(self, graders: AsyncGraders) -> None:
+        self._graders = graders
+
+        self.run = _legacy_response.async_to_raw_response_wrapper(
+            graders.run,
+        )
+        self.validate = _legacy_response.async_to_raw_response_wrapper(
+            graders.validate,
+        )
+
+
+class GradersWithStreamingResponse:
+    def __init__(self, graders: Graders) -> None:
+        self._graders = graders
+
+        self.run = to_streamed_response_wrapper(
+            graders.run,
+        )
+        self.validate = to_streamed_response_wrapper(
+            graders.validate,
+        )
+
+
+class AsyncGradersWithStreamingResponse:
+    def __init__(self, graders: AsyncGraders) -> None:
+        self._graders = graders
+
+        self.run = async_to_streamed_response_wrapper(
+            graders.run,
+        )
+        self.validate = async_to_streamed_response_wrapper(
+            graders.validate,
+        )
diff --git a/src/openai/resources/fine_tuning/checkpoints/__init__.py b/src/openai/resources/fine_tuning/checkpoints/__init__.py
new file mode 100644
index 0000000000..fdc37940f9
--- /dev/null
+++ b/src/openai/resources/fine_tuning/checkpoints/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
+from .permissions import (
+    Permissions,
+    AsyncPermissions,
+    PermissionsWithRawResponse,
+    AsyncPermissionsWithRawResponse,
+    PermissionsWithStreamingResponse,
+    AsyncPermissionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Permissions",
+    "AsyncPermissions",
+    "PermissionsWithRawResponse",
+    "AsyncPermissionsWithRawResponse",
+    "PermissionsWithStreamingResponse",
+    "AsyncPermissionsWithStreamingResponse",
+    "Checkpoints",
+    "AsyncCheckpoints",
+    "CheckpointsWithRawResponse",
+    "AsyncCheckpointsWithRawResponse",
+    "CheckpointsWithStreamingResponse",
+    "AsyncCheckpointsWithStreamingResponse",
+]
diff --git a/src/openai/resources/fine_tuning/checkpoints/checkpoints.py b/src/openai/resources/fine_tuning/checkpoints/checkpoints.py
new file mode 100644
index 0000000000..f59976a264
--- /dev/null
+++ b/src/openai/resources/fine_tuning/checkpoints/checkpoints.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ...._compat import cached_property
+from .permissions import (
+    Permissions,
+    AsyncPermissions,
+    PermissionsWithRawResponse,
+    AsyncPermissionsWithRawResponse,
+    PermissionsWithStreamingResponse,
+    AsyncPermissionsWithStreamingResponse,
+)
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["Checkpoints", "AsyncCheckpoints"]
+
+
+class Checkpoints(SyncAPIResource):
+    @cached_property
+    def permissions(self) -> Permissions:
+        return Permissions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> CheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return CheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return CheckpointsWithStreamingResponse(self)
+
+
+class AsyncCheckpoints(AsyncAPIResource):
+    @cached_property
+    def permissions(self) -> AsyncPermissions:
+        return AsyncPermissions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncCheckpointsWithStreamingResponse(self)
+
+
+class CheckpointsWithRawResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+    @cached_property
+    def permissions(self) -> PermissionsWithRawResponse:
+        return PermissionsWithRawResponse(self._checkpoints.permissions)
+
+
+class AsyncCheckpointsWithRawResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+    @cached_property
+    def permissions(self) -> AsyncPermissionsWithRawResponse:
+        return AsyncPermissionsWithRawResponse(self._checkpoints.permissions)
+
+
+class CheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+    @cached_property
+    def permissions(self) -> PermissionsWithStreamingResponse:
+        return PermissionsWithStreamingResponse(self._checkpoints.permissions)
+
+
+class AsyncCheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+    @cached_property
+    def permissions(self) -> AsyncPermissionsWithStreamingResponse:
+        return AsyncPermissionsWithStreamingResponse(self._checkpoints.permissions)
diff --git a/src/openai/resources/fine_tuning/checkpoints/permissions.py b/src/openai/resources/fine_tuning/checkpoints/permissions.py
new file mode 100644
index 0000000000..e7f55b82d9
--- /dev/null
+++ b/src/openai/resources/fine_tuning/checkpoints/permissions.py
@@ -0,0 +1,418 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncPage, AsyncPage
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.fine_tuning.checkpoints import permission_create_params, permission_retrieve_params
+from ....types.fine_tuning.checkpoints.permission_create_response import PermissionCreateResponse
+from ....types.fine_tuning.checkpoints.permission_delete_response import PermissionDeleteResponse
+from ....types.fine_tuning.checkpoints.permission_retrieve_response import PermissionRetrieveResponse
+
+__all__ = ["Permissions", "AsyncPermissions"]
+
+
+class Permissions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> PermissionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return PermissionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> PermissionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return PermissionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        fine_tuned_model_checkpoint: str,
+        *,
+        project_ids: SequenceNotStr[str],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncPage[PermissionCreateResponse]:
+        """
+        **NOTE:** Calling this endpoint requires an [admin API key](../admin-api-keys).
+
+        This enables organization owners to share fine-tuned models with other projects
+        in their organization.
+
+        Args:
+          project_ids: The project identifiers to grant access to.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        return self._get_api_list(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+            page=SyncPage[PermissionCreateResponse],
+            body=maybe_transform({"project_ids": project_ids}, permission_create_params.PermissionCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=PermissionCreateResponse,
+            method="post",
+        )
+
+    def retrieve(
+        self,
+        fine_tuned_model_checkpoint: str,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["ascending", "descending"] | Omit = omit,
+        project_id: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> PermissionRetrieveResponse:
+        """
+        **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+        Organization owners can use this endpoint to view all permissions for a
+        fine-tuned model checkpoint.
+
+        Args:
+          after: Identifier for the last permission ID from the previous pagination request.
+
+          limit: Number of permissions to retrieve.
+
+          order: The order in which to retrieve permissions.
+
+          project_id: The ID of the project to get permissions for.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        return self._get(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "project_id": project_id,
+                    },
+                    permission_retrieve_params.PermissionRetrieveParams,
+                ),
+            ),
+            cast_to=PermissionRetrieveResponse,
+        )
+
+    def delete(
+        self,
+        permission_id: str,
+        *,
+        fine_tuned_model_checkpoint: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> PermissionDeleteResponse:
+        """
+        **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+        Organization owners can use this endpoint to delete a permission for a
+        fine-tuned model checkpoint.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        if not permission_id:
+            raise ValueError(f"Expected a non-empty value for `permission_id` but received {permission_id!r}")
+        return self._delete(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=PermissionDeleteResponse,
+        )
+
+
+class AsyncPermissions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncPermissionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncPermissionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncPermissionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncPermissionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        fine_tuned_model_checkpoint: str,
+        *,
+        project_ids: SequenceNotStr[str],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[PermissionCreateResponse, AsyncPage[PermissionCreateResponse]]:
+        """
+        **NOTE:** Calling this endpoint requires an [admin API key](../admin-api-keys).
+
+        This enables organization owners to share fine-tuned models with other projects
+        in their organization.
+
+        Args:
+          project_ids: The project identifiers to grant access to.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        return self._get_api_list(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+            page=AsyncPage[PermissionCreateResponse],
+            body=maybe_transform({"project_ids": project_ids}, permission_create_params.PermissionCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=PermissionCreateResponse,
+            method="post",
+        )
+
+    async def retrieve(
+        self,
+        fine_tuned_model_checkpoint: str,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["ascending", "descending"] | Omit = omit,
+        project_id: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> PermissionRetrieveResponse:
+        """
+        **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+        Organization owners can use this endpoint to view all permissions for a
+        fine-tuned model checkpoint.
+
+        Args:
+          after: Identifier for the last permission ID from the previous pagination request.
+
+          limit: Number of permissions to retrieve.
+
+          order: The order in which to retrieve permissions.
+
+          project_id: The ID of the project to get permissions for.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        return await self._get(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "project_id": project_id,
+                    },
+                    permission_retrieve_params.PermissionRetrieveParams,
+                ),
+            ),
+            cast_to=PermissionRetrieveResponse,
+        )
+
+    async def delete(
+        self,
+        permission_id: str,
+        *,
+        fine_tuned_model_checkpoint: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> PermissionDeleteResponse:
+        """
+        **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+        Organization owners can use this endpoint to delete a permission for a
+        fine-tuned model checkpoint.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        if not permission_id:
+            raise ValueError(f"Expected a non-empty value for `permission_id` but received {permission_id!r}")
+        return await self._delete(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=PermissionDeleteResponse,
+        )
+
+
+class PermissionsWithRawResponse:
+    def __init__(self, permissions: Permissions) -> None:
+        self._permissions = permissions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            permissions.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            permissions.retrieve,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            permissions.delete,
+        )
+
+
+class AsyncPermissionsWithRawResponse:
+    def __init__(self, permissions: AsyncPermissions) -> None:
+        self._permissions = permissions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            permissions.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            permissions.retrieve,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            permissions.delete,
+        )
+
+
+class PermissionsWithStreamingResponse:
+    def __init__(self, permissions: Permissions) -> None:
+        self._permissions = permissions
+
+        self.create = to_streamed_response_wrapper(
+            permissions.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            permissions.retrieve,
+        )
+        self.delete = to_streamed_response_wrapper(
+            permissions.delete,
+        )
+
+
+class AsyncPermissionsWithStreamingResponse:
+    def __init__(self, permissions: AsyncPermissions) -> None:
+        self._permissions = permissions
+
+        self.create = async_to_streamed_response_wrapper(
+            permissions.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            permissions.retrieve,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            permissions.delete,
+        )
diff --git a/src/openai/resources/fine_tuning/fine_tuning.py b/src/openai/resources/fine_tuning/fine_tuning.py
index 0404fed6ec..25ae3e8cf4 100644
--- a/src/openai/resources/fine_tuning/fine_tuning.py
+++ b/src/openai/resources/fine_tuning/fine_tuning.py
@@ -2,7 +2,8 @@
 
 from __future__ import annotations
 
-from .jobs import (
+from ..._compat import cached_property
+from .jobs.jobs import (
     Jobs,
     AsyncJobs,
     JobsWithRawResponse,
@@ -10,9 +11,23 @@
     JobsWithStreamingResponse,
     AsyncJobsWithStreamingResponse,
 )
-from ..._compat import cached_property
-from .jobs.jobs import Jobs, AsyncJobs
 from ..._resource import SyncAPIResource, AsyncAPIResource
+from .alpha.alpha import (
+    Alpha,
+    AsyncAlpha,
+    AlphaWithRawResponse,
+    AsyncAlphaWithRawResponse,
+    AlphaWithStreamingResponse,
+    AsyncAlphaWithStreamingResponse,
+)
+from .checkpoints.checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
 
 __all__ = ["FineTuning", "AsyncFineTuning"]
 
@@ -22,12 +37,31 @@ class FineTuning(SyncAPIResource):
     def jobs(self) -> Jobs:
         return Jobs(self._client)
 
+    @cached_property
+    def checkpoints(self) -> Checkpoints:
+        return Checkpoints(self._client)
+
+    @cached_property
+    def alpha(self) -> Alpha:
+        return Alpha(self._client)
+
     @cached_property
     def with_raw_response(self) -> FineTuningWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return FineTuningWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> FineTuningWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return FineTuningWithStreamingResponse(self)
 
 
@@ -36,12 +70,31 @@ class AsyncFineTuning(AsyncAPIResource):
     def jobs(self) -> AsyncJobs:
         return AsyncJobs(self._client)
 
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpoints:
+        return AsyncCheckpoints(self._client)
+
+    @cached_property
+    def alpha(self) -> AsyncAlpha:
+        return AsyncAlpha(self._client)
+
     @cached_property
     def with_raw_response(self) -> AsyncFineTuningWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncFineTuningWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncFineTuningWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncFineTuningWithStreamingResponse(self)
 
 
@@ -53,6 +106,14 @@ def __init__(self, fine_tuning: FineTuning) -> None:
     def jobs(self) -> JobsWithRawResponse:
         return JobsWithRawResponse(self._fine_tuning.jobs)
 
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithRawResponse:
+        return CheckpointsWithRawResponse(self._fine_tuning.checkpoints)
+
+    @cached_property
+    def alpha(self) -> AlphaWithRawResponse:
+        return AlphaWithRawResponse(self._fine_tuning.alpha)
+
 
 class AsyncFineTuningWithRawResponse:
     def __init__(self, fine_tuning: AsyncFineTuning) -> None:
@@ -62,6 +123,14 @@ def __init__(self, fine_tuning: AsyncFineTuning) -> None:
     def jobs(self) -> AsyncJobsWithRawResponse:
         return AsyncJobsWithRawResponse(self._fine_tuning.jobs)
 
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithRawResponse:
+        return AsyncCheckpointsWithRawResponse(self._fine_tuning.checkpoints)
+
+    @cached_property
+    def alpha(self) -> AsyncAlphaWithRawResponse:
+        return AsyncAlphaWithRawResponse(self._fine_tuning.alpha)
+
 
 class FineTuningWithStreamingResponse:
     def __init__(self, fine_tuning: FineTuning) -> None:
@@ -71,6 +140,14 @@ def __init__(self, fine_tuning: FineTuning) -> None:
     def jobs(self) -> JobsWithStreamingResponse:
         return JobsWithStreamingResponse(self._fine_tuning.jobs)
 
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithStreamingResponse:
+        return CheckpointsWithStreamingResponse(self._fine_tuning.checkpoints)
+
+    @cached_property
+    def alpha(self) -> AlphaWithStreamingResponse:
+        return AlphaWithStreamingResponse(self._fine_tuning.alpha)
+
 
 class AsyncFineTuningWithStreamingResponse:
     def __init__(self, fine_tuning: AsyncFineTuning) -> None:
@@ -79,3 +156,11 @@ def __init__(self, fine_tuning: AsyncFineTuning) -> None:
     @cached_property
     def jobs(self) -> AsyncJobsWithStreamingResponse:
         return AsyncJobsWithStreamingResponse(self._fine_tuning.jobs)
+
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithStreamingResponse:
+        return AsyncCheckpointsWithStreamingResponse(self._fine_tuning.checkpoints)
+
+    @cached_property
+    def alpha(self) -> AsyncAlphaWithStreamingResponse:
+        return AsyncAlphaWithStreamingResponse(self._fine_tuning.alpha)
diff --git a/src/openai/resources/fine_tuning/jobs/checkpoints.py b/src/openai/resources/fine_tuning/jobs/checkpoints.py
index 67f5739a02..4e1e56cbee 100644
--- a/src/openai/resources/fine_tuning/jobs/checkpoints.py
+++ b/src/openai/resources/fine_tuning/jobs/checkpoints.py
@@ -5,16 +5,13 @@
 import httpx
 
 from .... import _legacy_response
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
 from ...._utils import maybe_transform
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....pagination import SyncCursorPage, AsyncCursorPage
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from ...._base_client import AsyncPaginator, make_request_options
 from ....types.fine_tuning.jobs import checkpoint_list_params
 from ....types.fine_tuning.jobs.fine_tuning_job_checkpoint import FineTuningJobCheckpoint
 
@@ -24,24 +21,35 @@
 class Checkpoints(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> CheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return CheckpointsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return CheckpointsWithStreamingResponse(self)
 
     def list(
         self,
         fine_tuning_job_id: str,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> SyncCursorPage[FineTuningJobCheckpoint]:
         """
         List checkpoints for a fine-tuning job.
@@ -84,24 +92,35 @@ def list(
 class AsyncCheckpoints(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncCheckpointsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncCheckpointsWithStreamingResponse(self)
 
     def list(
         self,
         fine_tuning_job_id: str,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncPaginator[FineTuningJobCheckpoint, AsyncCursorPage[FineTuningJobCheckpoint]]:
         """
         List checkpoints for a fine-tuning job.
diff --git a/src/openai/resources/fine_tuning/jobs/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py
index f38956e6be..d7166bd756 100644
--- a/src/openai/resources/fine_tuning/jobs/jobs.py
+++ b/src/openai/resources/fine_tuning/jobs/jobs.py
@@ -2,17 +2,14 @@
 
 from __future__ import annotations
 
-from typing import Union, Iterable, Optional
+from typing import Dict, Union, Iterable, Optional
 from typing_extensions import Literal
 
 import httpx
 
 from .... import _legacy_response
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
 from ...._compat import cached_property
 from .checkpoints import (
     Checkpoints,
@@ -25,11 +22,9 @@
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....pagination import SyncCursorPage, AsyncCursorPage
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from ...._base_client import AsyncPaginator, make_request_options
 from ....types.fine_tuning import job_list_params, job_create_params, job_list_events_params
+from ....types.shared_params.metadata import Metadata
 from ....types.fine_tuning.fine_tuning_job import FineTuningJob
 from ....types.fine_tuning.fine_tuning_job_event import FineTuningJobEvent
 
@@ -43,28 +38,41 @@ def checkpoints(self) -> Checkpoints:
 
     @cached_property
     def with_raw_response(self) -> JobsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return JobsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> JobsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return JobsWithStreamingResponse(self)
 
     def create(
         self,
         *,
-        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
+        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo", "gpt-4o-mini"]],
         training_file: str,
-        hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
-        integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        validation_file: Optional[str] | NotGiven = NOT_GIVEN,
+        hyperparameters: job_create_params.Hyperparameters | Omit = omit,
+        integrations: Optional[Iterable[job_create_params.Integration]] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        method: job_create_params.Method | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        suffix: Optional[str] | Omit = omit,
+        validation_file: Optional[str] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> FineTuningJob:
         """
         Creates a fine-tuning job which begins the process of creating a new model from
@@ -73,11 +81,11 @@ def create(
         Response includes details of the enqueued job including job status and the name
         of the fine-tuned models once complete.
 
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization)
 
         Args:
           model: The name of the model to fine-tune. You can select one of the
-              [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
+              [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned).
 
           training_file: The ID of an uploaded file that contains training data.
 
@@ -87,22 +95,40 @@ def create(
               Your dataset must be formatted as a JSONL file. Additionally, you must upload
               your file with the purpose `fine-tune`.
 
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              The contents of the file should differ depending on if the model uses the
+              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
+              [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+              format, or if the fine-tuning method uses the
+              [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
+              format.
+
+              See the
+              [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
               for more details.
 
-          hyperparameters: The hyperparameters used for the fine-tuning job.
+          hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
+              in favor of `method`, and should be passed in under the `method` parameter.
 
           integrations: A list of integrations to enable for your fine-tuning job.
 
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          method: The method used for fine-tuning.
+
           seed: The seed controls the reproducibility of the job. Passing in the same seed and
               job parameters should produce the same results, but may differ in rare cases. If
               a seed is not specified, one will be generated for you.
 
-          suffix: A string of up to 18 characters that will be added to your fine-tuned model
+          suffix: A string of up to 64 characters that will be added to your fine-tuned model
               name.
 
               For example, a `suffix` of "custom-model-name" would produce a model name like
-              `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
+              `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`.
 
           validation_file: The ID of an uploaded file that contains validation data.
 
@@ -114,7 +140,8 @@ def create(
               Your dataset must be formatted as a JSONL file. You must upload your file with
               the purpose `fine-tune`.
 
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              See the
+              [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
               for more details.
 
           extra_headers: Send extra headers
@@ -133,6 +160,8 @@ def create(
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
                     "integrations": integrations,
+                    "metadata": metadata,
+                    "method": method,
                     "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
@@ -154,12 +183,12 @@ def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> FineTuningJob:
         """
         Get info about a fine-tuning job.
 
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization)
 
         Args:
           extra_headers: Send extra headers
@@ -183,14 +212,15 @@ def retrieve(
     def list(
         self,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        metadata: Optional[Dict[str, str]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> SyncCursorPage[FineTuningJob]:
         """
         List your organization's fine-tuning jobs
@@ -200,6 +230,9 @@ def list(
 
           limit: Number of fine-tuning jobs to retrieve.
 
+          metadata: Optional metadata filter. To filter, use the syntax `metadata[k]=v`.
+              Alternatively, set `metadata=null` to indicate no metadata.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -220,6 +253,7 @@ def list(
                     {
                         "after": after,
                         "limit": limit,
+                        "metadata": metadata,
                     },
                     job_list_params.JobListParams,
                 ),
@@ -236,7 +270,7 @@ def cancel(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> FineTuningJob:
         """
         Immediately cancel a fine-tune job.
@@ -264,14 +298,14 @@ def list_events(
         self,
         fine_tuning_job_id: str,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> SyncCursorPage[FineTuningJobEvent]:
         """
         Get status updates for a fine-tuning job.
@@ -310,6 +344,72 @@ def list_events(
             model=FineTuningJobEvent,
         )
 
+    def pause(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> FineTuningJob:
+        """
+        Pause a fine-tune job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._post(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/pause",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    def resume(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> FineTuningJob:
+        """
+        Resume a fine-tune job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return self._post(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/resume",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FineTuningJob,
+        )
+
 
 class AsyncJobs(AsyncAPIResource):
     @cached_property
@@ -318,28 +418,41 @@ def checkpoints(self) -> AsyncCheckpoints:
 
     @cached_property
     def with_raw_response(self) -> AsyncJobsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncJobsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncJobsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncJobsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
-        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
+        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo", "gpt-4o-mini"]],
         training_file: str,
-        hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
-        integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        validation_file: Optional[str] | NotGiven = NOT_GIVEN,
+        hyperparameters: job_create_params.Hyperparameters | Omit = omit,
+        integrations: Optional[Iterable[job_create_params.Integration]] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        method: job_create_params.Method | Omit = omit,
+        seed: Optional[int] | Omit = omit,
+        suffix: Optional[str] | Omit = omit,
+        validation_file: Optional[str] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> FineTuningJob:
         """
         Creates a fine-tuning job which begins the process of creating a new model from
@@ -348,11 +461,11 @@ async def create(
         Response includes details of the enqueued job including job status and the name
         of the fine-tuned models once complete.
 
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization)
 
         Args:
           model: The name of the model to fine-tune. You can select one of the
-              [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
+              [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned).
 
           training_file: The ID of an uploaded file that contains training data.
 
@@ -362,22 +475,40 @@ async def create(
               Your dataset must be formatted as a JSONL file. Additionally, you must upload
               your file with the purpose `fine-tune`.
 
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              The contents of the file should differ depending on if the model uses the
+              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
+              [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+              format, or if the fine-tuning method uses the
+              [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
+              format.
+
+              See the
+              [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
               for more details.
 
-          hyperparameters: The hyperparameters used for the fine-tuning job.
+          hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
+              in favor of `method`, and should be passed in under the `method` parameter.
 
           integrations: A list of integrations to enable for your fine-tuning job.
 
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          method: The method used for fine-tuning.
+
           seed: The seed controls the reproducibility of the job. Passing in the same seed and
               job parameters should produce the same results, but may differ in rare cases. If
               a seed is not specified, one will be generated for you.
 
-          suffix: A string of up to 18 characters that will be added to your fine-tuned model
+          suffix: A string of up to 64 characters that will be added to your fine-tuned model
               name.
 
               For example, a `suffix` of "custom-model-name" would produce a model name like
-              `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
+              `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`.
 
           validation_file: The ID of an uploaded file that contains validation data.
 
@@ -389,7 +520,8 @@ async def create(
               Your dataset must be formatted as a JSONL file. You must upload your file with
               the purpose `fine-tune`.
 
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              See the
+              [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
               for more details.
 
           extra_headers: Send extra headers
@@ -408,6 +540,8 @@ async def create(
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
                     "integrations": integrations,
+                    "metadata": metadata,
+                    "method": method,
                     "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
@@ -429,12 +563,12 @@ async def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> FineTuningJob:
         """
         Get info about a fine-tuning job.
 
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization)
 
         Args:
           extra_headers: Send extra headers
@@ -458,14 +592,15 @@ async def retrieve(
     def list(
         self,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        metadata: Optional[Dict[str, str]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncPaginator[FineTuningJob, AsyncCursorPage[FineTuningJob]]:
         """
         List your organization's fine-tuning jobs
@@ -475,6 +610,9 @@ def list(
 
           limit: Number of fine-tuning jobs to retrieve.
 
+          metadata: Optional metadata filter. To filter, use the syntax `metadata[k]=v`.
+              Alternatively, set `metadata=null` to indicate no metadata.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -495,6 +633,7 @@ def list(
                     {
                         "after": after,
                         "limit": limit,
+                        "metadata": metadata,
                     },
                     job_list_params.JobListParams,
                 ),
@@ -511,7 +650,7 @@ async def cancel(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> FineTuningJob:
         """
         Immediately cancel a fine-tune job.
@@ -539,14 +678,14 @@ def list_events(
         self,
         fine_tuning_job_id: str,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncPaginator[FineTuningJobEvent, AsyncCursorPage[FineTuningJobEvent]]:
         """
         Get status updates for a fine-tuning job.
@@ -585,6 +724,72 @@ def list_events(
             model=FineTuningJobEvent,
         )
 
+    async def pause(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> FineTuningJob:
+        """
+        Pause a fine-tune job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return await self._post(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/pause",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FineTuningJob,
+        )
+
+    async def resume(
+        self,
+        fine_tuning_job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> FineTuningJob:
+        """
+        Resume a fine-tune job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuning_job_id:
+            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
+        return await self._post(
+            f"/fine_tuning/jobs/{fine_tuning_job_id}/resume",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FineTuningJob,
+        )
+
 
 class JobsWithRawResponse:
     def __init__(self, jobs: Jobs) -> None:
@@ -605,6 +810,12 @@ def __init__(self, jobs: Jobs) -> None:
         self.list_events = _legacy_response.to_raw_response_wrapper(
             jobs.list_events,
         )
+        self.pause = _legacy_response.to_raw_response_wrapper(
+            jobs.pause,
+        )
+        self.resume = _legacy_response.to_raw_response_wrapper(
+            jobs.resume,
+        )
 
     @cached_property
     def checkpoints(self) -> CheckpointsWithRawResponse:
@@ -630,6 +841,12 @@ def __init__(self, jobs: AsyncJobs) -> None:
         self.list_events = _legacy_response.async_to_raw_response_wrapper(
             jobs.list_events,
         )
+        self.pause = _legacy_response.async_to_raw_response_wrapper(
+            jobs.pause,
+        )
+        self.resume = _legacy_response.async_to_raw_response_wrapper(
+            jobs.resume,
+        )
 
     @cached_property
     def checkpoints(self) -> AsyncCheckpointsWithRawResponse:
@@ -655,6 +872,12 @@ def __init__(self, jobs: Jobs) -> None:
         self.list_events = to_streamed_response_wrapper(
             jobs.list_events,
         )
+        self.pause = to_streamed_response_wrapper(
+            jobs.pause,
+        )
+        self.resume = to_streamed_response_wrapper(
+            jobs.resume,
+        )
 
     @cached_property
     def checkpoints(self) -> CheckpointsWithStreamingResponse:
@@ -680,6 +903,12 @@ def __init__(self, jobs: AsyncJobs) -> None:
         self.list_events = async_to_streamed_response_wrapper(
             jobs.list_events,
         )
+        self.pause = async_to_streamed_response_wrapper(
+            jobs.pause,
+        )
+        self.resume = async_to_streamed_response_wrapper(
+            jobs.resume,
+        )
 
     @cached_property
     def checkpoints(self) -> AsyncCheckpointsWithStreamingResponse:
diff --git a/src/openai/resources/images.py b/src/openai/resources/images.py
index 74b2a46a3f..265be6f743 100644
--- a/src/openai/resources/images.py
+++ b/src/openai/resources/images.py
@@ -3,26 +3,23 @@
 from __future__ import annotations
 
 from typing import Union, Mapping, Optional, cast
-from typing_extensions import Literal
+from typing_extensions import Literal, overload
 
 import httpx
 
 from .. import _legacy_response
 from ..types import image_edit_params, image_generate_params, image_create_variation_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from .._utils import (
-    extract_files,
-    maybe_transform,
-    deepcopy_minimal,
-    async_maybe_transform,
-)
+from .._types import Body, Omit, Query, Headers, NotGiven, FileTypes, SequenceNotStr, omit, not_given
+from .._utils import extract_files, required_args, maybe_transform, deepcopy_minimal, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .._base_client import (
-    make_request_options,
-)
+from .._streaming import Stream, AsyncStream
+from .._base_client import make_request_options
+from ..types.image_model import ImageModel
 from ..types.images_response import ImagesResponse
+from ..types.image_gen_stream_event import ImageGenStreamEvent
+from ..types.image_edit_stream_event import ImageEditStreamEvent
 
 __all__ = ["Images", "AsyncImages"]
 
@@ -30,30 +27,42 @@
 class Images(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> ImagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ImagesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ImagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ImagesWithStreamingResponse(self)
 
     def create_variation(
         self,
         *,
         image: FileTypes,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ImagesResponse:
-        """
-        Creates a variation of a given image.
+        """Creates a variation of a given image.
+
+        This endpoint only supports `dall-e-2`.
 
         Args:
           image: The image to use as the basis for the variation(s). Must be a valid PNG file,
@@ -62,8 +71,7 @@ def create_variation(
           model: The model to use for image generation. Only `dall-e-2` is supported at this
               time.
 
-          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
-              `n=1` is supported.
+          n: The number of images to generate. Must be between 1 and 10.
 
           response_format: The format in which the generated images are returned. Must be one of `url` or
               `b64_json`. URLs are only valid for 60 minutes after the image has been
@@ -74,7 +82,7 @@ def create_variation(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -95,11 +103,10 @@ def create_variation(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["image"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return self._post(
             "/images/variations",
             body=maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
@@ -110,53 +117,334 @@ def create_variation(
             cast_to=ImagesResponse,
         )
 
+    @overload
     def edit(
         self,
         *,
-        image: FileTypes,
+        image: Union[FileTypes, SequenceNotStr[FileTypes]],
         prompt: str,
-        mask: FileTypes | NotGiven = NOT_GIVEN,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        input_fidelity: Optional[Literal["high", "low"]] | Omit = omit,
+        mask: FileTypes | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[Literal["256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "auto"]] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ImagesResponse:
+        """Creates an edited or extended image given one or more source images and a
+        prompt.
+
+        This endpoint only supports `gpt-image-1` and `dall-e-2`.
+
+        Args:
+          image: The image(s) to edit. Must be a supported image file or an array of images.
+
+              For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
+              50MB. You can provide up to 16 images.
+
+              For `dall-e-2`, you can only provide one image, and it should be a square `png`
+              file less than 4MB.
+
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters for `dall-e-2`, and 32000 characters for `gpt-image-1`.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
+
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
+
+          mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
+              indicate where `image` should be edited. If there are multiple images provided,
+              the mask will be applied on the first image. Must be a valid PNG file, less than
+              4MB, and have the same dimensions as `image`.
+
+          model: The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are
+              supported. Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1`
+              is used.
+
+          n: The number of images to generate. Must be between 1 and 10.
+
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. The
+              default value is `png`.
+
+          partial_images: The number of partial images to generate. This parameter is used for streaming
+              responses that return partial images. Value must be between 0 and 3. When set to
+              0, the response will be a single image sent in one streaming event.
+
+              Note that the final image may be sent before the full number of partial images
+              are generated if the full image is generated more quickly.
+
+          quality: The quality of the image that will be generated. `high`, `medium` and `low` are
+              only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality.
+              Defaults to `auto`.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated. This parameter is only supported for `dall-e-2`, as `gpt-image-1`
+              will always return base64-encoded images.
+
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, and one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
+
+          stream: Edit the image in streaming mode. Defaults to `false`. See the
+              [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+              for more information.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def edit(
+        self,
+        *,
+        image: Union[FileTypes, SequenceNotStr[FileTypes]],
+        prompt: str,
+        stream: Literal[True],
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        input_fidelity: Optional[Literal["high", "low"]] | Omit = omit,
+        mask: FileTypes | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[Literal["256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "auto"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Stream[ImageEditStreamEvent]:
+        """Creates an edited or extended image given one or more source images and a
+        prompt.
+
+        This endpoint only supports `gpt-image-1` and `dall-e-2`.
+
+        Args:
+          image: The image(s) to edit. Must be a supported image file or an array of images.
+
+              For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
+              50MB. You can provide up to 16 images.
+
+              For `dall-e-2`, you can only provide one image, and it should be a square `png`
+              file less than 4MB.
+
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters for `dall-e-2`, and 32000 characters for `gpt-image-1`.
+
+          stream: Edit the image in streaming mode. Defaults to `false`. See the
+              [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+              for more information.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
+
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
+
+          mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
+              indicate where `image` should be edited. If there are multiple images provided,
+              the mask will be applied on the first image. Must be a valid PNG file, less than
+              4MB, and have the same dimensions as `image`.
+
+          model: The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are
+              supported. Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1`
+              is used.
+
+          n: The number of images to generate. Must be between 1 and 10.
+
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. The
+              default value is `png`.
+
+          partial_images: The number of partial images to generate. This parameter is used for streaming
+              responses that return partial images. Value must be between 0 and 3. When set to
+              0, the response will be a single image sent in one streaming event.
+
+              Note that the final image may be sent before the full number of partial images
+              are generated if the full image is generated more quickly.
+
+          quality: The quality of the image that will be generated. `high`, `medium` and `low` are
+              only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality.
+              Defaults to `auto`.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated. This parameter is only supported for `dall-e-2`, as `gpt-image-1`
+              will always return base64-encoded images.
+
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, and one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
         """
-        Creates an edited or extended image given an original image and a prompt.
+        ...
+
+    @overload
+    def edit(
+        self,
+        *,
+        image: Union[FileTypes, SequenceNotStr[FileTypes]],
+        prompt: str,
+        stream: bool,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        input_fidelity: Optional[Literal["high", "low"]] | Omit = omit,
+        mask: FileTypes | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[Literal["256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "auto"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ImagesResponse | Stream[ImageEditStreamEvent]:
+        """Creates an edited or extended image given one or more source images and a
+        prompt.
+
+        This endpoint only supports `gpt-image-1` and `dall-e-2`.
 
         Args:
-          image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
-              is not provided, image must have transparency, which will be used as the mask.
+          image: The image(s) to edit. Must be a supported image file or an array of images.
+
+              For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
+              50MB. You can provide up to 16 images.
+
+              For `dall-e-2`, you can only provide one image, and it should be a square `png`
+              file less than 4MB.
 
           prompt: A text description of the desired image(s). The maximum length is 1000
-              characters.
+              characters for `dall-e-2`, and 32000 characters for `gpt-image-1`.
+
+          stream: Edit the image in streaming mode. Defaults to `false`. See the
+              [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+              for more information.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
+
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
-              indicate where `image` should be edited. Must be a valid PNG file, less than
+              indicate where `image` should be edited. If there are multiple images provided,
+              the mask will be applied on the first image. Must be a valid PNG file, less than
               4MB, and have the same dimensions as `image`.
 
-          model: The model to use for image generation. Only `dall-e-2` is supported at this
-              time.
+          model: The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are
+              supported. Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1`
+              is used.
 
           n: The number of images to generate. Must be between 1 and 10.
 
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. The
+              default value is `png`.
+
+          partial_images: The number of partial images to generate. This parameter is used for streaming
+              responses that return partial images. Value must be between 0 and 3. When set to
+              0, the response will be a single image sent in one streaming event.
+
+              Note that the final image may be sent before the full number of partial images
+              are generated if the full image is generated more quickly.
+
+          quality: The quality of the image that will be generated. `high`, `medium` and `low` are
+              only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality.
+              Defaults to `auto`.
+
           response_format: The format in which the generated images are returned. Must be one of `url` or
               `b64_json`. URLs are only valid for 60 minutes after the image has been
-              generated.
+              generated. This parameter is only supported for `dall-e-2`, as `gpt-image-1`
+              will always return base64-encoded images.
 
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024`.
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, and one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -166,84 +454,392 @@ def edit(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        ...
+
+    @required_args(["image", "prompt"], ["image", "prompt", "stream"])
+    def edit(
+        self,
+        *,
+        image: Union[FileTypes, SequenceNotStr[FileTypes]],
+        prompt: str,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        input_fidelity: Optional[Literal["high", "low"]] | Omit = omit,
+        mask: FileTypes | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[Literal["256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "auto"]] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ImagesResponse | Stream[ImageEditStreamEvent]:
         body = deepcopy_minimal(
             {
                 "image": image,
                 "prompt": prompt,
+                "background": background,
+                "input_fidelity": input_fidelity,
                 "mask": mask,
                 "model": model,
                 "n": n,
+                "output_compression": output_compression,
+                "output_format": output_format,
+                "partial_images": partial_images,
+                "quality": quality,
                 "response_format": response_format,
                 "size": size,
+                "stream": stream,
                 "user": user,
             }
         )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["image", "<array>"], ["mask"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return self._post(
             "/images/edits",
-            body=maybe_transform(body, image_edit_params.ImageEditParams),
+            body=maybe_transform(
+                body,
+                image_edit_params.ImageEditParamsStreaming if stream else image_edit_params.ImageEditParamsNonStreaming,
+            ),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ImagesResponse,
+            stream=stream or False,
+            stream_cls=Stream[ImageEditStreamEvent],
         )
 
+    @overload
     def generate(
         self,
         *,
         prompt: str,
-        model: Union[str, Literal["dall-e-2", "dall-e-3"], None] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
-        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN,
-        style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        moderation: Optional[Literal["low", "auto"]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[
+            Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+        ]
+        | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        style: Optional[Literal["vivid", "natural"]] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ImagesResponse:
         """
         Creates an image given a prompt.
+        [Learn more](https://platform.openai.com/docs/guides/images).
 
         Args:
-          prompt: A text description of the desired image(s). The maximum length is 1000
-              characters for `dall-e-2` and 4000 characters for `dall-e-3`.
+          prompt: A text description of the desired image(s). The maximum length is 32000
+              characters for `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters
+              for `dall-e-3`.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
 
-          model: The model to use for image generation.
+          model: The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or
+              `gpt-image-1`. Defaults to `dall-e-2` unless a parameter specific to
+              `gpt-image-1` is used.
+
+          moderation: Control the content-moderation level for images generated by `gpt-image-1`. Must
+              be either `low` for less restrictive filtering or `auto` (default value).
 
           n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
               `n=1` is supported.
 
-          quality: The quality of the image that will be generated. `hd` creates images with finer
-              details and greater consistency across the image. This param is only supported
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.
+
+          partial_images: The number of partial images to generate. This parameter is used for streaming
+              responses that return partial images. Value must be between 0 and 3. When set to
+              0, the response will be a single image sent in one streaming event.
+
+              Note that the final image may be sent before the full number of partial images
+              are generated if the full image is generated more quickly.
+
+          quality: The quality of the image that will be generated.
+
+              - `auto` (default value) will automatically select the best quality for the
+                given model.
+              - `high`, `medium` and `low` are supported for `gpt-image-1`.
+              - `hd` and `standard` are supported for `dall-e-3`.
+              - `standard` is the only option for `dall-e-2`.
+
+          response_format: The format in which generated images with `dall-e-2` and `dall-e-3` are
+              returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes
+              after the image has been generated. This parameter isn't supported for
+              `gpt-image-1` which will always return base64-encoded images.
+
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and
+              one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
+
+          stream: Generate the image in streaming mode. Defaults to `false`. See the
+              [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+              for more information. This parameter is only supported for `gpt-image-1`.
+
+          style: The style of the generated images. This parameter is only supported for
+              `dall-e-3`. Must be one of `vivid` or `natural`. Vivid causes the model to lean
+              towards generating hyper-real and dramatic images. Natural causes the model to
+              produce more natural, less hyper-real looking images.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def generate(
+        self,
+        *,
+        prompt: str,
+        stream: Literal[True],
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        moderation: Optional[Literal["low", "auto"]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[
+            Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+        ]
+        | Omit = omit,
+        style: Optional[Literal["vivid", "natural"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Stream[ImageGenStreamEvent]:
+        """
+        Creates an image given a prompt.
+        [Learn more](https://platform.openai.com/docs/guides/images).
+
+        Args:
+          prompt: A text description of the desired image(s). The maximum length is 32000
+              characters for `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters
               for `dall-e-3`.
 
-          response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`. URLs are only valid for 60 minutes after the image has been
-              generated.
+          stream: Generate the image in streaming mode. Defaults to `false`. See the
+              [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+              for more information. This parameter is only supported for `gpt-image-1`.
 
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
-              `1024x1792` for `dall-e-3` models.
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
+
+          model: The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or
+              `gpt-image-1`. Defaults to `dall-e-2` unless a parameter specific to
+              `gpt-image-1` is used.
+
+          moderation: Control the content-moderation level for images generated by `gpt-image-1`. Must
+              be either `low` for less restrictive filtering or `auto` (default value).
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
 
-          style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid
-              causes the model to lean towards generating hyper-real and dramatic images.
-              Natural causes the model to produce more natural, less hyper-real looking
-              images. This param is only supported for `dall-e-3`.
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.
+
+          partial_images: The number of partial images to generate. This parameter is used for streaming
+              responses that return partial images. Value must be between 0 and 3. When set to
+              0, the response will be a single image sent in one streaming event.
+
+              Note that the final image may be sent before the full number of partial images
+              are generated if the full image is generated more quickly.
+
+          quality: The quality of the image that will be generated.
+
+              - `auto` (default value) will automatically select the best quality for the
+                given model.
+              - `high`, `medium` and `low` are supported for `gpt-image-1`.
+              - `hd` and `standard` are supported for `dall-e-3`.
+              - `standard` is the only option for `dall-e-2`.
+
+          response_format: The format in which generated images with `dall-e-2` and `dall-e-3` are
+              returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes
+              after the image has been generated. This parameter isn't supported for
+              `gpt-image-1` which will always return base64-encoded images.
+
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and
+              one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
+
+          style: The style of the generated images. This parameter is only supported for
+              `dall-e-3`. Must be one of `vivid` or `natural`. Vivid causes the model to lean
+              towards generating hyper-real and dramatic images. Natural causes the model to
+              produce more natural, less hyper-real looking images.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def generate(
+        self,
+        *,
+        prompt: str,
+        stream: bool,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        moderation: Optional[Literal["low", "auto"]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[
+            Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+        ]
+        | Omit = omit,
+        style: Optional[Literal["vivid", "natural"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ImagesResponse | Stream[ImageGenStreamEvent]:
+        """
+        Creates an image given a prompt.
+        [Learn more](https://platform.openai.com/docs/guides/images).
+
+        Args:
+          prompt: A text description of the desired image(s). The maximum length is 32000
+              characters for `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters
+              for `dall-e-3`.
+
+          stream: Generate the image in streaming mode. Defaults to `false`. See the
+              [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+              for more information. This parameter is only supported for `gpt-image-1`.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
+
+          model: The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or
+              `gpt-image-1`. Defaults to `dall-e-2` unless a parameter specific to
+              `gpt-image-1` is used.
+
+          moderation: Control the content-moderation level for images generated by `gpt-image-1`. Must
+              be either `low` for less restrictive filtering or `auto` (default value).
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
+
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.
+
+          partial_images: The number of partial images to generate. This parameter is used for streaming
+              responses that return partial images. Value must be between 0 and 3. When set to
+              0, the response will be a single image sent in one streaming event.
+
+              Note that the final image may be sent before the full number of partial images
+              are generated if the full image is generated more quickly.
+
+          quality: The quality of the image that will be generated.
+
+              - `auto` (default value) will automatically select the best quality for the
+                given model.
+              - `high`, `medium` and `low` are supported for `gpt-image-1`.
+              - `hd` and `standard` are supported for `dall-e-3`.
+              - `standard` is the only option for `dall-e-2`.
+
+          response_format: The format in which generated images with `dall-e-2` and `dall-e-3` are
+              returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes
+              after the image has been generated. This parameter isn't supported for
+              `gpt-image-1` which will always return base64-encoded images.
+
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and
+              one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
+
+          style: The style of the generated images. This parameter is only supported for
+              `dall-e-3`. Must be one of `vivid` or `natural`. Vivid causes the model to lean
+              towards generating hyper-real and dramatic images. Natural causes the model to
+              produce more natural, less hyper-real looking images.
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -253,55 +849,107 @@ def generate(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        ...
+
+    @required_args(["prompt"], ["prompt", "stream"])
+    def generate(
+        self,
+        *,
+        prompt: str,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        moderation: Optional[Literal["low", "auto"]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[
+            Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+        ]
+        | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        style: Optional[Literal["vivid", "natural"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ImagesResponse | Stream[ImageGenStreamEvent]:
         return self._post(
             "/images/generations",
             body=maybe_transform(
                 {
                     "prompt": prompt,
+                    "background": background,
                     "model": model,
+                    "moderation": moderation,
                     "n": n,
+                    "output_compression": output_compression,
+                    "output_format": output_format,
+                    "partial_images": partial_images,
                     "quality": quality,
                     "response_format": response_format,
                     "size": size,
+                    "stream": stream,
                     "style": style,
                     "user": user,
                 },
-                image_generate_params.ImageGenerateParams,
+                image_generate_params.ImageGenerateParamsStreaming
+                if stream
+                else image_generate_params.ImageGenerateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ImagesResponse,
+            stream=stream or False,
+            stream_cls=Stream[ImageGenStreamEvent],
         )
 
 
 class AsyncImages(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncImagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncImagesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncImagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncImagesWithStreamingResponse(self)
 
     async def create_variation(
         self,
         *,
         image: FileTypes,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ImagesResponse:
-        """
-        Creates a variation of a given image.
+        """Creates a variation of a given image.
+
+        This endpoint only supports `dall-e-2`.
 
         Args:
           image: The image to use as the basis for the variation(s). Must be a valid PNG file,
@@ -310,8 +958,7 @@ async def create_variation(
           model: The model to use for image generation. Only `dall-e-2` is supported at this
               time.
 
-          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
-              `n=1` is supported.
+          n: The number of images to generate. Must be between 1 and 10.
 
           response_format: The format in which the generated images are returned. Must be one of `url` or
               `b64_json`. URLs are only valid for 60 minutes after the image has been
@@ -322,7 +969,7 @@ async def create_variation(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -343,11 +990,10 @@ async def create_variation(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["image"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/images/variations",
             body=await async_maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
@@ -358,53 +1004,334 @@ async def create_variation(
             cast_to=ImagesResponse,
         )
 
+    @overload
     async def edit(
         self,
         *,
-        image: FileTypes,
+        image: Union[FileTypes, SequenceNotStr[FileTypes]],
         prompt: str,
-        mask: FileTypes | NotGiven = NOT_GIVEN,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        input_fidelity: Optional[Literal["high", "low"]] | Omit = omit,
+        mask: FileTypes | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[Literal["256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "auto"]] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ImagesResponse:
+        """Creates an edited or extended image given one or more source images and a
+        prompt.
+
+        This endpoint only supports `gpt-image-1` and `dall-e-2`.
+
+        Args:
+          image: The image(s) to edit. Must be a supported image file or an array of images.
+
+              For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
+              50MB. You can provide up to 16 images.
+
+              For `dall-e-2`, you can only provide one image, and it should be a square `png`
+              file less than 4MB.
+
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters for `dall-e-2`, and 32000 characters for `gpt-image-1`.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
+
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
+
+          mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
+              indicate where `image` should be edited. If there are multiple images provided,
+              the mask will be applied on the first image. Must be a valid PNG file, less than
+              4MB, and have the same dimensions as `image`.
+
+          model: The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are
+              supported. Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1`
+              is used.
+
+          n: The number of images to generate. Must be between 1 and 10.
+
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. The
+              default value is `png`.
+
+          partial_images: The number of partial images to generate. This parameter is used for streaming
+              responses that return partial images. Value must be between 0 and 3. When set to
+              0, the response will be a single image sent in one streaming event.
+
+              Note that the final image may be sent before the full number of partial images
+              are generated if the full image is generated more quickly.
+
+          quality: The quality of the image that will be generated. `high`, `medium` and `low` are
+              only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality.
+              Defaults to `auto`.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated. This parameter is only supported for `dall-e-2`, as `gpt-image-1`
+              will always return base64-encoded images.
+
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, and one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
+
+          stream: Edit the image in streaming mode. Defaults to `false`. See the
+              [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+              for more information.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def edit(
+        self,
+        *,
+        image: Union[FileTypes, SequenceNotStr[FileTypes]],
+        prompt: str,
+        stream: Literal[True],
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        input_fidelity: Optional[Literal["high", "low"]] | Omit = omit,
+        mask: FileTypes | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[Literal["256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "auto"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncStream[ImageEditStreamEvent]:
+        """Creates an edited or extended image given one or more source images and a
+        prompt.
+
+        This endpoint only supports `gpt-image-1` and `dall-e-2`.
+
+        Args:
+          image: The image(s) to edit. Must be a supported image file or an array of images.
+
+              For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
+              50MB. You can provide up to 16 images.
+
+              For `dall-e-2`, you can only provide one image, and it should be a square `png`
+              file less than 4MB.
+
+          prompt: A text description of the desired image(s). The maximum length is 1000
+              characters for `dall-e-2`, and 32000 characters for `gpt-image-1`.
+
+          stream: Edit the image in streaming mode. Defaults to `false`. See the
+              [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+              for more information.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
+
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
+
+          mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
+              indicate where `image` should be edited. If there are multiple images provided,
+              the mask will be applied on the first image. Must be a valid PNG file, less than
+              4MB, and have the same dimensions as `image`.
+
+          model: The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are
+              supported. Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1`
+              is used.
+
+          n: The number of images to generate. Must be between 1 and 10.
+
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. The
+              default value is `png`.
+
+          partial_images: The number of partial images to generate. This parameter is used for streaming
+              responses that return partial images. Value must be between 0 and 3. When set to
+              0, the response will be a single image sent in one streaming event.
+
+              Note that the final image may be sent before the full number of partial images
+              are generated if the full image is generated more quickly.
+
+          quality: The quality of the image that will be generated. `high`, `medium` and `low` are
+              only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality.
+              Defaults to `auto`.
+
+          response_format: The format in which the generated images are returned. Must be one of `url` or
+              `b64_json`. URLs are only valid for 60 minutes after the image has been
+              generated. This parameter is only supported for `dall-e-2`, as `gpt-image-1`
+              will always return base64-encoded images.
+
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, and one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
         """
-        Creates an edited or extended image given an original image and a prompt.
+        ...
+
+    @overload
+    async def edit(
+        self,
+        *,
+        image: Union[FileTypes, SequenceNotStr[FileTypes]],
+        prompt: str,
+        stream: bool,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        input_fidelity: Optional[Literal["high", "low"]] | Omit = omit,
+        mask: FileTypes | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[Literal["256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "auto"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ImagesResponse | AsyncStream[ImageEditStreamEvent]:
+        """Creates an edited or extended image given one or more source images and a
+        prompt.
+
+        This endpoint only supports `gpt-image-1` and `dall-e-2`.
 
         Args:
-          image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
-              is not provided, image must have transparency, which will be used as the mask.
+          image: The image(s) to edit. Must be a supported image file or an array of images.
+
+              For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
+              50MB. You can provide up to 16 images.
+
+              For `dall-e-2`, you can only provide one image, and it should be a square `png`
+              file less than 4MB.
 
           prompt: A text description of the desired image(s). The maximum length is 1000
-              characters.
+              characters for `dall-e-2`, and 32000 characters for `gpt-image-1`.
+
+          stream: Edit the image in streaming mode. Defaults to `false`. See the
+              [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+              for more information.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
+
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
-              indicate where `image` should be edited. Must be a valid PNG file, less than
+              indicate where `image` should be edited. If there are multiple images provided,
+              the mask will be applied on the first image. Must be a valid PNG file, less than
               4MB, and have the same dimensions as `image`.
 
-          model: The model to use for image generation. Only `dall-e-2` is supported at this
-              time.
+          model: The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are
+              supported. Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1`
+              is used.
 
           n: The number of images to generate. Must be between 1 and 10.
 
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. The
+              default value is `png`.
+
+          partial_images: The number of partial images to generate. This parameter is used for streaming
+              responses that return partial images. Value must be between 0 and 3. When set to
+              0, the response will be a single image sent in one streaming event.
+
+              Note that the final image may be sent before the full number of partial images
+              are generated if the full image is generated more quickly.
+
+          quality: The quality of the image that will be generated. `high`, `medium` and `low` are
+              only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality.
+              Defaults to `auto`.
+
           response_format: The format in which the generated images are returned. Must be one of `url` or
               `b64_json`. URLs are only valid for 60 minutes after the image has been
-              generated.
+              generated. This parameter is only supported for `dall-e-2`, as `gpt-image-1`
+              will always return base64-encoded images.
 
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024`.
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, and one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -414,84 +1341,392 @@ async def edit(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        ...
+
+    @required_args(["image", "prompt"], ["image", "prompt", "stream"])
+    async def edit(
+        self,
+        *,
+        image: Union[FileTypes, SequenceNotStr[FileTypes]],
+        prompt: str,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        input_fidelity: Optional[Literal["high", "low"]] | Omit = omit,
+        mask: FileTypes | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[Literal["256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "auto"]] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ImagesResponse | AsyncStream[ImageEditStreamEvent]:
         body = deepcopy_minimal(
             {
                 "image": image,
                 "prompt": prompt,
+                "background": background,
+                "input_fidelity": input_fidelity,
                 "mask": mask,
                 "model": model,
                 "n": n,
+                "output_compression": output_compression,
+                "output_format": output_format,
+                "partial_images": partial_images,
+                "quality": quality,
                 "response_format": response_format,
                 "size": size,
+                "stream": stream,
                 "user": user,
             }
         )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["image", "<array>"], ["mask"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/images/edits",
-            body=await async_maybe_transform(body, image_edit_params.ImageEditParams),
+            body=await async_maybe_transform(
+                body,
+                image_edit_params.ImageEditParamsStreaming if stream else image_edit_params.ImageEditParamsNonStreaming,
+            ),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ImagesResponse,
+            stream=stream or False,
+            stream_cls=AsyncStream[ImageEditStreamEvent],
         )
 
+    @overload
     async def generate(
         self,
         *,
         prompt: str,
-        model: Union[str, Literal["dall-e-2", "dall-e-3"], None] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
-        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN,
-        style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        moderation: Optional[Literal["low", "auto"]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[
+            Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+        ]
+        | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        style: Optional[Literal["vivid", "natural"]] | Omit = omit,
+        user: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ImagesResponse:
         """
         Creates an image given a prompt.
+        [Learn more](https://platform.openai.com/docs/guides/images).
 
         Args:
-          prompt: A text description of the desired image(s). The maximum length is 1000
-              characters for `dall-e-2` and 4000 characters for `dall-e-3`.
+          prompt: A text description of the desired image(s). The maximum length is 32000
+              characters for `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters
+              for `dall-e-3`.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
 
-          model: The model to use for image generation.
+          model: The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or
+              `gpt-image-1`. Defaults to `dall-e-2` unless a parameter specific to
+              `gpt-image-1` is used.
+
+          moderation: Control the content-moderation level for images generated by `gpt-image-1`. Must
+              be either `low` for less restrictive filtering or `auto` (default value).
 
           n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
               `n=1` is supported.
 
-          quality: The quality of the image that will be generated. `hd` creates images with finer
-              details and greater consistency across the image. This param is only supported
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.
+
+          partial_images: The number of partial images to generate. This parameter is used for streaming
+              responses that return partial images. Value must be between 0 and 3. When set to
+              0, the response will be a single image sent in one streaming event.
+
+              Note that the final image may be sent before the full number of partial images
+              are generated if the full image is generated more quickly.
+
+          quality: The quality of the image that will be generated.
+
+              - `auto` (default value) will automatically select the best quality for the
+                given model.
+              - `high`, `medium` and `low` are supported for `gpt-image-1`.
+              - `hd` and `standard` are supported for `dall-e-3`.
+              - `standard` is the only option for `dall-e-2`.
+
+          response_format: The format in which generated images with `dall-e-2` and `dall-e-3` are
+              returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes
+              after the image has been generated. This parameter isn't supported for
+              `gpt-image-1` which will always return base64-encoded images.
+
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and
+              one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
+
+          stream: Generate the image in streaming mode. Defaults to `false`. See the
+              [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+              for more information. This parameter is only supported for `gpt-image-1`.
+
+          style: The style of the generated images. This parameter is only supported for
+              `dall-e-3`. Must be one of `vivid` or `natural`. Vivid causes the model to lean
+              towards generating hyper-real and dramatic images. Natural causes the model to
+              produce more natural, less hyper-real looking images.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def generate(
+        self,
+        *,
+        prompt: str,
+        stream: Literal[True],
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        moderation: Optional[Literal["low", "auto"]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[
+            Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+        ]
+        | Omit = omit,
+        style: Optional[Literal["vivid", "natural"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncStream[ImageGenStreamEvent]:
+        """
+        Creates an image given a prompt.
+        [Learn more](https://platform.openai.com/docs/guides/images).
+
+        Args:
+          prompt: A text description of the desired image(s). The maximum length is 32000
+              characters for `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters
               for `dall-e-3`.
 
-          response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`. URLs are only valid for 60 minutes after the image has been
-              generated.
+          stream: Generate the image in streaming mode. Defaults to `false`. See the
+              [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+              for more information. This parameter is only supported for `gpt-image-1`.
 
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
-              `1024x1792` for `dall-e-3` models.
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
+
+          model: The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or
+              `gpt-image-1`. Defaults to `dall-e-2` unless a parameter specific to
+              `gpt-image-1` is used.
+
+          moderation: Control the content-moderation level for images generated by `gpt-image-1`. Must
+              be either `low` for less restrictive filtering or `auto` (default value).
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
 
-          style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid
-              causes the model to lean towards generating hyper-real and dramatic images.
-              Natural causes the model to produce more natural, less hyper-real looking
-              images. This param is only supported for `dall-e-3`.
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.
+
+          partial_images: The number of partial images to generate. This parameter is used for streaming
+              responses that return partial images. Value must be between 0 and 3. When set to
+              0, the response will be a single image sent in one streaming event.
+
+              Note that the final image may be sent before the full number of partial images
+              are generated if the full image is generated more quickly.
+
+          quality: The quality of the image that will be generated.
+
+              - `auto` (default value) will automatically select the best quality for the
+                given model.
+              - `high`, `medium` and `low` are supported for `gpt-image-1`.
+              - `hd` and `standard` are supported for `dall-e-3`.
+              - `standard` is the only option for `dall-e-2`.
+
+          response_format: The format in which generated images with `dall-e-2` and `dall-e-3` are
+              returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes
+              after the image has been generated. This parameter isn't supported for
+              `gpt-image-1` which will always return base64-encoded images.
+
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and
+              one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
+
+          style: The style of the generated images. This parameter is only supported for
+              `dall-e-3`. Must be one of `vivid` or `natural`. Vivid causes the model to lean
+              towards generating hyper-real and dramatic images. Natural causes the model to
+              produce more natural, less hyper-real looking images.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def generate(
+        self,
+        *,
+        prompt: str,
+        stream: bool,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        moderation: Optional[Literal["low", "auto"]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[
+            Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+        ]
+        | Omit = omit,
+        style: Optional[Literal["vivid", "natural"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ImagesResponse | AsyncStream[ImageGenStreamEvent]:
+        """
+        Creates an image given a prompt.
+        [Learn more](https://platform.openai.com/docs/guides/images).
+
+        Args:
+          prompt: A text description of the desired image(s). The maximum length is 32000
+              characters for `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters
+              for `dall-e-3`.
+
+          stream: Generate the image in streaming mode. Defaults to `false`. See the
+              [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+              for more information. This parameter is only supported for `gpt-image-1`.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
+
+          model: The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or
+              `gpt-image-1`. Defaults to `dall-e-2` unless a parameter specific to
+              `gpt-image-1` is used.
+
+          moderation: Control the content-moderation level for images generated by `gpt-image-1`. Must
+              be either `low` for less restrictive filtering or `auto` (default value).
+
+          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
+              `n=1` is supported.
+
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.
+
+          partial_images: The number of partial images to generate. This parameter is used for streaming
+              responses that return partial images. Value must be between 0 and 3. When set to
+              0, the response will be a single image sent in one streaming event.
+
+              Note that the final image may be sent before the full number of partial images
+              are generated if the full image is generated more quickly.
+
+          quality: The quality of the image that will be generated.
+
+              - `auto` (default value) will automatically select the best quality for the
+                given model.
+              - `high`, `medium` and `low` are supported for `gpt-image-1`.
+              - `hd` and `standard` are supported for `dall-e-3`.
+              - `standard` is the only option for `dall-e-2`.
+
+          response_format: The format in which generated images with `dall-e-2` and `dall-e-3` are
+              returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes
+              after the image has been generated. This parameter isn't supported for
+              `gpt-image-1` which will always return base64-encoded images.
+
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and
+              one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
+
+          style: The style of the generated images. This parameter is only supported for
+              `dall-e-3`. Must be one of `vivid` or `natural`. Vivid causes the model to lean
+              towards generating hyper-real and dramatic images. Natural causes the model to
+              produce more natural, less hyper-real looking images.
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -501,25 +1736,65 @@ async def generate(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        ...
+
+    @required_args(["prompt"], ["prompt", "stream"])
+    async def generate(
+        self,
+        *,
+        prompt: str,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | Omit = omit,
+        model: Union[str, ImageModel, None] | Omit = omit,
+        moderation: Optional[Literal["low", "auto"]] | Omit = omit,
+        n: Optional[int] | Omit = omit,
+        output_compression: Optional[int] | Omit = omit,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | Omit = omit,
+        partial_images: Optional[int] | Omit = omit,
+        quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]] | Omit = omit,
+        response_format: Optional[Literal["url", "b64_json"]] | Omit = omit,
+        size: Optional[
+            Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+        ]
+        | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        style: Optional[Literal["vivid", "natural"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ImagesResponse | AsyncStream[ImageGenStreamEvent]:
         return await self._post(
             "/images/generations",
             body=await async_maybe_transform(
                 {
                     "prompt": prompt,
+                    "background": background,
                     "model": model,
+                    "moderation": moderation,
                     "n": n,
+                    "output_compression": output_compression,
+                    "output_format": output_format,
+                    "partial_images": partial_images,
                     "quality": quality,
                     "response_format": response_format,
                     "size": size,
+                    "stream": stream,
                     "style": style,
                     "user": user,
                 },
-                image_generate_params.ImageGenerateParams,
+                image_generate_params.ImageGenerateParamsStreaming
+                if stream
+                else image_generate_params.ImageGenerateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ImagesResponse,
+            stream=stream or False,
+            stream_cls=AsyncStream[ImageGenStreamEvent],
         )
 
 
diff --git a/src/openai/resources/models.py b/src/openai/resources/models.py
index e76c496ffa..3f685e2eab 100644
--- a/src/openai/resources/models.py
+++ b/src/openai/resources/models.py
@@ -5,16 +5,13 @@
 import httpx
 
 from .. import _legacy_response
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._types import Body, Query, Headers, NotGiven, not_given
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ..pagination import SyncPage, AsyncPage
 from ..types.model import Model
-from .._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from .._base_client import AsyncPaginator, make_request_options
 from ..types.model_deleted import ModelDeleted
 
 __all__ = ["Models", "AsyncModels"]
@@ -23,10 +20,21 @@
 class Models(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ModelsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ModelsWithStreamingResponse(self)
 
     def retrieve(
@@ -38,7 +46,7 @@ def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Model:
         """
         Retrieves a model instance, providing basic information about the model such as
@@ -71,7 +79,7 @@ def list(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> SyncPage[Model]:
         """
         Lists the currently available models, and provides basic information about each
@@ -95,7 +103,7 @@ def delete(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ModelDeleted:
         """Delete a fine-tuned model.
 
@@ -125,10 +133,21 @@ def delete(
 class AsyncModels(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncModelsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncModelsWithStreamingResponse(self)
 
     async def retrieve(
@@ -140,7 +159,7 @@ async def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> Model:
         """
         Retrieves a model instance, providing basic information about the model such as
@@ -173,7 +192,7 @@ def list(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncPaginator[Model, AsyncPage[Model]]:
         """
         Lists the currently available models, and provides basic information about each
@@ -197,7 +216,7 @@ async def delete(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ModelDeleted:
         """Delete a fine-tuned model.
 
diff --git a/src/openai/resources/moderations.py b/src/openai/resources/moderations.py
index 9386e50dae..5f378f71e7 100644
--- a/src/openai/resources/moderations.py
+++ b/src/openai/resources/moderations.py
@@ -2,25 +2,21 @@
 
 from __future__ import annotations
 
-from typing import List, Union
-from typing_extensions import Literal
+from typing import Union, Iterable
 
 import httpx
 
 from .. import _legacy_response
 from ..types import moderation_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .._base_client import (
-    make_request_options,
-)
+from .._base_client import make_request_options
+from ..types.moderation_model import ModerationModel
 from ..types.moderation_create_response import ModerationCreateResponse
+from ..types.moderation_multi_modal_input_param import ModerationMultiModalInputParam
 
 __all__ = ["Moderations", "AsyncModerations"]
 
@@ -28,38 +24,48 @@
 class Moderations(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> ModerationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ModerationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ModerationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ModerationsWithStreamingResponse(self)
 
     def create(
         self,
         *,
-        input: Union[str, List[str]],
-        model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]] | NotGiven = NOT_GIVEN,
+        input: Union[str, SequenceNotStr[str], Iterable[ModerationMultiModalInputParam]],
+        model: Union[str, ModerationModel] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ModerationCreateResponse:
-        """
-        Classifies if text is potentially harmful.
+        """Classifies if text and/or image inputs are potentially harmful.
 
-        Args:
-          input: The input text to classify
+        Learn more in
+        the [moderation guide](https://platform.openai.com/docs/guides/moderation).
 
-          model: Two content moderations models are available: `text-moderation-stable` and
-              `text-moderation-latest`.
+        Args:
+          input: Input (or inputs) to classify. Can be a single string, an array of strings, or
+              an array of multi-modal input objects similar to other models.
 
-              The default is `text-moderation-latest` which will be automatically upgraded
-              over time. This ensures you are always using our most accurate model. If you use
-              `text-moderation-stable`, we will provide advanced notice before updating the
-              model. Accuracy of `text-moderation-stable` may be slightly lower than for
-              `text-moderation-latest`.
+          model: The content moderation model you would like to use. Learn more in
+              [the moderation guide](https://platform.openai.com/docs/guides/moderation), and
+              learn about available models
+              [here](https://platform.openai.com/docs/models#moderation).
 
           extra_headers: Send extra headers
 
@@ -88,38 +94,48 @@ def create(
 class AsyncModerations(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncModerationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncModerationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncModerationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncModerationsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
-        input: Union[str, List[str]],
-        model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]] | NotGiven = NOT_GIVEN,
+        input: Union[str, SequenceNotStr[str], Iterable[ModerationMultiModalInputParam]],
+        model: Union[str, ModerationModel] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ModerationCreateResponse:
-        """
-        Classifies if text is potentially harmful.
+        """Classifies if text and/or image inputs are potentially harmful.
 
-        Args:
-          input: The input text to classify
+        Learn more in
+        the [moderation guide](https://platform.openai.com/docs/guides/moderation).
 
-          model: Two content moderations models are available: `text-moderation-stable` and
-              `text-moderation-latest`.
+        Args:
+          input: Input (or inputs) to classify. Can be a single string, an array of strings, or
+              an array of multi-modal input objects similar to other models.
 
-              The default is `text-moderation-latest` which will be automatically upgraded
-              over time. This ensures you are always using our most accurate model. If you use
-              `text-moderation-stable`, we will provide advanced notice before updating the
-              model. Accuracy of `text-moderation-stable` may be slightly lower than for
-              `text-moderation-latest`.
+          model: The content moderation model you would like to use. Learn more in
+              [the moderation guide](https://platform.openai.com/docs/guides/moderation), and
+              learn about available models
+              [here](https://platform.openai.com/docs/models#moderation).
 
           extra_headers: Send extra headers
 
diff --git a/src/openai/resources/realtime/__init__.py b/src/openai/resources/realtime/__init__.py
new file mode 100644
index 0000000000..c11841017f
--- /dev/null
+++ b/src/openai/resources/realtime/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .calls import (
+    Calls,
+    AsyncCalls,
+    CallsWithRawResponse,
+    AsyncCallsWithRawResponse,
+    CallsWithStreamingResponse,
+    AsyncCallsWithStreamingResponse,
+)
+from .realtime import (
+    Realtime,
+    AsyncRealtime,
+    RealtimeWithRawResponse,
+    AsyncRealtimeWithRawResponse,
+    RealtimeWithStreamingResponse,
+    AsyncRealtimeWithStreamingResponse,
+)
+from .client_secrets import (
+    ClientSecrets,
+    AsyncClientSecrets,
+    ClientSecretsWithRawResponse,
+    AsyncClientSecretsWithRawResponse,
+    ClientSecretsWithStreamingResponse,
+    AsyncClientSecretsWithStreamingResponse,
+)
+
+__all__ = [
+    "ClientSecrets",
+    "AsyncClientSecrets",
+    "ClientSecretsWithRawResponse",
+    "AsyncClientSecretsWithRawResponse",
+    "ClientSecretsWithStreamingResponse",
+    "AsyncClientSecretsWithStreamingResponse",
+    "Calls",
+    "AsyncCalls",
+    "CallsWithRawResponse",
+    "AsyncCallsWithRawResponse",
+    "CallsWithStreamingResponse",
+    "AsyncCallsWithStreamingResponse",
+    "Realtime",
+    "AsyncRealtime",
+    "RealtimeWithRawResponse",
+    "AsyncRealtimeWithRawResponse",
+    "RealtimeWithStreamingResponse",
+    "AsyncRealtimeWithStreamingResponse",
+]
diff --git a/src/openai/resources/realtime/calls.py b/src/openai/resources/realtime/calls.py
new file mode 100644
index 0000000000..cdea492d95
--- /dev/null
+++ b/src/openai/resources/realtime/calls.py
@@ -0,0 +1,774 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from ..._base_client import make_request_options
+from ...types.realtime import (
+    call_refer_params,
+    call_accept_params,
+    call_create_params,
+    call_reject_params,
+)
+from ...types.responses.response_prompt_param import ResponsePromptParam
+from ...types.realtime.realtime_truncation_param import RealtimeTruncationParam
+from ...types.realtime.realtime_audio_config_param import RealtimeAudioConfigParam
+from ...types.realtime.realtime_tools_config_param import RealtimeToolsConfigParam
+from ...types.realtime.realtime_tracing_config_param import RealtimeTracingConfigParam
+from ...types.realtime.realtime_tool_choice_config_param import RealtimeToolChoiceConfigParam
+from ...types.realtime.realtime_session_create_request_param import RealtimeSessionCreateRequestParam
+
+__all__ = ["Calls", "AsyncCalls"]
+
+
+class Calls(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CallsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return CallsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CallsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return CallsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        sdp: str,
+        session: RealtimeSessionCreateRequestParam | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Create a new Realtime API call over WebRTC and receive the SDP answer needed to
+        complete the peer connection.
+
+        Args:
+          sdp: WebRTC Session Description Protocol (SDP) offer generated by the caller.
+
+          session: Realtime session object configuration.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"Accept": "application/sdp", **(extra_headers or {})}
+        return self._post(
+            "/realtime/calls",
+            body=maybe_transform(
+                {
+                    "sdp": sdp,
+                    "session": session,
+                },
+                call_create_params.CallCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+    def accept(
+        self,
+        call_id: str,
+        *,
+        type: Literal["realtime"],
+        audio: RealtimeAudioConfigParam | Omit = omit,
+        include: List[Literal["item.input_audio_transcription.logprobs"]] | Omit = omit,
+        instructions: str | Omit = omit,
+        max_output_tokens: Union[int, Literal["inf"]] | Omit = omit,
+        model: Union[
+            str,
+            Literal[
+                "gpt-realtime",
+                "gpt-realtime-2025-08-28",
+                "gpt-4o-realtime-preview",
+                "gpt-4o-realtime-preview-2024-10-01",
+                "gpt-4o-realtime-preview-2024-12-17",
+                "gpt-4o-realtime-preview-2025-06-03",
+                "gpt-4o-mini-realtime-preview",
+                "gpt-4o-mini-realtime-preview-2024-12-17",
+                "gpt-realtime-mini",
+                "gpt-realtime-mini-2025-10-06",
+                "gpt-audio-mini",
+                "gpt-audio-mini-2025-10-06",
+            ],
+        ]
+        | Omit = omit,
+        output_modalities: List[Literal["text", "audio"]] | Omit = omit,
+        prompt: Optional[ResponsePromptParam] | Omit = omit,
+        tool_choice: RealtimeToolChoiceConfigParam | Omit = omit,
+        tools: RealtimeToolsConfigParam | Omit = omit,
+        tracing: Optional[RealtimeTracingConfigParam] | Omit = omit,
+        truncation: RealtimeTruncationParam | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Accept an incoming SIP call and configure the realtime session that will handle
+        it.
+
+        Args:
+          type: The type of session to create. Always `realtime` for the Realtime API.
+
+          audio: Configuration for input and output audio.
+
+          include: Additional fields to include in server outputs.
+
+              `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+              transcription.
+
+          instructions: The default system instructions (i.e. system message) prepended to model calls.
+              This field allows the client to guide the model on desired responses. The model
+              can be instructed on response content and format, (e.g. "be extremely succinct",
+              "act friendly", "here are examples of good responses") and on audio behavior
+              (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+              instructions are not guaranteed to be followed by the model, but they provide
+              guidance to the model on the desired behavior.
+
+              Note that the server sets default instructions which will be used if this field
+              is not set and are visible in the `session.created` event at the start of the
+              session.
+
+          max_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+              tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+              `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+          model: The Realtime model used for this session.
+
+          output_modalities: The set of modalities the model can respond with. It defaults to `["audio"]`,
+              indicating that the model will respond with audio plus a transcript. `["text"]`
+              can be used to make the model respond with text only. It is not possible to
+              request both `text` and `audio` at the same time.
+
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
+          tool_choice: How the model chooses tools. Provide one of the string modes or force a specific
+              function/MCP tool.
+
+          tools: Tools available to the model.
+
+          tracing: Realtime API can write session traces to the
+              [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once
+              tracing is enabled for a session, the configuration cannot be modified.
+
+              `auto` will create a trace for the session with default values for the workflow
+              name, group id, and metadata.
+
+          truncation: When the number of tokens in a conversation exceeds the model's input token
+              limit, the conversation be truncated, meaning messages (starting from the
+              oldest) will not be included in the model's context. A 32k context model with
+              4,096 max output tokens can only include 28,224 tokens in the context before
+              truncation occurs.
+
+              Clients can configure truncation behavior to truncate with a lower max token
+              limit, which is an effective way to control token usage and cost.
+
+              Truncation will reduce the number of cached tokens on the next turn (busting the
+              cache), since messages are dropped from the beginning of the context. However,
+              clients can also configure truncation to retain messages up to a fraction of the
+              maximum context size, which will reduce the need for future truncations and thus
+              improve the cache rate.
+
+              Truncation can be disabled entirely, which means the server will never truncate
+              but would instead return an error if the conversation exceeds the model's input
+              token limit.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not call_id:
+            raise ValueError(f"Expected a non-empty value for `call_id` but received {call_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._post(
+            f"/realtime/calls/{call_id}/accept",
+            body=maybe_transform(
+                {
+                    "type": type,
+                    "audio": audio,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "model": model,
+                    "output_modalities": output_modalities,
+                    "prompt": prompt,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "tracing": tracing,
+                    "truncation": truncation,
+                },
+                call_accept_params.CallAcceptParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    def hangup(
+        self,
+        call_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        End an active Realtime API call, whether it was initiated over SIP or WebRTC.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not call_id:
+            raise ValueError(f"Expected a non-empty value for `call_id` but received {call_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._post(
+            f"/realtime/calls/{call_id}/hangup",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    def refer(
+        self,
+        call_id: str,
+        *,
+        target_uri: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Transfer an active SIP call to a new destination using the SIP REFER verb.
+
+        Args:
+          target_uri: URI that should appear in the SIP Refer-To header. Supports values like
+              `tel:+14155550123` or `sip:agent@example.com`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not call_id:
+            raise ValueError(f"Expected a non-empty value for `call_id` but received {call_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._post(
+            f"/realtime/calls/{call_id}/refer",
+            body=maybe_transform({"target_uri": target_uri}, call_refer_params.CallReferParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    def reject(
+        self,
+        call_id: str,
+        *,
+        status_code: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Decline an incoming SIP call by returning a SIP status code to the caller.
+
+        Args:
+          status_code: SIP response code to send back to the caller. Defaults to `603` (Decline) when
+              omitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not call_id:
+            raise ValueError(f"Expected a non-empty value for `call_id` but received {call_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._post(
+            f"/realtime/calls/{call_id}/reject",
+            body=maybe_transform({"status_code": status_code}, call_reject_params.CallRejectParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AsyncCalls(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCallsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCallsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCallsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncCallsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        sdp: str,
+        session: RealtimeSessionCreateRequestParam | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """
+        Create a new Realtime API call over WebRTC and receive the SDP answer needed to
+        complete the peer connection.
+
+        Args:
+          sdp: WebRTC Session Description Protocol (SDP) offer generated by the caller.
+
+          session: Realtime session object configuration.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"Accept": "application/sdp", **(extra_headers or {})}
+        return await self._post(
+            "/realtime/calls",
+            body=await async_maybe_transform(
+                {
+                    "sdp": sdp,
+                    "session": session,
+                },
+                call_create_params.CallCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+    async def accept(
+        self,
+        call_id: str,
+        *,
+        type: Literal["realtime"],
+        audio: RealtimeAudioConfigParam | Omit = omit,
+        include: List[Literal["item.input_audio_transcription.logprobs"]] | Omit = omit,
+        instructions: str | Omit = omit,
+        max_output_tokens: Union[int, Literal["inf"]] | Omit = omit,
+        model: Union[
+            str,
+            Literal[
+                "gpt-realtime",
+                "gpt-realtime-2025-08-28",
+                "gpt-4o-realtime-preview",
+                "gpt-4o-realtime-preview-2024-10-01",
+                "gpt-4o-realtime-preview-2024-12-17",
+                "gpt-4o-realtime-preview-2025-06-03",
+                "gpt-4o-mini-realtime-preview",
+                "gpt-4o-mini-realtime-preview-2024-12-17",
+                "gpt-realtime-mini",
+                "gpt-realtime-mini-2025-10-06",
+                "gpt-audio-mini",
+                "gpt-audio-mini-2025-10-06",
+            ],
+        ]
+        | Omit = omit,
+        output_modalities: List[Literal["text", "audio"]] | Omit = omit,
+        prompt: Optional[ResponsePromptParam] | Omit = omit,
+        tool_choice: RealtimeToolChoiceConfigParam | Omit = omit,
+        tools: RealtimeToolsConfigParam | Omit = omit,
+        tracing: Optional[RealtimeTracingConfigParam] | Omit = omit,
+        truncation: RealtimeTruncationParam | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Accept an incoming SIP call and configure the realtime session that will handle
+        it.
+
+        Args:
+          type: The type of session to create. Always `realtime` for the Realtime API.
+
+          audio: Configuration for input and output audio.
+
+          include: Additional fields to include in server outputs.
+
+              `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+              transcription.
+
+          instructions: The default system instructions (i.e. system message) prepended to model calls.
+              This field allows the client to guide the model on desired responses. The model
+              can be instructed on response content and format, (e.g. "be extremely succinct",
+              "act friendly", "here are examples of good responses") and on audio behavior
+              (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+              instructions are not guaranteed to be followed by the model, but they provide
+              guidance to the model on the desired behavior.
+
+              Note that the server sets default instructions which will be used if this field
+              is not set and are visible in the `session.created` event at the start of the
+              session.
+
+          max_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+              tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+              `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+          model: The Realtime model used for this session.
+
+          output_modalities: The set of modalities the model can respond with. It defaults to `["audio"]`,
+              indicating that the model will respond with audio plus a transcript. `["text"]`
+              can be used to make the model respond with text only. It is not possible to
+              request both `text` and `audio` at the same time.
+
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
+          tool_choice: How the model chooses tools. Provide one of the string modes or force a specific
+              function/MCP tool.
+
+          tools: Tools available to the model.
+
+          tracing: Realtime API can write session traces to the
+              [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once
+              tracing is enabled for a session, the configuration cannot be modified.
+
+              `auto` will create a trace for the session with default values for the workflow
+              name, group id, and metadata.
+
+          truncation: When the number of tokens in a conversation exceeds the model's input token
+              limit, the conversation be truncated, meaning messages (starting from the
+              oldest) will not be included in the model's context. A 32k context model with
+              4,096 max output tokens can only include 28,224 tokens in the context before
+              truncation occurs.
+
+              Clients can configure truncation behavior to truncate with a lower max token
+              limit, which is an effective way to control token usage and cost.
+
+              Truncation will reduce the number of cached tokens on the next turn (busting the
+              cache), since messages are dropped from the beginning of the context. However,
+              clients can also configure truncation to retain messages up to a fraction of the
+              maximum context size, which will reduce the need for future truncations and thus
+              improve the cache rate.
+
+              Truncation can be disabled entirely, which means the server will never truncate
+              but would instead return an error if the conversation exceeds the model's input
+              token limit.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not call_id:
+            raise ValueError(f"Expected a non-empty value for `call_id` but received {call_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._post(
+            f"/realtime/calls/{call_id}/accept",
+            body=await async_maybe_transform(
+                {
+                    "type": type,
+                    "audio": audio,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "model": model,
+                    "output_modalities": output_modalities,
+                    "prompt": prompt,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "tracing": tracing,
+                    "truncation": truncation,
+                },
+                call_accept_params.CallAcceptParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    async def hangup(
+        self,
+        call_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        End an active Realtime API call, whether it was initiated over SIP or WebRTC.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not call_id:
+            raise ValueError(f"Expected a non-empty value for `call_id` but received {call_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._post(
+            f"/realtime/calls/{call_id}/hangup",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    async def refer(
+        self,
+        call_id: str,
+        *,
+        target_uri: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Transfer an active SIP call to a new destination using the SIP REFER verb.
+
+        Args:
+          target_uri: URI that should appear in the SIP Refer-To header. Supports values like
+              `tel:+14155550123` or `sip:agent@example.com`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not call_id:
+            raise ValueError(f"Expected a non-empty value for `call_id` but received {call_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._post(
+            f"/realtime/calls/{call_id}/refer",
+            body=await async_maybe_transform({"target_uri": target_uri}, call_refer_params.CallReferParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    async def reject(
+        self,
+        call_id: str,
+        *,
+        status_code: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Decline an incoming SIP call by returning a SIP status code to the caller.
+
+        Args:
+          status_code: SIP response code to send back to the caller. Defaults to `603` (Decline) when
+              omitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not call_id:
+            raise ValueError(f"Expected a non-empty value for `call_id` but received {call_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._post(
+            f"/realtime/calls/{call_id}/reject",
+            body=await async_maybe_transform({"status_code": status_code}, call_reject_params.CallRejectParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class CallsWithRawResponse:
+    def __init__(self, calls: Calls) -> None:
+        self._calls = calls
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            calls.create,
+        )
+        self.accept = _legacy_response.to_raw_response_wrapper(
+            calls.accept,
+        )
+        self.hangup = _legacy_response.to_raw_response_wrapper(
+            calls.hangup,
+        )
+        self.refer = _legacy_response.to_raw_response_wrapper(
+            calls.refer,
+        )
+        self.reject = _legacy_response.to_raw_response_wrapper(
+            calls.reject,
+        )
+
+
+class AsyncCallsWithRawResponse:
+    def __init__(self, calls: AsyncCalls) -> None:
+        self._calls = calls
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            calls.create,
+        )
+        self.accept = _legacy_response.async_to_raw_response_wrapper(
+            calls.accept,
+        )
+        self.hangup = _legacy_response.async_to_raw_response_wrapper(
+            calls.hangup,
+        )
+        self.refer = _legacy_response.async_to_raw_response_wrapper(
+            calls.refer,
+        )
+        self.reject = _legacy_response.async_to_raw_response_wrapper(
+            calls.reject,
+        )
+
+
+class CallsWithStreamingResponse:
+    def __init__(self, calls: Calls) -> None:
+        self._calls = calls
+
+        self.create = to_custom_streamed_response_wrapper(
+            calls.create,
+            StreamedBinaryAPIResponse,
+        )
+        self.accept = to_streamed_response_wrapper(
+            calls.accept,
+        )
+        self.hangup = to_streamed_response_wrapper(
+            calls.hangup,
+        )
+        self.refer = to_streamed_response_wrapper(
+            calls.refer,
+        )
+        self.reject = to_streamed_response_wrapper(
+            calls.reject,
+        )
+
+
+class AsyncCallsWithStreamingResponse:
+    def __init__(self, calls: AsyncCalls) -> None:
+        self._calls = calls
+
+        self.create = async_to_custom_streamed_response_wrapper(
+            calls.create,
+            AsyncStreamedBinaryAPIResponse,
+        )
+        self.accept = async_to_streamed_response_wrapper(
+            calls.accept,
+        )
+        self.hangup = async_to_streamed_response_wrapper(
+            calls.hangup,
+        )
+        self.refer = async_to_streamed_response_wrapper(
+            calls.refer,
+        )
+        self.reject = async_to_streamed_response_wrapper(
+            calls.reject,
+        )
diff --git a/src/openai/resources/realtime/client_secrets.py b/src/openai/resources/realtime/client_secrets.py
new file mode 100644
index 0000000000..5ceba7bef1
--- /dev/null
+++ b/src/openai/resources/realtime/client_secrets.py
@@ -0,0 +1,189 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._base_client import make_request_options
+from ...types.realtime import client_secret_create_params
+from ...types.realtime.client_secret_create_response import ClientSecretCreateResponse
+
+__all__ = ["ClientSecrets", "AsyncClientSecrets"]
+
+
+class ClientSecrets(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ClientSecretsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ClientSecretsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ClientSecretsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ClientSecretsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        expires_after: client_secret_create_params.ExpiresAfter | Omit = omit,
+        session: client_secret_create_params.Session | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ClientSecretCreateResponse:
+        """
+        Create a Realtime client secret with an associated session configuration.
+
+        Args:
+          expires_after: Configuration for the client secret expiration. Expiration refers to the time
+              after which a client secret will no longer be valid for creating sessions. The
+              session itself may continue after that time once started. A secret can be used
+              to create multiple sessions until it expires.
+
+          session: Session configuration to use for the client secret. Choose either a realtime
+              session or a transcription session.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/realtime/client_secrets",
+            body=maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "session": session,
+                },
+                client_secret_create_params.ClientSecretCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ClientSecretCreateResponse,
+        )
+
+
+class AsyncClientSecrets(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncClientSecretsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncClientSecretsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncClientSecretsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncClientSecretsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        expires_after: client_secret_create_params.ExpiresAfter | Omit = omit,
+        session: client_secret_create_params.Session | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> ClientSecretCreateResponse:
+        """
+        Create a Realtime client secret with an associated session configuration.
+
+        Args:
+          expires_after: Configuration for the client secret expiration. Expiration refers to the time
+              after which a client secret will no longer be valid for creating sessions. The
+              session itself may continue after that time once started. A secret can be used
+              to create multiple sessions until it expires.
+
+          session: Session configuration to use for the client secret. Choose either a realtime
+              session or a transcription session.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/realtime/client_secrets",
+            body=await async_maybe_transform(
+                {
+                    "expires_after": expires_after,
+                    "session": session,
+                },
+                client_secret_create_params.ClientSecretCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ClientSecretCreateResponse,
+        )
+
+
+class ClientSecretsWithRawResponse:
+    def __init__(self, client_secrets: ClientSecrets) -> None:
+        self._client_secrets = client_secrets
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            client_secrets.create,
+        )
+
+
+class AsyncClientSecretsWithRawResponse:
+    def __init__(self, client_secrets: AsyncClientSecrets) -> None:
+        self._client_secrets = client_secrets
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            client_secrets.create,
+        )
+
+
+class ClientSecretsWithStreamingResponse:
+    def __init__(self, client_secrets: ClientSecrets) -> None:
+        self._client_secrets = client_secrets
+
+        self.create = to_streamed_response_wrapper(
+            client_secrets.create,
+        )
+
+
+class AsyncClientSecretsWithStreamingResponse:
+    def __init__(self, client_secrets: AsyncClientSecrets) -> None:
+        self._client_secrets = client_secrets
+
+        self.create = async_to_streamed_response_wrapper(
+            client_secrets.create,
+        )
diff --git a/src/openai/resources/realtime/realtime.py b/src/openai/resources/realtime/realtime.py
new file mode 100644
index 0000000000..0d04dc5725
--- /dev/null
+++ b/src/openai/resources/realtime/realtime.py
@@ -0,0 +1,1045 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import json
+import logging
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Iterator, cast
+from typing_extensions import AsyncIterator
+
+import httpx
+from pydantic import BaseModel
+
+from .calls import (
+    Calls,
+    AsyncCalls,
+    CallsWithRawResponse,
+    AsyncCallsWithRawResponse,
+    CallsWithStreamingResponse,
+    AsyncCallsWithStreamingResponse,
+)
+from ..._types import Omit, Query, Headers, omit
+from ..._utils import maybe_transform, strip_not_given, async_maybe_transform
+from ..._compat import cached_property
+from ..._models import construct_type_unchecked
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._exceptions import OpenAIError
+from ..._base_client import _merge_mappings
+from .client_secrets import (
+    ClientSecrets,
+    AsyncClientSecrets,
+    ClientSecretsWithRawResponse,
+    AsyncClientSecretsWithRawResponse,
+    ClientSecretsWithStreamingResponse,
+    AsyncClientSecretsWithStreamingResponse,
+)
+from ...types.realtime import session_update_event_param
+from ...types.websocket_connection_options import WebsocketConnectionOptions
+from ...types.realtime.realtime_client_event import RealtimeClientEvent
+from ...types.realtime.realtime_server_event import RealtimeServerEvent
+from ...types.realtime.conversation_item_param import ConversationItemParam
+from ...types.realtime.realtime_client_event_param import RealtimeClientEventParam
+from ...types.realtime.realtime_response_create_params_param import RealtimeResponseCreateParamsParam
+
+if TYPE_CHECKING:
+    from websockets.sync.client import ClientConnection as WebsocketConnection
+    from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
+
+    from ..._client import OpenAI, AsyncOpenAI
+
+__all__ = ["Realtime", "AsyncRealtime"]
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class Realtime(SyncAPIResource):
+    @cached_property
+    def client_secrets(self) -> ClientSecrets:
+        return ClientSecrets(self._client)
+
+    @cached_property
+    def calls(self) -> Calls:
+        return Calls(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> RealtimeWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return RealtimeWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> RealtimeWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return RealtimeWithStreamingResponse(self)
+
+    def connect(
+        self,
+        *,
+        call_id: str | Omit = omit,
+        model: str | Omit = omit,
+        extra_query: Query = {},
+        extra_headers: Headers = {},
+        websocket_connection_options: WebsocketConnectionOptions = {},
+    ) -> RealtimeConnectionManager:
+        """
+        The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+        Some notable benefits of the API include:
+
+        - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+        - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+        - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+        The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+        """
+        return RealtimeConnectionManager(
+            client=self._client,
+            extra_query=extra_query,
+            extra_headers=extra_headers,
+            websocket_connection_options=websocket_connection_options,
+            call_id=call_id,
+            model=model,
+        )
+
+
+class AsyncRealtime(AsyncAPIResource):
+    @cached_property
+    def client_secrets(self) -> AsyncClientSecrets:
+        return AsyncClientSecrets(self._client)
+
+    @cached_property
+    def calls(self) -> AsyncCalls:
+        return AsyncCalls(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncRealtimeWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncRealtimeWithStreamingResponse(self)
+
+    def connect(
+        self,
+        *,
+        call_id: str | Omit = omit,
+        model: str | Omit = omit,
+        extra_query: Query = {},
+        extra_headers: Headers = {},
+        websocket_connection_options: WebsocketConnectionOptions = {},
+    ) -> AsyncRealtimeConnectionManager:
+        """
+        The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+        Some notable benefits of the API include:
+
+        - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+        - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+        - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+        The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+        """
+        return AsyncRealtimeConnectionManager(
+            client=self._client,
+            extra_query=extra_query,
+            extra_headers=extra_headers,
+            websocket_connection_options=websocket_connection_options,
+            call_id=call_id,
+            model=model,
+        )
+
+
+class RealtimeWithRawResponse:
+    def __init__(self, realtime: Realtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def client_secrets(self) -> ClientSecretsWithRawResponse:
+        return ClientSecretsWithRawResponse(self._realtime.client_secrets)
+
+    @cached_property
+    def calls(self) -> CallsWithRawResponse:
+        return CallsWithRawResponse(self._realtime.calls)
+
+
+class AsyncRealtimeWithRawResponse:
+    def __init__(self, realtime: AsyncRealtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def client_secrets(self) -> AsyncClientSecretsWithRawResponse:
+        return AsyncClientSecretsWithRawResponse(self._realtime.client_secrets)
+
+    @cached_property
+    def calls(self) -> AsyncCallsWithRawResponse:
+        return AsyncCallsWithRawResponse(self._realtime.calls)
+
+
+class RealtimeWithStreamingResponse:
+    def __init__(self, realtime: Realtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def client_secrets(self) -> ClientSecretsWithStreamingResponse:
+        return ClientSecretsWithStreamingResponse(self._realtime.client_secrets)
+
+    @cached_property
+    def calls(self) -> CallsWithStreamingResponse:
+        return CallsWithStreamingResponse(self._realtime.calls)
+
+
+class AsyncRealtimeWithStreamingResponse:
+    def __init__(self, realtime: AsyncRealtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def client_secrets(self) -> AsyncClientSecretsWithStreamingResponse:
+        return AsyncClientSecretsWithStreamingResponse(self._realtime.client_secrets)
+
+    @cached_property
+    def calls(self) -> AsyncCallsWithStreamingResponse:
+        return AsyncCallsWithStreamingResponse(self._realtime.calls)
+
+
+class AsyncRealtimeConnection:
+    """Represents a live WebSocket connection to the Realtime API"""
+
+    session: AsyncRealtimeSessionResource
+    response: AsyncRealtimeResponseResource
+    input_audio_buffer: AsyncRealtimeInputAudioBufferResource
+    conversation: AsyncRealtimeConversationResource
+    output_audio_buffer: AsyncRealtimeOutputAudioBufferResource
+
+    _connection: AsyncWebsocketConnection
+
+    def __init__(self, connection: AsyncWebsocketConnection) -> None:
+        self._connection = connection
+
+        self.session = AsyncRealtimeSessionResource(self)
+        self.response = AsyncRealtimeResponseResource(self)
+        self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
+        self.conversation = AsyncRealtimeConversationResource(self)
+        self.output_audio_buffer = AsyncRealtimeOutputAudioBufferResource(self)
+
+    async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
+        """
+        An infinite-iterator that will continue to yield events until
+        the connection is closed.
+        """
+        from websockets.exceptions import ConnectionClosedOK
+
+        try:
+            while True:
+                yield await self.recv()
+        except ConnectionClosedOK:
+            return
+
+    async def recv(self) -> RealtimeServerEvent:
+        """
+        Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+        Canceling this method is safe. There's no risk of losing data.
+        """
+        return self.parse_event(await self.recv_bytes())
+
+    async def recv_bytes(self) -> bytes:
+        """Receive the next message from the connection as raw bytes.
+
+        Canceling this method is safe. There's no risk of losing data.
+
+        If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+        then you can call `.parse_event(data)`.
+        """
+        message = await self._connection.recv(decode=False)
+        log.debug(f"Received websocket message: %s", message)
+        return message
+
+    async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+        data = (
+            event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+            if isinstance(event, BaseModel)
+            else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
+        )
+        await self._connection.send(data)
+
+    async def close(self, *, code: int = 1000, reason: str = "") -> None:
+        await self._connection.close(code=code, reason=reason)
+
+    def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+        """
+        Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+        This is helpful if you're using `.recv_bytes()`.
+        """
+        return cast(
+            RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+        )
+
+
+class AsyncRealtimeConnectionManager:
+    """
+    Context manager over a `AsyncRealtimeConnection` that is returned by `realtime.connect()`
+
+    This context manager ensures that the connection will be closed when it exits.
+
+    ---
+
+    Note that if your application doesn't work well with the context manager approach then you
+    can call the `.enter()` method directly to initiate a connection.
+
+    **Warning**: You must remember to close the connection with `.close()`.
+
+    ```py
+    connection = await client.realtime.connect(...).enter()
+    # ...
+    await connection.close()
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        client: AsyncOpenAI,
+        call_id: str | Omit = omit,
+        model: str | Omit = omit,
+        extra_query: Query,
+        extra_headers: Headers,
+        websocket_connection_options: WebsocketConnectionOptions,
+    ) -> None:
+        self.__client = client
+        self.__call_id = call_id
+        self.__model = model
+        self.__connection: AsyncRealtimeConnection | None = None
+        self.__extra_query = extra_query
+        self.__extra_headers = extra_headers
+        self.__websocket_connection_options = websocket_connection_options
+
+    async def __aenter__(self) -> AsyncRealtimeConnection:
+        """
+        👋 If your application doesn't work well with the context manager approach then you
+        can call this method directly to initiate a connection.
+
+        **Warning**: You must remember to close the connection with `.close()`.
+
+        ```py
+        connection = await client.realtime.connect(...).enter()
+        # ...
+        await connection.close()
+        ```
+        """
+        try:
+            from websockets.asyncio.client import connect
+        except ImportError as exc:
+            raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+        url = self._prepare_url().copy_with(
+            params={
+                **self.__client.base_url.params,
+                "call_id": self.__call_id,
+                "model": self.__model,
+                **self.__extra_query,
+            },
+        )
+        log.debug("Connecting to %s", url)
+        if self.__websocket_connection_options:
+            log.debug("Connection options: %s", self.__websocket_connection_options)
+
+        self.__connection = AsyncRealtimeConnection(
+            await connect(
+                str(url),
+                user_agent_header=self.__client.user_agent,
+                additional_headers=_merge_mappings(
+                    {
+                        **self.__client.auth_headers,
+                    },
+                    self.__extra_headers,
+                ),
+                **self.__websocket_connection_options,
+            )
+        )
+
+        return self.__connection
+
+    enter = __aenter__
+
+    def _prepare_url(self) -> httpx.URL:
+        if self.__client.websocket_base_url is not None:
+            base_url = httpx.URL(self.__client.websocket_base_url)
+        else:
+            base_url = self.__client._base_url.copy_with(scheme="wss")
+
+        merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+        return base_url.copy_with(raw_path=merge_raw_path)
+
+    async def __aexit__(
+        self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+    ) -> None:
+        if self.__connection is not None:
+            await self.__connection.close()
+
+
+class RealtimeConnection:
+    """Represents a live WebSocket connection to the Realtime API"""
+
+    session: RealtimeSessionResource
+    response: RealtimeResponseResource
+    input_audio_buffer: RealtimeInputAudioBufferResource
+    conversation: RealtimeConversationResource
+    output_audio_buffer: RealtimeOutputAudioBufferResource
+
+    _connection: WebsocketConnection
+
+    def __init__(self, connection: WebsocketConnection) -> None:
+        self._connection = connection
+
+        self.session = RealtimeSessionResource(self)
+        self.response = RealtimeResponseResource(self)
+        self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
+        self.conversation = RealtimeConversationResource(self)
+        self.output_audio_buffer = RealtimeOutputAudioBufferResource(self)
+
+    def __iter__(self) -> Iterator[RealtimeServerEvent]:
+        """
+        An infinite-iterator that will continue to yield events until
+        the connection is closed.
+        """
+        from websockets.exceptions import ConnectionClosedOK
+
+        try:
+            while True:
+                yield self.recv()
+        except ConnectionClosedOK:
+            return
+
+    def recv(self) -> RealtimeServerEvent:
+        """
+        Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+        Canceling this method is safe. There's no risk of losing data.
+        """
+        return self.parse_event(self.recv_bytes())
+
+    def recv_bytes(self) -> bytes:
+        """Receive the next message from the connection as raw bytes.
+
+        Canceling this method is safe. There's no risk of losing data.
+
+        If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+        then you can call `.parse_event(data)`.
+        """
+        message = self._connection.recv(decode=False)
+        log.debug(f"Received websocket message: %s", message)
+        return message
+
+    def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+        data = (
+            event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+            if isinstance(event, BaseModel)
+            else json.dumps(maybe_transform(event, RealtimeClientEventParam))
+        )
+        self._connection.send(data)
+
+    def close(self, *, code: int = 1000, reason: str = "") -> None:
+        self._connection.close(code=code, reason=reason)
+
+    def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+        """
+        Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+        This is helpful if you're using `.recv_bytes()`.
+        """
+        return cast(
+            RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+        )
+
+
+class RealtimeConnectionManager:
+    """
+    Context manager over a `RealtimeConnection` that is returned by `realtime.connect()`
+
+    This context manager ensures that the connection will be closed when it exits.
+
+    ---
+
+    Note that if your application doesn't work well with the context manager approach then you
+    can call the `.enter()` method directly to initiate a connection.
+
+    **Warning**: You must remember to close the connection with `.close()`.
+
+    ```py
+    connection = client.realtime.connect(...).enter()
+    # ...
+    connection.close()
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        client: OpenAI,
+        call_id: str | Omit = omit,
+        model: str | Omit = omit,
+        extra_query: Query,
+        extra_headers: Headers,
+        websocket_connection_options: WebsocketConnectionOptions,
+    ) -> None:
+        self.__client = client
+        self.__call_id = call_id
+        self.__model = model
+        self.__connection: RealtimeConnection | None = None
+        self.__extra_query = extra_query
+        self.__extra_headers = extra_headers
+        self.__websocket_connection_options = websocket_connection_options
+
+    def __enter__(self) -> RealtimeConnection:
+        """
+        👋 If your application doesn't work well with the context manager approach then you
+        can call this method directly to initiate a connection.
+
+        **Warning**: You must remember to close the connection with `.close()`.
+
+        ```py
+        connection = client.realtime.connect(...).enter()
+        # ...
+        connection.close()
+        ```
+        """
+        try:
+            from websockets.sync.client import connect
+        except ImportError as exc:
+            raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+        url = self._prepare_url().copy_with(
+            params={
+                **self.__client.base_url.params,
+                "call_id": self.__call_id,
+                "model": self.__model,
+                **self.__extra_query,
+            },
+        )
+        log.debug("Connecting to %s", url)
+        if self.__websocket_connection_options:
+            log.debug("Connection options: %s", self.__websocket_connection_options)
+
+        self.__connection = RealtimeConnection(
+            connect(
+                str(url),
+                user_agent_header=self.__client.user_agent,
+                additional_headers=_merge_mappings(
+                    {
+                        **self.__client.auth_headers,
+                    },
+                    self.__extra_headers,
+                ),
+                **self.__websocket_connection_options,
+            )
+        )
+
+        return self.__connection
+
+    enter = __enter__
+
+    def _prepare_url(self) -> httpx.URL:
+        if self.__client.websocket_base_url is not None:
+            base_url = httpx.URL(self.__client.websocket_base_url)
+        else:
+            base_url = self.__client._base_url.copy_with(scheme="wss")
+
+        merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+        return base_url.copy_with(raw_path=merge_raw_path)
+
+    def __exit__(
+        self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+    ) -> None:
+        if self.__connection is not None:
+            self.__connection.close()
+
+
+class BaseRealtimeConnectionResource:
+    def __init__(self, connection: RealtimeConnection) -> None:
+        self._connection = connection
+
+
+class RealtimeSessionResource(BaseRealtimeConnectionResource):
+    def update(self, *, session: session_update_event_param.Session, event_id: str | Omit = omit) -> None:
+        """
+        Send this event to update the session’s configuration.
+        The client may send this event at any time to update any field
+        except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
+
+        When the server receives a `session.update`, it will respond
+        with a `session.updated` event showing the full, effective configuration.
+        Only the fields that are present in the `session.update` are updated. To clear a field like
+        `instructions`, pass an empty string. To clear a field like `tools`, pass an empty array.
+        To clear a field like `turn_detection`, pass `null`.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+            )
+        )
+
+
+class RealtimeResponseResource(BaseRealtimeConnectionResource):
+    def create(self, *, event_id: str | Omit = omit, response: RealtimeResponseCreateParamsParam | Omit = omit) -> None:
+        """
+        This event instructs the server to create a Response, which means triggering
+        model inference. When in Server VAD mode, the server will create Responses
+        automatically.
+
+        A Response will include at least one Item, and may have two, in which case
+        the second will be a function call. These Items will be appended to the
+        conversation history by default.
+
+        The server will respond with a `response.created` event, events for Items
+        and content created, and finally a `response.done` event to indicate the
+        Response is complete.
+
+        The `response.create` event includes inference configuration like
+        `instructions` and `tools`. If these are set, they will override the Session's
+        configuration for this Response only.
+
+        Responses can be created out-of-band of the default Conversation, meaning that they can
+        have arbitrary input, and it's possible to disable writing the output to the Conversation.
+        Only one Response can write to the default Conversation at a time, but otherwise multiple
+        Responses can be created in parallel. The `metadata` field is a good way to disambiguate
+        multiple simultaneous Responses.
+
+        Clients can set `conversation` to `none` to create a Response that does not write to the default
+        Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting
+        raw Items and references to existing Items.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+            )
+        )
+
+    def cancel(self, *, event_id: str | Omit = omit, response_id: str | Omit = omit) -> None:
+        """Send this event to cancel an in-progress response.
+
+        The server will respond
+        with a `response.done` event with a status of `response.status=cancelled`. If
+        there is no response to cancel, the server will respond with an error. It's safe
+        to call `response.cancel` even if no response is in progress, an error will be
+        returned the session will remain unaffected.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+            )
+        )
+
+
+class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
+    def clear(self, *, event_id: str | Omit = omit) -> None:
+        """Send this event to clear the audio bytes in the buffer.
+
+        The server will
+        respond with an `input_audio_buffer.cleared` event.
+        """
+        self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+        )
+
+    def commit(self, *, event_id: str | Omit = omit) -> None:
+        """
+        Send this event to commit the user input audio buffer, which will create a  new user message item in the conversation. This event will produce an error  if the input audio buffer is empty. When in Server VAD mode, the client does  not need to send this event, the server will commit the audio buffer  automatically.
+
+        Committing the input audio buffer will trigger input audio transcription  (if enabled in session configuration), but it will not create a response  from the model. The server will respond with an `input_audio_buffer.committed` event.
+        """
+        self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+        )
+
+    def append(self, *, audio: str, event_id: str | Omit = omit) -> None:
+        """Send this event to append audio bytes to the input audio buffer.
+
+        The audio
+        buffer is temporary storage you can write to and later commit. A "commit" will create a new
+        user message item in the conversation history from the buffer content and clear the buffer.
+        Input audio transcription (if enabled) will be generated when the buffer is committed.
+
+        If VAD is enabled the audio buffer is used to detect speech and the server will decide
+        when to commit. When Server VAD is disabled, you must commit the audio buffer
+        manually. Input audio noise reduction operates on writes to the audio buffer.
+
+        The client may choose how much audio to place in each event up to a maximum
+        of 15 MiB, for example streaming smaller chunks from the client may allow the
+        VAD to be more responsive. Unlike most other client events, the server will
+        not send a confirmation response to this event.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+            )
+        )
+
+
+class RealtimeConversationResource(BaseRealtimeConnectionResource):
+    @cached_property
+    def item(self) -> RealtimeConversationItemResource:
+        return RealtimeConversationItemResource(self._connection)
+
+
+class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
+    def delete(self, *, item_id: str, event_id: str | Omit = omit) -> None:
+        """Send this event when you want to remove any item from the conversation
+        history.
+
+        The server will respond with a `conversation.item.deleted` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+    def create(
+        self, *, item: ConversationItemParam, event_id: str | Omit = omit, previous_item_id: str | Omit = omit
+    ) -> None:
+        """
+        Add a new Item to the Conversation's context, including messages, function
+        calls, and function call responses. This event can be used both to populate a
+        "history" of the conversation and to add new items mid-stream, but has the
+        current limitation that it cannot populate assistant audio messages.
+
+        If successful, the server will respond with a `conversation.item.created`
+        event, otherwise an `error` event will be sent.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.create",
+                        "item": item,
+                        "event_id": event_id,
+                        "previous_item_id": previous_item_id,
+                    }
+                ),
+            )
+        )
+
+    def truncate(self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | Omit = omit) -> None:
+        """Send this event to truncate a previous assistant message’s audio.
+
+        The server
+        will produce audio faster than realtime, so this event is useful when the user
+        interrupts to truncate audio that has already been sent to the client but not
+        yet played. This will synchronize the server's understanding of the audio with
+        the client's playback.
+
+        Truncating audio will delete the server-side text transcript to ensure there
+        is not text in the context that hasn't been heard by the user.
+
+        If successful, the server will respond with a `conversation.item.truncated`
+        event.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.truncate",
+                        "audio_end_ms": audio_end_ms,
+                        "content_index": content_index,
+                        "item_id": item_id,
+                        "event_id": event_id,
+                    }
+                ),
+            )
+        )
+
+    def retrieve(self, *, item_id: str, event_id: str | Omit = omit) -> None:
+        """
+        Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
+        The server will respond with a `conversation.item.retrieved` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+
+class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
+    def clear(self, *, event_id: str | Omit = omit) -> None:
+        """**WebRTC/SIP Only:** Emit to cut off the current audio response.
+
+        This will trigger the server to
+        stop generating audio and emit a `output_audio_buffer.cleared` event. This
+        event should be preceded by a `response.cancel` client event to stop the
+        generation of the current response.
+        [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+        """
+        self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
+        )
+
+
+class BaseAsyncRealtimeConnectionResource:
+    def __init__(self, connection: AsyncRealtimeConnection) -> None:
+        self._connection = connection
+
+
+class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
+    async def update(self, *, session: session_update_event_param.Session, event_id: str | Omit = omit) -> None:
+        """
+        Send this event to update the session’s configuration.
+        The client may send this event at any time to update any field
+        except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
+
+        When the server receives a `session.update`, it will respond
+        with a `session.updated` event showing the full, effective configuration.
+        Only the fields that are present in the `session.update` are updated. To clear a field like
+        `instructions`, pass an empty string. To clear a field like `tools`, pass an empty array.
+        To clear a field like `turn_detection`, pass `null`.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+            )
+        )
+
+
+class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
+    async def create(
+        self, *, event_id: str | Omit = omit, response: RealtimeResponseCreateParamsParam | Omit = omit
+    ) -> None:
+        """
+        This event instructs the server to create a Response, which means triggering
+        model inference. When in Server VAD mode, the server will create Responses
+        automatically.
+
+        A Response will include at least one Item, and may have two, in which case
+        the second will be a function call. These Items will be appended to the
+        conversation history by default.
+
+        The server will respond with a `response.created` event, events for Items
+        and content created, and finally a `response.done` event to indicate the
+        Response is complete.
+
+        The `response.create` event includes inference configuration like
+        `instructions` and `tools`. If these are set, they will override the Session's
+        configuration for this Response only.
+
+        Responses can be created out-of-band of the default Conversation, meaning that they can
+        have arbitrary input, and it's possible to disable writing the output to the Conversation.
+        Only one Response can write to the default Conversation at a time, but otherwise multiple
+        Responses can be created in parallel. The `metadata` field is a good way to disambiguate
+        multiple simultaneous Responses.
+
+        Clients can set `conversation` to `none` to create a Response that does not write to the default
+        Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting
+        raw Items and references to existing Items.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+            )
+        )
+
+    async def cancel(self, *, event_id: str | Omit = omit, response_id: str | Omit = omit) -> None:
+        """Send this event to cancel an in-progress response.
+
+        The server will respond
+        with a `response.done` event with a status of `response.status=cancelled`. If
+        there is no response to cancel, the server will respond with an error. It's safe
+        to call `response.cancel` even if no response is in progress, an error will be
+        returned the session will remain unaffected.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+            )
+        )
+
+
+class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
+    async def clear(self, *, event_id: str | Omit = omit) -> None:
+        """Send this event to clear the audio bytes in the buffer.
+
+        The server will
+        respond with an `input_audio_buffer.cleared` event.
+        """
+        await self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+        )
+
+    async def commit(self, *, event_id: str | Omit = omit) -> None:
+        """
+        Send this event to commit the user input audio buffer, which will create a  new user message item in the conversation. This event will produce an error  if the input audio buffer is empty. When in Server VAD mode, the client does  not need to send this event, the server will commit the audio buffer  automatically.
+
+        Committing the input audio buffer will trigger input audio transcription  (if enabled in session configuration), but it will not create a response  from the model. The server will respond with an `input_audio_buffer.committed` event.
+        """
+        await self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+        )
+
+    async def append(self, *, audio: str, event_id: str | Omit = omit) -> None:
+        """Send this event to append audio bytes to the input audio buffer.
+
+        The audio
+        buffer is temporary storage you can write to and later commit. A "commit" will create a new
+        user message item in the conversation history from the buffer content and clear the buffer.
+        Input audio transcription (if enabled) will be generated when the buffer is committed.
+
+        If VAD is enabled the audio buffer is used to detect speech and the server will decide
+        when to commit. When Server VAD is disabled, you must commit the audio buffer
+        manually. Input audio noise reduction operates on writes to the audio buffer.
+
+        The client may choose how much audio to place in each event up to a maximum
+        of 15 MiB, for example streaming smaller chunks from the client may allow the
+        VAD to be more responsive. Unlike most other client events, the server will
+        not send a confirmation response to this event.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+            )
+        )
+
+
+class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
+    @cached_property
+    def item(self) -> AsyncRealtimeConversationItemResource:
+        return AsyncRealtimeConversationItemResource(self._connection)
+
+
+class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
+    async def delete(self, *, item_id: str, event_id: str | Omit = omit) -> None:
+        """Send this event when you want to remove any item from the conversation
+        history.
+
+        The server will respond with a `conversation.item.deleted` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+    async def create(
+        self, *, item: ConversationItemParam, event_id: str | Omit = omit, previous_item_id: str | Omit = omit
+    ) -> None:
+        """
+        Add a new Item to the Conversation's context, including messages, function
+        calls, and function call responses. This event can be used both to populate a
+        "history" of the conversation and to add new items mid-stream, but has the
+        current limitation that it cannot populate assistant audio messages.
+
+        If successful, the server will respond with a `conversation.item.created`
+        event, otherwise an `error` event will be sent.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.create",
+                        "item": item,
+                        "event_id": event_id,
+                        "previous_item_id": previous_item_id,
+                    }
+                ),
+            )
+        )
+
+    async def truncate(
+        self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | Omit = omit
+    ) -> None:
+        """Send this event to truncate a previous assistant message’s audio.
+
+        The server
+        will produce audio faster than realtime, so this event is useful when the user
+        interrupts to truncate audio that has already been sent to the client but not
+        yet played. This will synchronize the server's understanding of the audio with
+        the client's playback.
+
+        Truncating audio will delete the server-side text transcript to ensure there
+        is not text in the context that hasn't been heard by the user.
+
+        If successful, the server will respond with a `conversation.item.truncated`
+        event.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.truncate",
+                        "audio_end_ms": audio_end_ms,
+                        "content_index": content_index,
+                        "item_id": item_id,
+                        "event_id": event_id,
+                    }
+                ),
+            )
+        )
+
+    async def retrieve(self, *, item_id: str, event_id: str | Omit = omit) -> None:
+        """
+        Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
+        The server will respond with a `conversation.item.retrieved` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+
+class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
+    async def clear(self, *, event_id: str | Omit = omit) -> None:
+        """**WebRTC/SIP Only:** Emit to cut off the current audio response.
+
+        This will trigger the server to
+        stop generating audio and emit a `output_audio_buffer.cleared` event. This
+        event should be preceded by a `response.cancel` client event to stop the
+        generation of the current response.
+        [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+        """
+        await self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
+        )
diff --git a/src/openai/resources/responses/__init__.py b/src/openai/resources/responses/__init__.py
new file mode 100644
index 0000000000..51d318ad8d
--- /dev/null
+++ b/src/openai/resources/responses/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .responses import (
+    Responses,
+    AsyncResponses,
+    ResponsesWithRawResponse,
+    AsyncResponsesWithRawResponse,
+    ResponsesWithStreamingResponse,
+    AsyncResponsesWithStreamingResponse,
+)
+from .input_items import (
+    InputItems,
+    AsyncInputItems,
+    InputItemsWithRawResponse,
+    AsyncInputItemsWithRawResponse,
+    InputItemsWithStreamingResponse,
+    AsyncInputItemsWithStreamingResponse,
+)
+from .input_tokens import (
+    InputTokens,
+    AsyncInputTokens,
+    InputTokensWithRawResponse,
+    AsyncInputTokensWithRawResponse,
+    InputTokensWithStreamingResponse,
+    AsyncInputTokensWithStreamingResponse,
+)
+
+__all__ = [
+    "InputItems",
+    "AsyncInputItems",
+    "InputItemsWithRawResponse",
+    "AsyncInputItemsWithRawResponse",
+    "InputItemsWithStreamingResponse",
+    "AsyncInputItemsWithStreamingResponse",
+    "InputTokens",
+    "AsyncInputTokens",
+    "InputTokensWithRawResponse",
+    "AsyncInputTokensWithRawResponse",
+    "InputTokensWithStreamingResponse",
+    "AsyncInputTokensWithStreamingResponse",
+    "Responses",
+    "AsyncResponses",
+    "ResponsesWithRawResponse",
+    "AsyncResponsesWithRawResponse",
+    "ResponsesWithStreamingResponse",
+    "AsyncResponsesWithStreamingResponse",
+]
diff --git a/src/openai/resources/responses/input_items.py b/src/openai/resources/responses/input_items.py
new file mode 100644
index 0000000000..3311bfe10a
--- /dev/null
+++ b/src/openai/resources/responses/input_items.py
@@ -0,0 +1,226 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, List, cast
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ..._utils import maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.responses import input_item_list_params
+from ...types.responses.response_item import ResponseItem
+from ...types.responses.response_includable import ResponseIncludable
+
+__all__ = ["InputItems", "AsyncInputItems"]
+
+
+class InputItems(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> InputItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return InputItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> InputItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return InputItemsWithStreamingResponse(self)
+
+    def list(
+        self,
+        response_id: str,
+        *,
+        after: str | Omit = omit,
+        include: List[ResponseIncludable] | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncCursorPage[ResponseItem]:
+        """
+        Returns a list of input items for a given response.
+
+        Args:
+          after: An item ID to list items after, used in pagination.
+
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: The order to return the input items in. Default is `desc`.
+
+              - `asc`: Return the input items in ascending order.
+              - `desc`: Return the input items in descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._get_api_list(
+            f"/responses/{response_id}/input_items",
+            page=SyncCursorPage[ResponseItem],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "include": include,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    input_item_list_params.InputItemListParams,
+                ),
+            ),
+            model=cast(Any, ResponseItem),  # Union types cannot be passed in as arguments in the type system
+        )
+
+
+class AsyncInputItems(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncInputItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncInputItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncInputItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncInputItemsWithStreamingResponse(self)
+
+    def list(
+        self,
+        response_id: str,
+        *,
+        after: str | Omit = omit,
+        include: List[ResponseIncludable] | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[ResponseItem, AsyncCursorPage[ResponseItem]]:
+        """
+        Returns a list of input items for a given response.
+
+        Args:
+          after: An item ID to list items after, used in pagination.
+
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: The order to return the input items in. Default is `desc`.
+
+              - `asc`: Return the input items in ascending order.
+              - `desc`: Return the input items in descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._get_api_list(
+            f"/responses/{response_id}/input_items",
+            page=AsyncCursorPage[ResponseItem],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "include": include,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    input_item_list_params.InputItemListParams,
+                ),
+            ),
+            model=cast(Any, ResponseItem),  # Union types cannot be passed in as arguments in the type system
+        )
+
+
+class InputItemsWithRawResponse:
+    def __init__(self, input_items: InputItems) -> None:
+        self._input_items = input_items
+
+        self.list = _legacy_response.to_raw_response_wrapper(
+            input_items.list,
+        )
+
+
+class AsyncInputItemsWithRawResponse:
+    def __init__(self, input_items: AsyncInputItems) -> None:
+        self._input_items = input_items
+
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            input_items.list,
+        )
+
+
+class InputItemsWithStreamingResponse:
+    def __init__(self, input_items: InputItems) -> None:
+        self._input_items = input_items
+
+        self.list = to_streamed_response_wrapper(
+            input_items.list,
+        )
+
+
+class AsyncInputItemsWithStreamingResponse:
+    def __init__(self, input_items: AsyncInputItems) -> None:
+        self._input_items = input_items
+
+        self.list = async_to_streamed_response_wrapper(
+            input_items.list,
+        )
diff --git a/src/openai/resources/responses/input_tokens.py b/src/openai/resources/responses/input_tokens.py
new file mode 100644
index 0000000000..0f47955fe4
--- /dev/null
+++ b/src/openai/resources/responses/input_tokens.py
@@ -0,0 +1,309 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._base_client import make_request_options
+from ...types.responses import input_token_count_params
+from ...types.responses.tool_param import ToolParam
+from ...types.shared_params.reasoning import Reasoning
+from ...types.responses.response_input_item_param import ResponseInputItemParam
+from ...types.responses.input_token_count_response import InputTokenCountResponse
+
+__all__ = ["InputTokens", "AsyncInputTokens"]
+
+
+class InputTokens(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> InputTokensWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return InputTokensWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> InputTokensWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return InputTokensWithStreamingResponse(self)
+
+    def count(
+        self,
+        *,
+        conversation: Optional[input_token_count_params.Conversation] | Omit = omit,
+        input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        model: Optional[str] | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        reasoning: Optional[Reasoning] | Omit = omit,
+        text: Optional[input_token_count_params.Text] | Omit = omit,
+        tool_choice: Optional[input_token_count_params.ToolChoice] | Omit = omit,
+        tools: Optional[Iterable[ToolParam]] | Omit = omit,
+        truncation: Literal["auto", "disabled"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> InputTokenCountResponse:
+        """
+        Get input token counts
+
+        Args:
+          conversation: The conversation that this response belongs to. Items from this conversation are
+              prepended to `input_items` for this response request. Input items and output
+              items from this response are automatically added to this conversation after this
+              response completes.
+
+          input: Text, image, or file inputs to the model, used to generate a response
+
+          instructions: A system (or developer) message inserted into the model's context. When used
+              along with `previous_response_id`, the instructions from a previous response
+              will not be carried over to the next response. This makes it simple to swap out
+              system (or developer) messages in new responses.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          reasoning: **gpt-5 and o-series models only** Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+          truncation: The truncation strategy to use for the model response. - `auto`: If the input to
+              this Response exceeds the model's context window size, the model will truncate
+              the response to fit the context window by dropping items from the beginning of
+              the conversation. - `disabled` (default): If the input size will exceed the
+              context window size for a model, the request will fail with a 400 error.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/responses/input_tokens",
+            body=maybe_transform(
+                {
+                    "conversation": conversation,
+                    "input": input,
+                    "instructions": instructions,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "truncation": truncation,
+                },
+                input_token_count_params.InputTokenCountParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=InputTokenCountResponse,
+        )
+
+
+class AsyncInputTokens(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncInputTokensWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncInputTokensWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncInputTokensWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncInputTokensWithStreamingResponse(self)
+
+    async def count(
+        self,
+        *,
+        conversation: Optional[input_token_count_params.Conversation] | Omit = omit,
+        input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        model: Optional[str] | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        reasoning: Optional[Reasoning] | Omit = omit,
+        text: Optional[input_token_count_params.Text] | Omit = omit,
+        tool_choice: Optional[input_token_count_params.ToolChoice] | Omit = omit,
+        tools: Optional[Iterable[ToolParam]] | Omit = omit,
+        truncation: Literal["auto", "disabled"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> InputTokenCountResponse:
+        """
+        Get input token counts
+
+        Args:
+          conversation: The conversation that this response belongs to. Items from this conversation are
+              prepended to `input_items` for this response request. Input items and output
+              items from this response are automatically added to this conversation after this
+              response completes.
+
+          input: Text, image, or file inputs to the model, used to generate a response
+
+          instructions: A system (or developer) message inserted into the model's context. When used
+              along with `previous_response_id`, the instructions from a previous response
+              will not be carried over to the next response. This makes it simple to swap out
+              system (or developer) messages in new responses.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          reasoning: **gpt-5 and o-series models only** Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+          truncation: The truncation strategy to use for the model response. - `auto`: If the input to
+              this Response exceeds the model's context window size, the model will truncate
+              the response to fit the context window by dropping items from the beginning of
+              the conversation. - `disabled` (default): If the input size will exceed the
+              context window size for a model, the request will fail with a 400 error.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/responses/input_tokens",
+            body=await async_maybe_transform(
+                {
+                    "conversation": conversation,
+                    "input": input,
+                    "instructions": instructions,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "truncation": truncation,
+                },
+                input_token_count_params.InputTokenCountParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=InputTokenCountResponse,
+        )
+
+
+class InputTokensWithRawResponse:
+    def __init__(self, input_tokens: InputTokens) -> None:
+        self._input_tokens = input_tokens
+
+        self.count = _legacy_response.to_raw_response_wrapper(
+            input_tokens.count,
+        )
+
+
+class AsyncInputTokensWithRawResponse:
+    def __init__(self, input_tokens: AsyncInputTokens) -> None:
+        self._input_tokens = input_tokens
+
+        self.count = _legacy_response.async_to_raw_response_wrapper(
+            input_tokens.count,
+        )
+
+
+class InputTokensWithStreamingResponse:
+    def __init__(self, input_tokens: InputTokens) -> None:
+        self._input_tokens = input_tokens
+
+        self.count = to_streamed_response_wrapper(
+            input_tokens.count,
+        )
+
+
+class AsyncInputTokensWithStreamingResponse:
+    def __init__(self, input_tokens: AsyncInputTokens) -> None:
+        self._input_tokens = input_tokens
+
+        self.count = async_to_streamed_response_wrapper(
+            input_tokens.count,
+        )
diff --git a/src/openai/resources/responses/responses.py b/src/openai/resources/responses/responses.py
new file mode 100644
index 0000000000..f55d79755a
--- /dev/null
+++ b/src/openai/resources/responses/responses.py
@@ -0,0 +1,2678 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .input_items import (
+    InputItems,
+    AsyncInputItems,
+    InputItemsWithRawResponse,
+    AsyncInputItemsWithRawResponse,
+    InputItemsWithStreamingResponse,
+    AsyncInputItemsWithStreamingResponse,
+)
+from ..._streaming import Stream, AsyncStream
+from .input_tokens import (
+    InputTokens,
+    AsyncInputTokens,
+    InputTokensWithRawResponse,
+    AsyncInputTokensWithRawResponse,
+    InputTokensWithStreamingResponse,
+    AsyncInputTokensWithStreamingResponse,
+)
+from ..._base_client import make_request_options
+from ...types.responses import (
+    response_create_params,
+    response_compact_params,
+    response_retrieve_params,
+)
+from ...types.responses.response import Response
+from ...types.responses.tool_param import ToolParam
+from ...types.shared_params.metadata import Metadata
+from ...types.shared_params.reasoning import Reasoning
+from ...types.responses.compacted_response import CompactedResponse
+from ...types.responses.response_includable import ResponseIncludable
+from ...types.shared_params.responses_model import ResponsesModel
+from ...types.responses.response_input_param import ResponseInputParam
+from ...types.responses.response_prompt_param import ResponsePromptParam
+from ...types.responses.response_stream_event import ResponseStreamEvent
+from ...types.responses.response_input_item_param import ResponseInputItemParam
+from ...types.responses.response_text_config_param import ResponseTextConfigParam
+
+__all__ = ["Responses", "AsyncResponses"]
+
+
+class Responses(SyncAPIResource):
+    @cached_property
+    def input_items(self) -> InputItems:
+        return InputItems(self._client)
+
+    @cached_property
+    def input_tokens(self) -> InputTokens:
+        return InputTokens(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ResponsesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ResponsesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ResponsesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ResponsesWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        background: Optional[bool] | Omit = omit,
+        conversation: Optional[response_create_params.Conversation] | Omit = omit,
+        include: Optional[List[ResponseIncludable]] | Omit = omit,
+        input: Union[str, ResponseInputParam] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tool_calls: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: ResponsesModel | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        prompt: Optional[ResponsePromptParam] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning: Optional[Reasoning] | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        text: ResponseTextConfigParam | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[ToolParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Response:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          background: Whether to run the model response in the background.
+              [Learn more](https://platform.openai.com/docs/guides/background).
+
+          conversation: The conversation that this response belongs to. Items from this conversation are
+              prepended to `input_items` for this response request. Input items and output
+              items from this response are automatically added to this conversation after this
+              response completes.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `web_search_call.action.sources`: Include the sources of the web search tool
+                call.
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
+              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+                tokens in reasoning item outputs. This enables reasoning items to be used in
+                multi-turn conversations when using the Responses API statelessly (like when
+                the `store` parameter is set to `false`, or when an organization is enrolled
+                in the zero data retention program).
+
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          instructions: A system (or developer) message inserted into the model's context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will not be carried over to the next response. This makes it simple to
+              swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
+          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
+              hit rates. Replaces the `user` field.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+
+          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
+              prompt caching, which keeps cached prefixes active for longer, up to a maximum
+              of 24 hours.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+
+          reasoning: **gpt-5 and o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          safety_identifier: A stable identifier used to help detect users of your application that may be
+              violating OpenAI's usage policies. The IDs should be a string that uniquely
+              identifies each user. We recommend hashing their username or email address, in
+              order to avoid sending us any identifying information.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the request will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                '[priority](https://openai.com/api-priority-processing/)', then the request
+                will be processed with the corresponding service tier.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          stream_options: Options for streaming responses. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              We support the following categories of tools:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+                predefined connectors such as Google Drive and SharePoint. Learn more about
+                [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code with strongly typed arguments and outputs.
+                Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+                You can also use custom tools to call your own code.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the input to this Response exceeds the model's context window size,
+                the model will truncate the response to fit the context window by dropping
+                items from the beginning of the conversation.
+              - `disabled` (default): If the input size will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
+              `prompt_cache_key` instead to maintain caching optimizations. A stable
+              identifier for your end-users. Used to boost cache hit rates by better bucketing
+              similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        stream: Literal[True],
+        background: Optional[bool] | Omit = omit,
+        conversation: Optional[response_create_params.Conversation] | Omit = omit,
+        include: Optional[List[ResponseIncludable]] | Omit = omit,
+        input: Union[str, ResponseInputParam] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tool_calls: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: ResponsesModel | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        prompt: Optional[ResponsePromptParam] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning: Optional[Reasoning] | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        text: ResponseTextConfigParam | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[ToolParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Stream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          background: Whether to run the model response in the background.
+              [Learn more](https://platform.openai.com/docs/guides/background).
+
+          conversation: The conversation that this response belongs to. Items from this conversation are
+              prepended to `input_items` for this response request. Input items and output
+              items from this response are automatically added to this conversation after this
+              response completes.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `web_search_call.action.sources`: Include the sources of the web search tool
+                call.
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
+              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+                tokens in reasoning item outputs. This enables reasoning items to be used in
+                multi-turn conversations when using the Responses API statelessly (like when
+                the `store` parameter is set to `false`, or when an organization is enrolled
+                in the zero data retention program).
+
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          instructions: A system (or developer) message inserted into the model's context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will not be carried over to the next response. This makes it simple to
+              swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
+          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
+              hit rates. Replaces the `user` field.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+
+          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
+              prompt caching, which keeps cached prefixes active for longer, up to a maximum
+              of 24 hours.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+
+          reasoning: **gpt-5 and o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          safety_identifier: A stable identifier used to help detect users of your application that may be
+              violating OpenAI's usage policies. The IDs should be a string that uniquely
+              identifies each user. We recommend hashing their username or email address, in
+              order to avoid sending us any identifying information.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the request will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                '[priority](https://openai.com/api-priority-processing/)', then the request
+                will be processed with the corresponding service tier.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream_options: Options for streaming responses. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              We support the following categories of tools:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+                predefined connectors such as Google Drive and SharePoint. Learn more about
+                [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code with strongly typed arguments and outputs.
+                Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+                You can also use custom tools to call your own code.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the input to this Response exceeds the model's context window size,
+                the model will truncate the response to fit the context window by dropping
+                items from the beginning of the conversation.
+              - `disabled` (default): If the input size will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
+              `prompt_cache_key` instead to maintain caching optimizations. A stable
+              identifier for your end-users. Used to boost cache hit rates by better bucketing
+              similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        stream: bool,
+        background: Optional[bool] | Omit = omit,
+        conversation: Optional[response_create_params.Conversation] | Omit = omit,
+        include: Optional[List[ResponseIncludable]] | Omit = omit,
+        input: Union[str, ResponseInputParam] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tool_calls: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: ResponsesModel | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        prompt: Optional[ResponsePromptParam] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning: Optional[Reasoning] | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        text: ResponseTextConfigParam | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[ToolParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          background: Whether to run the model response in the background.
+              [Learn more](https://platform.openai.com/docs/guides/background).
+
+          conversation: The conversation that this response belongs to. Items from this conversation are
+              prepended to `input_items` for this response request. Input items and output
+              items from this response are automatically added to this conversation after this
+              response completes.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `web_search_call.action.sources`: Include the sources of the web search tool
+                call.
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
+              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+                tokens in reasoning item outputs. This enables reasoning items to be used in
+                multi-turn conversations when using the Responses API statelessly (like when
+                the `store` parameter is set to `false`, or when an organization is enrolled
+                in the zero data retention program).
+
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          instructions: A system (or developer) message inserted into the model's context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will not be carried over to the next response. This makes it simple to
+              swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
+          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
+              hit rates. Replaces the `user` field.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+
+          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
+              prompt caching, which keeps cached prefixes active for longer, up to a maximum
+              of 24 hours.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+
+          reasoning: **gpt-5 and o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          safety_identifier: A stable identifier used to help detect users of your application that may be
+              violating OpenAI's usage policies. The IDs should be a string that uniquely
+              identifies each user. We recommend hashing their username or email address, in
+              order to avoid sending us any identifying information.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the request will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                '[priority](https://openai.com/api-priority-processing/)', then the request
+                will be processed with the corresponding service tier.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream_options: Options for streaming responses. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              We support the following categories of tools:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+                predefined connectors such as Google Drive and SharePoint. Learn more about
+                [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code with strongly typed arguments and outputs.
+                Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+                You can also use custom tools to call your own code.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the input to this Response exceeds the model's context window size,
+                the model will truncate the response to fit the context window by dropping
+                items from the beginning of the conversation.
+              - `disabled` (default): If the input size will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
+              `prompt_cache_key` instead to maintain caching optimizations. A stable
+              identifier for your end-users. Used to boost cache hit rates by better bucketing
+              similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    def create(
+        self,
+        *,
+        background: Optional[bool] | Omit = omit,
+        conversation: Optional[response_create_params.Conversation] | Omit = omit,
+        include: Optional[List[ResponseIncludable]] | Omit = omit,
+        input: Union[str, ResponseInputParam] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tool_calls: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: ResponsesModel | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        prompt: Optional[ResponsePromptParam] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning: Optional[Reasoning] | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        text: ResponseTextConfigParam | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[ToolParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        return self._post(
+            "/responses",
+            body=maybe_transform(
+                {
+                    "background": background,
+                    "conversation": conversation,
+                    "include": include,
+                    "input": input,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "max_tool_calls": max_tool_calls,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "prompt": prompt,
+                    "prompt_cache_key": prompt_cache_key,
+                    "prompt_cache_retention": prompt_cache_retention,
+                    "reasoning": reasoning,
+                    "safety_identifier": safety_identifier,
+                    "service_tier": service_tier,
+                    "store": store,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParamsStreaming
+                if stream
+                else response_create_params.ResponseCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=Stream[ResponseStreamEvent],
+        )
+
+    @overload
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | Omit = omit,
+        include_obfuscation: bool | Omit = omit,
+        starting_after: int | Omit = omit,
+        stream: Literal[False] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Response:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
+              characters to an `obfuscation` field on streaming delta events to normalize
+              payload sizes as a mitigation to certain side-channel attacks. These obfuscation
+              fields are included by default, but add a small amount of overhead to the data
+              stream. You can set `include_obfuscation` to false to optimize for bandwidth if
+              you trust the network links between your application and the OpenAI API.
+
+          starting_after: The sequence number of the event after which to start streaming.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: Literal[True],
+        include: List[ResponseIncludable] | Omit = omit,
+        include_obfuscation: bool | Omit = omit,
+        starting_after: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Stream[ResponseStreamEvent]:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
+              characters to an `obfuscation` field on streaming delta events to normalize
+              payload sizes as a mitigation to certain side-channel attacks. These obfuscation
+              fields are included by default, but add a small amount of overhead to the data
+              stream. You can set `include_obfuscation` to false to optimize for bandwidth if
+              you trust the network links between your application and the OpenAI API.
+
+          starting_after: The sequence number of the event after which to start streaming.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: bool,
+        include: List[ResponseIncludable] | Omit = omit,
+        include_obfuscation: bool | Omit = omit,
+        starting_after: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
+              characters to an `obfuscation` field on streaming delta events to normalize
+              payload sizes as a mitigation to certain side-channel attacks. These obfuscation
+              fields are included by default, but add a small amount of overhead to the data
+              stream. You can set `include_obfuscation` to false to optimize for bandwidth if
+              you trust the network links between your application and the OpenAI API.
+
+          starting_after: The sequence number of the event after which to start streaming.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | Omit = omit,
+        include_obfuscation: bool | Omit = omit,
+        starting_after: int | Omit = omit,
+        stream: Literal[False] | Literal[True] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._get(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "include": include,
+                        "include_obfuscation": include_obfuscation,
+                        "starting_after": starting_after,
+                        "stream": stream,
+                    },
+                    response_retrieve_params.ResponseRetrieveParams,
+                ),
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=Stream[ResponseStreamEvent],
+        )
+
+    def delete(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Deletes a model response with the given ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    def cancel(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Response:
+        """Cancels a model response with the given ID.
+
+        Only responses created with the
+        `background` parameter set to `true` can be cancelled.
+        [Learn more](https://platform.openai.com/docs/guides/background).
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._post(
+            f"/responses/{response_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+        )
+
+    def compact(
+        self,
+        *,
+        model: Union[
+            Literal[
+                "gpt-5.2",
+                "gpt-5.2-2025-12-11",
+                "gpt-5.2-chat-latest",
+                "gpt-5.2-pro",
+                "gpt-5.2-pro-2025-12-11",
+                "gpt-5.1",
+                "gpt-5.1-2025-11-13",
+                "gpt-5.1-codex",
+                "gpt-5.1-mini",
+                "gpt-5.1-chat-latest",
+                "gpt-5",
+                "gpt-5-mini",
+                "gpt-5-nano",
+                "gpt-5-2025-08-07",
+                "gpt-5-mini-2025-08-07",
+                "gpt-5-nano-2025-08-07",
+                "gpt-5-chat-latest",
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "gpt-4.1-2025-04-14",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
+                "o4-mini",
+                "o4-mini-2025-04-16",
+                "o3",
+                "o3-2025-04-16",
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "o1-preview",
+                "o1-preview-2024-09-12",
+                "o1-mini",
+                "o1-mini-2024-09-12",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-audio-preview",
+                "gpt-4o-audio-preview-2024-10-01",
+                "gpt-4o-audio-preview-2024-12-17",
+                "gpt-4o-audio-preview-2025-06-03",
+                "gpt-4o-mini-audio-preview",
+                "gpt-4o-mini-audio-preview-2024-12-17",
+                "gpt-4o-search-preview",
+                "gpt-4o-mini-search-preview",
+                "gpt-4o-search-preview-2025-03-11",
+                "gpt-4o-mini-search-preview-2025-03-11",
+                "chatgpt-4o-latest",
+                "codex-mini-latest",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0301",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+                "o1-pro",
+                "o1-pro-2025-03-19",
+                "o3-pro",
+                "o3-pro-2025-06-10",
+                "o3-deep-research",
+                "o3-deep-research-2025-06-26",
+                "o4-mini-deep-research",
+                "o4-mini-deep-research-2025-06-26",
+                "computer-use-preview",
+                "computer-use-preview-2025-03-11",
+                "gpt-5-codex",
+                "gpt-5-pro",
+                "gpt-5-pro-2025-10-06",
+                "gpt-5.1-codex-max",
+            ],
+            str,
+            None,
+        ],
+        input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> CompactedResponse:
+        """
+        Compact conversation
+
+        Args:
+          model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          input: Text, image, or file inputs to the model, used to generate a response
+
+          instructions: A system (or developer) message inserted into the model's context. When used
+              along with `previous_response_id`, the instructions from a previous response
+              will not be carried over to the next response. This makes it simple to swap out
+              system (or developer) messages in new responses.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/responses/compact",
+            body=maybe_transform(
+                {
+                    "model": model,
+                    "input": input,
+                    "instructions": instructions,
+                    "previous_response_id": previous_response_id,
+                },
+                response_compact_params.ResponseCompactParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=CompactedResponse,
+        )
+
+
+class AsyncResponses(AsyncAPIResource):
+    @cached_property
+    def input_items(self) -> AsyncInputItems:
+        return AsyncInputItems(self._client)
+
+    @cached_property
+    def input_tokens(self) -> AsyncInputTokens:
+        return AsyncInputTokens(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncResponsesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncResponsesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncResponsesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncResponsesWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        background: Optional[bool] | Omit = omit,
+        conversation: Optional[response_create_params.Conversation] | Omit = omit,
+        include: Optional[List[ResponseIncludable]] | Omit = omit,
+        input: Union[str, ResponseInputParam] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tool_calls: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: ResponsesModel | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        prompt: Optional[ResponsePromptParam] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning: Optional[Reasoning] | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream: Optional[Literal[False]] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        text: ResponseTextConfigParam | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[ToolParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Response:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          background: Whether to run the model response in the background.
+              [Learn more](https://platform.openai.com/docs/guides/background).
+
+          conversation: The conversation that this response belongs to. Items from this conversation are
+              prepended to `input_items` for this response request. Input items and output
+              items from this response are automatically added to this conversation after this
+              response completes.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `web_search_call.action.sources`: Include the sources of the web search tool
+                call.
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
+              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+                tokens in reasoning item outputs. This enables reasoning items to be used in
+                multi-turn conversations when using the Responses API statelessly (like when
+                the `store` parameter is set to `false`, or when an organization is enrolled
+                in the zero data retention program).
+
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          instructions: A system (or developer) message inserted into the model's context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will not be carried over to the next response. This makes it simple to
+              swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
+          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
+              hit rates. Replaces the `user` field.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+
+          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
+              prompt caching, which keeps cached prefixes active for longer, up to a maximum
+              of 24 hours.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+
+          reasoning: **gpt-5 and o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          safety_identifier: A stable identifier used to help detect users of your application that may be
+              violating OpenAI's usage policies. The IDs should be a string that uniquely
+              identifies each user. We recommend hashing their username or email address, in
+              order to avoid sending us any identifying information.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the request will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                '[priority](https://openai.com/api-priority-processing/)', then the request
+                will be processed with the corresponding service tier.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          stream_options: Options for streaming responses. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              We support the following categories of tools:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+                predefined connectors such as Google Drive and SharePoint. Learn more about
+                [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code with strongly typed arguments and outputs.
+                Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+                You can also use custom tools to call your own code.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the input to this Response exceeds the model's context window size,
+                the model will truncate the response to fit the context window by dropping
+                items from the beginning of the conversation.
+              - `disabled` (default): If the input size will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
+              `prompt_cache_key` instead to maintain caching optimizations. A stable
+              identifier for your end-users. Used to boost cache hit rates by better bucketing
+              similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        stream: Literal[True],
+        background: Optional[bool] | Omit = omit,
+        conversation: Optional[response_create_params.Conversation] | Omit = omit,
+        include: Optional[List[ResponseIncludable]] | Omit = omit,
+        input: Union[str, ResponseInputParam] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tool_calls: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: ResponsesModel | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        prompt: Optional[ResponsePromptParam] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning: Optional[Reasoning] | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        text: ResponseTextConfigParam | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[ToolParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncStream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          background: Whether to run the model response in the background.
+              [Learn more](https://platform.openai.com/docs/guides/background).
+
+          conversation: The conversation that this response belongs to. Items from this conversation are
+              prepended to `input_items` for this response request. Input items and output
+              items from this response are automatically added to this conversation after this
+              response completes.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `web_search_call.action.sources`: Include the sources of the web search tool
+                call.
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
+              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+                tokens in reasoning item outputs. This enables reasoning items to be used in
+                multi-turn conversations when using the Responses API statelessly (like when
+                the `store` parameter is set to `false`, or when an organization is enrolled
+                in the zero data retention program).
+
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          instructions: A system (or developer) message inserted into the model's context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will not be carried over to the next response. This makes it simple to
+              swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
+          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
+              hit rates. Replaces the `user` field.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+
+          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
+              prompt caching, which keeps cached prefixes active for longer, up to a maximum
+              of 24 hours.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+
+          reasoning: **gpt-5 and o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          safety_identifier: A stable identifier used to help detect users of your application that may be
+              violating OpenAI's usage policies. The IDs should be a string that uniquely
+              identifies each user. We recommend hashing their username or email address, in
+              order to avoid sending us any identifying information.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the request will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                '[priority](https://openai.com/api-priority-processing/)', then the request
+                will be processed with the corresponding service tier.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream_options: Options for streaming responses. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              We support the following categories of tools:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+                predefined connectors such as Google Drive and SharePoint. Learn more about
+                [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code with strongly typed arguments and outputs.
+                Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+                You can also use custom tools to call your own code.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the input to this Response exceeds the model's context window size,
+                the model will truncate the response to fit the context window by dropping
+                items from the beginning of the conversation.
+              - `disabled` (default): If the input size will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
+              `prompt_cache_key` instead to maintain caching optimizations. A stable
+              identifier for your end-users. Used to boost cache hit rates by better bucketing
+              similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        stream: bool,
+        background: Optional[bool] | Omit = omit,
+        conversation: Optional[response_create_params.Conversation] | Omit = omit,
+        include: Optional[List[ResponseIncludable]] | Omit = omit,
+        input: Union[str, ResponseInputParam] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tool_calls: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: ResponsesModel | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        prompt: Optional[ResponsePromptParam] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning: Optional[Reasoning] | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        text: ResponseTextConfigParam | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[ToolParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          background: Whether to run the model response in the background.
+              [Learn more](https://platform.openai.com/docs/guides/background).
+
+          conversation: The conversation that this response belongs to. Items from this conversation are
+              prepended to `input_items` for this response request. Input items and output
+              items from this response are automatically added to this conversation after this
+              response completes.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `web_search_call.action.sources`: Include the sources of the web search tool
+                call.
+              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+                in code interpreter tool call items.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `message.output_text.logprobs`: Include logprobs with assistant messages.
+              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+                tokens in reasoning item outputs. This enables reasoning items to be used in
+                multi-turn conversations when using the Responses API statelessly (like when
+                the `store` parameter is set to `false`, or when an organization is enrolled
+                in the zero data retention program).
+
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          instructions: A system (or developer) message inserted into the model's context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will not be carried over to the next response. This makes it simple to
+              swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+              response. This maximum number applies across all built-in tool calls, not per
+              individual tool. Any further attempts to call a tool by the model will be
+              ignored.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
+          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
+              hit rates. Replaces the `user` field.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+
+          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
+              prompt caching, which keeps cached prefixes active for longer, up to a maximum
+              of 24 hours.
+              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+
+          reasoning: **gpt-5 and o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          safety_identifier: A stable identifier used to help detect users of your application that may be
+              violating OpenAI's usage policies. The IDs should be a string that uniquely
+              identifies each user. We recommend hashing their username or email address, in
+              order to avoid sending us any identifying information.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          service_tier: Specifies the processing type used for serving the request.
+
+              - If set to 'auto', then the request will be processed with the service tier
+                configured in the Project settings. Unless otherwise configured, the Project
+                will use 'default'.
+              - If set to 'default', then the request will be processed with the standard
+                pricing and performance for the selected model.
+              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+                '[priority](https://openai.com/api-priority-processing/)', then the request
+                will be processed with the corresponding service tier.
+              - When not set, the default behavior is 'auto'.
+
+              When the `service_tier` parameter is set, the response body will include the
+              `service_tier` value based on the processing mode actually used to serve the
+              request. This response value may be different from the value set in the
+              parameter.
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream_options: Options for streaming responses. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              We support the following categories of tools:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+                predefined connectors such as Google Drive and SharePoint. Learn more about
+                [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code with strongly typed arguments and outputs.
+                Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+                You can also use custom tools to call your own code.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the input to this Response exceeds the model's context window size,
+                the model will truncate the response to fit the context window by dropping
+                items from the beginning of the conversation.
+              - `disabled` (default): If the input size will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
+              `prompt_cache_key` instead to maintain caching optimizations. A stable
+              identifier for your end-users. Used to boost cache hit rates by better bucketing
+              similar requests and to help OpenAI detect and prevent abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    async def create(
+        self,
+        *,
+        background: Optional[bool] | Omit = omit,
+        conversation: Optional[response_create_params.Conversation] | Omit = omit,
+        include: Optional[List[ResponseIncludable]] | Omit = omit,
+        input: Union[str, ResponseInputParam] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        max_output_tokens: Optional[int] | Omit = omit,
+        max_tool_calls: Optional[int] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        model: ResponsesModel | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        prompt: Optional[ResponsePromptParam] | Omit = omit,
+        prompt_cache_key: str | Omit = omit,
+        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
+        reasoning: Optional[Reasoning] | Omit = omit,
+        safety_identifier: str | Omit = omit,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
+        store: Optional[bool] | Omit = omit,
+        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
+        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
+        temperature: Optional[float] | Omit = omit,
+        text: ResponseTextConfigParam | Omit = omit,
+        tool_choice: response_create_params.ToolChoice | Omit = omit,
+        tools: Iterable[ToolParam] | Omit = omit,
+        top_logprobs: Optional[int] | Omit = omit,
+        top_p: Optional[float] | Omit = omit,
+        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
+        user: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        return await self._post(
+            "/responses",
+            body=await async_maybe_transform(
+                {
+                    "background": background,
+                    "conversation": conversation,
+                    "include": include,
+                    "input": input,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "max_tool_calls": max_tool_calls,
+                    "metadata": metadata,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "prompt": prompt,
+                    "prompt_cache_key": prompt_cache_key,
+                    "prompt_cache_retention": prompt_cache_retention,
+                    "reasoning": reasoning,
+                    "safety_identifier": safety_identifier,
+                    "service_tier": service_tier,
+                    "store": store,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParamsStreaming
+                if stream
+                else response_create_params.ResponseCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=AsyncStream[ResponseStreamEvent],
+        )
+
+    @overload
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | Omit = omit,
+        include_obfuscation: bool | Omit = omit,
+        starting_after: int | Omit = omit,
+        stream: Literal[False] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Response:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
+              characters to an `obfuscation` field on streaming delta events to normalize
+              payload sizes as a mitigation to certain side-channel attacks. These obfuscation
+              fields are included by default, but add a small amount of overhead to the data
+              stream. You can set `include_obfuscation` to false to optimize for bandwidth if
+              you trust the network links between your application and the OpenAI API.
+
+          starting_after: The sequence number of the event after which to start streaming.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: Literal[True],
+        include: List[ResponseIncludable] | Omit = omit,
+        include_obfuscation: bool | Omit = omit,
+        starting_after: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncStream[ResponseStreamEvent]:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
+              characters to an `obfuscation` field on streaming delta events to normalize
+              payload sizes as a mitigation to certain side-channel attacks. These obfuscation
+              fields are included by default, but add a small amount of overhead to the data
+              stream. You can set `include_obfuscation` to false to optimize for bandwidth if
+              you trust the network links between your application and the OpenAI API.
+
+          starting_after: The sequence number of the event after which to start streaming.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: bool,
+        include: List[ResponseIncludable] | Omit = omit,
+        include_obfuscation: bool | Omit = omit,
+        starting_after: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
+              characters to an `obfuscation` field on streaming delta events to normalize
+              payload sizes as a mitigation to certain side-channel attacks. These obfuscation
+              fields are included by default, but add a small amount of overhead to the data
+              stream. You can set `include_obfuscation` to false to optimize for bandwidth if
+              you trust the network links between your application and the OpenAI API.
+
+          starting_after: The sequence number of the event after which to start streaming.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | Omit = omit,
+        include_obfuscation: bool | Omit = omit,
+        starting_after: int | Omit = omit,
+        stream: Literal[False] | Literal[True] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return await self._get(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {
+                        "include": include,
+                        "include_obfuscation": include_obfuscation,
+                        "starting_after": starting_after,
+                        "stream": stream,
+                    },
+                    response_retrieve_params.ResponseRetrieveParams,
+                ),
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=AsyncStream[ResponseStreamEvent],
+        )
+
+    async def delete(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Deletes a model response with the given ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    async def cancel(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Response:
+        """Cancels a model response with the given ID.
+
+        Only responses created with the
+        `background` parameter set to `true` can be cancelled.
+        [Learn more](https://platform.openai.com/docs/guides/background).
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return await self._post(
+            f"/responses/{response_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+        )
+
+    async def compact(
+        self,
+        *,
+        model: Union[
+            Literal[
+                "gpt-5.2",
+                "gpt-5.2-2025-12-11",
+                "gpt-5.2-chat-latest",
+                "gpt-5.2-pro",
+                "gpt-5.2-pro-2025-12-11",
+                "gpt-5.1",
+                "gpt-5.1-2025-11-13",
+                "gpt-5.1-codex",
+                "gpt-5.1-mini",
+                "gpt-5.1-chat-latest",
+                "gpt-5",
+                "gpt-5-mini",
+                "gpt-5-nano",
+                "gpt-5-2025-08-07",
+                "gpt-5-mini-2025-08-07",
+                "gpt-5-nano-2025-08-07",
+                "gpt-5-chat-latest",
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "gpt-4.1-2025-04-14",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
+                "o4-mini",
+                "o4-mini-2025-04-16",
+                "o3",
+                "o3-2025-04-16",
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "o1-preview",
+                "o1-preview-2024-09-12",
+                "o1-mini",
+                "o1-mini-2024-09-12",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-audio-preview",
+                "gpt-4o-audio-preview-2024-10-01",
+                "gpt-4o-audio-preview-2024-12-17",
+                "gpt-4o-audio-preview-2025-06-03",
+                "gpt-4o-mini-audio-preview",
+                "gpt-4o-mini-audio-preview-2024-12-17",
+                "gpt-4o-search-preview",
+                "gpt-4o-mini-search-preview",
+                "gpt-4o-search-preview-2025-03-11",
+                "gpt-4o-mini-search-preview-2025-03-11",
+                "chatgpt-4o-latest",
+                "codex-mini-latest",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0301",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+                "o1-pro",
+                "o1-pro-2025-03-19",
+                "o3-pro",
+                "o3-pro-2025-06-10",
+                "o3-deep-research",
+                "o3-deep-research-2025-06-26",
+                "o4-mini-deep-research",
+                "o4-mini-deep-research-2025-06-26",
+                "computer-use-preview",
+                "computer-use-preview-2025-03-11",
+                "gpt-5-codex",
+                "gpt-5-pro",
+                "gpt-5-pro-2025-10-06",
+                "gpt-5.1-codex-max",
+            ],
+            str,
+            None,
+        ],
+        input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> CompactedResponse:
+        """
+        Compact conversation
+
+        Args:
+          model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          input: Text, image, or file inputs to the model, used to generate a response
+
+          instructions: A system (or developer) message inserted into the model's context. When used
+              along with `previous_response_id`, the instructions from a previous response
+              will not be carried over to the next response. This makes it simple to swap out
+              system (or developer) messages in new responses.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/responses/compact",
+            body=await async_maybe_transform(
+                {
+                    "model": model,
+                    "input": input,
+                    "instructions": instructions,
+                    "previous_response_id": previous_response_id,
+                },
+                response_compact_params.ResponseCompactParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=CompactedResponse,
+        )
+
+
+class ResponsesWithRawResponse:
+    def __init__(self, responses: Responses) -> None:
+        self._responses = responses
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            responses.delete,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            responses.cancel,
+        )
+        self.compact = _legacy_response.to_raw_response_wrapper(
+            responses.compact,
+        )
+
+    @cached_property
+    def input_items(self) -> InputItemsWithRawResponse:
+        return InputItemsWithRawResponse(self._responses.input_items)
+
+    @cached_property
+    def input_tokens(self) -> InputTokensWithRawResponse:
+        return InputTokensWithRawResponse(self._responses.input_tokens)
+
+
+class AsyncResponsesWithRawResponse:
+    def __init__(self, responses: AsyncResponses) -> None:
+        self._responses = responses
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            responses.delete,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            responses.cancel,
+        )
+        self.compact = _legacy_response.async_to_raw_response_wrapper(
+            responses.compact,
+        )
+
+    @cached_property
+    def input_items(self) -> AsyncInputItemsWithRawResponse:
+        return AsyncInputItemsWithRawResponse(self._responses.input_items)
+
+    @cached_property
+    def input_tokens(self) -> AsyncInputTokensWithRawResponse:
+        return AsyncInputTokensWithRawResponse(self._responses.input_tokens)
+
+
+class ResponsesWithStreamingResponse:
+    def __init__(self, responses: Responses) -> None:
+        self._responses = responses
+
+        self.create = to_streamed_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = to_streamed_response_wrapper(
+            responses.delete,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            responses.cancel,
+        )
+        self.compact = to_streamed_response_wrapper(
+            responses.compact,
+        )
+
+    @cached_property
+    def input_items(self) -> InputItemsWithStreamingResponse:
+        return InputItemsWithStreamingResponse(self._responses.input_items)
+
+    @cached_property
+    def input_tokens(self) -> InputTokensWithStreamingResponse:
+        return InputTokensWithStreamingResponse(self._responses.input_tokens)
+
+
+class AsyncResponsesWithStreamingResponse:
+    def __init__(self, responses: AsyncResponses) -> None:
+        self._responses = responses
+
+        self.create = async_to_streamed_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            responses.delete,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            responses.cancel,
+        )
+        self.compact = async_to_streamed_response_wrapper(
+            responses.compact,
+        )
+
+    @cached_property
+    def input_items(self) -> AsyncInputItemsWithStreamingResponse:
+        return AsyncInputItemsWithStreamingResponse(self._responses.input_items)
+
+    @cached_property
+    def input_tokens(self) -> AsyncInputTokensWithStreamingResponse:
+        return AsyncInputTokensWithStreamingResponse(self._responses.input_tokens)
diff --git a/src/openai/resources/uploads/__init__.py b/src/openai/resources/uploads/__init__.py
new file mode 100644
index 0000000000..12d1056f9e
--- /dev/null
+++ b/src/openai/resources/uploads/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .parts import (
+    Parts,
+    AsyncParts,
+    PartsWithRawResponse,
+    AsyncPartsWithRawResponse,
+    PartsWithStreamingResponse,
+    AsyncPartsWithStreamingResponse,
+)
+from .uploads import (
+    Uploads,
+    AsyncUploads,
+    UploadsWithRawResponse,
+    AsyncUploadsWithRawResponse,
+    UploadsWithStreamingResponse,
+    AsyncUploadsWithStreamingResponse,
+)
+
+__all__ = [
+    "Parts",
+    "AsyncParts",
+    "PartsWithRawResponse",
+    "AsyncPartsWithRawResponse",
+    "PartsWithStreamingResponse",
+    "AsyncPartsWithStreamingResponse",
+    "Uploads",
+    "AsyncUploads",
+    "UploadsWithRawResponse",
+    "AsyncUploadsWithRawResponse",
+    "UploadsWithStreamingResponse",
+    "AsyncUploadsWithStreamingResponse",
+]
diff --git a/src/openai/resources/uploads/parts.py b/src/openai/resources/uploads/parts.py
new file mode 100644
index 0000000000..73eabd4083
--- /dev/null
+++ b/src/openai/resources/uploads/parts.py
@@ -0,0 +1,205 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Mapping, cast
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import Body, Query, Headers, NotGiven, FileTypes, not_given
+from ..._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._base_client import make_request_options
+from ...types.uploads import part_create_params
+from ...types.uploads.upload_part import UploadPart
+
+__all__ = ["Parts", "AsyncParts"]
+
+
+class Parts(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> PartsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return PartsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> PartsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return PartsWithStreamingResponse(self)
+
+    def create(
+        self,
+        upload_id: str,
+        *,
+        data: FileTypes,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> UploadPart:
+        """
+        Adds a
+        [Part](https://platform.openai.com/docs/api-reference/uploads/part-object) to an
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object.
+        A Part represents a chunk of bytes from the file you are trying to upload.
+
+        Each Part can be at most 64 MB, and you can add Parts until you hit the Upload
+        maximum of 8 GB.
+
+        It is possible to add multiple Parts in parallel. You can decide the intended
+        order of the Parts when you
+        [complete the Upload](https://platform.openai.com/docs/api-reference/uploads/complete).
+
+        Args:
+          data: The chunk of bytes for this Part.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        body = deepcopy_minimal({"data": data})
+        files = extract_files(cast(Mapping[str, object], body), paths=[["data"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            f"/uploads/{upload_id}/parts",
+            body=maybe_transform(body, part_create_params.PartCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=UploadPart,
+        )
+
+
+class AsyncParts(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncPartsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncPartsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncPartsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncPartsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        upload_id: str,
+        *,
+        data: FileTypes,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> UploadPart:
+        """
+        Adds a
+        [Part](https://platform.openai.com/docs/api-reference/uploads/part-object) to an
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object.
+        A Part represents a chunk of bytes from the file you are trying to upload.
+
+        Each Part can be at most 64 MB, and you can add Parts until you hit the Upload
+        maximum of 8 GB.
+
+        It is possible to add multiple Parts in parallel. You can decide the intended
+        order of the Parts when you
+        [complete the Upload](https://platform.openai.com/docs/api-reference/uploads/complete).
+
+        Args:
+          data: The chunk of bytes for this Part.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        body = deepcopy_minimal({"data": data})
+        files = extract_files(cast(Mapping[str, object], body), paths=[["data"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            f"/uploads/{upload_id}/parts",
+            body=await async_maybe_transform(body, part_create_params.PartCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=UploadPart,
+        )
+
+
+class PartsWithRawResponse:
+    def __init__(self, parts: Parts) -> None:
+        self._parts = parts
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            parts.create,
+        )
+
+
+class AsyncPartsWithRawResponse:
+    def __init__(self, parts: AsyncParts) -> None:
+        self._parts = parts
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            parts.create,
+        )
+
+
+class PartsWithStreamingResponse:
+    def __init__(self, parts: Parts) -> None:
+        self._parts = parts
+
+        self.create = to_streamed_response_wrapper(
+            parts.create,
+        )
+
+
+class AsyncPartsWithStreamingResponse:
+    def __init__(self, parts: AsyncParts) -> None:
+        self._parts = parts
+
+        self.create = async_to_streamed_response_wrapper(
+            parts.create,
+        )
diff --git a/src/openai/resources/uploads/uploads.py b/src/openai/resources/uploads/uploads.py
new file mode 100644
index 0000000000..fb474778f1
--- /dev/null
+++ b/src/openai/resources/uploads/uploads.py
@@ -0,0 +1,498 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ... import _legacy_response
+from .parts import (
+    Parts,
+    AsyncParts,
+    PartsWithRawResponse,
+    AsyncPartsWithRawResponse,
+    PartsWithStreamingResponse,
+    AsyncPartsWithStreamingResponse,
+)
+from ...types import FilePurpose, upload_create_params, upload_complete_params
+from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._base_client import make_request_options
+from ...types.upload import Upload
+from ...types.file_purpose import FilePurpose
+
+__all__ = ["Uploads", "AsyncUploads"]
+
+
+class Uploads(SyncAPIResource):
+    @cached_property
+    def parts(self) -> Parts:
+        return Parts(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> UploadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return UploadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> UploadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return UploadsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        bytes: int,
+        filename: str,
+        mime_type: str,
+        purpose: FilePurpose,
+        expires_after: upload_create_params.ExpiresAfter | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Upload:
+        """
+        Creates an intermediate
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object
+        that you can add
+        [Parts](https://platform.openai.com/docs/api-reference/uploads/part-object) to.
+        Currently, an Upload can accept at most 8 GB in total and expires after an hour
+        after you create it.
+
+        Once you complete the Upload, we will create a
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        contains all the parts you uploaded. This File is usable in the rest of our
+        platform as a regular File object.
+
+        For certain `purpose` values, the correct `mime_type` must be specified. Please
+        refer to documentation for the
+        [supported MIME types for your use case](https://platform.openai.com/docs/assistants/tools/file-search#supported-files).
+
+        For guidance on the proper filename extensions for each purpose, please follow
+        the documentation on
+        [creating a File](https://platform.openai.com/docs/api-reference/files/create).
+
+        Args:
+          bytes: The number of bytes in the file you are uploading.
+
+          filename: The name of the file to upload.
+
+          mime_type: The MIME type of the file.
+
+              This must fall within the supported MIME types for your file purpose. See the
+              supported MIME types for assistants and vision.
+
+          purpose: The intended purpose of the uploaded file.
+
+              See the
+              [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose).
+
+          expires_after: The expiration policy for a file. By default, files with `purpose=batch` expire
+              after 30 days and all other files are persisted until they are manually deleted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/uploads",
+            body=maybe_transform(
+                {
+                    "bytes": bytes,
+                    "filename": filename,
+                    "mime_type": mime_type,
+                    "purpose": purpose,
+                    "expires_after": expires_after,
+                },
+                upload_create_params.UploadCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    def cancel(
+        self,
+        upload_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Upload:
+        """Cancels the Upload.
+
+        No Parts may be added after an Upload is cancelled.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return self._post(
+            f"/uploads/{upload_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    def complete(
+        self,
+        upload_id: str,
+        *,
+        part_ids: SequenceNotStr[str],
+        md5: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Upload:
+        """
+        Completes the
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object).
+
+        Within the returned Upload object, there is a nested
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        is ready to use in the rest of the platform.
+
+        You can specify the order of the Parts by passing in an ordered list of the Part
+        IDs.
+
+        The number of bytes uploaded upon completion must match the number of bytes
+        initially specified when creating the Upload object. No Parts may be added after
+        an Upload is completed.
+
+        Args:
+          part_ids: The ordered list of Part IDs.
+
+          md5: The optional md5 checksum for the file contents to verify if the bytes uploaded
+              matches what you expect.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return self._post(
+            f"/uploads/{upload_id}/complete",
+            body=maybe_transform(
+                {
+                    "part_ids": part_ids,
+                    "md5": md5,
+                },
+                upload_complete_params.UploadCompleteParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+
+class AsyncUploads(AsyncAPIResource):
+    @cached_property
+    def parts(self) -> AsyncParts:
+        return AsyncParts(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncUploadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncUploadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncUploadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncUploadsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        bytes: int,
+        filename: str,
+        mime_type: str,
+        purpose: FilePurpose,
+        expires_after: upload_create_params.ExpiresAfter | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Upload:
+        """
+        Creates an intermediate
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object
+        that you can add
+        [Parts](https://platform.openai.com/docs/api-reference/uploads/part-object) to.
+        Currently, an Upload can accept at most 8 GB in total and expires after an hour
+        after you create it.
+
+        Once you complete the Upload, we will create a
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        contains all the parts you uploaded. This File is usable in the rest of our
+        platform as a regular File object.
+
+        For certain `purpose` values, the correct `mime_type` must be specified. Please
+        refer to documentation for the
+        [supported MIME types for your use case](https://platform.openai.com/docs/assistants/tools/file-search#supported-files).
+
+        For guidance on the proper filename extensions for each purpose, please follow
+        the documentation on
+        [creating a File](https://platform.openai.com/docs/api-reference/files/create).
+
+        Args:
+          bytes: The number of bytes in the file you are uploading.
+
+          filename: The name of the file to upload.
+
+          mime_type: The MIME type of the file.
+
+              This must fall within the supported MIME types for your file purpose. See the
+              supported MIME types for assistants and vision.
+
+          purpose: The intended purpose of the uploaded file.
+
+              See the
+              [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose).
+
+          expires_after: The expiration policy for a file. By default, files with `purpose=batch` expire
+              after 30 days and all other files are persisted until they are manually deleted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/uploads",
+            body=await async_maybe_transform(
+                {
+                    "bytes": bytes,
+                    "filename": filename,
+                    "mime_type": mime_type,
+                    "purpose": purpose,
+                    "expires_after": expires_after,
+                },
+                upload_create_params.UploadCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    async def cancel(
+        self,
+        upload_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Upload:
+        """Cancels the Upload.
+
+        No Parts may be added after an Upload is cancelled.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return await self._post(
+            f"/uploads/{upload_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    async def complete(
+        self,
+        upload_id: str,
+        *,
+        part_ids: SequenceNotStr[str],
+        md5: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Upload:
+        """
+        Completes the
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object).
+
+        Within the returned Upload object, there is a nested
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        is ready to use in the rest of the platform.
+
+        You can specify the order of the Parts by passing in an ordered list of the Part
+        IDs.
+
+        The number of bytes uploaded upon completion must match the number of bytes
+        initially specified when creating the Upload object. No Parts may be added after
+        an Upload is completed.
+
+        Args:
+          part_ids: The ordered list of Part IDs.
+
+          md5: The optional md5 checksum for the file contents to verify if the bytes uploaded
+              matches what you expect.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return await self._post(
+            f"/uploads/{upload_id}/complete",
+            body=await async_maybe_transform(
+                {
+                    "part_ids": part_ids,
+                    "md5": md5,
+                },
+                upload_complete_params.UploadCompleteParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+
+class UploadsWithRawResponse:
+    def __init__(self, uploads: Uploads) -> None:
+        self._uploads = uploads
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = _legacy_response.to_raw_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> PartsWithRawResponse:
+        return PartsWithRawResponse(self._uploads.parts)
+
+
+class AsyncUploadsWithRawResponse:
+    def __init__(self, uploads: AsyncUploads) -> None:
+        self._uploads = uploads
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = _legacy_response.async_to_raw_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> AsyncPartsWithRawResponse:
+        return AsyncPartsWithRawResponse(self._uploads.parts)
+
+
+class UploadsWithStreamingResponse:
+    def __init__(self, uploads: Uploads) -> None:
+        self._uploads = uploads
+
+        self.create = to_streamed_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = to_streamed_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> PartsWithStreamingResponse:
+        return PartsWithStreamingResponse(self._uploads.parts)
+
+
+class AsyncUploadsWithStreamingResponse:
+    def __init__(self, uploads: AsyncUploads) -> None:
+        self._uploads = uploads
+
+        self.create = async_to_streamed_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = async_to_streamed_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> AsyncPartsWithStreamingResponse:
+        return AsyncPartsWithStreamingResponse(self._uploads.parts)
diff --git a/src/openai/resources/beta/vector_stores/__init__.py b/src/openai/resources/vector_stores/__init__.py
similarity index 100%
rename from src/openai/resources/beta/vector_stores/__init__.py
rename to src/openai/resources/vector_stores/__init__.py
diff --git a/src/openai/resources/beta/vector_stores/file_batches.py b/src/openai/resources/vector_stores/file_batches.py
similarity index 63%
rename from src/openai/resources/beta/vector_stores/file_batches.py
rename to src/openai/resources/vector_stores/file_batches.py
index f1ced51700..30d110a325 100644
--- a/src/openai/resources/beta/vector_stores/file_batches.py
+++ b/src/openai/resources/vector_stores/file_batches.py
@@ -2,33 +2,24 @@
 
 from __future__ import annotations
 
-import asyncio
-from typing import List, Iterable
+from typing import Dict, Union, Iterable, Optional
 from typing_extensions import Literal
-from concurrent.futures import Future, ThreadPoolExecutor, as_completed
 
 import httpx
-import sniffio
-
-from .... import _legacy_response
-from ....types import FileObject
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ...._utils import (
-    is_given,
-    maybe_transform,
-    async_maybe_transform,
-)
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ....pagination import SyncCursorPage, AsyncCursorPage
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from ....types.beta.vector_stores import file_batch_create_params, file_batch_list_files_params
-from ....types.beta.vector_stores.vector_store_file import VectorStoreFile
-from ....types.beta.vector_stores.vector_store_file_batch import VectorStoreFileBatch
+
+from ... import _legacy_response
+from ...types import FileChunkingStrategyParam
+from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.vector_stores import file_batch_create_params, file_batch_list_files_params
+from ...types.file_chunking_strategy_param import FileChunkingStrategyParam
+from ...types.vector_stores.vector_store_file import VectorStoreFile
+from ...types.vector_stores.vector_store_file_batch import VectorStoreFileBatch
 
 __all__ = ["FileBatches", "AsyncFileBatches"]
 
@@ -36,31 +27,60 @@
 class FileBatches(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> FileBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return FileBatchesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> FileBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return FileBatchesWithStreamingResponse(self)
 
     def create(
         self,
         vector_store_id: str,
         *,
-        file_ids: List[str],
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit,
+        chunking_strategy: FileChunkingStrategyParam | Omit = omit,
+        file_ids: SequenceNotStr[str] | Omit = omit,
+        files: Iterable[file_batch_create_params.File] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStoreFileBatch:
         """
         Create a vector store file batch.
 
         Args:
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
           file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
               the vector store should use. Useful for tools like `file_search` that can access
-              files.
+              files. If `attributes` or `chunking_strategy` are provided, they will be applied
+              to all files in the batch. Mutually exclusive with `files`.
+
+          files: A list of objects that each include a `file_id` plus optional `attributes` or
+              `chunking_strategy`. Use this when you need to override metadata for specific
+              files. The global `attributes` or `chunking_strategy` will be ignored and must
+              be specified for each file. Mutually exclusive with `file_ids`.
 
           extra_headers: Send extra headers
 
@@ -75,7 +95,15 @@ def create(
         extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/vector_stores/{vector_store_id}/file_batches",
-            body=maybe_transform({"file_ids": file_ids}, file_batch_create_params.FileBatchCreateParams),
+            body=maybe_transform(
+                {
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                    "file_ids": file_ids,
+                    "files": files,
+                },
+                file_batch_create_params.FileBatchCreateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -92,7 +120,7 @@ def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStoreFileBatch:
         """
         Retrieves a vector store file batch.
@@ -129,7 +157,7 @@ def cancel(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStoreFileBatch:
         """Cancel a vector store file batch.
 
@@ -158,41 +186,22 @@ def cancel(
             cast_to=VectorStoreFileBatch,
         )
 
-    def create_and_poll(
-        self,
-        vector_store_id: str,
-        *,
-        file_ids: List[str],
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFileBatch:
-        """Create a vector store batch and poll until all files have been processed."""
-        batch = self.create(
-            vector_store_id=vector_store_id,
-            file_ids=file_ids,
-        )
-        # TODO: don't poll unless necessary??
-        return self.poll(
-            batch.id,
-            vector_store_id=vector_store_id,
-            poll_interval_ms=poll_interval_ms,
-        )
-
     def list_files(
         self,
         batch_id: str,
         *,
         vector_store_id: str,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> SyncCursorPage[VectorStoreFile]:
         """
         Returns a list of vector store files in a batch.
@@ -205,8 +214,8 @@ def list_files(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
 
@@ -251,120 +260,64 @@ def list_files(
             model=VectorStoreFile,
         )
 
-    def poll(
-        self,
-        batch_id: str,
-        *,
-        vector_store_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFileBatch:
-        """Wait for the given file batch to be processed.
-
-        Note: this will return even if one of the files failed to process, you need to
-        check batch.file_counts.failed_count to handle this case.
-        """
-        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
-        if is_given(poll_interval_ms):
-            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
-
-        while True:
-            response = self.with_raw_response.retrieve(
-                batch_id,
-                vector_store_id=vector_store_id,
-                extra_headers=headers,
-            )
-
-            batch = response.parse()
-            if batch.file_counts.in_progress > 0:
-                if not is_given(poll_interval_ms):
-                    from_header = response.headers.get("openai-poll-after-ms")
-                    if from_header is not None:
-                        poll_interval_ms = int(from_header)
-                    else:
-                        poll_interval_ms = 1000
-
-                self._sleep(poll_interval_ms / 1000)
-                continue
-
-            return batch
-
-    def upload_and_poll(
-        self,
-        vector_store_id: str,
-        *,
-        files: Iterable[FileTypes],
-        max_concurrency: int = 5,
-        file_ids: List[str] = [],
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFileBatch:
-        """Uploads the given files concurrently and then creates a vector store file batch.
-
-        If you've already uploaded certain files that you want to include in this batch
-        then you can pass their IDs through the `file_ids` argument.
-
-        By default, if any file upload fails then an exception will be eagerly raised.
-
-        The number of concurrency uploads is configurable using the `max_concurrency`
-        parameter.
-
-        Note: this method only supports `asyncio` or `trio` as the backing async
-        runtime.
-        """
-        results: list[FileObject] = []
-
-        with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
-            futures: list[Future[FileObject]] = [
-                executor.submit(
-                    self._client.files.create,
-                    file=file,
-                    purpose="assistants",
-                )
-                for file in files
-            ]
-
-        for future in as_completed(futures):
-            exc = future.exception()
-            if exc:
-                raise exc
-
-            results.append(future.result())
-
-        batch = self.create_and_poll(
-            vector_store_id=vector_store_id,
-            file_ids=[*file_ids, *(f.id for f in results)],
-            poll_interval_ms=poll_interval_ms,
-        )
-        return batch
-
 
 class AsyncFileBatches(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncFileBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncFileBatchesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncFileBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncFileBatchesWithStreamingResponse(self)
 
     async def create(
         self,
         vector_store_id: str,
         *,
-        file_ids: List[str],
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit,
+        chunking_strategy: FileChunkingStrategyParam | Omit = omit,
+        file_ids: SequenceNotStr[str] | Omit = omit,
+        files: Iterable[file_batch_create_params.File] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStoreFileBatch:
         """
         Create a vector store file batch.
 
         Args:
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
           file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
               the vector store should use. Useful for tools like `file_search` that can access
-              files.
+              files. If `attributes` or `chunking_strategy` are provided, they will be applied
+              to all files in the batch. Mutually exclusive with `files`.
+
+          files: A list of objects that each include a `file_id` plus optional `attributes` or
+              `chunking_strategy`. Use this when you need to override metadata for specific
+              files. The global `attributes` or `chunking_strategy` will be ignored and must
+              be specified for each file. Mutually exclusive with `file_ids`.
 
           extra_headers: Send extra headers
 
@@ -379,7 +332,15 @@ async def create(
         extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
             f"/vector_stores/{vector_store_id}/file_batches",
-            body=await async_maybe_transform({"file_ids": file_ids}, file_batch_create_params.FileBatchCreateParams),
+            body=await async_maybe_transform(
+                {
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                    "file_ids": file_ids,
+                    "files": files,
+                },
+                file_batch_create_params.FileBatchCreateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -396,7 +357,7 @@ async def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStoreFileBatch:
         """
         Retrieves a vector store file batch.
@@ -433,7 +394,7 @@ async def cancel(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStoreFileBatch:
         """Cancel a vector store file batch.
 
@@ -462,41 +423,22 @@ async def cancel(
             cast_to=VectorStoreFileBatch,
         )
 
-    async def create_and_poll(
-        self,
-        vector_store_id: str,
-        *,
-        file_ids: List[str],
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFileBatch:
-        """Create a vector store batch and poll until all files have been processed."""
-        batch = await self.create(
-            vector_store_id=vector_store_id,
-            file_ids=file_ids,
-        )
-        # TODO: don't poll unless necessary??
-        return await self.poll(
-            batch.id,
-            vector_store_id=vector_store_id,
-            poll_interval_ms=poll_interval_ms,
-        )
-
     def list_files(
         self,
         batch_id: str,
         *,
         vector_store_id: str,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]:
         """
         Returns a list of vector store files in a batch.
@@ -509,8 +451,8 @@ def list_files(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
 
@@ -555,114 +497,6 @@ def list_files(
             model=VectorStoreFile,
         )
 
-    async def poll(
-        self,
-        batch_id: str,
-        *,
-        vector_store_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFileBatch:
-        """Wait for the given file batch to be processed.
-
-        Note: this will return even if one of the files failed to process, you need to
-        check batch.file_counts.failed_count to handle this case.
-        """
-        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
-        if is_given(poll_interval_ms):
-            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
-
-        while True:
-            response = await self.with_raw_response.retrieve(
-                batch_id,
-                vector_store_id=vector_store_id,
-                extra_headers=headers,
-            )
-
-            batch = response.parse()
-            if batch.file_counts.in_progress > 0:
-                if not is_given(poll_interval_ms):
-                    from_header = response.headers.get("openai-poll-after-ms")
-                    if from_header is not None:
-                        poll_interval_ms = int(from_header)
-                    else:
-                        poll_interval_ms = 1000
-
-                await self._sleep(poll_interval_ms / 1000)
-                continue
-
-            return batch
-
-    async def upload_and_poll(
-        self,
-        vector_store_id: str,
-        *,
-        files: Iterable[FileTypes],
-        max_concurrency: int = 5,
-        file_ids: List[str] = [],
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFileBatch:
-        """Uploads the given files concurrently and then creates a vector store file batch.
-
-        If you've already uploaded certain files that you want to include in this batch
-        then you can pass their IDs through the `file_ids` argument.
-
-        By default, if any file upload fails then an exception will be eagerly raised.
-
-        The number of concurrency uploads is configurable using the `max_concurrency`
-        parameter.
-
-        Note: this method only supports `asyncio` or `trio` as the backing async
-        runtime.
-        """
-        uploaded_files: list[FileObject] = []
-
-        async_library = sniffio.current_async_library()
-
-        if async_library == "asyncio":
-
-            async def asyncio_upload_file(semaphore: asyncio.Semaphore, file: FileTypes) -> None:
-                async with semaphore:
-                    file_obj = await self._client.files.create(
-                        file=file,
-                        purpose="assistants",
-                    )
-                    uploaded_files.append(file_obj)
-
-            semaphore = asyncio.Semaphore(max_concurrency)
-
-            tasks = [asyncio_upload_file(semaphore, file) for file in files]
-
-            await asyncio.gather(*tasks)
-        elif async_library == "trio":
-            # We only import if the library is being used.
-            # We support Python 3.7 so are using an older version of trio that does not have type information
-            import trio  # type: ignore # pyright: ignore[reportMissingTypeStubs]
-
-            async def trio_upload_file(limiter: trio.CapacityLimiter, file: FileTypes) -> None:
-                async with limiter:
-                    file_obj = await self._client.files.create(
-                        file=file,
-                        purpose="assistants",
-                    )
-                    uploaded_files.append(file_obj)
-
-            limiter = trio.CapacityLimiter(max_concurrency)
-
-            async with trio.open_nursery() as nursery:
-                for file in files:
-                    nursery.start_soon(trio_upload_file, limiter, file)  # pyright: ignore [reportUnknownMemberType]
-        else:
-            raise RuntimeError(
-                f"Async runtime {async_library} is not supported yet. Only asyncio or trio is supported",
-            )
-
-        batch = await self.create_and_poll(
-            vector_store_id=vector_store_id,
-            file_ids=[*file_ids, *(f.id for f in uploaded_files)],
-            poll_interval_ms=poll_interval_ms,
-        )
-        return batch
-
 
 class FileBatchesWithRawResponse:
     def __init__(self, file_batches: FileBatches) -> None:
diff --git a/src/openai/resources/beta/vector_stores/files.py b/src/openai/resources/vector_stores/files.py
similarity index 58%
rename from src/openai/resources/beta/vector_stores/files.py
rename to src/openai/resources/vector_stores/files.py
index 5c3db27619..de795eb7f8 100644
--- a/src/openai/resources/beta/vector_stores/files.py
+++ b/src/openai/resources/vector_stores/files.py
@@ -2,29 +2,25 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-from typing_extensions import Literal, assert_never
+from typing import Dict, Union, Optional
+from typing_extensions import Literal
 
 import httpx
 
-from .... import _legacy_response
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ...._utils import (
-    is_given,
-    maybe_transform,
-    async_maybe_transform,
-)
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ....pagination import SyncCursorPage, AsyncCursorPage
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from ....types.beta.vector_stores import file_list_params, file_create_params
-from ....types.beta.vector_stores.vector_store_file import VectorStoreFile
-from ....types.beta.vector_stores.vector_store_file_deleted import VectorStoreFileDeleted
+from ... import _legacy_response
+from ...types import FileChunkingStrategyParam
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.vector_stores import file_list_params, file_create_params, file_update_params
+from ...types.file_chunking_strategy_param import FileChunkingStrategyParam
+from ...types.vector_stores.vector_store_file import VectorStoreFile
+from ...types.vector_stores.file_content_response import FileContentResponse
+from ...types.vector_stores.vector_store_file_deleted import VectorStoreFileDeleted
 
 __all__ = ["Files", "AsyncFiles"]
 
@@ -32,10 +28,21 @@
 class Files(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return FilesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return FilesWithStreamingResponse(self)
 
     def create(
@@ -43,12 +50,14 @@ def create(
         vector_store_id: str,
         *,
         file_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit,
+        chunking_strategy: FileChunkingStrategyParam | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStoreFile:
         """
         Create a vector store file by attaching a
@@ -60,6 +69,15 @@ def create(
               vector store should use. Useful for tools like `file_search` that can access
               files.
 
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -73,7 +91,14 @@ def create(
         extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/vector_stores/{vector_store_id}/files",
-            body=maybe_transform({"file_id": file_id}, file_create_params.FileCreateParams),
+            body=maybe_transform(
+                {
+                    "file_id": file_id,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_create_params.FileCreateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -90,7 +115,7 @@ def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStoreFile:
         """
         Retrieves a vector store file.
@@ -117,21 +142,66 @@ def retrieve(
             cast_to=VectorStoreFile,
         )
 
+    def update(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VectorStoreFile:
+        """
+        Update attributes on a vector store file.
+
+        Args:
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            body=maybe_transform({"attributes": attributes}, file_update_params.FileUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
     def list(
         self,
         vector_store_id: str,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> SyncCursorPage[VectorStoreFile]:
         """
         Returns a list of vector store files.
@@ -144,8 +214,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
 
@@ -198,7 +268,7 @@ def delete(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStoreFileDeleted:
         """Delete a vector store file.
 
@@ -229,100 +299,63 @@ def delete(
             cast_to=VectorStoreFileDeleted,
         )
 
-    def create_and_poll(
+    def content(
         self,
         file_id: str,
         *,
         vector_store_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """Attach a file to the given vector store and wait for it to be processed."""
-        self.create(vector_store_id=vector_store_id, file_id=file_id)
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncPage[FileContentResponse]:
+        """
+        Retrieve the parsed contents of a vector store file.
 
-        return self.poll(
-            file_id,
-            vector_store_id=vector_store_id,
-            poll_interval_ms=poll_interval_ms,
-        )
+        Args:
+          extra_headers: Send extra headers
 
-    def poll(
-        self,
-        file_id: str,
-        *,
-        vector_store_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """Wait for the vector store file to finish processing.
+          extra_query: Add additional query parameters to the request
 
-        Note: this will return even if the file failed to process, you need to check
-        file.last_error and file.status to handle these cases
-        """
-        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
-        if is_given(poll_interval_ms):
-            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
-
-        while True:
-            response = self.with_raw_response.retrieve(
-                file_id,
-                vector_store_id=vector_store_id,
-                extra_headers=headers,
-            )
-
-            file = response.parse()
-            if file.status == "in_progress":
-                if not is_given(poll_interval_ms):
-                    from_header = response.headers.get("openai-poll-after-ms")
-                    if from_header is not None:
-                        poll_interval_ms = int(from_header)
-                    else:
-                        poll_interval_ms = 1000
-
-                self._sleep(poll_interval_ms / 1000)
-            elif file.status == "cancelled" or file.status == "completed" or file.status == "failed":
-                return file
-            else:
-                if TYPE_CHECKING:  # type: ignore[unreachable]
-                    assert_never(file.status)
-                else:
-                    return file
-
-    def upload(
-        self,
-        *,
-        vector_store_id: str,
-        file: FileTypes,
-    ) -> VectorStoreFile:
-        """Upload a file to the `files` API and then attach it to the given vector store.
+          extra_body: Add additional JSON properties to the request
 
-        Note the file will be asynchronously processed (you can use the alternative
-        polling helper method to wait for processing to complete).
+          timeout: Override the client-level default timeout for this request, in seconds
         """
-        file_obj = self._client.files.create(file=file, purpose="assistants")
-        return self.create(vector_store_id=vector_store_id, file_id=file_obj.id)
-
-    def upload_and_poll(
-        self,
-        *,
-        vector_store_id: str,
-        file: FileTypes,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """Add a file to a vector store and poll until processing is complete."""
-        file_obj = self._client.files.create(file=file, purpose="assistants")
-        return self.create_and_poll(
-            vector_store_id=vector_store_id,
-            file_id=file_obj.id,
-            poll_interval_ms=poll_interval_ms,
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files/{file_id}/content",
+            page=SyncPage[FileContentResponse],
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=FileContentResponse,
         )
 
 
 class AsyncFiles(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncFilesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncFilesWithStreamingResponse(self)
 
     async def create(
@@ -330,12 +363,14 @@ async def create(
         vector_store_id: str,
         *,
         file_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit,
+        chunking_strategy: FileChunkingStrategyParam | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStoreFile:
         """
         Create a vector store file by attaching a
@@ -347,6 +382,15 @@ async def create(
               vector store should use. Useful for tools like `file_search` that can access
               files.
 
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -360,7 +404,14 @@ async def create(
         extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
             f"/vector_stores/{vector_store_id}/files",
-            body=await async_maybe_transform({"file_id": file_id}, file_create_params.FileCreateParams),
+            body=await async_maybe_transform(
+                {
+                    "file_id": file_id,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_create_params.FileCreateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -377,7 +428,7 @@ async def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStoreFile:
         """
         Retrieves a vector store file.
@@ -404,21 +455,66 @@ async def retrieve(
             cast_to=VectorStoreFile,
         )
 
+    async def update(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VectorStoreFile:
+        """
+        Update attributes on a vector store file.
+
+        Args:
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            body=await async_maybe_transform({"attributes": attributes}, file_update_params.FileUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
     def list(
         self,
         vector_store_id: str,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        filter: Literal["in_progress", "completed", "failed", "cancelled"] | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]:
         """
         Returns a list of vector store files.
@@ -431,8 +527,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
 
@@ -485,7 +581,7 @@ async def delete(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStoreFileDeleted:
         """Delete a vector store file.
 
@@ -516,90 +612,42 @@ async def delete(
             cast_to=VectorStoreFileDeleted,
         )
 
-    async def create_and_poll(
+    def content(
         self,
         file_id: str,
         *,
         vector_store_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """Attach a file to the given vector store and wait for it to be processed."""
-        await self.create(vector_store_id=vector_store_id, file_id=file_id)
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[FileContentResponse, AsyncPage[FileContentResponse]]:
+        """
+        Retrieve the parsed contents of a vector store file.
 
-        return await self.poll(
-            file_id,
-            vector_store_id=vector_store_id,
-            poll_interval_ms=poll_interval_ms,
-        )
+        Args:
+          extra_headers: Send extra headers
 
-    async def poll(
-        self,
-        file_id: str,
-        *,
-        vector_store_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """Wait for the vector store file to finish processing.
+          extra_query: Add additional query parameters to the request
 
-        Note: this will return even if the file failed to process, you need to check
-        file.last_error and file.status to handle these cases
-        """
-        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
-        if is_given(poll_interval_ms):
-            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
-
-        while True:
-            response = await self.with_raw_response.retrieve(
-                file_id,
-                vector_store_id=vector_store_id,
-                extra_headers=headers,
-            )
-
-            file = response.parse()
-            if file.status == "in_progress":
-                if not is_given(poll_interval_ms):
-                    from_header = response.headers.get("openai-poll-after-ms")
-                    if from_header is not None:
-                        poll_interval_ms = int(from_header)
-                    else:
-                        poll_interval_ms = 1000
-
-                await self._sleep(poll_interval_ms / 1000)
-            elif file.status == "cancelled" or file.status == "completed" or file.status == "failed":
-                return file
-            else:
-                if TYPE_CHECKING:  # type: ignore[unreachable]
-                    assert_never(file.status)
-                else:
-                    return file
-
-    async def upload(
-        self,
-        *,
-        vector_store_id: str,
-        file: FileTypes,
-    ) -> VectorStoreFile:
-        """Upload a file to the `files` API and then attach it to the given vector store.
+          extra_body: Add additional JSON properties to the request
 
-        Note the file will be asynchronously processed (you can use the alternative
-        polling helper method to wait for processing to complete).
+          timeout: Override the client-level default timeout for this request, in seconds
         """
-        file_obj = await self._client.files.create(file=file, purpose="assistants")
-        return await self.create(vector_store_id=vector_store_id, file_id=file_obj.id)
-
-    async def upload_and_poll(
-        self,
-        *,
-        vector_store_id: str,
-        file: FileTypes,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """Add a file to a vector store and poll until processing is complete."""
-        file_obj = await self._client.files.create(file=file, purpose="assistants")
-        return await self.create_and_poll(
-            vector_store_id=vector_store_id,
-            file_id=file_obj.id,
-            poll_interval_ms=poll_interval_ms,
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files/{file_id}/content",
+            page=AsyncPage[FileContentResponse],
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=FileContentResponse,
         )
 
 
@@ -613,12 +661,18 @@ def __init__(self, files: Files) -> None:
         self.retrieve = _legacy_response.to_raw_response_wrapper(
             files.retrieve,
         )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            files.update,
+        )
         self.list = _legacy_response.to_raw_response_wrapper(
             files.list,
         )
         self.delete = _legacy_response.to_raw_response_wrapper(
             files.delete,
         )
+        self.content = _legacy_response.to_raw_response_wrapper(
+            files.content,
+        )
 
 
 class AsyncFilesWithRawResponse:
@@ -631,12 +685,18 @@ def __init__(self, files: AsyncFiles) -> None:
         self.retrieve = _legacy_response.async_to_raw_response_wrapper(
             files.retrieve,
         )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            files.update,
+        )
         self.list = _legacy_response.async_to_raw_response_wrapper(
             files.list,
         )
         self.delete = _legacy_response.async_to_raw_response_wrapper(
             files.delete,
         )
+        self.content = _legacy_response.async_to_raw_response_wrapper(
+            files.content,
+        )
 
 
 class FilesWithStreamingResponse:
@@ -649,12 +709,18 @@ def __init__(self, files: Files) -> None:
         self.retrieve = to_streamed_response_wrapper(
             files.retrieve,
         )
+        self.update = to_streamed_response_wrapper(
+            files.update,
+        )
         self.list = to_streamed_response_wrapper(
             files.list,
         )
         self.delete = to_streamed_response_wrapper(
             files.delete,
         )
+        self.content = to_streamed_response_wrapper(
+            files.content,
+        )
 
 
 class AsyncFilesWithStreamingResponse:
@@ -667,9 +733,15 @@ def __init__(self, files: AsyncFiles) -> None:
         self.retrieve = async_to_streamed_response_wrapper(
             files.retrieve,
         )
+        self.update = async_to_streamed_response_wrapper(
+            files.update,
+        )
         self.list = async_to_streamed_response_wrapper(
             files.list,
         )
         self.delete = async_to_streamed_response_wrapper(
             files.delete,
         )
+        self.content = async_to_streamed_response_wrapper(
+            files.content,
+        )
diff --git a/src/openai/resources/beta/vector_stores/vector_stores.py b/src/openai/resources/vector_stores/vector_stores.py
similarity index 67%
rename from src/openai/resources/beta/vector_stores/vector_stores.py
rename to src/openai/resources/vector_stores/vector_stores.py
index 8a177c2864..490e3e7fdb 100644
--- a/src/openai/resources/beta/vector_stores/vector_stores.py
+++ b/src/openai/resources/vector_stores/vector_stores.py
@@ -2,12 +2,12 @@
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import Union, Optional
 from typing_extensions import Literal
 
 import httpx
 
-from .... import _legacy_response
+from ... import _legacy_response
 from .files import (
     Files,
     AsyncFiles,
@@ -16,14 +16,19 @@
     FilesWithStreamingResponse,
     AsyncFilesWithStreamingResponse,
 )
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    maybe_transform,
-    async_maybe_transform,
+from ...types import (
+    FileChunkingStrategyParam,
+    vector_store_list_params,
+    vector_store_create_params,
+    vector_store_search_params,
+    vector_store_update_params,
 )
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage
 from .file_batches import (
     FileBatches,
     AsyncFileBatches,
@@ -32,14 +37,12 @@
     FileBatchesWithStreamingResponse,
     AsyncFileBatchesWithStreamingResponse,
 )
-from ....pagination import SyncCursorPage, AsyncCursorPage
-from ....types.beta import vector_store_list_params, vector_store_create_params, vector_store_update_params
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from ....types.beta.vector_store import VectorStore
-from ....types.beta.vector_store_deleted import VectorStoreDeleted
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.vector_store import VectorStore
+from ...types.vector_store_deleted import VectorStoreDeleted
+from ...types.shared_params.metadata import Metadata
+from ...types.file_chunking_strategy_param import FileChunkingStrategyParam
+from ...types.vector_store_search_response import VectorStoreSearchResponse
 
 __all__ = ["VectorStores", "AsyncVectorStores"]
 
@@ -55,30 +58,49 @@ def file_batches(self) -> FileBatches:
 
     @cached_property
     def with_raw_response(self) -> VectorStoresWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return VectorStoresWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> VectorStoresWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return VectorStoresWithStreamingResponse(self)
 
     def create(
         self,
         *,
-        expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        name: str | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | Omit = omit,
+        description: str | Omit = omit,
+        expires_after: vector_store_create_params.ExpiresAfter | Omit = omit,
+        file_ids: SequenceNotStr[str] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        name: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStore:
         """
         Create a vector store.
 
         Args:
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          description: A description for the vector store. Can be used to describe the vector store's
+              purpose.
+
           expires_after: The expiration policy for a vector store.
 
           file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
@@ -86,9 +108,11 @@ def create(
               files.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           name: The name of the vector store.
 
@@ -105,6 +129,8 @@ def create(
             "/vector_stores",
             body=maybe_transform(
                 {
+                    "chunking_strategy": chunking_strategy,
+                    "description": description,
                     "expires_after": expires_after,
                     "file_ids": file_ids,
                     "metadata": metadata,
@@ -127,7 +153,7 @@ def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStore:
         """
         Retrieves a vector store.
@@ -156,15 +182,15 @@ def update(
         self,
         vector_store_id: str,
         *,
-        expires_after: Optional[vector_store_update_params.ExpiresAfter] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        name: Optional[str] | NotGiven = NOT_GIVEN,
+        expires_after: Optional[vector_store_update_params.ExpiresAfter] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        name: Optional[str] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStore:
         """
         Modifies a vector store.
@@ -173,9 +199,11 @@ def update(
           expires_after: The expiration policy for a vector store.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           name: The name of the vector store.
 
@@ -209,16 +237,16 @@ def update(
     def list(
         self,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> SyncCursorPage[VectorStore]:
         """Returns a list of vector stores.
 
@@ -232,8 +260,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -280,7 +308,7 @@ def delete(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStoreDeleted:
         """
         Delete a vector store.
@@ -305,6 +333,69 @@ def delete(
             cast_to=VectorStoreDeleted,
         )
 
+    def search(
+        self,
+        vector_store_id: str,
+        *,
+        query: Union[str, SequenceNotStr[str]],
+        filters: vector_store_search_params.Filters | Omit = omit,
+        max_num_results: int | Omit = omit,
+        ranking_options: vector_store_search_params.RankingOptions | Omit = omit,
+        rewrite_query: bool | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncPage[VectorStoreSearchResponse]:
+        """
+        Search a vector store for relevant chunks based on a query and file attributes
+        filter.
+
+        Args:
+          query: A query string for a search
+
+          filters: A filter to apply based on file attributes.
+
+          max_num_results: The maximum number of results to return. This number should be between 1 and 50
+              inclusive.
+
+          ranking_options: Ranking options for search.
+
+          rewrite_query: Whether to rewrite the natural language query for vector search.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/search",
+            page=SyncPage[VectorStoreSearchResponse],
+            body=maybe_transform(
+                {
+                    "query": query,
+                    "filters": filters,
+                    "max_num_results": max_num_results,
+                    "ranking_options": ranking_options,
+                    "rewrite_query": rewrite_query,
+                },
+                vector_store_search_params.VectorStoreSearchParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=VectorStoreSearchResponse,
+            method="post",
+        )
+
 
 class AsyncVectorStores(AsyncAPIResource):
     @cached_property
@@ -317,30 +408,49 @@ def file_batches(self) -> AsyncFileBatches:
 
     @cached_property
     def with_raw_response(self) -> AsyncVectorStoresWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncVectorStoresWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncVectorStoresWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncVectorStoresWithStreamingResponse(self)
 
     async def create(
         self,
         *,
-        expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        name: str | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | Omit = omit,
+        description: str | Omit = omit,
+        expires_after: vector_store_create_params.ExpiresAfter | Omit = omit,
+        file_ids: SequenceNotStr[str] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        name: str | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStore:
         """
         Create a vector store.
 
         Args:
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
+          description: A description for the vector store. Can be used to describe the vector store's
+              purpose.
+
           expires_after: The expiration policy for a vector store.
 
           file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
@@ -348,9 +458,11 @@ async def create(
               files.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           name: The name of the vector store.
 
@@ -367,6 +479,8 @@ async def create(
             "/vector_stores",
             body=await async_maybe_transform(
                 {
+                    "chunking_strategy": chunking_strategy,
+                    "description": description,
                     "expires_after": expires_after,
                     "file_ids": file_ids,
                     "metadata": metadata,
@@ -389,7 +503,7 @@ async def retrieve(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStore:
         """
         Retrieves a vector store.
@@ -418,15 +532,15 @@ async def update(
         self,
         vector_store_id: str,
         *,
-        expires_after: Optional[vector_store_update_params.ExpiresAfter] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        name: Optional[str] | NotGiven = NOT_GIVEN,
+        expires_after: Optional[vector_store_update_params.ExpiresAfter] | Omit = omit,
+        metadata: Optional[Metadata] | Omit = omit,
+        name: Optional[str] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStore:
         """
         Modifies a vector store.
@@ -435,9 +549,11 @@ async def update(
           expires_after: The expiration policy for a vector store.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           name: The name of the vector store.
 
@@ -471,16 +587,16 @@ async def update(
     def list(
         self,
         *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> AsyncPaginator[VectorStore, AsyncCursorPage[VectorStore]]:
         """Returns a list of vector stores.
 
@@ -494,8 +610,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -542,7 +658,7 @@ async def delete(
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> VectorStoreDeleted:
         """
         Delete a vector store.
@@ -567,6 +683,69 @@ async def delete(
             cast_to=VectorStoreDeleted,
         )
 
+    def search(
+        self,
+        vector_store_id: str,
+        *,
+        query: Union[str, SequenceNotStr[str]],
+        filters: vector_store_search_params.Filters | Omit = omit,
+        max_num_results: int | Omit = omit,
+        ranking_options: vector_store_search_params.RankingOptions | Omit = omit,
+        rewrite_query: bool | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[VectorStoreSearchResponse, AsyncPage[VectorStoreSearchResponse]]:
+        """
+        Search a vector store for relevant chunks based on a query and file attributes
+        filter.
+
+        Args:
+          query: A query string for a search
+
+          filters: A filter to apply based on file attributes.
+
+          max_num_results: The maximum number of results to return. This number should be between 1 and 50
+              inclusive.
+
+          ranking_options: Ranking options for search.
+
+          rewrite_query: Whether to rewrite the natural language query for vector search.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/search",
+            page=AsyncPage[VectorStoreSearchResponse],
+            body=maybe_transform(
+                {
+                    "query": query,
+                    "filters": filters,
+                    "max_num_results": max_num_results,
+                    "ranking_options": ranking_options,
+                    "rewrite_query": rewrite_query,
+                },
+                vector_store_search_params.VectorStoreSearchParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=VectorStoreSearchResponse,
+            method="post",
+        )
+
 
 class VectorStoresWithRawResponse:
     def __init__(self, vector_stores: VectorStores) -> None:
@@ -587,6 +766,9 @@ def __init__(self, vector_stores: VectorStores) -> None:
         self.delete = _legacy_response.to_raw_response_wrapper(
             vector_stores.delete,
         )
+        self.search = _legacy_response.to_raw_response_wrapper(
+            vector_stores.search,
+        )
 
     @cached_property
     def files(self) -> FilesWithRawResponse:
@@ -616,6 +798,9 @@ def __init__(self, vector_stores: AsyncVectorStores) -> None:
         self.delete = _legacy_response.async_to_raw_response_wrapper(
             vector_stores.delete,
         )
+        self.search = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.search,
+        )
 
     @cached_property
     def files(self) -> AsyncFilesWithRawResponse:
@@ -645,6 +830,9 @@ def __init__(self, vector_stores: VectorStores) -> None:
         self.delete = to_streamed_response_wrapper(
             vector_stores.delete,
         )
+        self.search = to_streamed_response_wrapper(
+            vector_stores.search,
+        )
 
     @cached_property
     def files(self) -> FilesWithStreamingResponse:
@@ -674,6 +862,9 @@ def __init__(self, vector_stores: AsyncVectorStores) -> None:
         self.delete = async_to_streamed_response_wrapper(
             vector_stores.delete,
         )
+        self.search = async_to_streamed_response_wrapper(
+            vector_stores.search,
+        )
 
     @cached_property
     def files(self) -> AsyncFilesWithStreamingResponse:
diff --git a/src/openai/resources/videos.py b/src/openai/resources/videos.py
new file mode 100644
index 0000000000..0705831910
--- /dev/null
+++ b/src/openai/resources/videos.py
@@ -0,0 +1,704 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Mapping, cast
+from typing_extensions import Literal
+
+import httpx
+
+from .. import _legacy_response
+from ..types import (
+    VideoSize,
+    VideoModel,
+    VideoSeconds,
+    video_list_params,
+    video_remix_params,
+    video_create_params,
+    video_download_content_params,
+)
+from .._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given
+from .._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from ..pagination import SyncConversationCursorPage, AsyncConversationCursorPage
+from ..types.video import Video
+from .._base_client import AsyncPaginator, make_request_options
+from ..types.video_size import VideoSize
+from ..types.video_model import VideoModel
+from ..types.video_seconds import VideoSeconds
+from ..types.video_delete_response import VideoDeleteResponse
+
+__all__ = ["Videos", "AsyncVideos"]
+
+
+class Videos(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> VideosWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return VideosWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> VideosWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return VideosWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        prompt: str,
+        input_reference: FileTypes | Omit = omit,
+        model: VideoModel | Omit = omit,
+        seconds: VideoSeconds | Omit = omit,
+        size: VideoSize | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Video:
+        """
+        Create a video
+
+        Args:
+          prompt: Text prompt that describes the video to generate.
+
+          input_reference: Optional image reference that guides generation.
+
+          model: The video generation model to use (allowed values: sora-2, sora-2-pro). Defaults
+              to `sora-2`.
+
+          seconds: Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds.
+
+          size: Output resolution formatted as width x height (allowed values: 720x1280,
+              1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "prompt": prompt,
+                "input_reference": input_reference,
+                "model": model,
+                "seconds": seconds,
+                "size": size,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["input_reference"]])
+        if files:
+            # It should be noted that the actual Content-Type header that will be
+            # sent to the server will contain a `boundary` parameter, e.g.
+            # multipart/form-data; boundary=---abc--
+            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            "/videos",
+            body=maybe_transform(body, video_create_params.VideoCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Video,
+        )
+
+    def retrieve(
+        self,
+        video_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Video:
+        """
+        Retrieve a video
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not video_id:
+            raise ValueError(f"Expected a non-empty value for `video_id` but received {video_id!r}")
+        return self._get(
+            f"/videos/{video_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Video,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncConversationCursorPage[Video]:
+        """
+        List videos
+
+        Args:
+          after: Identifier for the last item from the previous pagination request
+
+          limit: Number of items to retrieve
+
+          order: Sort order of results by timestamp. Use `asc` for ascending order or `desc` for
+              descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/videos",
+            page=SyncConversationCursorPage[Video],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    video_list_params.VideoListParams,
+                ),
+            ),
+            model=Video,
+        )
+
+    def delete(
+        self,
+        video_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VideoDeleteResponse:
+        """
+        Delete a video
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not video_id:
+            raise ValueError(f"Expected a non-empty value for `video_id` but received {video_id!r}")
+        return self._delete(
+            f"/videos/{video_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VideoDeleteResponse,
+        )
+
+    def download_content(
+        self,
+        video_id: str,
+        *,
+        variant: Literal["video", "thumbnail", "spritesheet"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """Download video content
+
+        Args:
+          variant: Which downloadable asset to return.
+
+        Defaults to the MP4 video.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not video_id:
+            raise ValueError(f"Expected a non-empty value for `video_id` but received {video_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return self._get(
+            f"/videos/{video_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"variant": variant}, video_download_content_params.VideoDownloadContentParams),
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+    def remix(
+        self,
+        video_id: str,
+        *,
+        prompt: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Video:
+        """
+        Create a video remix
+
+        Args:
+          prompt: Updated text prompt that directs the remix generation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not video_id:
+            raise ValueError(f"Expected a non-empty value for `video_id` but received {video_id!r}")
+        return self._post(
+            f"/videos/{video_id}/remix",
+            body=maybe_transform({"prompt": prompt}, video_remix_params.VideoRemixParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Video,
+        )
+
+
+class AsyncVideos(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncVideosWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncVideosWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncVideosWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncVideosWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        prompt: str,
+        input_reference: FileTypes | Omit = omit,
+        model: VideoModel | Omit = omit,
+        seconds: VideoSeconds | Omit = omit,
+        size: VideoSize | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Video:
+        """
+        Create a video
+
+        Args:
+          prompt: Text prompt that describes the video to generate.
+
+          input_reference: Optional image reference that guides generation.
+
+          model: The video generation model to use (allowed values: sora-2, sora-2-pro). Defaults
+              to `sora-2`.
+
+          seconds: Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds.
+
+          size: Output resolution formatted as width x height (allowed values: 720x1280,
+              1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "prompt": prompt,
+                "input_reference": input_reference,
+                "model": model,
+                "seconds": seconds,
+                "size": size,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["input_reference"]])
+        if files:
+            # It should be noted that the actual Content-Type header that will be
+            # sent to the server will contain a `boundary` parameter, e.g.
+            # multipart/form-data; boundary=---abc--
+            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            "/videos",
+            body=await async_maybe_transform(body, video_create_params.VideoCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Video,
+        )
+
+    async def retrieve(
+        self,
+        video_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Video:
+        """
+        Retrieve a video
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not video_id:
+            raise ValueError(f"Expected a non-empty value for `video_id` but received {video_id!r}")
+        return await self._get(
+            f"/videos/{video_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Video,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: Literal["asc", "desc"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[Video, AsyncConversationCursorPage[Video]]:
+        """
+        List videos
+
+        Args:
+          after: Identifier for the last item from the previous pagination request
+
+          limit: Number of items to retrieve
+
+          order: Sort order of results by timestamp. Use `asc` for ascending order or `desc` for
+              descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/videos",
+            page=AsyncConversationCursorPage[Video],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    video_list_params.VideoListParams,
+                ),
+            ),
+            model=Video,
+        )
+
+    async def delete(
+        self,
+        video_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VideoDeleteResponse:
+        """
+        Delete a video
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not video_id:
+            raise ValueError(f"Expected a non-empty value for `video_id` but received {video_id!r}")
+        return await self._delete(
+            f"/videos/{video_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VideoDeleteResponse,
+        )
+
+    async def download_content(
+        self,
+        video_id: str,
+        *,
+        variant: Literal["video", "thumbnail", "spritesheet"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> _legacy_response.HttpxBinaryResponseContent:
+        """Download video content
+
+        Args:
+          variant: Which downloadable asset to return.
+
+        Defaults to the MP4 video.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not video_id:
+            raise ValueError(f"Expected a non-empty value for `video_id` but received {video_id!r}")
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return await self._get(
+            f"/videos/{video_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {"variant": variant}, video_download_content_params.VideoDownloadContentParams
+                ),
+            ),
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
+        )
+
+    async def remix(
+        self,
+        video_id: str,
+        *,
+        prompt: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Video:
+        """
+        Create a video remix
+
+        Args:
+          prompt: Updated text prompt that directs the remix generation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not video_id:
+            raise ValueError(f"Expected a non-empty value for `video_id` but received {video_id!r}")
+        return await self._post(
+            f"/videos/{video_id}/remix",
+            body=await async_maybe_transform({"prompt": prompt}, video_remix_params.VideoRemixParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Video,
+        )
+
+
+class VideosWithRawResponse:
+    def __init__(self, videos: Videos) -> None:
+        self._videos = videos
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            videos.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            videos.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            videos.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            videos.delete,
+        )
+        self.download_content = _legacy_response.to_raw_response_wrapper(
+            videos.download_content,
+        )
+        self.remix = _legacy_response.to_raw_response_wrapper(
+            videos.remix,
+        )
+
+
+class AsyncVideosWithRawResponse:
+    def __init__(self, videos: AsyncVideos) -> None:
+        self._videos = videos
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            videos.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            videos.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            videos.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            videos.delete,
+        )
+        self.download_content = _legacy_response.async_to_raw_response_wrapper(
+            videos.download_content,
+        )
+        self.remix = _legacy_response.async_to_raw_response_wrapper(
+            videos.remix,
+        )
+
+
+class VideosWithStreamingResponse:
+    def __init__(self, videos: Videos) -> None:
+        self._videos = videos
+
+        self.create = to_streamed_response_wrapper(
+            videos.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            videos.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            videos.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            videos.delete,
+        )
+        self.download_content = to_custom_streamed_response_wrapper(
+            videos.download_content,
+            StreamedBinaryAPIResponse,
+        )
+        self.remix = to_streamed_response_wrapper(
+            videos.remix,
+        )
+
+
+class AsyncVideosWithStreamingResponse:
+    def __init__(self, videos: AsyncVideos) -> None:
+        self._videos = videos
+
+        self.create = async_to_streamed_response_wrapper(
+            videos.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            videos.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            videos.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            videos.delete,
+        )
+        self.download_content = async_to_custom_streamed_response_wrapper(
+            videos.download_content,
+            AsyncStreamedBinaryAPIResponse,
+        )
+        self.remix = async_to_streamed_response_wrapper(
+            videos.remix,
+        )
diff --git a/src/openai/resources/webhooks.py b/src/openai/resources/webhooks.py
new file mode 100644
index 0000000000..76245997fa
--- /dev/null
+++ b/src/openai/resources/webhooks.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import json
+from typing import cast
+
+from .._models import construct_type
+from .._resource import SyncAPIResource, AsyncAPIResource
+from ..types.webhooks.unwrap_webhook_event import UnwrapWebhookEvent
+
+__all__ = ["Webhooks", "AsyncWebhooks"]
+
+
+class Webhooks(SyncAPIResource):
+    def unwrap(self, payload: str) -> UnwrapWebhookEvent:
+        return cast(
+            UnwrapWebhookEvent,
+            construct_type(
+                type_=UnwrapWebhookEvent,
+                value=json.loads(payload),
+            ),
+        )
+
+
+class AsyncWebhooks(AsyncAPIResource):
+    def unwrap(self, payload: str) -> UnwrapWebhookEvent:
+        return cast(
+            UnwrapWebhookEvent,
+            construct_type(
+                type_=UnwrapWebhookEvent,
+                value=json.loads(payload),
+            ),
+        )
diff --git a/src/openai/types/__init__.py b/src/openai/types/__init__.py
index 7873efb34f..a98ca16ee9 100644
--- a/src/openai/types/__init__.py
+++ b/src/openai/types/__init__.py
@@ -5,33 +5,111 @@
 from .batch import Batch as Batch
 from .image import Image as Image
 from .model import Model as Model
+from .video import Video as Video
 from .shared import (
+    Metadata as Metadata,
+    AllModels as AllModels,
+    ChatModel as ChatModel,
+    Reasoning as Reasoning,
     ErrorObject as ErrorObject,
+    CompoundFilter as CompoundFilter,
+    ResponsesModel as ResponsesModel,
+    ReasoningEffort as ReasoningEffort,
+    ComparisonFilter as ComparisonFilter,
     FunctionDefinition as FunctionDefinition,
     FunctionParameters as FunctionParameters,
+    ResponseFormatText as ResponseFormatText,
+    CustomToolInputFormat as CustomToolInputFormat,
+    ResponseFormatJSONObject as ResponseFormatJSONObject,
+    ResponseFormatJSONSchema as ResponseFormatJSONSchema,
+    ResponseFormatTextPython as ResponseFormatTextPython,
+    ResponseFormatTextGrammar as ResponseFormatTextGrammar,
 )
+from .upload import Upload as Upload
 from .embedding import Embedding as Embedding
 from .chat_model import ChatModel as ChatModel
 from .completion import Completion as Completion
 from .moderation import Moderation as Moderation
+from .video_size import VideoSize as VideoSize
+from .audio_model import AudioModel as AudioModel
 from .batch_error import BatchError as BatchError
+from .batch_usage import BatchUsage as BatchUsage
 from .file_object import FileObject as FileObject
+from .image_model import ImageModel as ImageModel
+from .video_model import VideoModel as VideoModel
 from .file_content import FileContent as FileContent
 from .file_deleted import FileDeleted as FileDeleted
+from .file_purpose import FilePurpose as FilePurpose
+from .vector_store import VectorStore as VectorStore
 from .model_deleted import ModelDeleted as ModelDeleted
+from .video_seconds import VideoSeconds as VideoSeconds
+from .embedding_model import EmbeddingModel as EmbeddingModel
 from .images_response import ImagesResponse as ImagesResponse
 from .completion_usage import CompletionUsage as CompletionUsage
+from .eval_list_params import EvalListParams as EvalListParams
 from .file_list_params import FileListParams as FileListParams
+from .moderation_model import ModerationModel as ModerationModel
 from .batch_list_params import BatchListParams as BatchListParams
 from .completion_choice import CompletionChoice as CompletionChoice
 from .image_edit_params import ImageEditParams as ImageEditParams
+from .video_list_params import VideoListParams as VideoListParams
+from .eval_create_params import EvalCreateParams as EvalCreateParams
+from .eval_list_response import EvalListResponse as EvalListResponse
+from .eval_update_params import EvalUpdateParams as EvalUpdateParams
 from .file_create_params import FileCreateParams as FileCreateParams
+from .video_create_error import VideoCreateError as VideoCreateError
+from .video_remix_params import VideoRemixParams as VideoRemixParams
 from .batch_create_params import BatchCreateParams as BatchCreateParams
+from .video_create_params import VideoCreateParams as VideoCreateParams
 from .batch_request_counts import BatchRequestCounts as BatchRequestCounts
+from .eval_create_response import EvalCreateResponse as EvalCreateResponse
+from .eval_delete_response import EvalDeleteResponse as EvalDeleteResponse
+from .eval_update_response import EvalUpdateResponse as EvalUpdateResponse
+from .upload_create_params import UploadCreateParams as UploadCreateParams
+from .vector_store_deleted import VectorStoreDeleted as VectorStoreDeleted
+from .audio_response_format import AudioResponseFormat as AudioResponseFormat
+from .container_list_params import ContainerListParams as ContainerListParams
 from .image_generate_params import ImageGenerateParams as ImageGenerateParams
+from .video_delete_response import VideoDeleteResponse as VideoDeleteResponse
+from .eval_retrieve_response import EvalRetrieveResponse as EvalRetrieveResponse
+from .file_chunking_strategy import FileChunkingStrategy as FileChunkingStrategy
+from .image_gen_stream_event import ImageGenStreamEvent as ImageGenStreamEvent
+from .upload_complete_params import UploadCompleteParams as UploadCompleteParams
+from .container_create_params import ContainerCreateParams as ContainerCreateParams
+from .container_list_response import ContainerListResponse as ContainerListResponse
 from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
+from .image_edit_stream_event import ImageEditStreamEvent as ImageEditStreamEvent
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
 from .moderation_create_params import ModerationCreateParams as ModerationCreateParams
+from .vector_store_list_params import VectorStoreListParams as VectorStoreListParams
+from .container_create_response import ContainerCreateResponse as ContainerCreateResponse
 from .create_embedding_response import CreateEmbeddingResponse as CreateEmbeddingResponse
+from .image_gen_completed_event import ImageGenCompletedEvent as ImageGenCompletedEvent
+from .image_edit_completed_event import ImageEditCompletedEvent as ImageEditCompletedEvent
 from .moderation_create_response import ModerationCreateResponse as ModerationCreateResponse
+from .vector_store_create_params import VectorStoreCreateParams as VectorStoreCreateParams
+from .vector_store_search_params import VectorStoreSearchParams as VectorStoreSearchParams
+from .vector_store_update_params import VectorStoreUpdateParams as VectorStoreUpdateParams
+from .container_retrieve_response import ContainerRetrieveResponse as ContainerRetrieveResponse
+from .moderation_text_input_param import ModerationTextInputParam as ModerationTextInputParam
+from .file_chunking_strategy_param import FileChunkingStrategyParam as FileChunkingStrategyParam
+from .vector_store_search_response import VectorStoreSearchResponse as VectorStoreSearchResponse
+from .websocket_connection_options import WebsocketConnectionOptions as WebsocketConnectionOptions
 from .image_create_variation_params import ImageCreateVariationParams as ImageCreateVariationParams
+from .image_gen_partial_image_event import ImageGenPartialImageEvent as ImageGenPartialImageEvent
+from .static_file_chunking_strategy import StaticFileChunkingStrategy as StaticFileChunkingStrategy
+from .video_download_content_params import VideoDownloadContentParams as VideoDownloadContentParams
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig as EvalCustomDataSourceConfig
+from .image_edit_partial_image_event import ImageEditPartialImageEvent as ImageEditPartialImageEvent
+from .moderation_image_url_input_param import ModerationImageURLInputParam as ModerationImageURLInputParam
+from .auto_file_chunking_strategy_param import AutoFileChunkingStrategyParam as AutoFileChunkingStrategyParam
+from .moderation_multi_modal_input_param import ModerationMultiModalInputParam as ModerationMultiModalInputParam
+from .other_file_chunking_strategy_object import OtherFileChunkingStrategyObject as OtherFileChunkingStrategyObject
+from .static_file_chunking_strategy_param import StaticFileChunkingStrategyParam as StaticFileChunkingStrategyParam
+from .static_file_chunking_strategy_object import StaticFileChunkingStrategyObject as StaticFileChunkingStrategyObject
+from .eval_stored_completions_data_source_config import (
+    EvalStoredCompletionsDataSourceConfig as EvalStoredCompletionsDataSourceConfig,
+)
+from .static_file_chunking_strategy_object_param import (
+    StaticFileChunkingStrategyObjectParam as StaticFileChunkingStrategyObjectParam,
+)
diff --git a/src/openai/types/audio/__init__.py b/src/openai/types/audio/__init__.py
index 8d2c44c86a..2ff2b8185d 100644
--- a/src/openai/types/audio/__init__.py
+++ b/src/openai/types/audio/__init__.py
@@ -3,7 +3,21 @@
 from __future__ import annotations
 
 from .translation import Translation as Translation
+from .speech_model import SpeechModel as SpeechModel
 from .transcription import Transcription as Transcription
+from .transcription_word import TranscriptionWord as TranscriptionWord
+from .translation_verbose import TranslationVerbose as TranslationVerbose
 from .speech_create_params import SpeechCreateParams as SpeechCreateParams
+from .transcription_include import TranscriptionInclude as TranscriptionInclude
+from .transcription_segment import TranscriptionSegment as TranscriptionSegment
+from .transcription_verbose import TranscriptionVerbose as TranscriptionVerbose
+from .transcription_diarized import TranscriptionDiarized as TranscriptionDiarized
 from .translation_create_params import TranslationCreateParams as TranslationCreateParams
+from .transcription_stream_event import TranscriptionStreamEvent as TranscriptionStreamEvent
 from .transcription_create_params import TranscriptionCreateParams as TranscriptionCreateParams
+from .translation_create_response import TranslationCreateResponse as TranslationCreateResponse
+from .transcription_create_response import TranscriptionCreateResponse as TranscriptionCreateResponse
+from .transcription_text_done_event import TranscriptionTextDoneEvent as TranscriptionTextDoneEvent
+from .transcription_diarized_segment import TranscriptionDiarizedSegment as TranscriptionDiarizedSegment
+from .transcription_text_delta_event import TranscriptionTextDeltaEvent as TranscriptionTextDeltaEvent
+from .transcription_text_segment_event import TranscriptionTextSegmentEvent as TranscriptionTextSegmentEvent
diff --git a/src/openai/types/audio/speech_create_params.py b/src/openai/types/audio/speech_create_params.py
index 8d75ec4ccc..634d788191 100644
--- a/src/openai/types/audio/speech_create_params.py
+++ b/src/openai/types/audio/speech_create_params.py
@@ -5,6 +5,8 @@
 from typing import Union
 from typing_extensions import Literal, Required, TypedDict
 
+from .speech_model import SpeechModel
+
 __all__ = ["SpeechCreateParams"]
 
 
@@ -12,18 +14,27 @@ class SpeechCreateParams(TypedDict, total=False):
     input: Required[str]
     """The text to generate audio for. The maximum length is 4096 characters."""
 
-    model: Required[Union[str, Literal["tts-1", "tts-1-hd"]]]
+    model: Required[Union[str, SpeechModel]]
     """
-    One of the available [TTS models](https://platform.openai.com/docs/models/tts):
-    `tts-1` or `tts-1-hd`
+    One of the available [TTS models](https://platform.openai.com/docs/models#tts):
+    `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
     """
 
-    voice: Required[Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]]
+    voice: Required[
+        Union[str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]]
+    ]
     """The voice to use when generating the audio.
 
-    Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
-    Previews of the voices are available in the
-    [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
+    Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `onyx`,
+    `nova`, `sage`, `shimmer`, and `verse`. Previews of the voices are available in
+    the
+    [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
+    """
+
+    instructions: str
+    """Control the voice of your generated audio with additional instructions.
+
+    Does not work with `tts-1` or `tts-1-hd`.
     """
 
     response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"]
@@ -37,3 +48,10 @@ class SpeechCreateParams(TypedDict, total=False):
 
     Select a value from `0.25` to `4.0`. `1.0` is the default.
     """
+
+    stream_format: Literal["sse", "audio"]
+    """The format to stream the audio in.
+
+    Supported formats are `sse` and `audio`. `sse` is not supported for `tts-1` or
+    `tts-1-hd`.
+    """
diff --git a/src/openai/types/audio/speech_model.py b/src/openai/types/audio/speech_model.py
new file mode 100644
index 0000000000..f004f805da
--- /dev/null
+++ b/src/openai/types/audio/speech_model.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["SpeechModel"]
+
+SpeechModel: TypeAlias = Literal["tts-1", "tts-1-hd", "gpt-4o-mini-tts"]
diff --git a/src/openai/types/audio/transcription.py b/src/openai/types/audio/transcription.py
index 0b6ab39e78..cbae8bf750 100644
--- a/src/openai/types/audio/transcription.py
+++ b/src/openai/types/audio/transcription.py
@@ -1,12 +1,81 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
 
-
+from ..._utils import PropertyInfo
 from ..._models import BaseModel
 
-__all__ = ["Transcription"]
+__all__ = ["Transcription", "Logprob", "Usage", "UsageTokens", "UsageTokensInputTokenDetails", "UsageDuration"]
+
+
+class Logprob(BaseModel):
+    token: Optional[str] = None
+    """The token in the transcription."""
+
+    bytes: Optional[List[float]] = None
+    """The bytes of the token."""
+
+    logprob: Optional[float] = None
+    """The log probability of the token."""
+
+
+class UsageTokensInputTokenDetails(BaseModel):
+    """Details about the input tokens billed for this request."""
+
+    audio_tokens: Optional[int] = None
+    """Number of audio tokens billed for this request."""
+
+    text_tokens: Optional[int] = None
+    """Number of text tokens billed for this request."""
+
+
+class UsageTokens(BaseModel):
+    """Usage statistics for models billed by token usage."""
+
+    input_tokens: int
+    """Number of input tokens billed for this request."""
+
+    output_tokens: int
+    """Number of output tokens generated."""
+
+    total_tokens: int
+    """Total number of tokens used (input + output)."""
+
+    type: Literal["tokens"]
+    """The type of the usage object. Always `tokens` for this variant."""
+
+    input_token_details: Optional[UsageTokensInputTokenDetails] = None
+    """Details about the input tokens billed for this request."""
+
+
+class UsageDuration(BaseModel):
+    """Usage statistics for models billed by audio input duration."""
+
+    seconds: float
+    """Duration of the input audio in seconds."""
+
+    type: Literal["duration"]
+    """The type of the usage object. Always `duration` for this variant."""
+
+
+Usage: TypeAlias = Annotated[Union[UsageTokens, UsageDuration], PropertyInfo(discriminator="type")]
 
 
 class Transcription(BaseModel):
+    """
+    Represents a transcription response returned by model, based on the provided input.
+    """
+
     text: str
     """The transcribed text."""
+
+    logprobs: Optional[List[Logprob]] = None
+    """The log probabilities of the tokens in the transcription.
+
+    Only returned with the models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`
+    if `logprobs` is added to the `include` array.
+    """
+
+    usage: Optional[Usage] = None
+    """Token usage statistics for the request."""
diff --git a/src/openai/types/audio/transcription_create_params.py b/src/openai/types/audio/transcription_create_params.py
index 6b2d5bae79..adaef9f5fe 100644
--- a/src/openai/types/audio/transcription_create_params.py
+++ b/src/openai/types/audio/transcription_create_params.py
@@ -2,48 +2,98 @@
 
 from __future__ import annotations
 
-from typing import List, Union
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from ..._types import FileTypes
+from ..._types import FileTypes, SequenceNotStr
+from ..audio_model import AudioModel
+from .transcription_include import TranscriptionInclude
+from ..audio_response_format import AudioResponseFormat
 
-__all__ = ["TranscriptionCreateParams"]
+__all__ = [
+    "TranscriptionCreateParamsBase",
+    "ChunkingStrategy",
+    "ChunkingStrategyVadConfig",
+    "TranscriptionCreateParamsNonStreaming",
+    "TranscriptionCreateParamsStreaming",
+]
 
 
-class TranscriptionCreateParams(TypedDict, total=False):
+class TranscriptionCreateParamsBase(TypedDict, total=False):
     file: Required[FileTypes]
     """
     The audio file object (not file name) to transcribe, in one of these formats:
     flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
     """
 
-    model: Required[Union[str, Literal["whisper-1"]]]
+    model: Required[Union[str, AudioModel]]
     """ID of the model to use.
 
-    Only `whisper-1` (which is powered by our open source Whisper V2 model) is
-    currently available.
+    The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, `whisper-1`
+    (which is powered by our open source Whisper V2 model), and
+    `gpt-4o-transcribe-diarize`.
+    """
+
+    chunking_strategy: Optional[ChunkingStrategy]
+    """Controls how the audio is cut into chunks.
+
+    When set to `"auto"`, the server first normalizes loudness and then uses voice
+    activity detection (VAD) to choose boundaries. `server_vad` object can be
+    provided to tweak VAD detection parameters manually. If unset, the audio is
+    transcribed as a single block. Required when using `gpt-4o-transcribe-diarize`
+    for inputs longer than 30 seconds.
+    """
+
+    include: List[TranscriptionInclude]
+    """
+    Additional information to include in the transcription response. `logprobs` will
+    return the log probabilities of the tokens in the response to understand the
+    model's confidence in the transcription. `logprobs` only works with
+    response_format set to `json` and only with the models `gpt-4o-transcribe` and
+    `gpt-4o-mini-transcribe`. This field is not supported when using
+    `gpt-4o-transcribe-diarize`.
+    """
+
+    known_speaker_names: SequenceNotStr[str]
+    """
+    Optional list of speaker names that correspond to the audio samples provided in
+    `known_speaker_references[]`. Each entry should be a short identifier (for
+    example `customer` or `agent`). Up to 4 speakers are supported.
+    """
+
+    known_speaker_references: SequenceNotStr[str]
+    """
+    Optional list of audio samples (as
+    [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
+    that contain known speaker references matching `known_speaker_names[]`. Each
+    sample must be between 2 and 10 seconds, and can use any of the same input audio
+    formats supported by `file`.
     """
 
     language: str
     """The language of the input audio.
 
     Supplying the input language in
-    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
-    improve accuracy and latency.
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
     """
 
     prompt: str
     """An optional text to guide the model's style or continue a previous audio
     segment.
 
-    The [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
-    should match the audio language.
+    The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+    should match the audio language. This field is not supported when using
+    `gpt-4o-transcribe-diarize`.
     """
 
-    response_format: Literal["json", "text", "srt", "verbose_json", "vtt"]
+    response_format: AudioResponseFormat
     """
-    The format of the transcript output, in one of these options: `json`, `text`,
-    `srt`, `verbose_json`, or `vtt`.
+    The format of the output, in one of these options: `json`, `text`, `srt`,
+    `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
+    `gpt-4o-mini-transcribe`, the only supported format is `json`. For
+    `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
+    `diarized_json`, with `diarized_json` required to receive speaker annotations.
     """
 
     temperature: float
@@ -61,5 +111,62 @@ class TranscriptionCreateParams(TypedDict, total=False):
     `response_format` must be set `verbose_json` to use timestamp granularities.
     Either or both of these options are supported: `word`, or `segment`. Note: There
     is no additional latency for segment timestamps, but generating word timestamps
-    incurs additional latency.
+    incurs additional latency. This option is not available for
+    `gpt-4o-transcribe-diarize`.
+    """
+
+
+class ChunkingStrategyVadConfig(TypedDict, total=False):
+    type: Required[Literal["server_vad"]]
+    """Must be set to `server_vad` to enable manual chunking using server side VAD."""
+
+    prefix_padding_ms: int
+    """Amount of audio to include before the VAD detected speech (in milliseconds)."""
+
+    silence_duration_ms: int
     """
+    Duration of silence to detect speech stop (in milliseconds). With shorter values
+    the model will respond more quickly, but may jump in on short pauses from the
+    user.
+    """
+
+    threshold: float
+    """Sensitivity threshold (0.0 to 1.0) for voice activity detection.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+
+ChunkingStrategy: TypeAlias = Union[Literal["auto"], ChunkingStrategyVadConfig]
+
+
+class TranscriptionCreateParamsNonStreaming(TranscriptionCreateParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+    for more information.
+
+    Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+    """
+
+
+class TranscriptionCreateParamsStreaming(TranscriptionCreateParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+    for more information.
+
+    Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+    """
+
+
+TranscriptionCreateParams = Union[TranscriptionCreateParamsNonStreaming, TranscriptionCreateParamsStreaming]
diff --git a/src/openai/types/audio/transcription_create_response.py b/src/openai/types/audio/transcription_create_response.py
new file mode 100644
index 0000000000..5717a3e701
--- /dev/null
+++ b/src/openai/types/audio/transcription_create_response.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .transcription import Transcription
+from .transcription_verbose import TranscriptionVerbose
+from .transcription_diarized import TranscriptionDiarized
+
+__all__ = ["TranscriptionCreateResponse"]
+
+TranscriptionCreateResponse: TypeAlias = Union[Transcription, TranscriptionDiarized, TranscriptionVerbose]
diff --git a/src/openai/types/audio/transcription_diarized.py b/src/openai/types/audio/transcription_diarized.py
new file mode 100644
index 0000000000..07585fe239
--- /dev/null
+++ b/src/openai/types/audio/transcription_diarized.py
@@ -0,0 +1,73 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .transcription_diarized_segment import TranscriptionDiarizedSegment
+
+__all__ = ["TranscriptionDiarized", "Usage", "UsageTokens", "UsageTokensInputTokenDetails", "UsageDuration"]
+
+
+class UsageTokensInputTokenDetails(BaseModel):
+    """Details about the input tokens billed for this request."""
+
+    audio_tokens: Optional[int] = None
+    """Number of audio tokens billed for this request."""
+
+    text_tokens: Optional[int] = None
+    """Number of text tokens billed for this request."""
+
+
+class UsageTokens(BaseModel):
+    """Usage statistics for models billed by token usage."""
+
+    input_tokens: int
+    """Number of input tokens billed for this request."""
+
+    output_tokens: int
+    """Number of output tokens generated."""
+
+    total_tokens: int
+    """Total number of tokens used (input + output)."""
+
+    type: Literal["tokens"]
+    """The type of the usage object. Always `tokens` for this variant."""
+
+    input_token_details: Optional[UsageTokensInputTokenDetails] = None
+    """Details about the input tokens billed for this request."""
+
+
+class UsageDuration(BaseModel):
+    """Usage statistics for models billed by audio input duration."""
+
+    seconds: float
+    """Duration of the input audio in seconds."""
+
+    type: Literal["duration"]
+    """The type of the usage object. Always `duration` for this variant."""
+
+
+Usage: TypeAlias = Annotated[Union[UsageTokens, UsageDuration], PropertyInfo(discriminator="type")]
+
+
+class TranscriptionDiarized(BaseModel):
+    """
+    Represents a diarized transcription response returned by the model, including the combined transcript and speaker-segment annotations.
+    """
+
+    duration: float
+    """Duration of the input audio in seconds."""
+
+    segments: List[TranscriptionDiarizedSegment]
+    """Segments of the transcript annotated with timestamps and speaker labels."""
+
+    task: Literal["transcribe"]
+    """The type of task that was run. Always `transcribe`."""
+
+    text: str
+    """The concatenated transcript text for the entire audio input."""
+
+    usage: Optional[Usage] = None
+    """Token or duration usage statistics for the request."""
diff --git a/src/openai/types/audio/transcription_diarized_segment.py b/src/openai/types/audio/transcription_diarized_segment.py
new file mode 100644
index 0000000000..fcfdb3634f
--- /dev/null
+++ b/src/openai/types/audio/transcription_diarized_segment.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["TranscriptionDiarizedSegment"]
+
+
+class TranscriptionDiarizedSegment(BaseModel):
+    """A segment of diarized transcript text with speaker metadata."""
+
+    id: str
+    """Unique identifier for the segment."""
+
+    end: float
+    """End timestamp of the segment in seconds."""
+
+    speaker: str
+    """Speaker label for this segment.
+
+    When known speakers are provided, the label matches `known_speaker_names[]`.
+    Otherwise speakers are labeled sequentially using capital letters (`A`, `B`,
+    ...).
+    """
+
+    start: float
+    """Start timestamp of the segment in seconds."""
+
+    text: str
+    """Transcript text for this segment."""
+
+    type: Literal["transcript.text.segment"]
+    """The type of the segment. Always `transcript.text.segment`."""
diff --git a/src/openai/types/audio/transcription_include.py b/src/openai/types/audio/transcription_include.py
new file mode 100644
index 0000000000..0e464ac934
--- /dev/null
+++ b/src/openai/types/audio/transcription_include.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["TranscriptionInclude"]
+
+TranscriptionInclude: TypeAlias = Literal["logprobs"]
diff --git a/src/openai/types/audio/transcription_segment.py b/src/openai/types/audio/transcription_segment.py
new file mode 100644
index 0000000000..522c401ebb
--- /dev/null
+++ b/src/openai/types/audio/transcription_segment.py
@@ -0,0 +1,49 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+
+from ..._models import BaseModel
+
+__all__ = ["TranscriptionSegment"]
+
+
+class TranscriptionSegment(BaseModel):
+    id: int
+    """Unique identifier of the segment."""
+
+    avg_logprob: float
+    """Average logprob of the segment.
+
+    If the value is lower than -1, consider the logprobs failed.
+    """
+
+    compression_ratio: float
+    """Compression ratio of the segment.
+
+    If the value is greater than 2.4, consider the compression failed.
+    """
+
+    end: float
+    """End time of the segment in seconds."""
+
+    no_speech_prob: float
+    """Probability of no speech in the segment.
+
+    If the value is higher than 1.0 and the `avg_logprob` is below -1, consider this
+    segment silent.
+    """
+
+    seek: int
+    """Seek offset of the segment."""
+
+    start: float
+    """Start time of the segment in seconds."""
+
+    temperature: float
+    """Temperature parameter used for generating the segment."""
+
+    text: str
+    """Text content of the segment."""
+
+    tokens: List[int]
+    """Array of token IDs for the text content."""
diff --git a/src/openai/types/audio/transcription_stream_event.py b/src/openai/types/audio/transcription_stream_event.py
new file mode 100644
index 0000000000..77d3a3aeec
--- /dev/null
+++ b/src/openai/types/audio/transcription_stream_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .transcription_text_done_event import TranscriptionTextDoneEvent
+from .transcription_text_delta_event import TranscriptionTextDeltaEvent
+from .transcription_text_segment_event import TranscriptionTextSegmentEvent
+
+__all__ = ["TranscriptionStreamEvent"]
+
+TranscriptionStreamEvent: TypeAlias = Annotated[
+    Union[TranscriptionTextSegmentEvent, TranscriptionTextDeltaEvent, TranscriptionTextDoneEvent],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/audio/transcription_text_delta_event.py b/src/openai/types/audio/transcription_text_delta_event.py
new file mode 100644
index 0000000000..a6e83133c8
--- /dev/null
+++ b/src/openai/types/audio/transcription_text_delta_event.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["TranscriptionTextDeltaEvent", "Logprob"]
+
+
+class Logprob(BaseModel):
+    token: Optional[str] = None
+    """The token that was used to generate the log probability."""
+
+    bytes: Optional[List[int]] = None
+    """The bytes that were used to generate the log probability."""
+
+    logprob: Optional[float] = None
+    """The log probability of the token."""
+
+
+class TranscriptionTextDeltaEvent(BaseModel):
+    """Emitted when there is an additional text delta.
+
+    This is also the first event emitted when the transcription starts. Only emitted when you [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with the `Stream` parameter set to `true`.
+    """
+
+    delta: str
+    """The text delta that was additionally transcribed."""
+
+    type: Literal["transcript.text.delta"]
+    """The type of the event. Always `transcript.text.delta`."""
+
+    logprobs: Optional[List[Logprob]] = None
+    """The log probabilities of the delta.
+
+    Only included if you
+    [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+    with the `include[]` parameter set to `logprobs`.
+    """
+
+    segment_id: Optional[str] = None
+    """Identifier of the diarized segment that this delta belongs to.
+
+    Only present when using `gpt-4o-transcribe-diarize`.
+    """
diff --git a/src/openai/types/audio/transcription_text_done_event.py b/src/openai/types/audio/transcription_text_done_event.py
new file mode 100644
index 0000000000..c8f7fc0769
--- /dev/null
+++ b/src/openai/types/audio/transcription_text_done_event.py
@@ -0,0 +1,72 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["TranscriptionTextDoneEvent", "Logprob", "Usage", "UsageInputTokenDetails"]
+
+
+class Logprob(BaseModel):
+    token: Optional[str] = None
+    """The token that was used to generate the log probability."""
+
+    bytes: Optional[List[int]] = None
+    """The bytes that were used to generate the log probability."""
+
+    logprob: Optional[float] = None
+    """The log probability of the token."""
+
+
+class UsageInputTokenDetails(BaseModel):
+    """Details about the input tokens billed for this request."""
+
+    audio_tokens: Optional[int] = None
+    """Number of audio tokens billed for this request."""
+
+    text_tokens: Optional[int] = None
+    """Number of text tokens billed for this request."""
+
+
+class Usage(BaseModel):
+    """Usage statistics for models billed by token usage."""
+
+    input_tokens: int
+    """Number of input tokens billed for this request."""
+
+    output_tokens: int
+    """Number of output tokens generated."""
+
+    total_tokens: int
+    """Total number of tokens used (input + output)."""
+
+    type: Literal["tokens"]
+    """The type of the usage object. Always `tokens` for this variant."""
+
+    input_token_details: Optional[UsageInputTokenDetails] = None
+    """Details about the input tokens billed for this request."""
+
+
+class TranscriptionTextDoneEvent(BaseModel):
+    """Emitted when the transcription is complete.
+
+    Contains the complete transcription text. Only emitted when you [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with the `Stream` parameter set to `true`.
+    """
+
+    text: str
+    """The text that was transcribed."""
+
+    type: Literal["transcript.text.done"]
+    """The type of the event. Always `transcript.text.done`."""
+
+    logprobs: Optional[List[Logprob]] = None
+    """The log probabilities of the individual tokens in the transcription.
+
+    Only included if you
+    [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+    with the `include[]` parameter set to `logprobs`.
+    """
+
+    usage: Optional[Usage] = None
+    """Usage statistics for models billed by token usage."""
diff --git a/src/openai/types/audio/transcription_text_segment_event.py b/src/openai/types/audio/transcription_text_segment_event.py
new file mode 100644
index 0000000000..e95472e6c6
--- /dev/null
+++ b/src/openai/types/audio/transcription_text_segment_event.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["TranscriptionTextSegmentEvent"]
+
+
+class TranscriptionTextSegmentEvent(BaseModel):
+    """
+    Emitted when a diarized transcription returns a completed segment with speaker information. Only emitted when you [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with `stream` set to `true` and `response_format` set to `diarized_json`.
+    """
+
+    id: str
+    """Unique identifier for the segment."""
+
+    end: float
+    """End timestamp of the segment in seconds."""
+
+    speaker: str
+    """Speaker label for this segment."""
+
+    start: float
+    """Start timestamp of the segment in seconds."""
+
+    text: str
+    """Transcript text for this segment."""
+
+    type: Literal["transcript.text.segment"]
+    """The type of the event. Always `transcript.text.segment`."""
diff --git a/src/openai/types/audio/transcription_verbose.py b/src/openai/types/audio/transcription_verbose.py
new file mode 100644
index 0000000000..b1a95e9c72
--- /dev/null
+++ b/src/openai/types/audio/transcription_verbose.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .transcription_word import TranscriptionWord
+from .transcription_segment import TranscriptionSegment
+
+__all__ = ["TranscriptionVerbose", "Usage"]
+
+
+class Usage(BaseModel):
+    """Usage statistics for models billed by audio input duration."""
+
+    seconds: float
+    """Duration of the input audio in seconds."""
+
+    type: Literal["duration"]
+    """The type of the usage object. Always `duration` for this variant."""
+
+
+class TranscriptionVerbose(BaseModel):
+    """
+    Represents a verbose json transcription response returned by model, based on the provided input.
+    """
+
+    duration: float
+    """The duration of the input audio."""
+
+    language: str
+    """The language of the input audio."""
+
+    text: str
+    """The transcribed text."""
+
+    segments: Optional[List[TranscriptionSegment]] = None
+    """Segments of the transcribed text and their corresponding details."""
+
+    usage: Optional[Usage] = None
+    """Usage statistics for models billed by audio input duration."""
+
+    words: Optional[List[TranscriptionWord]] = None
+    """Extracted words and their corresponding timestamps."""
diff --git a/src/openai/types/audio/transcription_word.py b/src/openai/types/audio/transcription_word.py
new file mode 100644
index 0000000000..2ce682f957
--- /dev/null
+++ b/src/openai/types/audio/transcription_word.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["TranscriptionWord"]
+
+
+class TranscriptionWord(BaseModel):
+    end: float
+    """End time of the word in seconds."""
+
+    start: float
+    """Start time of the word in seconds."""
+
+    word: str
+    """The text content of the word."""
diff --git a/src/openai/types/audio/translation.py b/src/openai/types/audio/translation.py
index 3d9ede2939..efc56f7f9b 100644
--- a/src/openai/types/audio/translation.py
+++ b/src/openai/types/audio/translation.py
@@ -1,7 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
-
 from ..._models import BaseModel
 
 __all__ = ["Translation"]
diff --git a/src/openai/types/audio/translation_create_params.py b/src/openai/types/audio/translation_create_params.py
index f23a41ed5c..b23a185375 100644
--- a/src/openai/types/audio/translation_create_params.py
+++ b/src/openai/types/audio/translation_create_params.py
@@ -6,6 +6,7 @@
 from typing_extensions import Literal, Required, TypedDict
 
 from ..._types import FileTypes
+from ..audio_model import AudioModel
 
 __all__ = ["TranslationCreateParams"]
 
@@ -17,7 +18,7 @@ class TranslationCreateParams(TypedDict, total=False):
     mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
     """
 
-    model: Required[Union[str, Literal["whisper-1"]]]
+    model: Required[Union[str, AudioModel]]
     """ID of the model to use.
 
     Only `whisper-1` (which is powered by our open source Whisper V2 model) is
@@ -28,14 +29,14 @@ class TranslationCreateParams(TypedDict, total=False):
     """An optional text to guide the model's style or continue a previous audio
     segment.
 
-    The [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+    The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
     should be in English.
     """
 
-    response_format: str
+    response_format: Literal["json", "text", "srt", "verbose_json", "vtt"]
     """
-    The format of the transcript output, in one of these options: `json`, `text`,
-    `srt`, `verbose_json`, or `vtt`.
+    The format of the output, in one of these options: `json`, `text`, `srt`,
+    `verbose_json`, or `vtt`.
     """
 
     temperature: float
diff --git a/src/openai/types/audio/translation_create_response.py b/src/openai/types/audio/translation_create_response.py
new file mode 100644
index 0000000000..9953813c08
--- /dev/null
+++ b/src/openai/types/audio/translation_create_response.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .translation import Translation
+from .translation_verbose import TranslationVerbose
+
+__all__ = ["TranslationCreateResponse"]
+
+TranslationCreateResponse: TypeAlias = Union[Translation, TranslationVerbose]
diff --git a/src/openai/types/audio/translation_verbose.py b/src/openai/types/audio/translation_verbose.py
new file mode 100644
index 0000000000..27cb02d64f
--- /dev/null
+++ b/src/openai/types/audio/translation_verbose.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+from .transcription_segment import TranscriptionSegment
+
+__all__ = ["TranslationVerbose"]
+
+
+class TranslationVerbose(BaseModel):
+    duration: float
+    """The duration of the input audio."""
+
+    language: str
+    """The language of the output translation (always `english`)."""
+
+    text: str
+    """The translated text."""
+
+    segments: Optional[List[TranscriptionSegment]] = None
+    """Segments of the translated text and their corresponding details."""
diff --git a/src/openai/types/audio_model.py b/src/openai/types/audio_model.py
new file mode 100644
index 0000000000..68031a2198
--- /dev/null
+++ b/src/openai/types/audio_model.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["AudioModel"]
+
+AudioModel: TypeAlias = Literal["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe", "gpt-4o-transcribe-diarize"]
diff --git a/src/openai/types/audio_response_format.py b/src/openai/types/audio_response_format.py
new file mode 100644
index 0000000000..1897aaf6ed
--- /dev/null
+++ b/src/openai/types/audio_response_format.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["AudioResponseFormat"]
+
+AudioResponseFormat: TypeAlias = Literal["json", "text", "srt", "verbose_json", "vtt", "diarized_json"]
diff --git a/src/openai/types/auto_file_chunking_strategy_param.py b/src/openai/types/auto_file_chunking_strategy_param.py
new file mode 100644
index 0000000000..db7cbf596d
--- /dev/null
+++ b/src/openai/types/auto_file_chunking_strategy_param.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["AutoFileChunkingStrategyParam"]
+
+
+class AutoFileChunkingStrategyParam(TypedDict, total=False):
+    """The default strategy.
+
+    This strategy currently uses a `max_chunk_size_tokens` of `800` and `chunk_overlap_tokens` of `400`.
+    """
+
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
diff --git a/src/openai/types/batch.py b/src/openai/types/batch.py
index 90f6d79572..ece0513b35 100644
--- a/src/openai/types/batch.py
+++ b/src/openai/types/batch.py
@@ -1,11 +1,12 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-import builtins
 from typing import List, Optional
 from typing_extensions import Literal
 
 from .._models import BaseModel
 from .batch_error import BatchError
+from .batch_usage import BatchUsage
+from .shared.metadata import Metadata
 from .batch_request_counts import BatchRequestCounts
 
 __all__ = ["Batch", "Errors"]
@@ -70,12 +71,23 @@ class Batch(BaseModel):
     in_progress_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the batch started processing."""
 
-    metadata: Optional[builtins.object] = None
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: Optional[str] = None
+    """Model ID used to process the batch, like `gpt-5-2025-08-07`.
+
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
     """
 
     output_file_id: Optional[str] = None
@@ -83,3 +95,10 @@ class Batch(BaseModel):
 
     request_counts: Optional[BatchRequestCounts] = None
     """The request counts for different statuses within the batch."""
+
+    usage: Optional[BatchUsage] = None
+    """
+    Represents token usage details including input tokens, output tokens, a
+    breakdown of output tokens, and the total tokens used. Only populated on batches
+    created after September 7, 2025.
+    """
diff --git a/src/openai/types/batch_create_params.py b/src/openai/types/batch_create_params.py
index 140380d417..1088aab380 100644
--- a/src/openai/types/batch_create_params.py
+++ b/src/openai/types/batch_create_params.py
@@ -2,10 +2,12 @@
 
 from __future__ import annotations
 
-from typing import Dict, Optional
+from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["BatchCreateParams"]
+from .shared_params.metadata import Metadata
+
+__all__ = ["BatchCreateParams", "OutputExpiresAfter"]
 
 
 class BatchCreateParams(TypedDict, total=False):
@@ -15,12 +17,15 @@ class BatchCreateParams(TypedDict, total=False):
     Currently only `24h` is supported.
     """
 
-    endpoint: Required[Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"]]
+    endpoint: Required[
+        Literal["/v1/responses", "/v1/chat/completions", "/v1/embeddings", "/v1/completions", "/v1/moderations"]
+    ]
     """The endpoint to be used for all requests in the batch.
 
-    Currently `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are
-    supported. Note that `/v1/embeddings` batches are also restricted to a maximum
-    of 50,000 embedding inputs across all requests in the batch.
+    Currently `/v1/responses`, `/v1/chat/completions`, `/v1/embeddings`,
+    `/v1/completions`, and `/v1/moderations` are supported. Note that
+    `/v1/embeddings` batches are also restricted to a maximum of 50,000 embedding
+    inputs across all requests in the batch.
     """
 
     input_file_id: Required[str]
@@ -30,10 +35,42 @@ class BatchCreateParams(TypedDict, total=False):
     for how to upload a file.
 
     Your input file must be formatted as a
-    [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput),
+    [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input),
     and must be uploaded with the purpose `batch`. The file can contain up to 50,000
-    requests, and can be up to 100 MB in size.
+    requests, and can be up to 200 MB in size.
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
-    metadata: Optional[Dict[str, str]]
-    """Optional custom metadata for the batch."""
+    output_expires_after: OutputExpiresAfter
+    """
+    The expiration policy for the output and/or error file that are generated for a
+    batch.
+    """
+
+
+class OutputExpiresAfter(TypedDict, total=False):
+    """
+    The expiration policy for the output and/or error file that are generated for a batch.
+    """
+
+    anchor: Required[Literal["created_at"]]
+    """Anchor timestamp after which the expiration policy applies.
+
+    Supported anchors: `created_at`. Note that the anchor is the file creation time,
+    not the time the batch is created.
+    """
+
+    seconds: Required[int]
+    """The number of seconds after the anchor time that the file will expire.
+
+    Must be between 3600 (1 hour) and 2592000 (30 days).
+    """
diff --git a/src/openai/types/batch_request_counts.py b/src/openai/types/batch_request_counts.py
index ef6c84a0a1..64a570747d 100644
--- a/src/openai/types/batch_request_counts.py
+++ b/src/openai/types/batch_request_counts.py
@@ -1,13 +1,13 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
-
 from .._models import BaseModel
 
 __all__ = ["BatchRequestCounts"]
 
 
 class BatchRequestCounts(BaseModel):
+    """The request counts for different statuses within the batch."""
+
     completed: int
     """Number of requests that have been completed successfully."""
 
diff --git a/src/openai/types/batch_usage.py b/src/openai/types/batch_usage.py
new file mode 100644
index 0000000000..d68d7110ac
--- /dev/null
+++ b/src/openai/types/batch_usage.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["BatchUsage", "InputTokensDetails", "OutputTokensDetails"]
+
+
+class InputTokensDetails(BaseModel):
+    """A detailed breakdown of the input tokens."""
+
+    cached_tokens: int
+    """The number of tokens that were retrieved from the cache.
+
+    [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching).
+    """
+
+
+class OutputTokensDetails(BaseModel):
+    """A detailed breakdown of the output tokens."""
+
+    reasoning_tokens: int
+    """The number of reasoning tokens."""
+
+
+class BatchUsage(BaseModel):
+    """
+    Represents token usage details including input tokens, output tokens, a
+    breakdown of output tokens, and the total tokens used. Only populated on
+    batches created after September 7, 2025.
+    """
+
+    input_tokens: int
+    """The number of input tokens."""
+
+    input_tokens_details: InputTokensDetails
+    """A detailed breakdown of the input tokens."""
+
+    output_tokens: int
+    """The number of output tokens."""
+
+    output_tokens_details: OutputTokensDetails
+    """A detailed breakdown of the output tokens."""
+
+    total_tokens: int
+    """The total number of tokens used."""
diff --git a/src/openai/types/beta/__init__.py b/src/openai/types/beta/__init__.py
index d851a3619c..deb2369677 100644
--- a/src/openai/types/beta/__init__.py
+++ b/src/openai/types/beta/__init__.py
@@ -4,17 +4,16 @@
 
 from .thread import Thread as Thread
 from .assistant import Assistant as Assistant
-from .vector_store import VectorStore as VectorStore
 from .function_tool import FunctionTool as FunctionTool
 from .assistant_tool import AssistantTool as AssistantTool
 from .thread_deleted import ThreadDeleted as ThreadDeleted
+from .chatkit_workflow import ChatKitWorkflow as ChatKitWorkflow
 from .file_search_tool import FileSearchTool as FileSearchTool
 from .assistant_deleted import AssistantDeleted as AssistantDeleted
 from .function_tool_param import FunctionToolParam as FunctionToolParam
 from .assistant_tool_param import AssistantToolParam as AssistantToolParam
 from .thread_create_params import ThreadCreateParams as ThreadCreateParams
 from .thread_update_params import ThreadUpdateParams as ThreadUpdateParams
-from .vector_store_deleted import VectorStoreDeleted as VectorStoreDeleted
 from .assistant_list_params import AssistantListParams as AssistantListParams
 from .assistant_tool_choice import AssistantToolChoice as AssistantToolChoice
 from .code_interpreter_tool import CodeInterpreterTool as CodeInterpreterTool
@@ -22,16 +21,11 @@
 from .file_search_tool_param import FileSearchToolParam as FileSearchToolParam
 from .assistant_create_params import AssistantCreateParams as AssistantCreateParams
 from .assistant_update_params import AssistantUpdateParams as AssistantUpdateParams
-from .vector_store_list_params import VectorStoreListParams as VectorStoreListParams
-from .assistant_response_format import AssistantResponseFormat as AssistantResponseFormat
-from .vector_store_create_params import VectorStoreCreateParams as VectorStoreCreateParams
-from .vector_store_update_params import VectorStoreUpdateParams as VectorStoreUpdateParams
 from .assistant_tool_choice_param import AssistantToolChoiceParam as AssistantToolChoiceParam
 from .code_interpreter_tool_param import CodeInterpreterToolParam as CodeInterpreterToolParam
 from .assistant_tool_choice_option import AssistantToolChoiceOption as AssistantToolChoiceOption
 from .thread_create_and_run_params import ThreadCreateAndRunParams as ThreadCreateAndRunParams
 from .assistant_tool_choice_function import AssistantToolChoiceFunction as AssistantToolChoiceFunction
-from .assistant_response_format_param import AssistantResponseFormatParam as AssistantResponseFormatParam
 from .assistant_response_format_option import AssistantResponseFormatOption as AssistantResponseFormatOption
 from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam as AssistantToolChoiceOptionParam
 from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam as AssistantToolChoiceFunctionParam
diff --git a/src/openai/types/beta/assistant.py b/src/openai/types/beta/assistant.py
index 4e5adc766e..61344f85a1 100644
--- a/src/openai/types/beta/assistant.py
+++ b/src/openai/types/beta/assistant.py
@@ -5,6 +5,7 @@
 
 from ..._models import BaseModel
 from .assistant_tool import AssistantTool
+from ..shared.metadata import Metadata
 from .assistant_response_format_option import AssistantResponseFormatOption
 
 __all__ = ["Assistant", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
@@ -30,12 +31,19 @@ class ToolResourcesFileSearch(BaseModel):
 
 
 class ToolResources(BaseModel):
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    """
+
     code_interpreter: Optional[ToolResourcesCodeInterpreter] = None
 
     file_search: Optional[ToolResourcesFileSearch] = None
 
 
 class Assistant(BaseModel):
+    """Represents an `assistant` that can call the model and use tools."""
+
     id: str
     """The identifier, which can be referenced in API endpoints."""
 
@@ -51,12 +59,14 @@ class Assistant(BaseModel):
     The maximum length is 256,000 characters.
     """
 
-    metadata: Optional[object] = None
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     model: str
@@ -65,8 +75,8 @@ class Assistant(BaseModel):
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
     """
 
     name: Optional[str] = None
@@ -85,11 +95,16 @@ class Assistant(BaseModel):
     response_format: Optional[AssistantResponseFormatOption] = None
     """Specifies the format that the model must output.
 
-    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py
index 67e7f7e78c..461d871ab5 100644
--- a/src/openai/types/beta/assistant_create_params.py
+++ b/src/openai/types/beta/assistant_create_params.py
@@ -2,10 +2,14 @@
 
 from __future__ import annotations
 
-from typing import List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
+from ..._types import SequenceNotStr
+from ..shared.chat_model import ChatModel
 from .assistant_tool_param import AssistantToolParam
+from ..shared_params.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
 from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = [
@@ -14,44 +18,22 @@
     "ToolResourcesCodeInterpreter",
     "ToolResourcesFileSearch",
     "ToolResourcesFileSearchVectorStore",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategy",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyAuto",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyStatic",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic",
 ]
 
 
 class AssistantCreateParams(TypedDict, total=False):
-    model: Required[
-        Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ]
-    ]
+    model: Required[Union[str, ChatModel]]
     """ID of the model to use.
 
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
     """
 
     description: Optional[str]
@@ -63,25 +45,49 @@ class AssistantCreateParams(TypedDict, total=False):
     The maximum length is 256,000 characters.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     name: Optional[str]
     """The name of the assistant. The maximum length is 256 characters."""
 
+    reasoning_effort: Optional[ReasoningEffort]
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
     response_format: Optional[AssistantResponseFormatOptionParam]
     """Specifies the format that the model must output.
 
-    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
@@ -126,7 +132,7 @@ class AssistantCreateParams(TypedDict, total=False):
 
 
 class ToolResourcesCodeInterpreter(TypedDict, total=False):
-    file_ids: List[str]
+    file_ids: SequenceNotStr[str]
     """
     A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
     available to the `code_interpreter` tool. There can be a maximum of 20 files
@@ -134,25 +140,70 @@ class ToolResourcesCodeInterpreter(TypedDict, total=False):
     """
 
 
+class ToolResourcesFileSearchVectorStoreChunkingStrategyAuto(TypedDict, total=False):
+    """The default strategy.
+
+    This strategy currently uses a `max_chunk_size_tokens` of `800` and `chunk_overlap_tokens` of `400`.
+    """
+
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
+
+
+class ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: Required[int]
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
+
+
+class ToolResourcesFileSearchVectorStoreChunkingStrategyStatic(TypedDict, total=False):
+    static: Required[ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic]
+
+    type: Required[Literal["static"]]
+    """Always `static`."""
+
+
+ToolResourcesFileSearchVectorStoreChunkingStrategy: TypeAlias = Union[
+    ToolResourcesFileSearchVectorStoreChunkingStrategyAuto, ToolResourcesFileSearchVectorStoreChunkingStrategyStatic
+]
+
+
 class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
-    file_ids: List[str]
+    chunking_strategy: ToolResourcesFileSearchVectorStoreChunkingStrategy
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy.
+    """
+
+    file_ids: SequenceNotStr[str]
     """
     A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
     add to the vector store. There can be a maximum of 10000 files in a vector
     store.
     """
 
-    metadata: object
-    """Set of 16 key-value pairs that can be attached to a vector store.
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
 
-    This can be useful for storing additional information about the vector store in
-    a structured format. Keys can be a maximum of 64 characters long and values can
-    be a maxium of 512 characters long.
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
 class ToolResourcesFileSearch(TypedDict, total=False):
-    vector_store_ids: List[str]
+    vector_store_ids: SequenceNotStr[str]
     """
     The
     [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
@@ -170,6 +221,11 @@ class ToolResourcesFileSearch(TypedDict, total=False):
 
 
 class ToolResources(TypedDict, total=False):
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    """
+
     code_interpreter: ToolResourcesCodeInterpreter
 
     file_search: ToolResourcesFileSearch
diff --git a/src/openai/types/beta/assistant_list_params.py b/src/openai/types/beta/assistant_list_params.py
index f54f63120b..834ffbcaf8 100644
--- a/src/openai/types/beta/assistant_list_params.py
+++ b/src/openai/types/beta/assistant_list_params.py
@@ -21,7 +21,7 @@ class AssistantListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/beta/assistant_response_format.py b/src/openai/types/beta/assistant_response_format.py
deleted file mode 100644
index f53bdaf62a..0000000000
--- a/src/openai/types/beta/assistant_response_format.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["AssistantResponseFormat"]
-
-
-class AssistantResponseFormat(BaseModel):
-    type: Optional[Literal["text", "json_object"]] = None
-    """Must be one of `text` or `json_object`."""
diff --git a/src/openai/types/beta/assistant_response_format_option.py b/src/openai/types/beta/assistant_response_format_option.py
index d4e05e0ea9..6f06a3442f 100644
--- a/src/openai/types/beta/assistant_response_format_option.py
+++ b/src/openai/types/beta/assistant_response_format_option.py
@@ -1,10 +1,14 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
-from .assistant_response_format import AssistantResponseFormat
+from ..shared.response_format_text import ResponseFormatText
+from ..shared.response_format_json_object import ResponseFormatJSONObject
+from ..shared.response_format_json_schema import ResponseFormatJSONSchema
 
 __all__ = ["AssistantResponseFormatOption"]
 
-AssistantResponseFormatOption = Union[Literal["none", "auto"], AssistantResponseFormat]
+AssistantResponseFormatOption: TypeAlias = Union[
+    Literal["auto"], ResponseFormatText, ResponseFormatJSONObject, ResponseFormatJSONSchema
+]
diff --git a/src/openai/types/beta/assistant_response_format_option_param.py b/src/openai/types/beta/assistant_response_format_option_param.py
index 46e04125d1..5e724a4d98 100644
--- a/src/openai/types/beta/assistant_response_format_option_param.py
+++ b/src/openai/types/beta/assistant_response_format_option_param.py
@@ -3,10 +3,14 @@
 from __future__ import annotations
 
 from typing import Union
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
-from .assistant_response_format_param import AssistantResponseFormatParam
+from ..shared_params.response_format_text import ResponseFormatText
+from ..shared_params.response_format_json_object import ResponseFormatJSONObject
+from ..shared_params.response_format_json_schema import ResponseFormatJSONSchema
 
 __all__ = ["AssistantResponseFormatOptionParam"]
 
-AssistantResponseFormatOptionParam = Union[Literal["none", "auto"], AssistantResponseFormatParam]
+AssistantResponseFormatOptionParam: TypeAlias = Union[
+    Literal["auto"], ResponseFormatText, ResponseFormatJSONObject, ResponseFormatJSONSchema
+]
diff --git a/src/openai/types/beta/assistant_response_format_param.py b/src/openai/types/beta/assistant_response_format_param.py
deleted file mode 100644
index 96e1d02115..0000000000
--- a/src/openai/types/beta/assistant_response_format_param.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, TypedDict
-
-__all__ = ["AssistantResponseFormatParam"]
-
-
-class AssistantResponseFormatParam(TypedDict, total=False):
-    type: Literal["text", "json_object"]
-    """Must be one of `text` or `json_object`."""
diff --git a/src/openai/types/beta/assistant_stream_event.py b/src/openai/types/beta/assistant_stream_event.py
index 91925e93b3..87620a11d0 100644
--- a/src/openai/types/beta/assistant_stream_event.py
+++ b/src/openai/types/beta/assistant_stream_event.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Union
-from typing_extensions import Literal, Annotated
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
 
 from .thread import Thread
 from ..._utils import PropertyInfo
@@ -21,6 +21,7 @@
     "ThreadRunInProgress",
     "ThreadRunRequiresAction",
     "ThreadRunCompleted",
+    "ThreadRunIncomplete",
     "ThreadRunFailed",
     "ThreadRunCancelling",
     "ThreadRunCancelled",
@@ -42,6 +43,10 @@
 
 
 class ThreadCreated(BaseModel):
+    """
+    Occurs when a new [thread](https://platform.openai.com/docs/api-reference/threads/object) is created.
+    """
+
     data: Thread
     """
     Represents a thread that contains
@@ -50,8 +55,15 @@ class ThreadCreated(BaseModel):
 
     event: Literal["thread.created"]
 
+    enabled: Optional[bool] = None
+    """Whether to enable input audio transcription."""
+
 
 class ThreadRunCreated(BaseModel):
+    """
+    Occurs when a new [run](https://platform.openai.com/docs/api-reference/runs/object) is created.
+    """
+
     data: Run
     """
     Represents an execution run on a
@@ -62,6 +74,10 @@ class ThreadRunCreated(BaseModel):
 
 
 class ThreadRunQueued(BaseModel):
+    """
+    Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) moves to a `queued` status.
+    """
+
     data: Run
     """
     Represents an execution run on a
@@ -72,6 +88,10 @@ class ThreadRunQueued(BaseModel):
 
 
 class ThreadRunInProgress(BaseModel):
+    """
+    Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) moves to an `in_progress` status.
+    """
+
     data: Run
     """
     Represents an execution run on a
@@ -82,6 +102,10 @@ class ThreadRunInProgress(BaseModel):
 
 
 class ThreadRunRequiresAction(BaseModel):
+    """
+    Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) moves to a `requires_action` status.
+    """
+
     data: Run
     """
     Represents an execution run on a
@@ -92,6 +116,10 @@ class ThreadRunRequiresAction(BaseModel):
 
 
 class ThreadRunCompleted(BaseModel):
+    """
+    Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) is completed.
+    """
+
     data: Run
     """
     Represents an execution run on a
@@ -101,7 +129,25 @@ class ThreadRunCompleted(BaseModel):
     event: Literal["thread.run.completed"]
 
 
+class ThreadRunIncomplete(BaseModel):
+    """
+    Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) ends with status `incomplete`.
+    """
+
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.incomplete"]
+
+
 class ThreadRunFailed(BaseModel):
+    """
+    Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) fails.
+    """
+
     data: Run
     """
     Represents an execution run on a
@@ -112,6 +158,10 @@ class ThreadRunFailed(BaseModel):
 
 
 class ThreadRunCancelling(BaseModel):
+    """
+    Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) moves to a `cancelling` status.
+    """
+
     data: Run
     """
     Represents an execution run on a
@@ -122,6 +172,10 @@ class ThreadRunCancelling(BaseModel):
 
 
 class ThreadRunCancelled(BaseModel):
+    """
+    Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) is cancelled.
+    """
+
     data: Run
     """
     Represents an execution run on a
@@ -132,6 +186,10 @@ class ThreadRunCancelled(BaseModel):
 
 
 class ThreadRunExpired(BaseModel):
+    """
+    Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) expires.
+    """
+
     data: Run
     """
     Represents an execution run on a
@@ -142,6 +200,10 @@ class ThreadRunExpired(BaseModel):
 
 
 class ThreadRunStepCreated(BaseModel):
+    """
+    Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) is created.
+    """
+
     data: RunStep
     """Represents a step in execution of a run."""
 
@@ -149,6 +211,10 @@ class ThreadRunStepCreated(BaseModel):
 
 
 class ThreadRunStepInProgress(BaseModel):
+    """
+    Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) moves to an `in_progress` state.
+    """
+
     data: RunStep
     """Represents a step in execution of a run."""
 
@@ -156,6 +222,10 @@ class ThreadRunStepInProgress(BaseModel):
 
 
 class ThreadRunStepDelta(BaseModel):
+    """
+    Occurs when parts of a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) are being streamed.
+    """
+
     data: RunStepDeltaEvent
     """Represents a run step delta i.e.
 
@@ -166,6 +236,10 @@ class ThreadRunStepDelta(BaseModel):
 
 
 class ThreadRunStepCompleted(BaseModel):
+    """
+    Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) is completed.
+    """
+
     data: RunStep
     """Represents a step in execution of a run."""
 
@@ -173,6 +247,10 @@ class ThreadRunStepCompleted(BaseModel):
 
 
 class ThreadRunStepFailed(BaseModel):
+    """
+    Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) fails.
+    """
+
     data: RunStep
     """Represents a step in execution of a run."""
 
@@ -180,6 +258,10 @@ class ThreadRunStepFailed(BaseModel):
 
 
 class ThreadRunStepCancelled(BaseModel):
+    """
+    Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) is cancelled.
+    """
+
     data: RunStep
     """Represents a step in execution of a run."""
 
@@ -187,6 +269,10 @@ class ThreadRunStepCancelled(BaseModel):
 
 
 class ThreadRunStepExpired(BaseModel):
+    """
+    Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) expires.
+    """
+
     data: RunStep
     """Represents a step in execution of a run."""
 
@@ -194,6 +280,10 @@ class ThreadRunStepExpired(BaseModel):
 
 
 class ThreadMessageCreated(BaseModel):
+    """
+    Occurs when a [message](https://platform.openai.com/docs/api-reference/messages/object) is created.
+    """
+
     data: Message
     """
     Represents a message within a
@@ -204,6 +294,10 @@ class ThreadMessageCreated(BaseModel):
 
 
 class ThreadMessageInProgress(BaseModel):
+    """
+    Occurs when a [message](https://platform.openai.com/docs/api-reference/messages/object) moves to an `in_progress` state.
+    """
+
     data: Message
     """
     Represents a message within a
@@ -214,6 +308,10 @@ class ThreadMessageInProgress(BaseModel):
 
 
 class ThreadMessageDelta(BaseModel):
+    """
+    Occurs when parts of a [Message](https://platform.openai.com/docs/api-reference/messages/object) are being streamed.
+    """
+
     data: MessageDeltaEvent
     """Represents a message delta i.e.
 
@@ -224,6 +322,10 @@ class ThreadMessageDelta(BaseModel):
 
 
 class ThreadMessageCompleted(BaseModel):
+    """
+    Occurs when a [message](https://platform.openai.com/docs/api-reference/messages/object) is completed.
+    """
+
     data: Message
     """
     Represents a message within a
@@ -234,6 +336,10 @@ class ThreadMessageCompleted(BaseModel):
 
 
 class ThreadMessageIncomplete(BaseModel):
+    """
+    Occurs when a [message](https://platform.openai.com/docs/api-reference/messages/object) ends before it is completed.
+    """
+
     data: Message
     """
     Represents a message within a
@@ -244,12 +350,16 @@ class ThreadMessageIncomplete(BaseModel):
 
 
 class ErrorEvent(BaseModel):
+    """
+    Occurs when an [error](https://platform.openai.com/docs/guides/error-codes#api-errors) occurs. This can happen due to an internal server error or a timeout.
+    """
+
     data: ErrorObject
 
     event: Literal["error"]
 
 
-AssistantStreamEvent = Annotated[
+AssistantStreamEvent: TypeAlias = Annotated[
     Union[
         ThreadCreated,
         ThreadRunCreated,
@@ -257,6 +367,7 @@ class ErrorEvent(BaseModel):
         ThreadRunInProgress,
         ThreadRunRequiresAction,
         ThreadRunCompleted,
+        ThreadRunIncomplete,
         ThreadRunFailed,
         ThreadRunCancelling,
         ThreadRunCancelled,
diff --git a/src/openai/types/beta/assistant_tool.py b/src/openai/types/beta/assistant_tool.py
index 7832da48cc..1bde6858b1 100644
--- a/src/openai/types/beta/assistant_tool.py
+++ b/src/openai/types/beta/assistant_tool.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ..._utils import PropertyInfo
 from .function_tool import FunctionTool
@@ -10,4 +10,6 @@
 
 __all__ = ["AssistantTool"]
 
-AssistantTool = Annotated[Union[CodeInterpreterTool, FileSearchTool, FunctionTool], PropertyInfo(discriminator="type")]
+AssistantTool: TypeAlias = Annotated[
+    Union[CodeInterpreterTool, FileSearchTool, FunctionTool], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/beta/assistant_tool_choice.py b/src/openai/types/beta/assistant_tool_choice.py
index d73439f006..cabded0b3c 100644
--- a/src/openai/types/beta/assistant_tool_choice.py
+++ b/src/openai/types/beta/assistant_tool_choice.py
@@ -10,6 +10,11 @@
 
 
 class AssistantToolChoice(BaseModel):
+    """Specifies a tool the model should use.
+
+    Use to force the model to call a specific tool.
+    """
+
     type: Literal["function", "code_interpreter", "file_search"]
     """The type of the tool. If type is `function`, the function name must be set"""
 
diff --git a/src/openai/types/beta/assistant_tool_choice_function.py b/src/openai/types/beta/assistant_tool_choice_function.py
index d0d4255357..87f38310ca 100644
--- a/src/openai/types/beta/assistant_tool_choice_function.py
+++ b/src/openai/types/beta/assistant_tool_choice_function.py
@@ -1,7 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
-
 from ..._models import BaseModel
 
 __all__ = ["AssistantToolChoiceFunction"]
diff --git a/src/openai/types/beta/assistant_tool_choice_option.py b/src/openai/types/beta/assistant_tool_choice_option.py
index 8958bc8fb0..e57c3278fb 100644
--- a/src/openai/types/beta/assistant_tool_choice_option.py
+++ b/src/openai/types/beta/assistant_tool_choice_option.py
@@ -1,10 +1,10 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
 from .assistant_tool_choice import AssistantToolChoice
 
 __all__ = ["AssistantToolChoiceOption"]
 
-AssistantToolChoiceOption = Union[Literal["none", "auto", "required"], AssistantToolChoice]
+AssistantToolChoiceOption: TypeAlias = Union[Literal["none", "auto", "required"], AssistantToolChoice]
diff --git a/src/openai/types/beta/assistant_tool_choice_option_param.py b/src/openai/types/beta/assistant_tool_choice_option_param.py
index 81b7f15136..cc0053d37e 100644
--- a/src/openai/types/beta/assistant_tool_choice_option_param.py
+++ b/src/openai/types/beta/assistant_tool_choice_option_param.py
@@ -3,10 +3,10 @@
 from __future__ import annotations
 
 from typing import Union
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
 from .assistant_tool_choice_param import AssistantToolChoiceParam
 
 __all__ = ["AssistantToolChoiceOptionParam"]
 
-AssistantToolChoiceOptionParam = Union[Literal["none", "auto", "required"], AssistantToolChoiceParam]
+AssistantToolChoiceOptionParam: TypeAlias = Union[Literal["none", "auto", "required"], AssistantToolChoiceParam]
diff --git a/src/openai/types/beta/assistant_tool_choice_param.py b/src/openai/types/beta/assistant_tool_choice_param.py
index 904f489e26..05916bb668 100644
--- a/src/openai/types/beta/assistant_tool_choice_param.py
+++ b/src/openai/types/beta/assistant_tool_choice_param.py
@@ -10,6 +10,11 @@
 
 
 class AssistantToolChoiceParam(TypedDict, total=False):
+    """Specifies a tool the model should use.
+
+    Use to force the model to call a specific tool.
+    """
+
     type: Required[Literal["function", "code_interpreter", "file_search"]]
     """The type of the tool. If type is `function`, the function name must be set"""
 
diff --git a/src/openai/types/beta/assistant_tool_param.py b/src/openai/types/beta/assistant_tool_param.py
index 5b1d30ba2f..321c4b1ddb 100644
--- a/src/openai/types/beta/assistant_tool_param.py
+++ b/src/openai/types/beta/assistant_tool_param.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from typing import Union
+from typing_extensions import TypeAlias
 
 from .function_tool_param import FunctionToolParam
 from .file_search_tool_param import FileSearchToolParam
@@ -10,4 +11,4 @@
 
 __all__ = ["AssistantToolParam"]
 
-AssistantToolParam = Union[CodeInterpreterToolParam, FileSearchToolParam, FunctionToolParam]
+AssistantToolParam: TypeAlias = Union[CodeInterpreterToolParam, FileSearchToolParam, FunctionToolParam]
diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py
index b401e1a891..7896fcd9c6 100644
--- a/src/openai/types/beta/assistant_update_params.py
+++ b/src/openai/types/beta/assistant_update_params.py
@@ -2,10 +2,13 @@
 
 from __future__ import annotations
 
-from typing import List, Iterable, Optional
-from typing_extensions import TypedDict
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, TypedDict
 
+from ..._types import SequenceNotStr
 from .assistant_tool_param import AssistantToolParam
+from ..shared_params.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
 from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = ["AssistantUpdateParams", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
@@ -21,35 +24,105 @@ class AssistantUpdateParams(TypedDict, total=False):
     The maximum length is 256,000 characters.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
-    model: str
+    model: Union[
+        str,
+        Literal[
+            "gpt-5",
+            "gpt-5-mini",
+            "gpt-5-nano",
+            "gpt-5-2025-08-07",
+            "gpt-5-mini-2025-08-07",
+            "gpt-5-nano-2025-08-07",
+            "gpt-4.1",
+            "gpt-4.1-mini",
+            "gpt-4.1-nano",
+            "gpt-4.1-2025-04-14",
+            "gpt-4.1-mini-2025-04-14",
+            "gpt-4.1-nano-2025-04-14",
+            "o3-mini",
+            "o3-mini-2025-01-31",
+            "o1",
+            "o1-2024-12-17",
+            "gpt-4o",
+            "gpt-4o-2024-11-20",
+            "gpt-4o-2024-08-06",
+            "gpt-4o-2024-05-13",
+            "gpt-4o-mini",
+            "gpt-4o-mini-2024-07-18",
+            "gpt-4.5-preview",
+            "gpt-4.5-preview-2025-02-27",
+            "gpt-4-turbo",
+            "gpt-4-turbo-2024-04-09",
+            "gpt-4-0125-preview",
+            "gpt-4-turbo-preview",
+            "gpt-4-1106-preview",
+            "gpt-4-vision-preview",
+            "gpt-4",
+            "gpt-4-0314",
+            "gpt-4-0613",
+            "gpt-4-32k",
+            "gpt-4-32k-0314",
+            "gpt-4-32k-0613",
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0125",
+            "gpt-3.5-turbo-16k-0613",
+        ],
+    ]
     """ID of the model to use.
 
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
     """
 
     name: Optional[str]
     """The name of the assistant. The maximum length is 256 characters."""
 
+    reasoning_effort: Optional[ReasoningEffort]
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
     response_format: Optional[AssistantResponseFormatOptionParam]
     """Specifies the format that the model must output.
 
-    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
@@ -94,7 +167,7 @@ class AssistantUpdateParams(TypedDict, total=False):
 
 
 class ToolResourcesCodeInterpreter(TypedDict, total=False):
-    file_ids: List[str]
+    file_ids: SequenceNotStr[str]
     """
     Overrides the list of
     [file](https://platform.openai.com/docs/api-reference/files) IDs made available
@@ -104,7 +177,7 @@ class ToolResourcesCodeInterpreter(TypedDict, total=False):
 
 
 class ToolResourcesFileSearch(TypedDict, total=False):
-    vector_store_ids: List[str]
+    vector_store_ids: SequenceNotStr[str]
     """
     Overrides the
     [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
@@ -114,6 +187,11 @@ class ToolResourcesFileSearch(TypedDict, total=False):
 
 
 class ToolResources(TypedDict, total=False):
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    """
+
     code_interpreter: ToolResourcesCodeInterpreter
 
     file_search: ToolResourcesFileSearch
diff --git a/src/openai/types/beta/chatkit/__init__.py b/src/openai/types/beta/chatkit/__init__.py
new file mode 100644
index 0000000000..eafed9dd99
--- /dev/null
+++ b/src/openai/types/beta/chatkit/__init__.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .chat_session import ChatSession as ChatSession
+from .chatkit_thread import ChatKitThread as ChatKitThread
+from .chatkit_attachment import ChatKitAttachment as ChatKitAttachment
+from .thread_list_params import ThreadListParams as ThreadListParams
+from .chat_session_status import ChatSessionStatus as ChatSessionStatus
+from .chatkit_widget_item import ChatKitWidgetItem as ChatKitWidgetItem
+from .chat_session_history import ChatSessionHistory as ChatSessionHistory
+from .session_create_params import SessionCreateParams as SessionCreateParams
+from .thread_delete_response import ThreadDeleteResponse as ThreadDeleteResponse
+from .chat_session_file_upload import ChatSessionFileUpload as ChatSessionFileUpload
+from .chat_session_rate_limits import ChatSessionRateLimits as ChatSessionRateLimits
+from .chatkit_thread_item_list import ChatKitThreadItemList as ChatKitThreadItemList
+from .thread_list_items_params import ThreadListItemsParams as ThreadListItemsParams
+from .chat_session_workflow_param import ChatSessionWorkflowParam as ChatSessionWorkflowParam
+from .chatkit_response_output_text import ChatKitResponseOutputText as ChatKitResponseOutputText
+from .chat_session_rate_limits_param import ChatSessionRateLimitsParam as ChatSessionRateLimitsParam
+from .chat_session_expires_after_param import ChatSessionExpiresAfterParam as ChatSessionExpiresAfterParam
+from .chatkit_thread_user_message_item import ChatKitThreadUserMessageItem as ChatKitThreadUserMessageItem
+from .chat_session_chatkit_configuration import ChatSessionChatKitConfiguration as ChatSessionChatKitConfiguration
+from .chat_session_automatic_thread_titling import (
+    ChatSessionAutomaticThreadTitling as ChatSessionAutomaticThreadTitling,
+)
+from .chatkit_thread_assistant_message_item import (
+    ChatKitThreadAssistantMessageItem as ChatKitThreadAssistantMessageItem,
+)
+from .chat_session_chatkit_configuration_param import (
+    ChatSessionChatKitConfigurationParam as ChatSessionChatKitConfigurationParam,
+)
diff --git a/src/openai/types/beta/chatkit/chat_session.py b/src/openai/types/beta/chatkit/chat_session.py
new file mode 100644
index 0000000000..9db9fc93a0
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chat_session.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ..chatkit_workflow import ChatKitWorkflow
+from .chat_session_status import ChatSessionStatus
+from .chat_session_rate_limits import ChatSessionRateLimits
+from .chat_session_chatkit_configuration import ChatSessionChatKitConfiguration
+
+__all__ = ["ChatSession"]
+
+
+class ChatSession(BaseModel):
+    """Represents a ChatKit session and its resolved configuration."""
+
+    id: str
+    """Identifier for the ChatKit session."""
+
+    chatkit_configuration: ChatSessionChatKitConfiguration
+    """Resolved ChatKit feature configuration for the session."""
+
+    client_secret: str
+    """Ephemeral client secret that authenticates session requests."""
+
+    expires_at: int
+    """Unix timestamp (in seconds) for when the session expires."""
+
+    max_requests_per_1_minute: int
+    """Convenience copy of the per-minute request limit."""
+
+    object: Literal["chatkit.session"]
+    """Type discriminator that is always `chatkit.session`."""
+
+    rate_limits: ChatSessionRateLimits
+    """Resolved rate limit values."""
+
+    status: ChatSessionStatus
+    """Current lifecycle state of the session."""
+
+    user: str
+    """User identifier associated with the session."""
+
+    workflow: ChatKitWorkflow
+    """Workflow metadata for the session."""
diff --git a/src/openai/types/beta/chatkit/chat_session_automatic_thread_titling.py b/src/openai/types/beta/chatkit/chat_session_automatic_thread_titling.py
new file mode 100644
index 0000000000..1d95255e06
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chat_session_automatic_thread_titling.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ...._models import BaseModel
+
+__all__ = ["ChatSessionAutomaticThreadTitling"]
+
+
+class ChatSessionAutomaticThreadTitling(BaseModel):
+    """Automatic thread title preferences for the session."""
+
+    enabled: bool
+    """Whether automatic thread titling is enabled."""
diff --git a/src/openai/types/beta/chatkit/chat_session_chatkit_configuration.py b/src/openai/types/beta/chatkit/chat_session_chatkit_configuration.py
new file mode 100644
index 0000000000..f9fa0ceff5
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chat_session_chatkit_configuration.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ...._models import BaseModel
+from .chat_session_history import ChatSessionHistory
+from .chat_session_file_upload import ChatSessionFileUpload
+from .chat_session_automatic_thread_titling import ChatSessionAutomaticThreadTitling
+
+__all__ = ["ChatSessionChatKitConfiguration"]
+
+
+class ChatSessionChatKitConfiguration(BaseModel):
+    """ChatKit configuration for the session."""
+
+    automatic_thread_titling: ChatSessionAutomaticThreadTitling
+    """Automatic thread titling preferences."""
+
+    file_upload: ChatSessionFileUpload
+    """Upload settings for the session."""
+
+    history: ChatSessionHistory
+    """History retention configuration."""
diff --git a/src/openai/types/beta/chatkit/chat_session_chatkit_configuration_param.py b/src/openai/types/beta/chatkit/chat_session_chatkit_configuration_param.py
new file mode 100644
index 0000000000..834de71e71
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chat_session_chatkit_configuration_param.py
@@ -0,0 +1,76 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["ChatSessionChatKitConfigurationParam", "AutomaticThreadTitling", "FileUpload", "History"]
+
+
+class AutomaticThreadTitling(TypedDict, total=False):
+    """Configuration for automatic thread titling.
+
+    When omitted, automatic thread titling is enabled by default.
+    """
+
+    enabled: bool
+    """Enable automatic thread title generation. Defaults to true."""
+
+
+class FileUpload(TypedDict, total=False):
+    """Configuration for upload enablement and limits.
+
+    When omitted, uploads are disabled by default (max_files 10, max_file_size 512 MB).
+    """
+
+    enabled: bool
+    """Enable uploads for this session. Defaults to false."""
+
+    max_file_size: int
+    """Maximum size in megabytes for each uploaded file.
+
+    Defaults to 512 MB, which is the maximum allowable size.
+    """
+
+    max_files: int
+    """Maximum number of files that can be uploaded to the session. Defaults to 10."""
+
+
+class History(TypedDict, total=False):
+    """Configuration for chat history retention.
+
+    When omitted, history is enabled by default with no limit on recent_threads (null).
+    """
+
+    enabled: bool
+    """Enables chat users to access previous ChatKit threads. Defaults to true."""
+
+    recent_threads: int
+    """Number of recent ChatKit threads users have access to.
+
+    Defaults to unlimited when unset.
+    """
+
+
+class ChatSessionChatKitConfigurationParam(TypedDict, total=False):
+    """Optional per-session configuration settings for ChatKit behavior."""
+
+    automatic_thread_titling: AutomaticThreadTitling
+    """Configuration for automatic thread titling.
+
+    When omitted, automatic thread titling is enabled by default.
+    """
+
+    file_upload: FileUpload
+    """Configuration for upload enablement and limits.
+
+    When omitted, uploads are disabled by default (max_files 10, max_file_size 512
+    MB).
+    """
+
+    history: History
+    """Configuration for chat history retention.
+
+    When omitted, history is enabled by default with no limit on recent_threads
+    (null).
+    """
diff --git a/src/openai/types/beta/chatkit/chat_session_expires_after_param.py b/src/openai/types/beta/chatkit/chat_session_expires_after_param.py
new file mode 100644
index 0000000000..c1de8a767a
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chat_session_expires_after_param.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatSessionExpiresAfterParam"]
+
+
+class ChatSessionExpiresAfterParam(TypedDict, total=False):
+    """Controls when the session expires relative to an anchor timestamp."""
+
+    anchor: Required[Literal["created_at"]]
+    """Base timestamp used to calculate expiration. Currently fixed to `created_at`."""
+
+    seconds: Required[int]
+    """Number of seconds after the anchor when the session expires."""
diff --git a/src/openai/types/beta/chatkit/chat_session_file_upload.py b/src/openai/types/beta/chatkit/chat_session_file_upload.py
new file mode 100644
index 0000000000..0275859d27
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chat_session_file_upload.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ...._models import BaseModel
+
+__all__ = ["ChatSessionFileUpload"]
+
+
+class ChatSessionFileUpload(BaseModel):
+    """Upload permissions and limits applied to the session."""
+
+    enabled: bool
+    """Indicates if uploads are enabled for the session."""
+
+    max_file_size: Optional[int] = None
+    """Maximum upload size in megabytes."""
+
+    max_files: Optional[int] = None
+    """Maximum number of uploads allowed during the session."""
diff --git a/src/openai/types/beta/chatkit/chat_session_history.py b/src/openai/types/beta/chatkit/chat_session_history.py
new file mode 100644
index 0000000000..54690009c2
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chat_session_history.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ...._models import BaseModel
+
+__all__ = ["ChatSessionHistory"]
+
+
+class ChatSessionHistory(BaseModel):
+    """History retention preferences returned for the session."""
+
+    enabled: bool
+    """Indicates if chat history is persisted for the session."""
+
+    recent_threads: Optional[int] = None
+    """Number of prior threads surfaced in history views.
+
+    Defaults to null when all history is retained.
+    """
diff --git a/src/openai/types/beta/chatkit/chat_session_rate_limits.py b/src/openai/types/beta/chatkit/chat_session_rate_limits.py
new file mode 100644
index 0000000000..7c5bd94e76
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chat_session_rate_limits.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ...._models import BaseModel
+
+__all__ = ["ChatSessionRateLimits"]
+
+
+class ChatSessionRateLimits(BaseModel):
+    """Active per-minute request limit for the session."""
+
+    max_requests_per_1_minute: int
+    """Maximum allowed requests per one-minute window."""
diff --git a/src/openai/types/beta/chatkit/chat_session_rate_limits_param.py b/src/openai/types/beta/chatkit/chat_session_rate_limits_param.py
new file mode 100644
index 0000000000..578f20b0c3
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chat_session_rate_limits_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["ChatSessionRateLimitsParam"]
+
+
+class ChatSessionRateLimitsParam(TypedDict, total=False):
+    """Controls request rate limits for the session."""
+
+    max_requests_per_1_minute: int
+    """Maximum number of requests allowed per minute for the session. Defaults to 10."""
diff --git a/src/openai/types/beta/chatkit/chat_session_status.py b/src/openai/types/beta/chatkit/chat_session_status.py
new file mode 100644
index 0000000000..a483099c6c
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chat_session_status.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatSessionStatus"]
+
+ChatSessionStatus: TypeAlias = Literal["active", "expired", "cancelled"]
diff --git a/src/openai/types/beta/chatkit/chat_session_workflow_param.py b/src/openai/types/beta/chatkit/chat_session_workflow_param.py
new file mode 100644
index 0000000000..abf52de526
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chat_session_workflow_param.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union
+from typing_extensions import Required, TypedDict
+
+__all__ = ["ChatSessionWorkflowParam", "Tracing"]
+
+
+class Tracing(TypedDict, total=False):
+    """Optional tracing overrides for the workflow invocation.
+
+    When omitted, tracing is enabled by default.
+    """
+
+    enabled: bool
+    """Whether tracing is enabled during the session. Defaults to true."""
+
+
+class ChatSessionWorkflowParam(TypedDict, total=False):
+    """Workflow reference and overrides applied to the chat session."""
+
+    id: Required[str]
+    """Identifier for the workflow invoked by the session."""
+
+    state_variables: Dict[str, Union[str, bool, float]]
+    """State variables forwarded to the workflow.
+
+    Keys may be up to 64 characters, values must be primitive types, and the map
+    defaults to an empty object.
+    """
+
+    tracing: Tracing
+    """Optional tracing overrides for the workflow invocation.
+
+    When omitted, tracing is enabled by default.
+    """
+
+    version: str
+    """Specific workflow version to run. Defaults to the latest deployed version."""
diff --git a/src/openai/types/beta/chatkit/chatkit_attachment.py b/src/openai/types/beta/chatkit/chatkit_attachment.py
new file mode 100644
index 0000000000..7750925e03
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chatkit_attachment.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ChatKitAttachment"]
+
+
+class ChatKitAttachment(BaseModel):
+    """Attachment metadata included on thread items."""
+
+    id: str
+    """Identifier for the attachment."""
+
+    mime_type: str
+    """MIME type of the attachment."""
+
+    name: str
+    """Original display name for the attachment."""
+
+    preview_url: Optional[str] = None
+    """Preview URL for rendering the attachment inline."""
+
+    type: Literal["image", "file"]
+    """Attachment discriminator."""
diff --git a/src/openai/types/beta/chatkit/chatkit_response_output_text.py b/src/openai/types/beta/chatkit/chatkit_response_output_text.py
new file mode 100644
index 0000000000..1348fed2b2
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chatkit_response_output_text.py
@@ -0,0 +1,72 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
+
+__all__ = [
+    "ChatKitResponseOutputText",
+    "Annotation",
+    "AnnotationFile",
+    "AnnotationFileSource",
+    "AnnotationURL",
+    "AnnotationURLSource",
+]
+
+
+class AnnotationFileSource(BaseModel):
+    """File attachment referenced by the annotation."""
+
+    filename: str
+    """Filename referenced by the annotation."""
+
+    type: Literal["file"]
+    """Type discriminator that is always `file`."""
+
+
+class AnnotationFile(BaseModel):
+    """Annotation that references an uploaded file."""
+
+    source: AnnotationFileSource
+    """File attachment referenced by the annotation."""
+
+    type: Literal["file"]
+    """Type discriminator that is always `file` for this annotation."""
+
+
+class AnnotationURLSource(BaseModel):
+    """URL referenced by the annotation."""
+
+    type: Literal["url"]
+    """Type discriminator that is always `url`."""
+
+    url: str
+    """URL referenced by the annotation."""
+
+
+class AnnotationURL(BaseModel):
+    """Annotation that references a URL."""
+
+    source: AnnotationURLSource
+    """URL referenced by the annotation."""
+
+    type: Literal["url"]
+    """Type discriminator that is always `url` for this annotation."""
+
+
+Annotation: TypeAlias = Annotated[Union[AnnotationFile, AnnotationURL], PropertyInfo(discriminator="type")]
+
+
+class ChatKitResponseOutputText(BaseModel):
+    """Assistant response text accompanied by optional annotations."""
+
+    annotations: List[Annotation]
+    """Ordered list of annotations attached to the response text."""
+
+    text: str
+    """Assistant generated text."""
+
+    type: Literal["output_text"]
+    """Type discriminator that is always `output_text`."""
diff --git a/src/openai/types/beta/chatkit/chatkit_thread.py b/src/openai/types/beta/chatkit/chatkit_thread.py
new file mode 100644
index 0000000000..32075233d8
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chatkit_thread.py
@@ -0,0 +1,64 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
+
+__all__ = ["ChatKitThread", "Status", "StatusActive", "StatusLocked", "StatusClosed"]
+
+
+class StatusActive(BaseModel):
+    """Indicates that a thread is active."""
+
+    type: Literal["active"]
+    """Status discriminator that is always `active`."""
+
+
+class StatusLocked(BaseModel):
+    """Indicates that a thread is locked and cannot accept new input."""
+
+    reason: Optional[str] = None
+    """Reason that the thread was locked. Defaults to null when no reason is recorded."""
+
+    type: Literal["locked"]
+    """Status discriminator that is always `locked`."""
+
+
+class StatusClosed(BaseModel):
+    """Indicates that a thread has been closed."""
+
+    reason: Optional[str] = None
+    """Reason that the thread was closed. Defaults to null when no reason is recorded."""
+
+    type: Literal["closed"]
+    """Status discriminator that is always `closed`."""
+
+
+Status: TypeAlias = Annotated[Union[StatusActive, StatusLocked, StatusClosed], PropertyInfo(discriminator="type")]
+
+
+class ChatKitThread(BaseModel):
+    """Represents a ChatKit thread and its current status."""
+
+    id: str
+    """Identifier of the thread."""
+
+    created_at: int
+    """Unix timestamp (in seconds) for when the thread was created."""
+
+    object: Literal["chatkit.thread"]
+    """Type discriminator that is always `chatkit.thread`."""
+
+    status: Status
+    """Current status for the thread. Defaults to `active` for newly created threads."""
+
+    title: Optional[str] = None
+    """Optional human-readable title for the thread.
+
+    Defaults to null when no title has been generated.
+    """
+
+    user: str
+    """Free-form string that identifies your end user who owns the thread."""
diff --git a/src/openai/types/beta/chatkit/chatkit_thread_assistant_message_item.py b/src/openai/types/beta/chatkit/chatkit_thread_assistant_message_item.py
new file mode 100644
index 0000000000..337f53a83d
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chatkit_thread_assistant_message_item.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .chatkit_response_output_text import ChatKitResponseOutputText
+
+__all__ = ["ChatKitThreadAssistantMessageItem"]
+
+
+class ChatKitThreadAssistantMessageItem(BaseModel):
+    """Assistant-authored message within a thread."""
+
+    id: str
+    """Identifier of the thread item."""
+
+    content: List[ChatKitResponseOutputText]
+    """Ordered assistant response segments."""
+
+    created_at: int
+    """Unix timestamp (in seconds) for when the item was created."""
+
+    object: Literal["chatkit.thread_item"]
+    """Type discriminator that is always `chatkit.thread_item`."""
+
+    thread_id: str
+    """Identifier of the parent thread."""
+
+    type: Literal["chatkit.assistant_message"]
+    """Type discriminator that is always `chatkit.assistant_message`."""
diff --git a/src/openai/types/beta/chatkit/chatkit_thread_item_list.py b/src/openai/types/beta/chatkit/chatkit_thread_item_list.py
new file mode 100644
index 0000000000..049ca54429
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chatkit_thread_item_list.py
@@ -0,0 +1,154 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
+from .chatkit_widget_item import ChatKitWidgetItem
+from .chatkit_thread_user_message_item import ChatKitThreadUserMessageItem
+from .chatkit_thread_assistant_message_item import ChatKitThreadAssistantMessageItem
+
+__all__ = [
+    "ChatKitThreadItemList",
+    "Data",
+    "DataChatKitClientToolCall",
+    "DataChatKitTask",
+    "DataChatKitTaskGroup",
+    "DataChatKitTaskGroupTask",
+]
+
+
+class DataChatKitClientToolCall(BaseModel):
+    """Record of a client side tool invocation initiated by the assistant."""
+
+    id: str
+    """Identifier of the thread item."""
+
+    arguments: str
+    """JSON-encoded arguments that were sent to the tool."""
+
+    call_id: str
+    """Identifier for the client tool call."""
+
+    created_at: int
+    """Unix timestamp (in seconds) for when the item was created."""
+
+    name: str
+    """Tool name that was invoked."""
+
+    object: Literal["chatkit.thread_item"]
+    """Type discriminator that is always `chatkit.thread_item`."""
+
+    output: Optional[str] = None
+    """JSON-encoded output captured from the tool.
+
+    Defaults to null while execution is in progress.
+    """
+
+    status: Literal["in_progress", "completed"]
+    """Execution status for the tool call."""
+
+    thread_id: str
+    """Identifier of the parent thread."""
+
+    type: Literal["chatkit.client_tool_call"]
+    """Type discriminator that is always `chatkit.client_tool_call`."""
+
+
+class DataChatKitTask(BaseModel):
+    """Task emitted by the workflow to show progress and status updates."""
+
+    id: str
+    """Identifier of the thread item."""
+
+    created_at: int
+    """Unix timestamp (in seconds) for when the item was created."""
+
+    heading: Optional[str] = None
+    """Optional heading for the task. Defaults to null when not provided."""
+
+    object: Literal["chatkit.thread_item"]
+    """Type discriminator that is always `chatkit.thread_item`."""
+
+    summary: Optional[str] = None
+    """Optional summary that describes the task. Defaults to null when omitted."""
+
+    task_type: Literal["custom", "thought"]
+    """Subtype for the task."""
+
+    thread_id: str
+    """Identifier of the parent thread."""
+
+    type: Literal["chatkit.task"]
+    """Type discriminator that is always `chatkit.task`."""
+
+
+class DataChatKitTaskGroupTask(BaseModel):
+    """Task entry that appears within a TaskGroup."""
+
+    heading: Optional[str] = None
+    """Optional heading for the grouped task. Defaults to null when not provided."""
+
+    summary: Optional[str] = None
+    """Optional summary that describes the grouped task.
+
+    Defaults to null when omitted.
+    """
+
+    type: Literal["custom", "thought"]
+    """Subtype for the grouped task."""
+
+
+class DataChatKitTaskGroup(BaseModel):
+    """Collection of workflow tasks grouped together in the thread."""
+
+    id: str
+    """Identifier of the thread item."""
+
+    created_at: int
+    """Unix timestamp (in seconds) for when the item was created."""
+
+    object: Literal["chatkit.thread_item"]
+    """Type discriminator that is always `chatkit.thread_item`."""
+
+    tasks: List[DataChatKitTaskGroupTask]
+    """Tasks included in the group."""
+
+    thread_id: str
+    """Identifier of the parent thread."""
+
+    type: Literal["chatkit.task_group"]
+    """Type discriminator that is always `chatkit.task_group`."""
+
+
+Data: TypeAlias = Annotated[
+    Union[
+        ChatKitThreadUserMessageItem,
+        ChatKitThreadAssistantMessageItem,
+        ChatKitWidgetItem,
+        DataChatKitClientToolCall,
+        DataChatKitTask,
+        DataChatKitTaskGroup,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class ChatKitThreadItemList(BaseModel):
+    """A paginated list of thread items rendered for the ChatKit API."""
+
+    data: List[Data]
+    """A list of items"""
+
+    first_id: Optional[str] = None
+    """The ID of the first item in the list."""
+
+    has_more: bool
+    """Whether there are more items available."""
+
+    last_id: Optional[str] = None
+    """The ID of the last item in the list."""
+
+    object: Literal["list"]
+    """The type of object returned, must be `list`."""
diff --git a/src/openai/types/beta/chatkit/chatkit_thread_user_message_item.py b/src/openai/types/beta/chatkit/chatkit_thread_user_message_item.py
new file mode 100644
index 0000000000..d7552c4f2e
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chatkit_thread_user_message_item.py
@@ -0,0 +1,87 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
+from .chatkit_attachment import ChatKitAttachment
+
+__all__ = [
+    "ChatKitThreadUserMessageItem",
+    "Content",
+    "ContentInputText",
+    "ContentQuotedText",
+    "InferenceOptions",
+    "InferenceOptionsToolChoice",
+]
+
+
+class ContentInputText(BaseModel):
+    """Text block that a user contributed to the thread."""
+
+    text: str
+    """Plain-text content supplied by the user."""
+
+    type: Literal["input_text"]
+    """Type discriminator that is always `input_text`."""
+
+
+class ContentQuotedText(BaseModel):
+    """Quoted snippet that the user referenced in their message."""
+
+    text: str
+    """Quoted text content."""
+
+    type: Literal["quoted_text"]
+    """Type discriminator that is always `quoted_text`."""
+
+
+Content: TypeAlias = Annotated[Union[ContentInputText, ContentQuotedText], PropertyInfo(discriminator="type")]
+
+
+class InferenceOptionsToolChoice(BaseModel):
+    """Preferred tool to invoke. Defaults to null when ChatKit should auto-select."""
+
+    id: str
+    """Identifier of the requested tool."""
+
+
+class InferenceOptions(BaseModel):
+    """Inference overrides applied to the message. Defaults to null when unset."""
+
+    model: Optional[str] = None
+    """Model name that generated the response.
+
+    Defaults to null when using the session default.
+    """
+
+    tool_choice: Optional[InferenceOptionsToolChoice] = None
+    """Preferred tool to invoke. Defaults to null when ChatKit should auto-select."""
+
+
+class ChatKitThreadUserMessageItem(BaseModel):
+    """User-authored messages within a thread."""
+
+    id: str
+    """Identifier of the thread item."""
+
+    attachments: List[ChatKitAttachment]
+    """Attachments associated with the user message. Defaults to an empty list."""
+
+    content: List[Content]
+    """Ordered content elements supplied by the user."""
+
+    created_at: int
+    """Unix timestamp (in seconds) for when the item was created."""
+
+    inference_options: Optional[InferenceOptions] = None
+    """Inference overrides applied to the message. Defaults to null when unset."""
+
+    object: Literal["chatkit.thread_item"]
+    """Type discriminator that is always `chatkit.thread_item`."""
+
+    thread_id: str
+    """Identifier of the parent thread."""
+
+    type: Literal["chatkit.user_message"]
diff --git a/src/openai/types/beta/chatkit/chatkit_widget_item.py b/src/openai/types/beta/chatkit/chatkit_widget_item.py
new file mode 100644
index 0000000000..a269c736fb
--- /dev/null
+++ b/src/openai/types/beta/chatkit/chatkit_widget_item.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ChatKitWidgetItem"]
+
+
+class ChatKitWidgetItem(BaseModel):
+    """Thread item that renders a widget payload."""
+
+    id: str
+    """Identifier of the thread item."""
+
+    created_at: int
+    """Unix timestamp (in seconds) for when the item was created."""
+
+    object: Literal["chatkit.thread_item"]
+    """Type discriminator that is always `chatkit.thread_item`."""
+
+    thread_id: str
+    """Identifier of the parent thread."""
+
+    type: Literal["chatkit.widget"]
+    """Type discriminator that is always `chatkit.widget`."""
+
+    widget: str
+    """Serialized widget payload rendered in the UI."""
diff --git a/src/openai/types/beta/chatkit/session_create_params.py b/src/openai/types/beta/chatkit/session_create_params.py
new file mode 100644
index 0000000000..1803d18cf6
--- /dev/null
+++ b/src/openai/types/beta/chatkit/session_create_params.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from .chat_session_workflow_param import ChatSessionWorkflowParam
+from .chat_session_rate_limits_param import ChatSessionRateLimitsParam
+from .chat_session_expires_after_param import ChatSessionExpiresAfterParam
+from .chat_session_chatkit_configuration_param import ChatSessionChatKitConfigurationParam
+
+__all__ = ["SessionCreateParams"]
+
+
+class SessionCreateParams(TypedDict, total=False):
+    user: Required[str]
+    """
+    A free-form string that identifies your end user; ensures this Session can
+    access other objects that have the same `user` scope.
+    """
+
+    workflow: Required[ChatSessionWorkflowParam]
+    """Workflow that powers the session."""
+
+    chatkit_configuration: ChatSessionChatKitConfigurationParam
+    """Optional overrides for ChatKit runtime configuration features"""
+
+    expires_after: ChatSessionExpiresAfterParam
+    """Optional override for session expiration timing in seconds from creation.
+
+    Defaults to 10 minutes.
+    """
+
+    rate_limits: ChatSessionRateLimitsParam
+    """Optional override for per-minute request limits. When omitted, defaults to 10."""
diff --git a/src/openai/types/beta/chatkit/thread_delete_response.py b/src/openai/types/beta/chatkit/thread_delete_response.py
new file mode 100644
index 0000000000..45b686bf8b
--- /dev/null
+++ b/src/openai/types/beta/chatkit/thread_delete_response.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ThreadDeleteResponse"]
+
+
+class ThreadDeleteResponse(BaseModel):
+    """Confirmation payload returned after deleting a thread."""
+
+    id: str
+    """Identifier of the deleted thread."""
+
+    deleted: bool
+    """Indicates that the thread has been deleted."""
+
+    object: Literal["chatkit.thread.deleted"]
+    """Type discriminator that is always `chatkit.thread.deleted`."""
diff --git a/src/openai/types/beta/chatkit/thread_list_items_params.py b/src/openai/types/beta/chatkit/thread_list_items_params.py
new file mode 100644
index 0000000000..95c959d719
--- /dev/null
+++ b/src/openai/types/beta/chatkit/thread_list_items_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["ThreadListItemsParams"]
+
+
+class ThreadListItemsParams(TypedDict, total=False):
+    after: str
+    """List items created after this thread item ID.
+
+    Defaults to null for the first page.
+    """
+
+    before: str
+    """List items created before this thread item ID.
+
+    Defaults to null for the newest results.
+    """
+
+    limit: int
+    """Maximum number of thread items to return. Defaults to 20."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for results by creation time. Defaults to `desc`."""
diff --git a/src/openai/types/beta/chatkit/thread_list_params.py b/src/openai/types/beta/chatkit/thread_list_params.py
new file mode 100644
index 0000000000..bb759c7ea3
--- /dev/null
+++ b/src/openai/types/beta/chatkit/thread_list_params.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["ThreadListParams"]
+
+
+class ThreadListParams(TypedDict, total=False):
+    after: str
+    """List items created after this thread item ID.
+
+    Defaults to null for the first page.
+    """
+
+    before: str
+    """List items created before this thread item ID.
+
+    Defaults to null for the newest results.
+    """
+
+    limit: int
+    """Maximum number of thread items to return. Defaults to 20."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for results by creation time. Defaults to `desc`."""
+
+    user: str
+    """Filter threads that belong to this user identifier.
+
+    Defaults to null to return all users.
+    """
diff --git a/src/openai/types/beta/chatkit_workflow.py b/src/openai/types/beta/chatkit_workflow.py
new file mode 100644
index 0000000000..b6f5b55b4a
--- /dev/null
+++ b/src/openai/types/beta/chatkit_workflow.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Union, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["ChatKitWorkflow", "Tracing"]
+
+
+class Tracing(BaseModel):
+    """Tracing settings applied to the workflow."""
+
+    enabled: bool
+    """Indicates whether tracing is enabled."""
+
+
+class ChatKitWorkflow(BaseModel):
+    """Workflow metadata and state returned for the session."""
+
+    id: str
+    """Identifier of the workflow backing the session."""
+
+    state_variables: Optional[Dict[str, Union[str, bool, float]]] = None
+    """State variable key-value pairs applied when invoking the workflow.
+
+    Defaults to null when no overrides were provided.
+    """
+
+    tracing: Tracing
+    """Tracing settings applied to the workflow."""
+
+    version: Optional[str] = None
+    """Specific workflow version used for the session.
+
+    Defaults to null when using the latest deployment.
+    """
diff --git a/src/openai/types/beta/file_search_tool.py b/src/openai/types/beta/file_search_tool.py
index eea55ea6ac..9e33249e0b 100644
--- a/src/openai/types/beta/file_search_tool.py
+++ b/src/openai/types/beta/file_search_tool.py
@@ -1,12 +1,64 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
 
-__all__ = ["FileSearchTool"]
+__all__ = ["FileSearchTool", "FileSearch", "FileSearchRankingOptions"]
+
+
+class FileSearchRankingOptions(BaseModel):
+    """The ranking options for the file search.
+
+    If not specified, the file search tool will use the `auto` ranker and a score_threshold of 0.
+
+    See the [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) for more information.
+    """
+
+    score_threshold: float
+    """The score threshold for the file search.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+    ranker: Optional[Literal["auto", "default_2024_08_21"]] = None
+    """The ranker to use for the file search.
+
+    If not specified will use the `auto` ranker.
+    """
+
+
+class FileSearch(BaseModel):
+    """Overrides for the file search tool."""
+
+    max_num_results: Optional[int] = None
+    """The maximum number of results the file search tool should output.
+
+    The default is 20 for `gpt-4*` models and 5 for `gpt-3.5-turbo`. This number
+    should be between 1 and 50 inclusive.
+
+    Note that the file search tool may output fewer than `max_num_results` results.
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
+
+    ranking_options: Optional[FileSearchRankingOptions] = None
+    """The ranking options for the file search.
+
+    If not specified, the file search tool will use the `auto` ranker and a
+    score_threshold of 0.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
 
 
 class FileSearchTool(BaseModel):
     type: Literal["file_search"]
     """The type of tool being defined: `file_search`"""
+
+    file_search: Optional[FileSearch] = None
+    """Overrides for the file search tool."""
diff --git a/src/openai/types/beta/file_search_tool_param.py b/src/openai/types/beta/file_search_tool_param.py
index d33fd06da4..9906b4b2a4 100644
--- a/src/openai/types/beta/file_search_tool_param.py
+++ b/src/openai/types/beta/file_search_tool_param.py
@@ -4,9 +4,60 @@
 
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["FileSearchToolParam"]
+__all__ = ["FileSearchToolParam", "FileSearch", "FileSearchRankingOptions"]
+
+
+class FileSearchRankingOptions(TypedDict, total=False):
+    """The ranking options for the file search.
+
+    If not specified, the file search tool will use the `auto` ranker and a score_threshold of 0.
+
+    See the [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) for more information.
+    """
+
+    score_threshold: Required[float]
+    """The score threshold for the file search.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+    ranker: Literal["auto", "default_2024_08_21"]
+    """The ranker to use for the file search.
+
+    If not specified will use the `auto` ranker.
+    """
+
+
+class FileSearch(TypedDict, total=False):
+    """Overrides for the file search tool."""
+
+    max_num_results: int
+    """The maximum number of results the file search tool should output.
+
+    The default is 20 for `gpt-4*` models and 5 for `gpt-3.5-turbo`. This number
+    should be between 1 and 50 inclusive.
+
+    Note that the file search tool may output fewer than `max_num_results` results.
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
+
+    ranking_options: FileSearchRankingOptions
+    """The ranking options for the file search.
+
+    If not specified, the file search tool will use the `auto` ranker and a
+    score_threshold of 0.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
 
 
 class FileSearchToolParam(TypedDict, total=False):
     type: Required[Literal["file_search"]]
     """The type of tool being defined: `file_search`"""
+
+    file_search: FileSearch
+    """Overrides for the file search tool."""
diff --git a/src/openai/types/beta/function_tool_param.py b/src/openai/types/beta/function_tool_param.py
index b44c0d47ef..d906e02b88 100644
--- a/src/openai/types/beta/function_tool_param.py
+++ b/src/openai/types/beta/function_tool_param.py
@@ -4,13 +4,13 @@
 
 from typing_extensions import Literal, Required, TypedDict
 
-from ...types import shared_params
+from ..shared_params.function_definition import FunctionDefinition
 
 __all__ = ["FunctionToolParam"]
 
 
 class FunctionToolParam(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
+    function: Required[FunctionDefinition]
 
     type: Required[Literal["function"]]
     """The type of tool being defined: `function`"""
diff --git a/src/openai/types/beta/thread.py b/src/openai/types/beta/thread.py
index 6f7a6c7d0c..83d9055194 100644
--- a/src/openai/types/beta/thread.py
+++ b/src/openai/types/beta/thread.py
@@ -4,6 +4,7 @@
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from ..shared.metadata import Metadata
 
 __all__ = ["Thread", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
 
@@ -28,24 +29,34 @@ class ToolResourcesFileSearch(BaseModel):
 
 
 class ToolResources(BaseModel):
+    """
+    A set of resources that are made available to the assistant's tools in this thread. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    """
+
     code_interpreter: Optional[ToolResourcesCodeInterpreter] = None
 
     file_search: Optional[ToolResourcesFileSearch] = None
 
 
 class Thread(BaseModel):
+    """
+    Represents a thread that contains [messages](https://platform.openai.com/docs/api-reference/messages).
+    """
+
     id: str
     """The identifier, which can be referenced in API endpoints."""
 
     created_at: int
     """The Unix timestamp (in seconds) for when the thread was created."""
 
-    metadata: Optional[object] = None
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     object: Literal["thread"]
diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py
index 6efe6e7aee..c0aee3e9f8 100644
--- a/src/openai/types/beta/thread_create_and_run_params.py
+++ b/src/openai/types/beta/thread_create_and_run_params.py
@@ -2,11 +2,13 @@
 
 from __future__ import annotations
 
-from typing import List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from .function_tool_param import FunctionToolParam
-from .file_search_tool_param import FileSearchToolParam
+from ..._types import SequenceNotStr
+from ..shared.chat_model import ChatModel
+from .assistant_tool_param import AssistantToolParam
+from ..shared_params.metadata import Metadata
 from .code_interpreter_tool_param import CodeInterpreterToolParam
 from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
 from .threads.message_content_part_param import MessageContentPartParam
@@ -18,14 +20,18 @@
     "ThreadMessage",
     "ThreadMessageAttachment",
     "ThreadMessageAttachmentTool",
+    "ThreadMessageAttachmentToolFileSearch",
     "ThreadToolResources",
     "ThreadToolResourcesCodeInterpreter",
     "ThreadToolResourcesFileSearch",
     "ThreadToolResourcesFileSearchVectorStore",
+    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategy",
+    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategyAuto",
+    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStatic",
+    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic",
     "ToolResources",
     "ToolResourcesCodeInterpreter",
     "ToolResourcesFileSearch",
-    "Tool",
     "TruncationStrategy",
     "ThreadCreateAndRunParamsNonStreaming",
     "ThreadCreateAndRunParamsStreaming",
@@ -64,40 +70,17 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     `incomplete_details` for more info.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    model: Union[
-        str,
-        Literal[
-            "gpt-4o",
-            "gpt-4o-2024-05-13",
-            "gpt-4-turbo",
-            "gpt-4-turbo-2024-04-09",
-            "gpt-4-0125-preview",
-            "gpt-4-turbo-preview",
-            "gpt-4-1106-preview",
-            "gpt-4-vision-preview",
-            "gpt-4",
-            "gpt-4-0314",
-            "gpt-4-0613",
-            "gpt-4-32k",
-            "gpt-4-32k-0314",
-            "gpt-4-32k-0613",
-            "gpt-3.5-turbo",
-            "gpt-3.5-turbo-16k",
-            "gpt-3.5-turbo-0613",
-            "gpt-3.5-turbo-1106",
-            "gpt-3.5-turbo-0125",
-            "gpt-3.5-turbo-16k-0613",
-        ],
-        None,
-    ]
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: Union[str, ChatModel, None]
     """
     The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
     be used to execute this run. If a value is provided here, it will override the
@@ -105,14 +88,26 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     assistant will be used.
     """
 
+    parallel_tool_calls: bool
+    """
+    Whether to enable
+    [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+    during tool use.
+    """
+
     response_format: Optional[AssistantResponseFormatOptionParam]
     """Specifies the format that the model must output.
 
-    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
@@ -132,7 +127,11 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     """
 
     thread: Thread
-    """If no thread is provided, an empty thread will be created."""
+    """Options to create a new thread.
+
+    If no thread is provided when running a request, an empty thread will be
+    created.
+    """
 
     tool_choice: Optional[AssistantToolChoiceOptionParam]
     """
@@ -153,7 +152,7 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     tool requires a list of vector store IDs.
     """
 
-    tools: Optional[Iterable[Tool]]
+    tools: Optional[Iterable[AssistantToolParam]]
     """Override the tools the assistant can use for this run.
 
     This is useful for modifying the behavior on a per-run basis.
@@ -171,11 +170,16 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     truncation_strategy: Optional[TruncationStrategy]
     """Controls for how a thread will be truncated prior to the run.
 
-    Use this to control the intial context window of the run.
+    Use this to control the initial context window of the run.
     """
 
 
-ThreadMessageAttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+class ThreadMessageAttachmentToolFileSearch(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
+
+
+ThreadMessageAttachmentTool: TypeAlias = Union[CodeInterpreterToolParam, ThreadMessageAttachmentToolFileSearch]
 
 
 class ThreadMessageAttachment(TypedDict, total=False):
@@ -202,17 +206,19 @@ class ThreadMessage(TypedDict, total=False):
     attachments: Optional[Iterable[ThreadMessageAttachment]]
     """A list of files attached to the message, and the tools they should be added to."""
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
 class ThreadToolResourcesCodeInterpreter(TypedDict, total=False):
-    file_ids: List[str]
+    file_ids: SequenceNotStr[str]
     """
     A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
     available to the `code_interpreter` tool. There can be a maximum of 20 files
@@ -220,25 +226,71 @@ class ThreadToolResourcesCodeInterpreter(TypedDict, total=False):
     """
 
 
+class ThreadToolResourcesFileSearchVectorStoreChunkingStrategyAuto(TypedDict, total=False):
+    """The default strategy.
+
+    This strategy currently uses a `max_chunk_size_tokens` of `800` and `chunk_overlap_tokens` of `400`.
+    """
+
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
+
+
+class ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: Required[int]
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
+
+
+class ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStatic(TypedDict, total=False):
+    static: Required[ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic]
+
+    type: Required[Literal["static"]]
+    """Always `static`."""
+
+
+ThreadToolResourcesFileSearchVectorStoreChunkingStrategy: TypeAlias = Union[
+    ThreadToolResourcesFileSearchVectorStoreChunkingStrategyAuto,
+    ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStatic,
+]
+
+
 class ThreadToolResourcesFileSearchVectorStore(TypedDict, total=False):
-    file_ids: List[str]
+    chunking_strategy: ThreadToolResourcesFileSearchVectorStoreChunkingStrategy
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy.
+    """
+
+    file_ids: SequenceNotStr[str]
     """
     A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
     add to the vector store. There can be a maximum of 10000 files in a vector
     store.
     """
 
-    metadata: object
-    """Set of 16 key-value pairs that can be attached to a vector store.
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
 
-    This can be useful for storing additional information about the vector store in
-    a structured format. Keys can be a maximum of 64 characters long and values can
-    be a maxium of 512 characters long.
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
 class ThreadToolResourcesFileSearch(TypedDict, total=False):
-    vector_store_ids: List[str]
+    vector_store_ids: SequenceNotStr[str]
     """
     The
     [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
@@ -256,24 +308,36 @@ class ThreadToolResourcesFileSearch(TypedDict, total=False):
 
 
 class ThreadToolResources(TypedDict, total=False):
+    """
+    A set of resources that are made available to the assistant's tools in this thread. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    """
+
     code_interpreter: ThreadToolResourcesCodeInterpreter
 
     file_search: ThreadToolResourcesFileSearch
 
 
 class Thread(TypedDict, total=False):
+    """Options to create a new thread.
+
+    If no thread is provided when running a
+    request, an empty thread will be created.
+    """
+
     messages: Iterable[ThreadMessage]
     """
     A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
     start the thread with.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     tool_resources: Optional[ThreadToolResources]
@@ -286,7 +350,7 @@ class Thread(TypedDict, total=False):
 
 
 class ToolResourcesCodeInterpreter(TypedDict, total=False):
-    file_ids: List[str]
+    file_ids: SequenceNotStr[str]
     """
     A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
     available to the `code_interpreter` tool. There can be a maximum of 20 files
@@ -295,7 +359,7 @@ class ToolResourcesCodeInterpreter(TypedDict, total=False):
 
 
 class ToolResourcesFileSearch(TypedDict, total=False):
-    vector_store_ids: List[str]
+    vector_store_ids: SequenceNotStr[str]
     """
     The ID of the
     [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
@@ -305,15 +369,22 @@ class ToolResourcesFileSearch(TypedDict, total=False):
 
 
 class ToolResources(TypedDict, total=False):
+    """A set of resources that are used by the assistant's tools.
+
+    The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    """
+
     code_interpreter: ToolResourcesCodeInterpreter
 
     file_search: ToolResourcesFileSearch
 
 
-Tool = Union[CodeInterpreterToolParam, FileSearchToolParam, FunctionToolParam]
+class TruncationStrategy(TypedDict, total=False):
+    """Controls for how a thread will be truncated prior to the run.
 
+    Use this to control the initial context window of the run.
+    """
 
-class TruncationStrategy(TypedDict, total=False):
     type: Required[Literal["auto", "last_messages"]]
     """The truncation strategy to use for the thread.
 
@@ -330,7 +401,7 @@ class TruncationStrategy(TypedDict, total=False):
     """
 
 
-class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase):
+class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase, total=False):
     stream: Optional[Literal[False]]
     """
     If `true`, returns a stream of events that happen during the Run as server-sent
diff --git a/src/openai/types/beta/thread_create_params.py b/src/openai/types/beta/thread_create_params.py
index ccf50d58dc..ef83e3d465 100644
--- a/src/openai/types/beta/thread_create_params.py
+++ b/src/openai/types/beta/thread_create_params.py
@@ -2,10 +2,11 @@
 
 from __future__ import annotations
 
-from typing import List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from .file_search_tool_param import FileSearchToolParam
+from ..._types import SequenceNotStr
+from ..shared_params.metadata import Metadata
 from .code_interpreter_tool_param import CodeInterpreterToolParam
 from .threads.message_content_part_param import MessageContentPartParam
 
@@ -14,10 +15,15 @@
     "Message",
     "MessageAttachment",
     "MessageAttachmentTool",
+    "MessageAttachmentToolFileSearch",
     "ToolResources",
     "ToolResourcesCodeInterpreter",
     "ToolResourcesFileSearch",
     "ToolResourcesFileSearchVectorStore",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategy",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyAuto",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyStatic",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic",
 ]
 
 
@@ -28,12 +34,14 @@ class ThreadCreateParams(TypedDict, total=False):
     start the thread with.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     tool_resources: Optional[ToolResources]
@@ -45,7 +53,12 @@ class ThreadCreateParams(TypedDict, total=False):
     """
 
 
-MessageAttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+class MessageAttachmentToolFileSearch(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
+
+
+MessageAttachmentTool: TypeAlias = Union[CodeInterpreterToolParam, MessageAttachmentToolFileSearch]
 
 
 class MessageAttachment(TypedDict, total=False):
@@ -72,17 +85,19 @@ class Message(TypedDict, total=False):
     attachments: Optional[Iterable[MessageAttachment]]
     """A list of files attached to the message, and the tools they should be added to."""
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
 class ToolResourcesCodeInterpreter(TypedDict, total=False):
-    file_ids: List[str]
+    file_ids: SequenceNotStr[str]
     """
     A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
     available to the `code_interpreter` tool. There can be a maximum of 20 files
@@ -90,25 +105,70 @@ class ToolResourcesCodeInterpreter(TypedDict, total=False):
     """
 
 
+class ToolResourcesFileSearchVectorStoreChunkingStrategyAuto(TypedDict, total=False):
+    """The default strategy.
+
+    This strategy currently uses a `max_chunk_size_tokens` of `800` and `chunk_overlap_tokens` of `400`.
+    """
+
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
+
+
+class ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: Required[int]
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
+
+
+class ToolResourcesFileSearchVectorStoreChunkingStrategyStatic(TypedDict, total=False):
+    static: Required[ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic]
+
+    type: Required[Literal["static"]]
+    """Always `static`."""
+
+
+ToolResourcesFileSearchVectorStoreChunkingStrategy: TypeAlias = Union[
+    ToolResourcesFileSearchVectorStoreChunkingStrategyAuto, ToolResourcesFileSearchVectorStoreChunkingStrategyStatic
+]
+
+
 class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
-    file_ids: List[str]
+    chunking_strategy: ToolResourcesFileSearchVectorStoreChunkingStrategy
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy.
+    """
+
+    file_ids: SequenceNotStr[str]
     """
     A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
     add to the vector store. There can be a maximum of 10000 files in a vector
     store.
     """
 
-    metadata: object
-    """Set of 16 key-value pairs that can be attached to a vector store.
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
 
-    This can be useful for storing additional information about the vector store in
-    a structured format. Keys can be a maximum of 64 characters long and values can
-    be a maxium of 512 characters long.
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
 class ToolResourcesFileSearch(TypedDict, total=False):
-    vector_store_ids: List[str]
+    vector_store_ids: SequenceNotStr[str]
     """
     The
     [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
@@ -126,6 +186,10 @@ class ToolResourcesFileSearch(TypedDict, total=False):
 
 
 class ToolResources(TypedDict, total=False):
+    """
+    A set of resources that are made available to the assistant's tools in this thread. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    """
+
     code_interpreter: ToolResourcesCodeInterpreter
 
     file_search: ToolResourcesFileSearch
diff --git a/src/openai/types/beta/thread_update_params.py b/src/openai/types/beta/thread_update_params.py
index 7210ab77c9..e000edc05f 100644
--- a/src/openai/types/beta/thread_update_params.py
+++ b/src/openai/types/beta/thread_update_params.py
@@ -2,19 +2,24 @@
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import Optional
 from typing_extensions import TypedDict
 
+from ..._types import SequenceNotStr
+from ..shared_params.metadata import Metadata
+
 __all__ = ["ThreadUpdateParams", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
 
 
 class ThreadUpdateParams(TypedDict, total=False):
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     tool_resources: Optional[ToolResources]
@@ -27,7 +32,7 @@ class ThreadUpdateParams(TypedDict, total=False):
 
 
 class ToolResourcesCodeInterpreter(TypedDict, total=False):
-    file_ids: List[str]
+    file_ids: SequenceNotStr[str]
     """
     A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
     available to the `code_interpreter` tool. There can be a maximum of 20 files
@@ -36,7 +41,7 @@ class ToolResourcesCodeInterpreter(TypedDict, total=False):
 
 
 class ToolResourcesFileSearch(TypedDict, total=False):
-    vector_store_ids: List[str]
+    vector_store_ids: SequenceNotStr[str]
     """
     The
     [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
@@ -46,6 +51,10 @@ class ToolResourcesFileSearch(TypedDict, total=False):
 
 
 class ToolResources(TypedDict, total=False):
+    """
+    A set of resources that are made available to the assistant's tools in this thread. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    """
+
     code_interpreter: ToolResourcesCodeInterpreter
 
     file_search: ToolResourcesFileSearch
diff --git a/src/openai/types/beta/threads/__init__.py b/src/openai/types/beta/threads/__init__.py
index 023d76fc13..70853177bd 100644
--- a/src/openai/types/beta/threads/__init__.py
+++ b/src/openai/types/beta/threads/__init__.py
@@ -25,11 +25,13 @@
 from .text_content_block import TextContentBlock as TextContentBlock
 from .message_delta_event import MessageDeltaEvent as MessageDeltaEvent
 from .message_list_params import MessageListParams as MessageListParams
+from .refusal_delta_block import RefusalDeltaBlock as RefusalDeltaBlock
 from .file_path_annotation import FilePathAnnotation as FilePathAnnotation
 from .image_url_delta_block import ImageURLDeltaBlock as ImageURLDeltaBlock
 from .message_content_delta import MessageContentDelta as MessageContentDelta
 from .message_create_params import MessageCreateParams as MessageCreateParams
 from .message_update_params import MessageUpdateParams as MessageUpdateParams
+from .refusal_content_block import RefusalContentBlock as RefusalContentBlock
 from .image_file_delta_block import ImageFileDeltaBlock as ImageFileDeltaBlock
 from .image_url_content_block import ImageURLContentBlock as ImageURLContentBlock
 from .file_citation_annotation import FileCitationAnnotation as FileCitationAnnotation
diff --git a/src/openai/types/beta/threads/annotation.py b/src/openai/types/beta/threads/annotation.py
index 31e228c831..13c10abf4d 100644
--- a/src/openai/types/beta/threads/annotation.py
+++ b/src/openai/types/beta/threads/annotation.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ...._utils import PropertyInfo
 from .file_path_annotation import FilePathAnnotation
@@ -9,4 +9,4 @@
 
 __all__ = ["Annotation"]
 
-Annotation = Annotated[Union[FileCitationAnnotation, FilePathAnnotation], PropertyInfo(discriminator="type")]
+Annotation: TypeAlias = Annotated[Union[FileCitationAnnotation, FilePathAnnotation], PropertyInfo(discriminator="type")]
diff --git a/src/openai/types/beta/threads/annotation_delta.py b/src/openai/types/beta/threads/annotation_delta.py
index 912429672f..c7c6c89837 100644
--- a/src/openai/types/beta/threads/annotation_delta.py
+++ b/src/openai/types/beta/threads/annotation_delta.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ...._utils import PropertyInfo
 from .file_path_delta_annotation import FilePathDeltaAnnotation
@@ -9,6 +9,6 @@
 
 __all__ = ["AnnotationDelta"]
 
-AnnotationDelta = Annotated[
+AnnotationDelta: TypeAlias = Annotated[
     Union[FileCitationDeltaAnnotation, FilePathDeltaAnnotation], PropertyInfo(discriminator="type")
 ]
diff --git a/src/openai/types/beta/threads/file_citation_annotation.py b/src/openai/types/beta/threads/file_citation_annotation.py
index 68571cd477..929da0ac56 100644
--- a/src/openai/types/beta/threads/file_citation_annotation.py
+++ b/src/openai/types/beta/threads/file_citation_annotation.py
@@ -11,11 +11,12 @@ class FileCitation(BaseModel):
     file_id: str
     """The ID of the specific File the citation is from."""
 
-    quote: str
-    """The specific quote in the file."""
-
 
 class FileCitationAnnotation(BaseModel):
+    """
+    A citation within the message that points to a specific quote from a specific File associated with the assistant or the message. Generated when the assistant uses the "file_search" tool to search files.
+    """
+
     end_index: int
 
     file_citation: FileCitation
diff --git a/src/openai/types/beta/threads/file_citation_delta_annotation.py b/src/openai/types/beta/threads/file_citation_delta_annotation.py
index b40c0d123e..591e322332 100644
--- a/src/openai/types/beta/threads/file_citation_delta_annotation.py
+++ b/src/openai/types/beta/threads/file_citation_delta_annotation.py
@@ -17,6 +17,10 @@ class FileCitation(BaseModel):
 
 
 class FileCitationDeltaAnnotation(BaseModel):
+    """
+    A citation within the message that points to a specific quote from a specific File associated with the assistant or the message. Generated when the assistant uses the "file_search" tool to search files.
+    """
+
     index: int
     """The index of the annotation in the text content part."""
 
diff --git a/src/openai/types/beta/threads/file_path_annotation.py b/src/openai/types/beta/threads/file_path_annotation.py
index 9812737ece..d3c144c2fc 100644
--- a/src/openai/types/beta/threads/file_path_annotation.py
+++ b/src/openai/types/beta/threads/file_path_annotation.py
@@ -13,6 +13,10 @@ class FilePath(BaseModel):
 
 
 class FilePathAnnotation(BaseModel):
+    """
+    A URL for the file that's generated when the assistant used the `code_interpreter` tool to generate a file.
+    """
+
     end_index: int
 
     file_path: FilePath
diff --git a/src/openai/types/beta/threads/file_path_delta_annotation.py b/src/openai/types/beta/threads/file_path_delta_annotation.py
index 0cbb445e48..5416874749 100644
--- a/src/openai/types/beta/threads/file_path_delta_annotation.py
+++ b/src/openai/types/beta/threads/file_path_delta_annotation.py
@@ -14,6 +14,10 @@ class FilePath(BaseModel):
 
 
 class FilePathDeltaAnnotation(BaseModel):
+    """
+    A URL for the file that's generated when the assistant used the `code_interpreter` tool to generate a file.
+    """
+
     index: int
     """The index of the annotation in the text content part."""
 
diff --git a/src/openai/types/beta/threads/image_file_content_block.py b/src/openai/types/beta/threads/image_file_content_block.py
index a909999065..5a082cd488 100644
--- a/src/openai/types/beta/threads/image_file_content_block.py
+++ b/src/openai/types/beta/threads/image_file_content_block.py
@@ -9,6 +9,10 @@
 
 
 class ImageFileContentBlock(BaseModel):
+    """
+    References an image [File](https://platform.openai.com/docs/api-reference/files) in the content of a message.
+    """
+
     image_file: ImageFile
 
     type: Literal["image_file"]
diff --git a/src/openai/types/beta/threads/image_file_content_block_param.py b/src/openai/types/beta/threads/image_file_content_block_param.py
index 48d94bee36..da095a5ff6 100644
--- a/src/openai/types/beta/threads/image_file_content_block_param.py
+++ b/src/openai/types/beta/threads/image_file_content_block_param.py
@@ -10,6 +10,10 @@
 
 
 class ImageFileContentBlockParam(TypedDict, total=False):
+    """
+    References an image [File](https://platform.openai.com/docs/api-reference/files) in the content of a message.
+    """
+
     image_file: Required[ImageFileParam]
 
     type: Required[Literal["image_file"]]
diff --git a/src/openai/types/beta/threads/image_file_delta_block.py b/src/openai/types/beta/threads/image_file_delta_block.py
index 0a5a2e8a5f..ed17f7ff3b 100644
--- a/src/openai/types/beta/threads/image_file_delta_block.py
+++ b/src/openai/types/beta/threads/image_file_delta_block.py
@@ -10,6 +10,10 @@
 
 
 class ImageFileDeltaBlock(BaseModel):
+    """
+    References an image [File](https://platform.openai.com/docs/api-reference/files) in the content of a message.
+    """
+
     index: int
     """The index of the content part in the message."""
 
diff --git a/src/openai/types/beta/threads/image_url_content_block.py b/src/openai/types/beta/threads/image_url_content_block.py
index 40a16c1df8..8dc1f16a7a 100644
--- a/src/openai/types/beta/threads/image_url_content_block.py
+++ b/src/openai/types/beta/threads/image_url_content_block.py
@@ -9,6 +9,8 @@
 
 
 class ImageURLContentBlock(BaseModel):
+    """References an image URL in the content of a message."""
+
     image_url: ImageURL
 
     type: Literal["image_url"]
diff --git a/src/openai/types/beta/threads/image_url_content_block_param.py b/src/openai/types/beta/threads/image_url_content_block_param.py
index 585b926c58..a5c59e02c2 100644
--- a/src/openai/types/beta/threads/image_url_content_block_param.py
+++ b/src/openai/types/beta/threads/image_url_content_block_param.py
@@ -10,6 +10,8 @@
 
 
 class ImageURLContentBlockParam(TypedDict, total=False):
+    """References an image URL in the content of a message."""
+
     image_url: Required[ImageURLParam]
 
     type: Required[Literal["image_url"]]
diff --git a/src/openai/types/beta/threads/image_url_delta_block.py b/src/openai/types/beta/threads/image_url_delta_block.py
index 5252da12dd..3128d8e709 100644
--- a/src/openai/types/beta/threads/image_url_delta_block.py
+++ b/src/openai/types/beta/threads/image_url_delta_block.py
@@ -10,6 +10,8 @@
 
 
 class ImageURLDeltaBlock(BaseModel):
+    """References an image URL in the content of a message."""
+
     index: int
     """The index of the content part in the message."""
 
diff --git a/src/openai/types/beta/threads/message.py b/src/openai/types/beta/threads/message.py
index ebaabdb0f5..fc7f73f091 100644
--- a/src/openai/types/beta/threads/message.py
+++ b/src/openai/types/beta/threads/message.py
@@ -1,16 +1,28 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Union, Optional
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
 from ...._models import BaseModel
 from .message_content import MessageContent
-from ..file_search_tool import FileSearchTool
+from ...shared.metadata import Metadata
 from ..code_interpreter_tool import CodeInterpreterTool
 
-__all__ = ["Message", "Attachment", "AttachmentTool", "IncompleteDetails"]
+__all__ = [
+    "Message",
+    "Attachment",
+    "AttachmentTool",
+    "AttachmentToolAssistantToolsFileSearchTypeOnly",
+    "IncompleteDetails",
+]
 
-AttachmentTool = Union[CodeInterpreterTool, FileSearchTool]
+
+class AttachmentToolAssistantToolsFileSearchTypeOnly(BaseModel):
+    type: Literal["file_search"]
+    """The type of tool being defined: `file_search`"""
+
+
+AttachmentTool: TypeAlias = Union[CodeInterpreterTool, AttachmentToolAssistantToolsFileSearchTypeOnly]
 
 
 class Attachment(BaseModel):
@@ -22,11 +34,17 @@ class Attachment(BaseModel):
 
 
 class IncompleteDetails(BaseModel):
+    """On an incomplete message, details about why the message is incomplete."""
+
     reason: Literal["content_filter", "max_tokens", "run_cancelled", "run_expired", "run_failed"]
     """The reason the message is incomplete."""
 
 
 class Message(BaseModel):
+    """
+    Represents a message within a [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
     id: str
     """The identifier, which can be referenced in API endpoints."""
 
@@ -55,12 +73,14 @@ class Message(BaseModel):
     incomplete_details: Optional[IncompleteDetails] = None
     """On an incomplete message, details about why the message is incomplete."""
 
-    metadata: Optional[object] = None
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     object: Literal["thread.message"]
diff --git a/src/openai/types/beta/threads/message_content.py b/src/openai/types/beta/threads/message_content.py
index 4f17d14786..b313d35af6 100644
--- a/src/openai/types/beta/threads/message_content.py
+++ b/src/openai/types/beta/threads/message_content.py
@@ -1,15 +1,17 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ...._utils import PropertyInfo
 from .text_content_block import TextContentBlock
+from .refusal_content_block import RefusalContentBlock
 from .image_url_content_block import ImageURLContentBlock
 from .image_file_content_block import ImageFileContentBlock
 
 __all__ = ["MessageContent"]
 
-MessageContent = Annotated[
-    Union[ImageFileContentBlock, ImageURLContentBlock, TextContentBlock], PropertyInfo(discriminator="type")
+MessageContent: TypeAlias = Annotated[
+    Union[ImageFileContentBlock, ImageURLContentBlock, TextContentBlock, RefusalContentBlock],
+    PropertyInfo(discriminator="type"),
 ]
diff --git a/src/openai/types/beta/threads/message_content_delta.py b/src/openai/types/beta/threads/message_content_delta.py
index 6c5f732b12..b6e7dfa45a 100644
--- a/src/openai/types/beta/threads/message_content_delta.py
+++ b/src/openai/types/beta/threads/message_content_delta.py
@@ -1,15 +1,17 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ...._utils import PropertyInfo
 from .text_delta_block import TextDeltaBlock
+from .refusal_delta_block import RefusalDeltaBlock
 from .image_url_delta_block import ImageURLDeltaBlock
 from .image_file_delta_block import ImageFileDeltaBlock
 
 __all__ = ["MessageContentDelta"]
 
-MessageContentDelta = Annotated[
-    Union[ImageFileDeltaBlock, TextDeltaBlock, ImageURLDeltaBlock], PropertyInfo(discriminator="type")
+MessageContentDelta: TypeAlias = Annotated[
+    Union[ImageFileDeltaBlock, TextDeltaBlock, RefusalDeltaBlock, ImageURLDeltaBlock],
+    PropertyInfo(discriminator="type"),
 ]
diff --git a/src/openai/types/beta/threads/message_content_part_param.py b/src/openai/types/beta/threads/message_content_part_param.py
index d11442a3a9..dc09a01c27 100644
--- a/src/openai/types/beta/threads/message_content_part_param.py
+++ b/src/openai/types/beta/threads/message_content_part_param.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from typing import Union
+from typing_extensions import TypeAlias
 
 from .text_content_block_param import TextContentBlockParam
 from .image_url_content_block_param import ImageURLContentBlockParam
@@ -10,4 +11,4 @@
 
 __all__ = ["MessageContentPartParam"]
 
-MessageContentPartParam = Union[ImageFileContentBlockParam, ImageURLContentBlockParam, TextContentBlockParam]
+MessageContentPartParam: TypeAlias = Union[ImageFileContentBlockParam, ImageURLContentBlockParam, TextContentBlockParam]
diff --git a/src/openai/types/beta/threads/message_create_params.py b/src/openai/types/beta/threads/message_create_params.py
index 3668df950d..b52386824a 100644
--- a/src/openai/types/beta/threads/message_create_params.py
+++ b/src/openai/types/beta/threads/message_create_params.py
@@ -3,13 +3,13 @@
 from __future__ import annotations
 
 from typing import Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from ..file_search_tool_param import FileSearchToolParam
+from ...shared_params.metadata import Metadata
 from .message_content_part_param import MessageContentPartParam
 from ..code_interpreter_tool_param import CodeInterpreterToolParam
 
-__all__ = ["MessageCreateParams", "Attachment", "AttachmentTool"]
+__all__ = ["MessageCreateParams", "Attachment", "AttachmentTool", "AttachmentToolFileSearch"]
 
 
 class MessageCreateParams(TypedDict, total=False):
@@ -28,16 +28,23 @@ class MessageCreateParams(TypedDict, total=False):
     attachments: Optional[Iterable[Attachment]]
     """A list of files attached to the message, and the tools they should be added to."""
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
-AttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+class AttachmentToolFileSearch(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
+
+
+AttachmentTool: TypeAlias = Union[CodeInterpreterToolParam, AttachmentToolFileSearch]
 
 
 class Attachment(TypedDict, total=False):
diff --git a/src/openai/types/beta/threads/message_delta.py b/src/openai/types/beta/threads/message_delta.py
index ecd0dfe319..fdeebb3a12 100644
--- a/src/openai/types/beta/threads/message_delta.py
+++ b/src/openai/types/beta/threads/message_delta.py
@@ -10,6 +10,8 @@
 
 
 class MessageDelta(BaseModel):
+    """The delta containing the fields that have changed on the Message."""
+
     content: Optional[List[MessageContentDelta]] = None
     """The content of the message in array of text and/or images."""
 
diff --git a/src/openai/types/beta/threads/message_delta_event.py b/src/openai/types/beta/threads/message_delta_event.py
index 3811cef679..d5ba1e172d 100644
--- a/src/openai/types/beta/threads/message_delta_event.py
+++ b/src/openai/types/beta/threads/message_delta_event.py
@@ -9,6 +9,11 @@
 
 
 class MessageDeltaEvent(BaseModel):
+    """Represents a message delta i.e.
+
+    any changed fields on a message during streaming.
+    """
+
     id: str
     """The identifier of the message, which can be referenced in API endpoints."""
 
diff --git a/src/openai/types/beta/threads/message_list_params.py b/src/openai/types/beta/threads/message_list_params.py
index 18c2442fb5..a7c22a66fb 100644
--- a/src/openai/types/beta/threads/message_list_params.py
+++ b/src/openai/types/beta/threads/message_list_params.py
@@ -21,7 +21,7 @@ class MessageListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/beta/threads/message_update_params.py b/src/openai/types/beta/threads/message_update_params.py
index 7000f33122..bb078281e6 100644
--- a/src/openai/types/beta/threads/message_update_params.py
+++ b/src/openai/types/beta/threads/message_update_params.py
@@ -5,16 +5,20 @@
 from typing import Optional
 from typing_extensions import Required, TypedDict
 
+from ...shared_params.metadata import Metadata
+
 __all__ = ["MessageUpdateParams"]
 
 
 class MessageUpdateParams(TypedDict, total=False):
     thread_id: Required[str]
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
diff --git a/src/openai/types/beta/threads/refusal_content_block.py b/src/openai/types/beta/threads/refusal_content_block.py
new file mode 100644
index 0000000000..b4512b3ccb
--- /dev/null
+++ b/src/openai/types/beta/threads/refusal_content_block.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RefusalContentBlock"]
+
+
+class RefusalContentBlock(BaseModel):
+    """The refusal content generated by the assistant."""
+
+    refusal: str
+
+    type: Literal["refusal"]
+    """Always `refusal`."""
diff --git a/src/openai/types/beta/threads/refusal_delta_block.py b/src/openai/types/beta/threads/refusal_delta_block.py
new file mode 100644
index 0000000000..85a1f08db1
--- /dev/null
+++ b/src/openai/types/beta/threads/refusal_delta_block.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RefusalDeltaBlock"]
+
+
+class RefusalDeltaBlock(BaseModel):
+    """The refusal content that is part of a message."""
+
+    index: int
+    """The index of the refusal part in the message."""
+
+    type: Literal["refusal"]
+    """Always `refusal`."""
+
+    refusal: Optional[str] = None
diff --git a/src/openai/types/beta/threads/required_action_function_tool_call.py b/src/openai/types/beta/threads/required_action_function_tool_call.py
index a24dfd068b..3cec8514ca 100644
--- a/src/openai/types/beta/threads/required_action_function_tool_call.py
+++ b/src/openai/types/beta/threads/required_action_function_tool_call.py
@@ -8,6 +8,8 @@
 
 
 class Function(BaseModel):
+    """The function definition."""
+
     arguments: str
     """The arguments that the model expects you to pass to the function."""
 
@@ -16,6 +18,8 @@ class Function(BaseModel):
 
 
 class RequiredActionFunctionToolCall(BaseModel):
+    """Tool call objects"""
+
     id: str
     """The ID of the tool call.
 
diff --git a/src/openai/types/beta/threads/run.py b/src/openai/types/beta/threads/run.py
index 8244ffd598..8a88fa1673 100644
--- a/src/openai/types/beta/threads/run.py
+++ b/src/openai/types/beta/threads/run.py
@@ -6,6 +6,7 @@
 from ...._models import BaseModel
 from .run_status import RunStatus
 from ..assistant_tool import AssistantTool
+from ...shared.metadata import Metadata
 from ..assistant_tool_choice_option import AssistantToolChoiceOption
 from ..assistant_response_format_option import AssistantResponseFormatOption
 from .required_action_function_tool_call import RequiredActionFunctionToolCall
@@ -22,6 +23,11 @@
 
 
 class IncompleteDetails(BaseModel):
+    """Details on why the run is incomplete.
+
+    Will be `null` if the run is not incomplete.
+    """
+
     reason: Optional[Literal["max_completion_tokens", "max_prompt_tokens"]] = None
     """The reason why the run is incomplete.
 
@@ -31,6 +37,8 @@ class IncompleteDetails(BaseModel):
 
 
 class LastError(BaseModel):
+    """The last error associated with this run. Will be `null` if there are no errors."""
+
     code: Literal["server_error", "rate_limit_exceeded", "invalid_prompt"]
     """One of `server_error`, `rate_limit_exceeded`, or `invalid_prompt`."""
 
@@ -39,11 +47,18 @@ class LastError(BaseModel):
 
 
 class RequiredActionSubmitToolOutputs(BaseModel):
+    """Details on the tool outputs needed for this run to continue."""
+
     tool_calls: List[RequiredActionFunctionToolCall]
     """A list of the relevant tool calls."""
 
 
 class RequiredAction(BaseModel):
+    """Details on the action required to continue the run.
+
+    Will be `null` if no action is required.
+    """
+
     submit_tool_outputs: RequiredActionSubmitToolOutputs
     """Details on the tool outputs needed for this run to continue."""
 
@@ -52,6 +67,11 @@ class RequiredAction(BaseModel):
 
 
 class TruncationStrategy(BaseModel):
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the initial context window of the run.
+    """
+
     type: Literal["auto", "last_messages"]
     """The truncation strategy to use for the thread.
 
@@ -69,6 +89,11 @@ class TruncationStrategy(BaseModel):
 
 
 class Usage(BaseModel):
+    """Usage statistics related to the run.
+
+    This value will be `null` if the run is not in a terminal state (i.e. `in_progress`, `queued`, etc.).
+    """
+
     completion_tokens: int
     """Number of completion tokens used over the course of the run."""
 
@@ -80,6 +105,10 @@ class Usage(BaseModel):
 
 
 class Run(BaseModel):
+    """
+    Represents an execution run on a [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
     id: str
     """The identifier, which can be referenced in API endpoints."""
 
@@ -133,12 +162,14 @@ class Run(BaseModel):
     of the run.
     """
 
-    metadata: Optional[object] = None
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     model: str
@@ -151,6 +182,13 @@ class Run(BaseModel):
     object: Literal["thread.run"]
     """The object type, which is always `thread.run`."""
 
+    parallel_tool_calls: bool
+    """
+    Whether to enable
+    [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+    during tool use.
+    """
+
     required_action: Optional[RequiredAction] = None
     """Details on the action required to continue the run.
 
@@ -160,11 +198,16 @@ class Run(BaseModel):
     response_format: Optional[AssistantResponseFormatOption] = None
     """Specifies the format that the model must output.
 
-    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
@@ -213,7 +256,7 @@ class Run(BaseModel):
     truncation_strategy: Optional[TruncationStrategy] = None
     """Controls for how a thread will be truncated prior to the run.
 
-    Use this to control the intial context window of the run.
+    Use this to control the initial context window of the run.
     """
 
     usage: Optional[Usage] = None
diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py
index 90c9708596..376afc9aad 100644
--- a/src/openai/types/beta/threads/run_create_params.py
+++ b/src/openai/types/beta/threads/run_create_params.py
@@ -2,11 +2,14 @@
 
 from __future__ import annotations
 
-from typing import Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
+from ...shared.chat_model import ChatModel
 from ..assistant_tool_param import AssistantToolParam
-from ..file_search_tool_param import FileSearchToolParam
+from .runs.run_step_include import RunStepInclude
+from ...shared_params.metadata import Metadata
+from ...shared.reasoning_effort import ReasoningEffort
 from .message_content_part_param import MessageContentPartParam
 from ..code_interpreter_tool_param import CodeInterpreterToolParam
 from ..assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
@@ -17,6 +20,7 @@
     "AdditionalMessage",
     "AdditionalMessageAttachment",
     "AdditionalMessageAttachmentTool",
+    "AdditionalMessageAttachmentToolFileSearch",
     "TruncationStrategy",
     "RunCreateParamsNonStreaming",
     "RunCreateParamsStreaming",
@@ -31,6 +35,18 @@ class RunCreateParamsBase(TypedDict, total=False):
     execute this run.
     """
 
+    include: List[RunStepInclude]
+    """A list of additional fields to include in the response.
+
+    Currently the only supported value is
+    `step_details.tool_calls[*].file_search.results[*].content` to fetch the file
+    search result content.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
+
     additional_instructions: Optional[str]
     """Appends additional instructions at the end of the instructions for the run.
 
@@ -66,40 +82,17 @@ class RunCreateParamsBase(TypedDict, total=False):
     `incomplete_details` for more info.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    model: Union[
-        str,
-        Literal[
-            "gpt-4o",
-            "gpt-4o-2024-05-13",
-            "gpt-4-turbo",
-            "gpt-4-turbo-2024-04-09",
-            "gpt-4-0125-preview",
-            "gpt-4-turbo-preview",
-            "gpt-4-1106-preview",
-            "gpt-4-vision-preview",
-            "gpt-4",
-            "gpt-4-0314",
-            "gpt-4-0613",
-            "gpt-4-32k",
-            "gpt-4-32k-0314",
-            "gpt-4-32k-0613",
-            "gpt-3.5-turbo",
-            "gpt-3.5-turbo-16k",
-            "gpt-3.5-turbo-0613",
-            "gpt-3.5-turbo-1106",
-            "gpt-3.5-turbo-0125",
-            "gpt-3.5-turbo-16k-0613",
-        ],
-        None,
-    ]
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: Union[str, ChatModel, None]
     """
     The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
     be used to execute this run. If a value is provided here, it will override the
@@ -107,14 +100,43 @@ class RunCreateParamsBase(TypedDict, total=False):
     assistant will be used.
     """
 
+    parallel_tool_calls: bool
+    """
+    Whether to enable
+    [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+    during tool use.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort]
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
     response_format: Optional[AssistantResponseFormatOptionParam]
     """Specifies the format that the model must output.
 
-    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
@@ -162,11 +184,16 @@ class RunCreateParamsBase(TypedDict, total=False):
     truncation_strategy: Optional[TruncationStrategy]
     """Controls for how a thread will be truncated prior to the run.
 
-    Use this to control the intial context window of the run.
+    Use this to control the initial context window of the run.
     """
 
 
-AdditionalMessageAttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+class AdditionalMessageAttachmentToolFileSearch(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
+
+
+AdditionalMessageAttachmentTool: TypeAlias = Union[CodeInterpreterToolParam, AdditionalMessageAttachmentToolFileSearch]
 
 
 class AdditionalMessageAttachment(TypedDict, total=False):
@@ -193,16 +220,23 @@ class AdditionalMessage(TypedDict, total=False):
     attachments: Optional[Iterable[AdditionalMessageAttachment]]
     """A list of files attached to the message, and the tools they should be added to."""
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
 class TruncationStrategy(TypedDict, total=False):
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the initial context window of the run.
+    """
+
     type: Required[Literal["auto", "last_messages"]]
     """The truncation strategy to use for the thread.
 
@@ -219,7 +253,7 @@ class TruncationStrategy(TypedDict, total=False):
     """
 
 
-class RunCreateParamsNonStreaming(RunCreateParamsBase):
+class RunCreateParamsNonStreaming(RunCreateParamsBase, total=False):
     stream: Optional[Literal[False]]
     """
     If `true`, returns a stream of events that happen during the Run as server-sent
diff --git a/src/openai/types/beta/threads/run_list_params.py b/src/openai/types/beta/threads/run_list_params.py
index 1e32bca4b4..fbea54f6f2 100644
--- a/src/openai/types/beta/threads/run_list_params.py
+++ b/src/openai/types/beta/threads/run_list_params.py
@@ -21,7 +21,7 @@ class RunListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/beta/threads/run_status.py b/src/openai/types/beta/threads/run_status.py
index 6666d00e5a..47c7cbd007 100644
--- a/src/openai/types/beta/threads/run_status.py
+++ b/src/openai/types/beta/threads/run_status.py
@@ -1,10 +1,10 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
 __all__ = ["RunStatus"]
 
-RunStatus = Literal[
+RunStatus: TypeAlias = Literal[
     "queued",
     "in_progress",
     "requires_action",
diff --git a/src/openai/types/beta/threads/run_submit_tool_outputs_params.py b/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
index ccb5e5e97e..147728603a 100644
--- a/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
+++ b/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
@@ -31,7 +31,7 @@ class ToolOutput(TypedDict, total=False):
     """
 
 
-class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase):
+class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase, total=False):
     stream: Optional[Literal[False]]
     """
     If `true`, returns a stream of events that happen during the Run as server-sent
diff --git a/src/openai/types/beta/threads/run_update_params.py b/src/openai/types/beta/threads/run_update_params.py
index e595eac882..fbcbd3fb14 100644
--- a/src/openai/types/beta/threads/run_update_params.py
+++ b/src/openai/types/beta/threads/run_update_params.py
@@ -5,16 +5,20 @@
 from typing import Optional
 from typing_extensions import Required, TypedDict
 
+from ...shared_params.metadata import Metadata
+
 __all__ = ["RunUpdateParams"]
 
 
 class RunUpdateParams(TypedDict, total=False):
     thread_id: Required[str]
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
diff --git a/src/openai/types/beta/threads/runs/__init__.py b/src/openai/types/beta/threads/runs/__init__.py
index a312ce3df2..467d5d793d 100644
--- a/src/openai/types/beta/threads/runs/__init__.py
+++ b/src/openai/types/beta/threads/runs/__init__.py
@@ -6,9 +6,11 @@
 from .tool_call import ToolCall as ToolCall
 from .run_step_delta import RunStepDelta as RunStepDelta
 from .tool_call_delta import ToolCallDelta as ToolCallDelta
+from .run_step_include import RunStepInclude as RunStepInclude
 from .step_list_params import StepListParams as StepListParams
 from .function_tool_call import FunctionToolCall as FunctionToolCall
 from .run_step_delta_event import RunStepDeltaEvent as RunStepDeltaEvent
+from .step_retrieve_params import StepRetrieveParams as StepRetrieveParams
 from .code_interpreter_logs import CodeInterpreterLogs as CodeInterpreterLogs
 from .file_search_tool_call import FileSearchToolCall as FileSearchToolCall
 from .tool_call_delta_object import ToolCallDeltaObject as ToolCallDeltaObject
diff --git a/src/openai/types/beta/threads/runs/code_interpreter_logs.py b/src/openai/types/beta/threads/runs/code_interpreter_logs.py
index 0bf8c1dac2..722fd2b4c4 100644
--- a/src/openai/types/beta/threads/runs/code_interpreter_logs.py
+++ b/src/openai/types/beta/threads/runs/code_interpreter_logs.py
@@ -9,6 +9,8 @@
 
 
 class CodeInterpreterLogs(BaseModel):
+    """Text output from the Code Interpreter tool call as part of a run step."""
+
     index: int
     """The index of the output in the outputs array."""
 
diff --git a/src/openai/types/beta/threads/runs/code_interpreter_tool_call.py b/src/openai/types/beta/threads/runs/code_interpreter_tool_call.py
index 2f07243684..bc78b5fa3d 100644
--- a/src/openai/types/beta/threads/runs/code_interpreter_tool_call.py
+++ b/src/openai/types/beta/threads/runs/code_interpreter_tool_call.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Union
-from typing_extensions import Literal, Annotated
+from typing_extensions import Literal, Annotated, TypeAlias
 
 from ....._utils import PropertyInfo
 from ....._models import BaseModel
@@ -17,6 +17,8 @@
 
 
 class CodeInterpreterOutputLogs(BaseModel):
+    """Text output from the Code Interpreter tool call as part of a run step."""
+
     logs: str
     """The text output from the Code Interpreter tool call."""
 
@@ -39,12 +41,14 @@ class CodeInterpreterOutputImage(BaseModel):
     """Always `image`."""
 
 
-CodeInterpreterOutput = Annotated[
+CodeInterpreterOutput: TypeAlias = Annotated[
     Union[CodeInterpreterOutputLogs, CodeInterpreterOutputImage], PropertyInfo(discriminator="type")
 ]
 
 
 class CodeInterpreter(BaseModel):
+    """The Code Interpreter tool call definition."""
+
     input: str
     """The input to the Code Interpreter tool call."""
 
@@ -57,6 +61,8 @@ class CodeInterpreter(BaseModel):
 
 
 class CodeInterpreterToolCall(BaseModel):
+    """Details of the Code Interpreter tool call the run step was involved in."""
+
     id: str
     """The ID of the tool call."""
 
diff --git a/src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py b/src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
index eff76355b3..efedac795c 100644
--- a/src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
+++ b/src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Union, Optional
-from typing_extensions import Literal, Annotated
+from typing_extensions import Literal, Annotated, TypeAlias
 
 from ....._utils import PropertyInfo
 from ....._models import BaseModel
@@ -10,12 +10,14 @@
 
 __all__ = ["CodeInterpreterToolCallDelta", "CodeInterpreter", "CodeInterpreterOutput"]
 
-CodeInterpreterOutput = Annotated[
+CodeInterpreterOutput: TypeAlias = Annotated[
     Union[CodeInterpreterLogs, CodeInterpreterOutputImage], PropertyInfo(discriminator="type")
 ]
 
 
 class CodeInterpreter(BaseModel):
+    """The Code Interpreter tool call definition."""
+
     input: Optional[str] = None
     """The input to the Code Interpreter tool call."""
 
@@ -28,6 +30,8 @@ class CodeInterpreter(BaseModel):
 
 
 class CodeInterpreterToolCallDelta(BaseModel):
+    """Details of the Code Interpreter tool call the run step was involved in."""
+
     index: int
     """The index of the tool call in the tool calls array."""
 
diff --git a/src/openai/types/beta/threads/runs/file_search_tool_call.py b/src/openai/types/beta/threads/runs/file_search_tool_call.py
index 57c0ca9a90..291a93ec65 100644
--- a/src/openai/types/beta/threads/runs/file_search_tool_call.py
+++ b/src/openai/types/beta/threads/runs/file_search_tool_call.py
@@ -1,17 +1,80 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import List, Optional
 from typing_extensions import Literal
 
 from ....._models import BaseModel
 
-__all__ = ["FileSearchToolCall"]
+__all__ = [
+    "FileSearchToolCall",
+    "FileSearch",
+    "FileSearchRankingOptions",
+    "FileSearchResult",
+    "FileSearchResultContent",
+]
+
+
+class FileSearchRankingOptions(BaseModel):
+    """The ranking options for the file search."""
+
+    ranker: Literal["auto", "default_2024_08_21"]
+    """The ranker to use for the file search.
+
+    If not specified will use the `auto` ranker.
+    """
+
+    score_threshold: float
+    """The score threshold for the file search.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+
+class FileSearchResultContent(BaseModel):
+    text: Optional[str] = None
+    """The text content of the file."""
+
+    type: Optional[Literal["text"]] = None
+    """The type of the content."""
+
+
+class FileSearchResult(BaseModel):
+    """A result instance of the file search."""
+
+    file_id: str
+    """The ID of the file that result was found in."""
+
+    file_name: str
+    """The name of the file that result was found in."""
+
+    score: float
+    """The score of the result.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+    content: Optional[List[FileSearchResultContent]] = None
+    """The content of the result that was found.
+
+    The content is only included if requested via the include query parameter.
+    """
+
+
+class FileSearch(BaseModel):
+    """For now, this is always going to be an empty object."""
+
+    ranking_options: Optional[FileSearchRankingOptions] = None
+    """The ranking options for the file search."""
+
+    results: Optional[List[FileSearchResult]] = None
+    """The results of the file search."""
 
 
 class FileSearchToolCall(BaseModel):
     id: str
     """The ID of the tool call object."""
 
-    file_search: object
+    file_search: FileSearch
     """For now, this is always going to be an empty object."""
 
     type: Literal["file_search"]
diff --git a/src/openai/types/beta/threads/runs/function_tool_call.py b/src/openai/types/beta/threads/runs/function_tool_call.py
index b1d354f894..dd0e22cfb1 100644
--- a/src/openai/types/beta/threads/runs/function_tool_call.py
+++ b/src/openai/types/beta/threads/runs/function_tool_call.py
@@ -9,6 +9,8 @@
 
 
 class Function(BaseModel):
+    """The definition of the function that was called."""
+
     arguments: str
     """The arguments passed to the function."""
 
diff --git a/src/openai/types/beta/threads/runs/function_tool_call_delta.py b/src/openai/types/beta/threads/runs/function_tool_call_delta.py
index faaf026f7f..4107e1b873 100644
--- a/src/openai/types/beta/threads/runs/function_tool_call_delta.py
+++ b/src/openai/types/beta/threads/runs/function_tool_call_delta.py
@@ -9,6 +9,8 @@
 
 
 class Function(BaseModel):
+    """The definition of the function that was called."""
+
     arguments: Optional[str] = None
     """The arguments passed to the function."""
 
diff --git a/src/openai/types/beta/threads/runs/message_creation_step_details.py b/src/openai/types/beta/threads/runs/message_creation_step_details.py
index 73439079d3..cd925b57ce 100644
--- a/src/openai/types/beta/threads/runs/message_creation_step_details.py
+++ b/src/openai/types/beta/threads/runs/message_creation_step_details.py
@@ -13,6 +13,8 @@ class MessageCreation(BaseModel):
 
 
 class MessageCreationStepDetails(BaseModel):
+    """Details of the message creation by the run step."""
+
     message_creation: MessageCreation
 
     type: Literal["message_creation"]
diff --git a/src/openai/types/beta/threads/runs/run_step.py b/src/openai/types/beta/threads/runs/run_step.py
index 7c81dcac2b..97451229fc 100644
--- a/src/openai/types/beta/threads/runs/run_step.py
+++ b/src/openai/types/beta/threads/runs/run_step.py
@@ -1,10 +1,11 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union, Optional
-from typing_extensions import Literal, Annotated
+from typing_extensions import Literal, Annotated, TypeAlias
 
 from ....._utils import PropertyInfo
 from ....._models import BaseModel
+from ....shared.metadata import Metadata
 from .tool_calls_step_details import ToolCallsStepDetails
 from .message_creation_step_details import MessageCreationStepDetails
 
@@ -12,6 +13,11 @@
 
 
 class LastError(BaseModel):
+    """The last error associated with this run step.
+
+    Will be `null` if there are no errors.
+    """
+
     code: Literal["server_error", "rate_limit_exceeded"]
     """One of `server_error` or `rate_limit_exceeded`."""
 
@@ -19,10 +25,17 @@ class LastError(BaseModel):
     """A human-readable description of the error."""
 
 
-StepDetails = Annotated[Union[MessageCreationStepDetails, ToolCallsStepDetails], PropertyInfo(discriminator="type")]
+StepDetails: TypeAlias = Annotated[
+    Union[MessageCreationStepDetails, ToolCallsStepDetails], PropertyInfo(discriminator="type")
+]
 
 
 class Usage(BaseModel):
+    """Usage statistics related to the run step.
+
+    This value will be `null` while the run step's status is `in_progress`.
+    """
+
     completion_tokens: int
     """Number of completion tokens used over the course of the run step."""
 
@@ -34,6 +47,8 @@ class Usage(BaseModel):
 
 
 class RunStep(BaseModel):
+    """Represents a step in execution of a run."""
+
     id: str
     """The identifier of the run step, which can be referenced in API endpoints."""
 
@@ -68,12 +83,14 @@ class RunStep(BaseModel):
     Will be `null` if there are no errors.
     """
 
-    metadata: Optional[object] = None
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     object: Literal["thread.run.step"]
diff --git a/src/openai/types/beta/threads/runs/run_step_delta.py b/src/openai/types/beta/threads/runs/run_step_delta.py
index d6b4aefeb9..2ccb770d57 100644
--- a/src/openai/types/beta/threads/runs/run_step_delta.py
+++ b/src/openai/types/beta/threads/runs/run_step_delta.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union, Optional
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ....._utils import PropertyInfo
 from ....._models import BaseModel
@@ -10,9 +10,13 @@
 
 __all__ = ["RunStepDelta", "StepDetails"]
 
-StepDetails = Annotated[Union[RunStepDeltaMessageDelta, ToolCallDeltaObject], PropertyInfo(discriminator="type")]
+StepDetails: TypeAlias = Annotated[
+    Union[RunStepDeltaMessageDelta, ToolCallDeltaObject], PropertyInfo(discriminator="type")
+]
 
 
 class RunStepDelta(BaseModel):
+    """The delta containing the fields that have changed on the run step."""
+
     step_details: Optional[StepDetails] = None
     """The details of the run step."""
diff --git a/src/openai/types/beta/threads/runs/run_step_delta_event.py b/src/openai/types/beta/threads/runs/run_step_delta_event.py
index 7f3f92aabf..8f1c095ae4 100644
--- a/src/openai/types/beta/threads/runs/run_step_delta_event.py
+++ b/src/openai/types/beta/threads/runs/run_step_delta_event.py
@@ -9,6 +9,11 @@
 
 
 class RunStepDeltaEvent(BaseModel):
+    """Represents a run step delta i.e.
+
+    any changed fields on a run step during streaming.
+    """
+
     id: str
     """The identifier of the run step, which can be referenced in API endpoints."""
 
diff --git a/src/openai/types/beta/threads/runs/run_step_delta_message_delta.py b/src/openai/types/beta/threads/runs/run_step_delta_message_delta.py
index f58ed3d96d..4b18277c18 100644
--- a/src/openai/types/beta/threads/runs/run_step_delta_message_delta.py
+++ b/src/openai/types/beta/threads/runs/run_step_delta_message_delta.py
@@ -14,6 +14,8 @@ class MessageCreation(BaseModel):
 
 
 class RunStepDeltaMessageDelta(BaseModel):
+    """Details of the message creation by the run step."""
+
     type: Literal["message_creation"]
     """Always `message_creation`."""
 
diff --git a/src/openai/types/beta/threads/runs/run_step_include.py b/src/openai/types/beta/threads/runs/run_step_include.py
new file mode 100644
index 0000000000..8e76c1b716
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/run_step_include.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["RunStepInclude"]
+
+RunStepInclude: TypeAlias = Literal["step_details.tool_calls[*].file_search.results[*].content"]
diff --git a/src/openai/types/beta/threads/runs/step_list_params.py b/src/openai/types/beta/threads/runs/step_list_params.py
index 606d444539..a6be771d9f 100644
--- a/src/openai/types/beta/threads/runs/step_list_params.py
+++ b/src/openai/types/beta/threads/runs/step_list_params.py
@@ -2,8 +2,11 @@
 
 from __future__ import annotations
 
+from typing import List
 from typing_extensions import Literal, Required, TypedDict
 
+from .run_step_include import RunStepInclude
+
 __all__ = ["StepListParams"]
 
 
@@ -23,11 +26,23 @@ class StepListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
 
+    include: List[RunStepInclude]
+    """A list of additional fields to include in the response.
+
+    Currently the only supported value is
+    `step_details.tool_calls[*].file_search.results[*].content` to fetch the file
+    search result content.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
+
     limit: int
     """A limit on the number of objects to be returned.
 
diff --git a/src/openai/types/beta/threads/runs/step_retrieve_params.py b/src/openai/types/beta/threads/runs/step_retrieve_params.py
new file mode 100644
index 0000000000..ecbb72edbd
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/step_retrieve_params.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Required, TypedDict
+
+from .run_step_include import RunStepInclude
+
+__all__ = ["StepRetrieveParams"]
+
+
+class StepRetrieveParams(TypedDict, total=False):
+    thread_id: Required[str]
+
+    run_id: Required[str]
+
+    include: List[RunStepInclude]
+    """A list of additional fields to include in the response.
+
+    Currently the only supported value is
+    `step_details.tool_calls[*].file_search.results[*].content` to fetch the file
+    search result content.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
diff --git a/src/openai/types/beta/threads/runs/tool_call.py b/src/openai/types/beta/threads/runs/tool_call.py
index 77d86b46d9..565e3109be 100644
--- a/src/openai/types/beta/threads/runs/tool_call.py
+++ b/src/openai/types/beta/threads/runs/tool_call.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ....._utils import PropertyInfo
 from .function_tool_call import FunctionToolCall
@@ -10,6 +10,6 @@
 
 __all__ = ["ToolCall"]
 
-ToolCall = Annotated[
+ToolCall: TypeAlias = Annotated[
     Union[CodeInterpreterToolCall, FileSearchToolCall, FunctionToolCall], PropertyInfo(discriminator="type")
 ]
diff --git a/src/openai/types/beta/threads/runs/tool_call_delta.py b/src/openai/types/beta/threads/runs/tool_call_delta.py
index 90cfe0657e..f0b8070c97 100644
--- a/src/openai/types/beta/threads/runs/tool_call_delta.py
+++ b/src/openai/types/beta/threads/runs/tool_call_delta.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ....._utils import PropertyInfo
 from .function_tool_call_delta import FunctionToolCallDelta
@@ -10,7 +10,7 @@
 
 __all__ = ["ToolCallDelta"]
 
-ToolCallDelta = Annotated[
+ToolCallDelta: TypeAlias = Annotated[
     Union[CodeInterpreterToolCallDelta, FileSearchToolCallDelta, FunctionToolCallDelta],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/openai/types/beta/threads/runs/tool_call_delta_object.py b/src/openai/types/beta/threads/runs/tool_call_delta_object.py
index 189dce772c..dbd1096ad6 100644
--- a/src/openai/types/beta/threads/runs/tool_call_delta_object.py
+++ b/src/openai/types/beta/threads/runs/tool_call_delta_object.py
@@ -10,6 +10,8 @@
 
 
 class ToolCallDeltaObject(BaseModel):
+    """Details of the tool call."""
+
     type: Literal["tool_calls"]
     """Always `tool_calls`."""
 
diff --git a/src/openai/types/beta/threads/runs/tool_calls_step_details.py b/src/openai/types/beta/threads/runs/tool_calls_step_details.py
index a084d387c7..1f54a6aa71 100644
--- a/src/openai/types/beta/threads/runs/tool_calls_step_details.py
+++ b/src/openai/types/beta/threads/runs/tool_calls_step_details.py
@@ -10,6 +10,8 @@
 
 
 class ToolCallsStepDetails(BaseModel):
+    """Details of the tool call."""
+
     tool_calls: List[ToolCall]
     """An array of tool calls the run step was involved in.
 
diff --git a/src/openai/types/beta/threads/text_content_block.py b/src/openai/types/beta/threads/text_content_block.py
index 3706d6b9d8..b9b1368a17 100644
--- a/src/openai/types/beta/threads/text_content_block.py
+++ b/src/openai/types/beta/threads/text_content_block.py
@@ -9,6 +9,8 @@
 
 
 class TextContentBlock(BaseModel):
+    """The text content that is part of a message."""
+
     text: Text
 
     type: Literal["text"]
diff --git a/src/openai/types/beta/threads/text_content_block_param.py b/src/openai/types/beta/threads/text_content_block_param.py
index 6313de32cc..22c864438d 100644
--- a/src/openai/types/beta/threads/text_content_block_param.py
+++ b/src/openai/types/beta/threads/text_content_block_param.py
@@ -8,6 +8,8 @@
 
 
 class TextContentBlockParam(TypedDict, total=False):
+    """The text content that is part of a message."""
+
     text: Required[str]
     """Text content to be sent to the model"""
 
diff --git a/src/openai/types/beta/threads/text_delta_block.py b/src/openai/types/beta/threads/text_delta_block.py
index 586116e0d6..a3d339ccad 100644
--- a/src/openai/types/beta/threads/text_delta_block.py
+++ b/src/openai/types/beta/threads/text_delta_block.py
@@ -10,6 +10,8 @@
 
 
 class TextDeltaBlock(BaseModel):
+    """The text content that is part of a message."""
+
     index: int
     """The index of the content part in the message."""
 
diff --git a/src/openai/types/beta/vector_stores/file_batch_create_params.py b/src/openai/types/beta/vector_stores/file_batch_create_params.py
deleted file mode 100644
index 0882829732..0000000000
--- a/src/openai/types/beta/vector_stores/file_batch_create_params.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List
-from typing_extensions import Required, TypedDict
-
-__all__ = ["FileBatchCreateParams"]
-
-
-class FileBatchCreateParams(TypedDict, total=False):
-    file_ids: Required[List[str]]
-    """
-    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-    the vector store should use. Useful for tools like `file_search` that can access
-    files.
-    """
diff --git a/src/openai/types/beta/vector_stores/file_create_params.py b/src/openai/types/beta/vector_stores/file_create_params.py
deleted file mode 100644
index 2fee588abf..0000000000
--- a/src/openai/types/beta/vector_stores/file_create_params.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["FileCreateParams"]
-
-
-class FileCreateParams(TypedDict, total=False):
-    file_id: Required[str]
-    """
-    A [File](https://platform.openai.com/docs/api-reference/files) ID that the
-    vector store should use. Useful for tools like `file_search` that can access
-    files.
-    """
diff --git a/src/openai/types/chat/__init__.py b/src/openai/types/chat/__init__.py
index 0ba812ff9b..09a4c5e8b0 100644
--- a/src/openai/types/chat/__init__.py
+++ b/src/openai/types/chat/__init__.py
@@ -4,16 +4,30 @@
 
 from .chat_completion import ChatCompletion as ChatCompletion
 from .chat_completion_role import ChatCompletionRole as ChatCompletionRole
+from .chat_completion_audio import ChatCompletionAudio as ChatCompletionAudio
 from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
+from .completion_list_params import CompletionListParams as CompletionListParams
+from .chat_completion_deleted import ChatCompletionDeleted as ChatCompletionDeleted
 from .chat_completion_message import ChatCompletionMessage as ChatCompletionMessage
+from .chat_completion_modality import ChatCompletionModality as ChatCompletionModality
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
-from .chat_completion_tool_param import ChatCompletionToolParam as ChatCompletionToolParam
+from .completion_update_params import CompletionUpdateParams as CompletionUpdateParams
+from .chat_completion_audio_param import ChatCompletionAudioParam as ChatCompletionAudioParam
+from .chat_completion_function_tool import ChatCompletionFunctionTool as ChatCompletionFunctionTool
 from .chat_completion_message_param import ChatCompletionMessageParam as ChatCompletionMessageParam
+from .chat_completion_store_message import ChatCompletionStoreMessage as ChatCompletionStoreMessage
 from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
-from .chat_completion_message_tool_call import ChatCompletionMessageToolCall as ChatCompletionMessageToolCall
+from .chat_completion_reasoning_effort import ChatCompletionReasoningEffort as ChatCompletionReasoningEffort
+from .chat_completion_tool_union_param import ChatCompletionToolUnionParam as ChatCompletionToolUnionParam
+from .chat_completion_content_part_text import ChatCompletionContentPartText as ChatCompletionContentPartText
+from .chat_completion_custom_tool_param import ChatCompletionCustomToolParam as ChatCompletionCustomToolParam
+from .chat_completion_message_tool_call import ChatCompletionMessageToolCallUnion as ChatCompletionMessageToolCallUnion
+from .chat_completion_content_part_image import ChatCompletionContentPartImage as ChatCompletionContentPartImage
 from .chat_completion_content_part_param import ChatCompletionContentPartParam as ChatCompletionContentPartParam
 from .chat_completion_tool_message_param import ChatCompletionToolMessageParam as ChatCompletionToolMessageParam
 from .chat_completion_user_message_param import ChatCompletionUserMessageParam as ChatCompletionUserMessageParam
+from .chat_completion_allowed_tools_param import ChatCompletionAllowedToolsParam as ChatCompletionAllowedToolsParam
+from .chat_completion_function_tool_param import ChatCompletionFunctionToolParam as ChatCompletionFunctionToolParam
 from .chat_completion_stream_options_param import ChatCompletionStreamOptionsParam as ChatCompletionStreamOptionsParam
 from .chat_completion_system_message_param import ChatCompletionSystemMessageParam as ChatCompletionSystemMessageParam
 from .chat_completion_function_message_param import (
@@ -25,8 +39,8 @@
 from .chat_completion_content_part_text_param import (
     ChatCompletionContentPartTextParam as ChatCompletionContentPartTextParam,
 )
-from .chat_completion_message_tool_call_param import (
-    ChatCompletionMessageToolCallParam as ChatCompletionMessageToolCallParam,
+from .chat_completion_developer_message_param import (
+    ChatCompletionDeveloperMessageParam as ChatCompletionDeveloperMessageParam,
 )
 from .chat_completion_named_tool_choice_param import (
     ChatCompletionNamedToolChoiceParam as ChatCompletionNamedToolChoiceParam,
@@ -34,9 +48,39 @@
 from .chat_completion_content_part_image_param import (
     ChatCompletionContentPartImageParam as ChatCompletionContentPartImageParam,
 )
+from .chat_completion_message_custom_tool_call import (
+    ChatCompletionMessageCustomToolCall as ChatCompletionMessageCustomToolCall,
+)
+from .chat_completion_prediction_content_param import (
+    ChatCompletionPredictionContentParam as ChatCompletionPredictionContentParam,
+)
 from .chat_completion_tool_choice_option_param import (
     ChatCompletionToolChoiceOptionParam as ChatCompletionToolChoiceOptionParam,
 )
+from .chat_completion_allowed_tool_choice_param import (
+    ChatCompletionAllowedToolChoiceParam as ChatCompletionAllowedToolChoiceParam,
+)
+from .chat_completion_content_part_refusal_param import (
+    ChatCompletionContentPartRefusalParam as ChatCompletionContentPartRefusalParam,
+)
 from .chat_completion_function_call_option_param import (
     ChatCompletionFunctionCallOptionParam as ChatCompletionFunctionCallOptionParam,
 )
+from .chat_completion_message_function_tool_call import (
+    ChatCompletionMessageFunctionToolCall as ChatCompletionMessageFunctionToolCall,
+)
+from .chat_completion_message_tool_call_union_param import (
+    ChatCompletionMessageToolCallUnionParam as ChatCompletionMessageToolCallUnionParam,
+)
+from .chat_completion_content_part_input_audio_param import (
+    ChatCompletionContentPartInputAudioParam as ChatCompletionContentPartInputAudioParam,
+)
+from .chat_completion_message_custom_tool_call_param import (
+    ChatCompletionMessageCustomToolCallParam as ChatCompletionMessageCustomToolCallParam,
+)
+from .chat_completion_named_tool_choice_custom_param import (
+    ChatCompletionNamedToolChoiceCustomParam as ChatCompletionNamedToolChoiceCustomParam,
+)
+from .chat_completion_message_function_tool_call_param import (
+    ChatCompletionMessageFunctionToolCallParam as ChatCompletionMessageFunctionToolCallParam,
+)
diff --git a/src/openai/types/chat/chat_completion.py b/src/openai/types/chat/chat_completion.py
index 61a94a258e..31219aa812 100644
--- a/src/openai/types/chat/chat_completion.py
+++ b/src/openai/types/chat/chat_completion.py
@@ -12,9 +12,14 @@
 
 
 class ChoiceLogprobs(BaseModel):
+    """Log probability information for the choice."""
+
     content: Optional[List[ChatCompletionTokenLogprob]] = None
     """A list of message content tokens with log probability information."""
 
+    refusal: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message refusal tokens with log probability information."""
+
 
 class Choice(BaseModel):
     finish_reason: Literal["stop", "length", "tool_calls", "content_filter", "function_call"]
@@ -38,6 +43,10 @@ class Choice(BaseModel):
 
 
 class ChatCompletion(BaseModel):
+    """
+    Represents a chat completion response returned by model, based on the provided input.
+    """
+
     id: str
     """A unique identifier for the chat completion."""
 
@@ -56,6 +65,25 @@ class ChatCompletion(BaseModel):
     object: Literal["chat.completion"]
     """The object type, which is always `chat.completion`."""
 
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = None
+    """Specifies the processing type used for serving the request.
+
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the request will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      '[priority](https://openai.com/api-priority-processing/)', then the request
+      will be processed with the corresponding service tier.
+    - When not set, the default behavior is 'auto'.
+
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
+    """
+
     system_fingerprint: Optional[str] = None
     """This fingerprint represents the backend configuration that the model runs with.
 
diff --git a/src/openai/types/chat/chat_completion_allowed_tool_choice_param.py b/src/openai/types/chat/chat_completion_allowed_tool_choice_param.py
new file mode 100644
index 0000000000..c5ba21626d
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_allowed_tool_choice_param.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat_completion_allowed_tools_param import ChatCompletionAllowedToolsParam
+
+__all__ = ["ChatCompletionAllowedToolChoiceParam"]
+
+
+class ChatCompletionAllowedToolChoiceParam(TypedDict, total=False):
+    """Constrains the tools available to the model to a pre-defined set."""
+
+    allowed_tools: Required[ChatCompletionAllowedToolsParam]
+    """Constrains the tools available to the model to a pre-defined set."""
+
+    type: Required[Literal["allowed_tools"]]
+    """Allowed tool configuration type. Always `allowed_tools`."""
diff --git a/src/openai/types/chat/chat_completion_allowed_tools_param.py b/src/openai/types/chat/chat_completion_allowed_tools_param.py
new file mode 100644
index 0000000000..ac31fcb543
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_allowed_tools_param.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionAllowedToolsParam"]
+
+
+class ChatCompletionAllowedToolsParam(TypedDict, total=False):
+    """Constrains the tools available to the model to a pre-defined set."""
+
+    mode: Required[Literal["auto", "required"]]
+    """Constrains the tools available to the model to a pre-defined set.
+
+    `auto` allows the model to pick from among the allowed tools and generate a
+    message.
+
+    `required` requires the model to call one or more of the allowed tools.
+    """
+
+    tools: Required[Iterable[Dict[str, object]]]
+    """A list of tool definitions that the model should be allowed to call.
+
+    For the Chat Completions API, the list of tool definitions might look like:
+
+    ```json
+    [
+      { "type": "function", "function": { "name": "get_weather" } },
+      { "type": "function", "function": { "name": "get_time" } }
+    ]
+    ```
+    """
diff --git a/src/openai/types/chat/chat_completion_assistant_message_param.py b/src/openai/types/chat/chat_completion_assistant_message_param.py
index e1e399486e..16a218438a 100644
--- a/src/openai/types/chat/chat_completion_assistant_message_param.py
+++ b/src/openai/types/chat/chat_completion_assistant_message_param.py
@@ -2,15 +2,35 @@
 
 from __future__ import annotations
 
-from typing import Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from .chat_completion_message_tool_call_param import ChatCompletionMessageToolCallParam
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+from .chat_completion_content_part_refusal_param import ChatCompletionContentPartRefusalParam
+from .chat_completion_message_tool_call_union_param import ChatCompletionMessageToolCallUnionParam
 
-__all__ = ["ChatCompletionAssistantMessageParam", "FunctionCall"]
+__all__ = ["ChatCompletionAssistantMessageParam", "Audio", "ContentArrayOfContentPart", "FunctionCall"]
+
+
+class Audio(TypedDict, total=False):
+    """
+    Data about a previous audio response from the model.
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
+    id: Required[str]
+    """Unique identifier for a previous audio response from the model."""
+
+
+ContentArrayOfContentPart: TypeAlias = Union[ChatCompletionContentPartTextParam, ChatCompletionContentPartRefusalParam]
 
 
 class FunctionCall(TypedDict, total=False):
+    """Deprecated and replaced by `tool_calls`.
+
+    The name and arguments of a function that should be called, as generated by the model.
+    """
+
     arguments: Required[str]
     """
     The arguments to call the function with, as generated by the model in JSON
@@ -24,16 +44,24 @@ class FunctionCall(TypedDict, total=False):
 
 
 class ChatCompletionAssistantMessageParam(TypedDict, total=False):
+    """Messages sent by the model in response to user messages."""
+
     role: Required[Literal["assistant"]]
     """The role of the messages author, in this case `assistant`."""
 
-    content: Optional[str]
+    audio: Optional[Audio]
+    """
+    Data about a previous audio response from the model.
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
+    content: Union[str, Iterable[ContentArrayOfContentPart], None]
     """The contents of the assistant message.
 
     Required unless `tool_calls` or `function_call` is specified.
     """
 
-    function_call: FunctionCall
+    function_call: Optional[FunctionCall]
     """Deprecated and replaced by `tool_calls`.
 
     The name and arguments of a function that should be called, as generated by the
@@ -47,5 +75,8 @@ class ChatCompletionAssistantMessageParam(TypedDict, total=False):
     role.
     """
 
-    tool_calls: Iterable[ChatCompletionMessageToolCallParam]
+    refusal: Optional[str]
+    """The refusal message by the assistant."""
+
+    tool_calls: Iterable[ChatCompletionMessageToolCallUnionParam]
     """The tool calls generated by the model, such as function calls."""
diff --git a/src/openai/types/chat/chat_completion_audio.py b/src/openai/types/chat/chat_completion_audio.py
new file mode 100644
index 0000000000..df346d8c9d
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_audio.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionAudio"]
+
+
+class ChatCompletionAudio(BaseModel):
+    """
+    If the audio output modality is requested, this object contains data
+    about the audio response from the model. [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
+    id: str
+    """Unique identifier for this audio response."""
+
+    data: str
+    """
+    Base64 encoded audio bytes generated by the model, in the format specified in
+    the request.
+    """
+
+    expires_at: int
+    """
+    The Unix timestamp (in seconds) for when this audio response will no longer be
+    accessible on the server for use in multi-turn conversations.
+    """
+
+    transcript: str
+    """Transcript of the audio generated by the model."""
diff --git a/src/openai/types/chat/chat_completion_audio_param.py b/src/openai/types/chat/chat_completion_audio_param.py
new file mode 100644
index 0000000000..cac3c8b9d4
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_audio_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionAudioParam"]
+
+
+class ChatCompletionAudioParam(TypedDict, total=False):
+    """Parameters for audio output.
+
+    Required when audio output is requested with
+    `modalities: ["audio"]`. [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
+    format: Required[Literal["wav", "aac", "mp3", "flac", "opus", "pcm16"]]
+    """Specifies the output audio format.
+
+    Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.
+    """
+
+    voice: Required[
+        Union[str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]]
+    ]
+    """The voice the model uses to respond.
+
+    Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `nova`,
+    `onyx`, `sage`, and `shimmer`.
+    """
diff --git a/src/openai/types/chat/chat_completion_chunk.py b/src/openai/types/chat/chat_completion_chunk.py
index 084a5fcc07..ecbfd0a5aa 100644
--- a/src/openai/types/chat/chat_completion_chunk.py
+++ b/src/openai/types/chat/chat_completion_chunk.py
@@ -19,6 +19,11 @@
 
 
 class ChoiceDeltaFunctionCall(BaseModel):
+    """Deprecated and replaced by `tool_calls`.
+
+    The name and arguments of a function that should be called, as generated by the model.
+    """
+
     arguments: Optional[str] = None
     """
     The arguments to call the function with, as generated by the model in JSON
@@ -57,6 +62,8 @@ class ChoiceDeltaToolCall(BaseModel):
 
 
 class ChoiceDelta(BaseModel):
+    """A chat completion delta generated by streamed model responses."""
+
     content: Optional[str] = None
     """The contents of the chunk message."""
 
@@ -67,16 +74,24 @@ class ChoiceDelta(BaseModel):
     model.
     """
 
-    role: Optional[Literal["system", "user", "assistant", "tool"]] = None
+    refusal: Optional[str] = None
+    """The refusal message generated by the model."""
+
+    role: Optional[Literal["developer", "system", "user", "assistant", "tool"]] = None
     """The role of the author of this message."""
 
     tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
 
 
 class ChoiceLogprobs(BaseModel):
+    """Log probability information for the choice."""
+
     content: Optional[List[ChatCompletionTokenLogprob]] = None
     """A list of message content tokens with log probability information."""
 
+    refusal: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message refusal tokens with log probability information."""
+
 
 class Choice(BaseModel):
     delta: ChoiceDelta
@@ -100,6 +115,12 @@ class Choice(BaseModel):
 
 
 class ChatCompletionChunk(BaseModel):
+    """
+    Represents a streamed chunk of a chat completion response returned
+    by the model, based on the provided input.
+    [Learn more](https://platform.openai.com/docs/guides/streaming-responses).
+    """
+
     id: str
     """A unique identifier for the chat completion. Each chunk has the same ID."""
 
@@ -122,6 +143,25 @@ class ChatCompletionChunk(BaseModel):
     object: Literal["chat.completion.chunk"]
     """The object type, which is always `chat.completion.chunk`."""
 
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = None
+    """Specifies the processing type used for serving the request.
+
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the request will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      '[priority](https://openai.com/api-priority-processing/)', then the request
+      will be processed with the corresponding service tier.
+    - When not set, the default behavior is 'auto'.
+
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
+    """
+
     system_fingerprint: Optional[str] = None
     """
     This fingerprint represents the backend configuration that the model runs with.
@@ -133,6 +173,9 @@ class ChatCompletionChunk(BaseModel):
     """
     An optional field that will only be present when you set
     `stream_options: {"include_usage": true}` in your request. When present, it
-    contains a null value except for the last chunk which contains the token usage
-    statistics for the entire request.
+    contains a null value **except for the last chunk** which contains the token
+    usage statistics for the entire request.
+
+    **NOTE:** If the stream is interrupted or cancelled, you may not receive the
+    final usage chunk which contains the total token usage for the request.
     """
diff --git a/src/openai/types/chat/chat_completion_content_part_image.py b/src/openai/types/chat/chat_completion_content_part_image.py
new file mode 100644
index 0000000000..a636c51fb4
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_content_part_image.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionContentPartImage", "ImageURL"]
+
+
+class ImageURL(BaseModel):
+    url: str
+    """Either a URL of the image or the base64 encoded image data."""
+
+    detail: Optional[Literal["auto", "low", "high"]] = None
+    """Specifies the detail level of the image.
+
+    Learn more in the
+    [Vision guide](https://platform.openai.com/docs/guides/vision#low-or-high-fidelity-image-understanding).
+    """
+
+
+class ChatCompletionContentPartImage(BaseModel):
+    """Learn about [image inputs](https://platform.openai.com/docs/guides/vision)."""
+
+    image_url: ImageURL
+
+    type: Literal["image_url"]
+    """The type of the content part."""
diff --git a/src/openai/types/chat/chat_completion_content_part_image_param.py b/src/openai/types/chat/chat_completion_content_part_image_param.py
index b1a186aa6d..a230a340a7 100644
--- a/src/openai/types/chat/chat_completion_content_part_image_param.py
+++ b/src/openai/types/chat/chat_completion_content_part_image_param.py
@@ -15,11 +15,13 @@ class ImageURL(TypedDict, total=False):
     """Specifies the detail level of the image.
 
     Learn more in the
-    [Vision guide](https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding).
+    [Vision guide](https://platform.openai.com/docs/guides/vision#low-or-high-fidelity-image-understanding).
     """
 
 
 class ChatCompletionContentPartImageParam(TypedDict, total=False):
+    """Learn about [image inputs](https://platform.openai.com/docs/guides/vision)."""
+
     image_url: Required[ImageURL]
 
     type: Required[Literal["image_url"]]
diff --git a/src/openai/types/chat/chat_completion_content_part_input_audio_param.py b/src/openai/types/chat/chat_completion_content_part_input_audio_param.py
new file mode 100644
index 0000000000..98d9e3c5eb
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_content_part_input_audio_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionContentPartInputAudioParam", "InputAudio"]
+
+
+class InputAudio(TypedDict, total=False):
+    data: Required[str]
+    """Base64 encoded audio data."""
+
+    format: Required[Literal["wav", "mp3"]]
+    """The format of the encoded audio data. Currently supports "wav" and "mp3"."""
+
+
+class ChatCompletionContentPartInputAudioParam(TypedDict, total=False):
+    """Learn about [audio inputs](https://platform.openai.com/docs/guides/audio)."""
+
+    input_audio: Required[InputAudio]
+
+    type: Required[Literal["input_audio"]]
+    """The type of the content part. Always `input_audio`."""
diff --git a/src/openai/types/chat/chat_completion_content_part_param.py b/src/openai/types/chat/chat_completion_content_part_param.py
index f9b5f71e43..b8c710a980 100644
--- a/src/openai/types/chat/chat_completion_content_part_param.py
+++ b/src/openai/types/chat/chat_completion_content_part_param.py
@@ -3,10 +3,43 @@
 from __future__ import annotations
 
 from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
 from .chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
+from .chat_completion_content_part_input_audio_param import ChatCompletionContentPartInputAudioParam
 
-__all__ = ["ChatCompletionContentPartParam"]
+__all__ = ["ChatCompletionContentPartParam", "File", "FileFile"]
 
-ChatCompletionContentPartParam = Union[ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam]
+
+class FileFile(TypedDict, total=False):
+    file_data: str
+    """
+    The base64 encoded file data, used when passing the file to the model as a
+    string.
+    """
+
+    file_id: str
+    """The ID of an uploaded file to use as input."""
+
+    filename: str
+    """The name of the file, used when passing the file to the model as a string."""
+
+
+class File(TypedDict, total=False):
+    """
+    Learn about [file inputs](https://platform.openai.com/docs/guides/text) for text generation.
+    """
+
+    file: Required[FileFile]
+
+    type: Required[Literal["file"]]
+    """The type of the content part. Always `file`."""
+
+
+ChatCompletionContentPartParam: TypeAlias = Union[
+    ChatCompletionContentPartTextParam,
+    ChatCompletionContentPartImageParam,
+    ChatCompletionContentPartInputAudioParam,
+    File,
+]
diff --git a/src/openai/types/chat/chat_completion_content_part_refusal_param.py b/src/openai/types/chat/chat_completion_content_part_refusal_param.py
new file mode 100644
index 0000000000..c18c7db770
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_content_part_refusal_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionContentPartRefusalParam"]
+
+
+class ChatCompletionContentPartRefusalParam(TypedDict, total=False):
+    refusal: Required[str]
+    """The refusal message generated by the model."""
+
+    type: Required[Literal["refusal"]]
+    """The type of the content part."""
diff --git a/src/openai/types/chat/chat_completion_content_part_text.py b/src/openai/types/chat/chat_completion_content_part_text.py
new file mode 100644
index 0000000000..e6d1bf1ec0
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_content_part_text.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionContentPartText"]
+
+
+class ChatCompletionContentPartText(BaseModel):
+    """
+    Learn about [text inputs](https://platform.openai.com/docs/guides/text-generation).
+    """
+
+    text: str
+    """The text content."""
+
+    type: Literal["text"]
+    """The type of the content part."""
diff --git a/src/openai/types/chat/chat_completion_content_part_text_param.py b/src/openai/types/chat/chat_completion_content_part_text_param.py
index a270744417..be69bf66fa 100644
--- a/src/openai/types/chat/chat_completion_content_part_text_param.py
+++ b/src/openai/types/chat/chat_completion_content_part_text_param.py
@@ -8,6 +8,10 @@
 
 
 class ChatCompletionContentPartTextParam(TypedDict, total=False):
+    """
+    Learn about [text inputs](https://platform.openai.com/docs/guides/text-generation).
+    """
+
     text: Required[str]
     """The text content."""
 
diff --git a/src/openai/types/chat/chat_completion_custom_tool_param.py b/src/openai/types/chat/chat_completion_custom_tool_param.py
new file mode 100644
index 0000000000..d4f21ba0ca
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_custom_tool_param.py
@@ -0,0 +1,68 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+    "ChatCompletionCustomToolParam",
+    "Custom",
+    "CustomFormat",
+    "CustomFormatText",
+    "CustomFormatGrammar",
+    "CustomFormatGrammarGrammar",
+]
+
+
+class CustomFormatText(TypedDict, total=False):
+    """Unconstrained free-form text."""
+
+    type: Required[Literal["text"]]
+    """Unconstrained text format. Always `text`."""
+
+
+class CustomFormatGrammarGrammar(TypedDict, total=False):
+    """Your chosen grammar."""
+
+    definition: Required[str]
+    """The grammar definition."""
+
+    syntax: Required[Literal["lark", "regex"]]
+    """The syntax of the grammar definition. One of `lark` or `regex`."""
+
+
+class CustomFormatGrammar(TypedDict, total=False):
+    """A grammar defined by the user."""
+
+    grammar: Required[CustomFormatGrammarGrammar]
+    """Your chosen grammar."""
+
+    type: Required[Literal["grammar"]]
+    """Grammar format. Always `grammar`."""
+
+
+CustomFormat: TypeAlias = Union[CustomFormatText, CustomFormatGrammar]
+
+
+class Custom(TypedDict, total=False):
+    """Properties of the custom tool."""
+
+    name: Required[str]
+    """The name of the custom tool, used to identify it in tool calls."""
+
+    description: str
+    """Optional description of the custom tool, used to provide more context."""
+
+    format: CustomFormat
+    """The input format for the custom tool. Default is unconstrained text."""
+
+
+class ChatCompletionCustomToolParam(TypedDict, total=False):
+    """A custom tool that processes input using a specified format."""
+
+    custom: Required[Custom]
+    """Properties of the custom tool."""
+
+    type: Required[Literal["custom"]]
+    """The type of the custom tool. Always `custom`."""
diff --git a/src/openai/types/chat/chat_completion_deleted.py b/src/openai/types/chat/chat_completion_deleted.py
new file mode 100644
index 0000000000..0a541cb23d
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_deleted.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionDeleted"]
+
+
+class ChatCompletionDeleted(BaseModel):
+    id: str
+    """The ID of the chat completion that was deleted."""
+
+    deleted: bool
+    """Whether the chat completion was deleted."""
+
+    object: Literal["chat.completion.deleted"]
+    """The type of object being deleted."""
diff --git a/src/openai/types/chat/chat_completion_developer_message_param.py b/src/openai/types/chat/chat_completion_developer_message_param.py
new file mode 100644
index 0000000000..94fb3359f6
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_developer_message_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
+__all__ = ["ChatCompletionDeveloperMessageParam"]
+
+
+class ChatCompletionDeveloperMessageParam(TypedDict, total=False):
+    """
+    Developer-provided instructions that the model should follow, regardless of
+    messages sent by the user. With o1 models and newer, `developer` messages
+    replace the previous `system` messages.
+    """
+
+    content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
+    """The contents of the developer message."""
+
+    role: Required[Literal["developer"]]
+    """The role of the messages author, in this case `developer`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
diff --git a/src/openai/types/chat/chat_completion_function_call_option_param.py b/src/openai/types/chat/chat_completion_function_call_option_param.py
index 2bc014af7a..b1ca37bf58 100644
--- a/src/openai/types/chat/chat_completion_function_call_option_param.py
+++ b/src/openai/types/chat/chat_completion_function_call_option_param.py
@@ -8,5 +8,9 @@
 
 
 class ChatCompletionFunctionCallOptionParam(TypedDict, total=False):
+    """
+    Specifying a particular function via `{"name": "my_function"}` forces the model to call that function.
+    """
+
     name: Required[str]
     """The name of the function to call."""
diff --git a/src/openai/types/chat/chat_completion_function_tool.py b/src/openai/types/chat/chat_completion_function_tool.py
new file mode 100644
index 0000000000..5d43a1e836
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_function_tool.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.function_definition import FunctionDefinition
+
+__all__ = ["ChatCompletionFunctionTool"]
+
+
+class ChatCompletionFunctionTool(BaseModel):
+    """A function tool that can be used to generate a response."""
+
+    function: FunctionDefinition
+
+    type: Literal["function"]
+    """The type of the tool. Currently, only `function` is supported."""
diff --git a/src/openai/types/chat/chat_completion_tool_param.py b/src/openai/types/chat/chat_completion_function_tool_param.py
similarity index 51%
rename from src/openai/types/chat/chat_completion_tool_param.py
rename to src/openai/types/chat/chat_completion_function_tool_param.py
index 0cf6ea7268..d336e8c08c 100644
--- a/src/openai/types/chat/chat_completion_tool_param.py
+++ b/src/openai/types/chat/chat_completion_function_tool_param.py
@@ -4,13 +4,15 @@
 
 from typing_extensions import Literal, Required, TypedDict
 
-from ...types import shared_params
+from ..shared_params.function_definition import FunctionDefinition
 
-__all__ = ["ChatCompletionToolParam"]
+__all__ = ["ChatCompletionFunctionToolParam"]
 
 
-class ChatCompletionToolParam(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
+class ChatCompletionFunctionToolParam(TypedDict, total=False):
+    """A function tool that can be used to generate a response."""
+
+    function: Required[FunctionDefinition]
 
     type: Required[Literal["function"]]
     """The type of the tool. Currently, only `function` is supported."""
diff --git a/src/openai/types/chat/chat_completion_message.py b/src/openai/types/chat/chat_completion_message.py
index 8db7d17d24..3f88f776b9 100644
--- a/src/openai/types/chat/chat_completion_message.py
+++ b/src/openai/types/chat/chat_completion_message.py
@@ -4,12 +4,44 @@
 from typing_extensions import Literal
 
 from ..._models import BaseModel
-from .chat_completion_message_tool_call import ChatCompletionMessageToolCall
+from .chat_completion_audio import ChatCompletionAudio
+from .chat_completion_message_tool_call import ChatCompletionMessageToolCallUnion
 
-__all__ = ["ChatCompletionMessage", "FunctionCall"]
+__all__ = ["ChatCompletionMessage", "Annotation", "AnnotationURLCitation", "FunctionCall"]
+
+
+class AnnotationURLCitation(BaseModel):
+    """A URL citation when using web search."""
+
+    end_index: int
+    """The index of the last character of the URL citation in the message."""
+
+    start_index: int
+    """The index of the first character of the URL citation in the message."""
+
+    title: str
+    """The title of the web resource."""
+
+    url: str
+    """The URL of the web resource."""
+
+
+class Annotation(BaseModel):
+    """A URL citation when using web search."""
+
+    type: Literal["url_citation"]
+    """The type of the URL citation. Always `url_citation`."""
+
+    url_citation: AnnotationURLCitation
+    """A URL citation when using web search."""
 
 
 class FunctionCall(BaseModel):
+    """Deprecated and replaced by `tool_calls`.
+
+    The name and arguments of a function that should be called, as generated by the model.
+    """
+
     arguments: str
     """
     The arguments to call the function with, as generated by the model in JSON
@@ -23,12 +55,30 @@ class FunctionCall(BaseModel):
 
 
 class ChatCompletionMessage(BaseModel):
+    """A chat completion message generated by the model."""
+
     content: Optional[str] = None
     """The contents of the message."""
 
+    refusal: Optional[str] = None
+    """The refusal message generated by the model."""
+
     role: Literal["assistant"]
     """The role of the author of this message."""
 
+    annotations: Optional[List[Annotation]] = None
+    """
+    Annotations for the message, when applicable, as when using the
+    [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+    """
+
+    audio: Optional[ChatCompletionAudio] = None
+    """
+    If the audio output modality is requested, this object contains data about the
+    audio response from the model.
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
     function_call: Optional[FunctionCall] = None
     """Deprecated and replaced by `tool_calls`.
 
@@ -36,5 +86,5 @@ class ChatCompletionMessage(BaseModel):
     model.
     """
 
-    tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
+    tool_calls: Optional[List[ChatCompletionMessageToolCallUnion]] = None
     """The tool calls generated by the model, such as function calls."""
diff --git a/src/openai/types/chat/chat_completion_message_custom_tool_call.py b/src/openai/types/chat/chat_completion_message_custom_tool_call.py
new file mode 100644
index 0000000000..9542d8b924
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_message_custom_tool_call.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionMessageCustomToolCall", "Custom"]
+
+
+class Custom(BaseModel):
+    """The custom tool that the model called."""
+
+    input: str
+    """The input for the custom tool call generated by the model."""
+
+    name: str
+    """The name of the custom tool to call."""
+
+
+class ChatCompletionMessageCustomToolCall(BaseModel):
+    """A call to a custom tool created by the model."""
+
+    id: str
+    """The ID of the tool call."""
+
+    custom: Custom
+    """The custom tool that the model called."""
+
+    type: Literal["custom"]
+    """The type of the tool. Always `custom`."""
diff --git a/src/openai/types/chat/chat_completion_message_custom_tool_call_param.py b/src/openai/types/chat/chat_completion_message_custom_tool_call_param.py
new file mode 100644
index 0000000000..3d03f0a93c
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_message_custom_tool_call_param.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionMessageCustomToolCallParam", "Custom"]
+
+
+class Custom(TypedDict, total=False):
+    """The custom tool that the model called."""
+
+    input: Required[str]
+    """The input for the custom tool call generated by the model."""
+
+    name: Required[str]
+    """The name of the custom tool to call."""
+
+
+class ChatCompletionMessageCustomToolCallParam(TypedDict, total=False):
+    """A call to a custom tool created by the model."""
+
+    id: Required[str]
+    """The ID of the tool call."""
+
+    custom: Required[Custom]
+    """The custom tool that the model called."""
+
+    type: Required[Literal["custom"]]
+    """The type of the tool. Always `custom`."""
diff --git a/src/openai/types/chat/chat_completion_message_function_tool_call.py b/src/openai/types/chat/chat_completion_message_function_tool_call.py
new file mode 100644
index 0000000000..e7278b923c
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_message_function_tool_call.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionMessageFunctionToolCall", "Function"]
+
+
+class Function(BaseModel):
+    """The function that the model called."""
+
+    arguments: str
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: str
+    """The name of the function to call."""
+
+
+class ChatCompletionMessageFunctionToolCall(BaseModel):
+    """A call to a function tool created by the model."""
+
+    id: str
+    """The ID of the tool call."""
+
+    function: Function
+    """The function that the model called."""
+
+    type: Literal["function"]
+    """The type of the tool. Currently, only `function` is supported."""
diff --git a/src/openai/types/chat/chat_completion_message_tool_call_param.py b/src/openai/types/chat/chat_completion_message_function_tool_call_param.py
similarity index 77%
rename from src/openai/types/chat/chat_completion_message_tool_call_param.py
rename to src/openai/types/chat/chat_completion_message_function_tool_call_param.py
index f616c363d0..a8094ea63a 100644
--- a/src/openai/types/chat/chat_completion_message_tool_call_param.py
+++ b/src/openai/types/chat/chat_completion_message_function_tool_call_param.py
@@ -4,10 +4,12 @@
 
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["ChatCompletionMessageToolCallParam", "Function"]
+__all__ = ["ChatCompletionMessageFunctionToolCallParam", "Function"]
 
 
 class Function(TypedDict, total=False):
+    """The function that the model called."""
+
     arguments: Required[str]
     """
     The arguments to call the function with, as generated by the model in JSON
@@ -20,7 +22,9 @@ class Function(TypedDict, total=False):
     """The name of the function to call."""
 
 
-class ChatCompletionMessageToolCallParam(TypedDict, total=False):
+class ChatCompletionMessageFunctionToolCallParam(TypedDict, total=False):
+    """A call to a function tool created by the model."""
+
     id: Required[str]
     """The ID of the tool call."""
 
diff --git a/src/openai/types/chat/chat_completion_message_param.py b/src/openai/types/chat/chat_completion_message_param.py
index a3644a5310..942da24304 100644
--- a/src/openai/types/chat/chat_completion_message_param.py
+++ b/src/openai/types/chat/chat_completion_message_param.py
@@ -3,16 +3,19 @@
 from __future__ import annotations
 
 from typing import Union
+from typing_extensions import TypeAlias
 
 from .chat_completion_tool_message_param import ChatCompletionToolMessageParam
 from .chat_completion_user_message_param import ChatCompletionUserMessageParam
 from .chat_completion_system_message_param import ChatCompletionSystemMessageParam
 from .chat_completion_function_message_param import ChatCompletionFunctionMessageParam
 from .chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam
+from .chat_completion_developer_message_param import ChatCompletionDeveloperMessageParam
 
 __all__ = ["ChatCompletionMessageParam"]
 
-ChatCompletionMessageParam = Union[
+ChatCompletionMessageParam: TypeAlias = Union[
+    ChatCompletionDeveloperMessageParam,
     ChatCompletionSystemMessageParam,
     ChatCompletionUserMessageParam,
     ChatCompletionAssistantMessageParam,
diff --git a/src/openai/types/chat/chat_completion_message_tool_call.py b/src/openai/types/chat/chat_completion_message_tool_call.py
index 4fec667096..550cb34287 100644
--- a/src/openai/types/chat/chat_completion_message_tool_call.py
+++ b/src/openai/types/chat/chat_completion_message_tool_call.py
@@ -1,31 +1,15 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing_extensions import Literal
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
 
-from ..._models import BaseModel
+from ..._utils import PropertyInfo
+from .chat_completion_message_custom_tool_call import ChatCompletionMessageCustomToolCall
+from .chat_completion_message_function_tool_call import ChatCompletionMessageFunctionToolCall
 
-__all__ = ["ChatCompletionMessageToolCall", "Function"]
+__all__ = ["ChatCompletionMessageToolCallUnion"]
 
-
-class Function(BaseModel):
-    arguments: str
-    """
-    The arguments to call the function with, as generated by the model in JSON
-    format. Note that the model does not always generate valid JSON, and may
-    hallucinate parameters not defined by your function schema. Validate the
-    arguments in your code before calling your function.
-    """
-
-    name: str
-    """The name of the function to call."""
-
-
-class ChatCompletionMessageToolCall(BaseModel):
-    id: str
-    """The ID of the tool call."""
-
-    function: Function
-    """The function that the model called."""
-
-    type: Literal["function"]
-    """The type of the tool. Currently, only `function` is supported."""
+ChatCompletionMessageToolCallUnion: TypeAlias = Annotated[
+    Union[ChatCompletionMessageFunctionToolCall, ChatCompletionMessageCustomToolCall],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/chat/chat_completion_message_tool_call_union_param.py b/src/openai/types/chat/chat_completion_message_tool_call_union_param.py
new file mode 100644
index 0000000000..fcca9bb116
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_message_tool_call_union_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .chat_completion_message_custom_tool_call_param import ChatCompletionMessageCustomToolCallParam
+from .chat_completion_message_function_tool_call_param import ChatCompletionMessageFunctionToolCallParam
+
+__all__ = ["ChatCompletionMessageToolCallUnionParam"]
+
+ChatCompletionMessageToolCallUnionParam: TypeAlias = Union[
+    ChatCompletionMessageFunctionToolCallParam, ChatCompletionMessageCustomToolCallParam
+]
diff --git a/src/openai/types/chat/chat_completion_modality.py b/src/openai/types/chat/chat_completion_modality.py
new file mode 100644
index 0000000000..8e3c145979
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_modality.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatCompletionModality"]
+
+ChatCompletionModality: TypeAlias = Literal["text", "audio"]
diff --git a/src/openai/types/chat/chat_completion_named_tool_choice_custom_param.py b/src/openai/types/chat/chat_completion_named_tool_choice_custom_param.py
new file mode 100644
index 0000000000..147fb87965
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_named_tool_choice_custom_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionNamedToolChoiceCustomParam", "Custom"]
+
+
+class Custom(TypedDict, total=False):
+    name: Required[str]
+    """The name of the custom tool to call."""
+
+
+class ChatCompletionNamedToolChoiceCustomParam(TypedDict, total=False):
+    """Specifies a tool the model should use.
+
+    Use to force the model to call a specific custom tool.
+    """
+
+    custom: Required[Custom]
+
+    type: Required[Literal["custom"]]
+    """For custom tool calling, the type is always `custom`."""
diff --git a/src/openai/types/chat/chat_completion_named_tool_choice_param.py b/src/openai/types/chat/chat_completion_named_tool_choice_param.py
index 369f8b42dd..f684fcea5e 100644
--- a/src/openai/types/chat/chat_completion_named_tool_choice_param.py
+++ b/src/openai/types/chat/chat_completion_named_tool_choice_param.py
@@ -13,7 +13,12 @@ class Function(TypedDict, total=False):
 
 
 class ChatCompletionNamedToolChoiceParam(TypedDict, total=False):
+    """Specifies a tool the model should use.
+
+    Use to force the model to call a specific function.
+    """
+
     function: Required[Function]
 
     type: Required[Literal["function"]]
-    """The type of the tool. Currently, only `function` is supported."""
+    """For function calling, the type is always `function`."""
diff --git a/src/openai/types/chat/chat_completion_prediction_content_param.py b/src/openai/types/chat/chat_completion_prediction_content_param.py
new file mode 100644
index 0000000000..6184a314b5
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_prediction_content_param.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
+__all__ = ["ChatCompletionPredictionContentParam"]
+
+
+class ChatCompletionPredictionContentParam(TypedDict, total=False):
+    """
+    Static predicted output content, such as the content of a text file that is
+    being regenerated.
+    """
+
+    content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
+    """
+    The content that should be matched when generating a model response. If
+    generated tokens would match this content, the entire model response can be
+    returned much more quickly.
+    """
+
+    type: Required[Literal["content"]]
+    """The type of the predicted content you want to provide.
+
+    This type is currently always `content`.
+    """
diff --git a/src/openai/types/chat/chat_completion_reasoning_effort.py b/src/openai/types/chat/chat_completion_reasoning_effort.py
new file mode 100644
index 0000000000..42a980c5b8
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_reasoning_effort.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..shared.reasoning_effort import ReasoningEffort
+
+__all__ = ["ChatCompletionReasoningEffort"]
+
+ChatCompletionReasoningEffort = ReasoningEffort
diff --git a/src/openai/types/chat/chat_completion_role.py b/src/openai/types/chat/chat_completion_role.py
index 1fd83888d3..3ec5e9ad87 100644
--- a/src/openai/types/chat/chat_completion_role.py
+++ b/src/openai/types/chat/chat_completion_role.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
 __all__ = ["ChatCompletionRole"]
 
-ChatCompletionRole = Literal["system", "user", "assistant", "tool", "function"]
+ChatCompletionRole: TypeAlias = Literal["developer", "system", "user", "assistant", "tool", "function"]
diff --git a/src/openai/types/chat/chat_completion_store_message.py b/src/openai/types/chat/chat_completion_store_message.py
new file mode 100644
index 0000000000..6a805cce76
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_store_message.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import TypeAlias
+
+from .chat_completion_message import ChatCompletionMessage
+from .chat_completion_content_part_text import ChatCompletionContentPartText
+from .chat_completion_content_part_image import ChatCompletionContentPartImage
+
+__all__ = ["ChatCompletionStoreMessage", "ChatCompletionStoreMessageContentPart"]
+
+ChatCompletionStoreMessageContentPart: TypeAlias = Union[ChatCompletionContentPartText, ChatCompletionContentPartImage]
+
+
+class ChatCompletionStoreMessage(ChatCompletionMessage):
+    """A chat completion message generated by the model."""
+
+    id: str
+    """The identifier of the chat message."""
+
+    content_parts: Optional[List[ChatCompletionStoreMessageContentPart]] = None
+    """
+    If a content parts array was provided, this is an array of `text` and
+    `image_url` parts. Otherwise, null.
+    """
diff --git a/src/openai/types/chat/chat_completion_stream_options_param.py b/src/openai/types/chat/chat_completion_stream_options_param.py
index fbf7291821..9b881fff02 100644
--- a/src/openai/types/chat/chat_completion_stream_options_param.py
+++ b/src/openai/types/chat/chat_completion_stream_options_param.py
@@ -8,10 +8,26 @@
 
 
 class ChatCompletionStreamOptionsParam(TypedDict, total=False):
+    """Options for streaming response. Only set this when you set `stream: true`."""
+
+    include_obfuscation: bool
+    """When true, stream obfuscation will be enabled.
+
+    Stream obfuscation adds random characters to an `obfuscation` field on streaming
+    delta events to normalize payload sizes as a mitigation to certain side-channel
+    attacks. These obfuscation fields are included by default, but add a small
+    amount of overhead to the data stream. You can set `include_obfuscation` to
+    false to optimize for bandwidth if you trust the network links between your
+    application and the OpenAI API.
+    """
+
     include_usage: bool
     """If set, an additional chunk will be streamed before the `data: [DONE]` message.
 
     The `usage` field on this chunk shows the token usage statistics for the entire
-    request, and the `choices` field will always be an empty array. All other chunks
-    will also include a `usage` field, but with a null value.
+    request, and the `choices` field will always be an empty array.
+
+    All other chunks will also include a `usage` field, but with a null value.
+    **NOTE:** If the stream is interrupted, you may not receive the final usage
+    chunk which contains the total token usage for the request.
     """
diff --git a/src/openai/types/chat/chat_completion_system_message_param.py b/src/openai/types/chat/chat_completion_system_message_param.py
index 94bb3f636c..9dcc5e07f9 100644
--- a/src/openai/types/chat/chat_completion_system_message_param.py
+++ b/src/openai/types/chat/chat_completion_system_message_param.py
@@ -2,13 +2,22 @@
 
 from __future__ import annotations
 
+from typing import Union, Iterable
 from typing_extensions import Literal, Required, TypedDict
 
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
 __all__ = ["ChatCompletionSystemMessageParam"]
 
 
 class ChatCompletionSystemMessageParam(TypedDict, total=False):
-    content: Required[str]
+    """
+    Developer-provided instructions that the model should follow, regardless of
+    messages sent by the user. With o1 models and newer, use `developer` messages
+    for this purpose instead.
+    """
+
+    content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
     """The contents of the system message."""
 
     role: Required[Literal["system"]]
diff --git a/src/openai/types/chat/chat_completion_tool_choice_option_param.py b/src/openai/types/chat/chat_completion_tool_choice_option_param.py
index 1d3c2506ab..f3bb0a46df 100644
--- a/src/openai/types/chat/chat_completion_tool_choice_option_param.py
+++ b/src/openai/types/chat/chat_completion_tool_choice_option_param.py
@@ -3,10 +3,17 @@
 from __future__ import annotations
 
 from typing import Union
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
 from .chat_completion_named_tool_choice_param import ChatCompletionNamedToolChoiceParam
+from .chat_completion_allowed_tool_choice_param import ChatCompletionAllowedToolChoiceParam
+from .chat_completion_named_tool_choice_custom_param import ChatCompletionNamedToolChoiceCustomParam
 
 __all__ = ["ChatCompletionToolChoiceOptionParam"]
 
-ChatCompletionToolChoiceOptionParam = Union[Literal["none", "auto", "required"], ChatCompletionNamedToolChoiceParam]
+ChatCompletionToolChoiceOptionParam: TypeAlias = Union[
+    Literal["none", "auto", "required"],
+    ChatCompletionAllowedToolChoiceParam,
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionNamedToolChoiceCustomParam,
+]
diff --git a/src/openai/types/chat/chat_completion_tool_message_param.py b/src/openai/types/chat/chat_completion_tool_message_param.py
index 5c590e033f..eb5e270e47 100644
--- a/src/openai/types/chat/chat_completion_tool_message_param.py
+++ b/src/openai/types/chat/chat_completion_tool_message_param.py
@@ -2,13 +2,16 @@
 
 from __future__ import annotations
 
+from typing import Union, Iterable
 from typing_extensions import Literal, Required, TypedDict
 
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
 __all__ = ["ChatCompletionToolMessageParam"]
 
 
 class ChatCompletionToolMessageParam(TypedDict, total=False):
-    content: Required[str]
+    content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
     """The contents of the tool message."""
 
     role: Required[Literal["tool"]]
diff --git a/src/openai/types/chat/chat_completion_tool_union_param.py b/src/openai/types/chat/chat_completion_tool_union_param.py
new file mode 100644
index 0000000000..0f8bf7b0e7
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_tool_union_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .chat_completion_custom_tool_param import ChatCompletionCustomToolParam
+from .chat_completion_function_tool_param import ChatCompletionFunctionToolParam
+
+__all__ = ["ChatCompletionToolUnionParam"]
+
+ChatCompletionToolUnionParam: TypeAlias = Union[ChatCompletionFunctionToolParam, ChatCompletionCustomToolParam]
diff --git a/src/openai/types/chat/chat_completion_user_message_param.py b/src/openai/types/chat/chat_completion_user_message_param.py
index 5c15322a22..c97ba535eb 100644
--- a/src/openai/types/chat/chat_completion_user_message_param.py
+++ b/src/openai/types/chat/chat_completion_user_message_param.py
@@ -11,6 +11,11 @@
 
 
 class ChatCompletionUserMessageParam(TypedDict, total=False):
+    """
+    Messages sent by an end user, containing prompts or additional context
+    information.
+    """
+
     content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]]
     """The contents of the user message."""
 
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index 226cf15882..49cefb95fc 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -3,14 +3,22 @@
 from __future__ import annotations
 
 from typing import Dict, List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from ...types import shared_params
-from ..chat_model import ChatModel
-from .chat_completion_tool_param import ChatCompletionToolParam
+from ..._types import SequenceNotStr
+from ..shared.chat_model import ChatModel
+from ..shared_params.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from .chat_completion_audio_param import ChatCompletionAudioParam
 from .chat_completion_message_param import ChatCompletionMessageParam
+from .chat_completion_tool_union_param import ChatCompletionToolUnionParam
+from ..shared_params.function_parameters import FunctionParameters
+from ..shared_params.response_format_text import ResponseFormatText
 from .chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+from .chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
 from .chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
+from ..shared_params.response_format_json_object import ResponseFormatJSONObject
+from ..shared_params.response_format_json_schema import ResponseFormatJSONSchema
 from .chat_completion_function_call_option_param import ChatCompletionFunctionCallOptionParam
 
 __all__ = [
@@ -18,6 +26,9 @@
     "FunctionCall",
     "Function",
     "ResponseFormat",
+    "WebSearchOptions",
+    "WebSearchOptionsUserLocation",
+    "WebSearchOptionsUserLocationApproximate",
     "CompletionCreateParamsNonStreaming",
     "CompletionCreateParamsStreaming",
 ]
@@ -27,15 +38,27 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     messages: Required[Iterable[ChatCompletionMessageParam]]
     """A list of messages comprising the conversation so far.
 
-    [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
+    Depending on the [model](https://platform.openai.com/docs/models) you use,
+    different message types (modalities) are supported, like
+    [text](https://platform.openai.com/docs/guides/text-generation),
+    [images](https://platform.openai.com/docs/guides/vision), and
+    [audio](https://platform.openai.com/docs/guides/audio).
     """
 
     model: Required[Union[str, ChatModel]]
-    """ID of the model to use.
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
 
-    See the
-    [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-    table for details on which models work with the Chat API.
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
+    """
+
+    audio: Optional[ChatCompletionAudioParam]
+    """Parameters for audio output.
+
+    Required when audio output is requested with `modalities: ["audio"]`.
+    [Learn more](https://platform.openai.com/docs/guides/audio).
     """
 
     frequency_penalty: Optional[float]
@@ -43,19 +66,21 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     Positive values penalize new tokens based on their existing frequency in the
     text so far, decreasing the model's likelihood to repeat the same line verbatim.
-
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
     """
 
     function_call: FunctionCall
     """Deprecated in favor of `tool_choice`.
 
-    Controls which (if any) function is called by the model. `none` means the model
-    will not call a function and instead generates a message. `auto` means the model
-    can pick between generating a message or calling a function. Specifying a
-    particular function via `{"name": "my_function"}` forces the model to call that
+    Controls which (if any) function is called by the model.
+
+    `none` means the model will not call a function and instead generates a message.
+
+    `auto` means the model can pick between generating a message or calling a
     function.
 
+    Specifying a particular function via `{"name": "my_function"}` forces the model
+    to call that function.
+
     `none` is the default when no functions are present. `auto` is the default if
     functions are present.
     """
@@ -84,15 +109,46 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     `content` of `message`.
     """
 
+    max_completion_tokens: Optional[int]
+    """
+    An upper bound for the number of tokens that can be generated for a completion,
+    including visible output tokens and
+    [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+    """
+
     max_tokens: Optional[int]
     """
     The maximum number of [tokens](/tokenizer) that can be generated in the chat
-    completion.
+    completion. This value can be used to control
+    [costs](https://openai.com/api/pricing/) for text generated via API.
+
+    This value is now deprecated in favor of `max_completion_tokens`, and is not
+    compatible with
+    [o-series models](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]]
+    """
+    Output types that you would like the model to generate. Most models are capable
+    of generating text, which is the default:
+
+    `["text"]`
+
+    The `gpt-4o-audio-preview` model can also be used to
+    [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+    this model generate both text and audio responses, you can use:
 
-    The total length of input tokens and generated tokens is limited by the model's
-    context length.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-    for counting tokens.
+    `["text", "audio"]`
     """
 
     n: Optional[int]
@@ -102,32 +158,78 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     of the choices. Keep `n` as `1` to minimize costs.
     """
 
+    parallel_tool_calls: bool
+    """
+    Whether to enable
+    [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+    during tool use.
+    """
+
+    prediction: Optional[ChatCompletionPredictionContentParam]
+    """
+    Static predicted output content, such as the content of a text file that is
+    being regenerated.
+    """
+
     presence_penalty: Optional[float]
     """Number between -2.0 and 2.0.
 
     Positive values penalize new tokens based on whether they appear in the text so
     far, increasing the model's likelihood to talk about new topics.
+    """
+
+    prompt_cache_key: str
+    """
+    Used by OpenAI to cache responses for similar requests to optimize your cache
+    hit rates. Replaces the `user` field.
+    [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+    """
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+    prompt_cache_retention: Optional[Literal["in-memory", "24h"]]
+    """The retention policy for the prompt cache.
+
+    Set to `24h` to enable extended prompt caching, which keeps cached prefixes
+    active for longer, up to a maximum of 24 hours.
+    [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+    """
+
+    reasoning_effort: Optional[ReasoningEffort]
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
     """
 
     response_format: ResponseFormat
     """An object specifying the format that the model must output.
 
-    Compatible with
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-    message the model generates is valid JSON.
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
 
-    **Important:** when using JSON mode, you **must** also instruct the model to
-    produce JSON yourself via a system or user message. Without this, the model may
-    generate an unending stream of whitespace until the generation reaches the token
-    limit, resulting in a long-running and seemingly "stuck" request. Also note that
-    the message content may be partially cut off if `finish_reason="length"`, which
-    indicates the generation exceeded `max_tokens` or the conversation exceeded the
-    max context length.
+    safety_identifier: str
+    """
+    A stable identifier used to help detect users of your application that may be
+    violating OpenAI's usage policies. The IDs should be a string that uniquely
+    identifies each user. We recommend hashing their username or email address, in
+    order to avoid sending us any identifying information.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
     """
 
     seed: Optional[int]
@@ -139,8 +241,40 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     in the backend.
     """
 
-    stop: Union[Optional[str], List[str]]
-    """Up to 4 sequences where the API will stop generating further tokens."""
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]]
+    """Specifies the processing type used for serving the request.
+
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the request will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      '[priority](https://openai.com/api-priority-processing/)', then the request
+      will be processed with the corresponding service tier.
+    - When not set, the default behavior is 'auto'.
+
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
+    """
+
+    stop: Union[Optional[str], SequenceNotStr[str], None]
+    """Not supported with latest reasoning models `o3` and `o4-mini`.
+
+    Up to 4 sequences where the API will stop generating further tokens. The
+    returned text will not contain the stop sequence.
+    """
+
+    store: Optional[bool]
+    """
+    Whether or not to store the output of this chat completion request for use in
+    our [model distillation](https://platform.openai.com/docs/guides/distillation)
+    or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+    Supports text and image inputs. Note: image inputs over 8MB will be dropped.
+    """
 
     stream_options: Optional[ChatCompletionStreamOptionsParam]
     """Options for streaming response. Only set this when you set `stream: true`."""
@@ -149,9 +283,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """What sampling temperature to use, between 0 and 2.
 
     Higher values like 0.8 will make the output more random, while lower values like
-    0.2 will make it more focused and deterministic.
-
-    We generally recommend altering this or `top_p` but not both.
+    0.2 will make it more focused and deterministic. We generally recommend altering
+    this or `top_p` but not both.
     """
 
     tool_choice: ChatCompletionToolChoiceOptionParam
@@ -167,12 +300,12 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     are present.
     """
 
-    tools: Iterable[ChatCompletionToolParam]
+    tools: Iterable[ChatCompletionToolUnionParam]
     """A list of tools the model may call.
 
-    Currently, only functions are supported as a tool. Use this to provide a list of
-    functions the model may generate JSON inputs for. A max of 128 functions are
-    supported.
+    You can provide either
+    [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
+    or [function tools](https://platform.openai.com/docs/guides/function-calling).
     """
 
     top_logprobs: Optional[int]
@@ -192,14 +325,31 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     user: str
+    """This field is being replaced by `safety_identifier` and `prompt_cache_key`.
+
+    Use `prompt_cache_key` instead to maintain caching optimizations. A stable
+    identifier for your end-users. Used to boost cache hit rates by better bucketing
+    similar requests and to help OpenAI detect and prevent abuse.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+    """
+
+    verbosity: Optional[Literal["low", "medium", "high"]]
+    """Constrains the verbosity of the model's response.
+
+    Lower values will result in more concise responses, while higher values will
+    result in more verbose responses. Currently supported values are `low`,
+    `medium`, and `high`.
+    """
+
+    web_search_options: WebSearchOptions
     """
-    A unique identifier representing your end-user, which can help OpenAI to monitor
-    and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    This tool searches the web for relevant results to use in a response. Learn more
+    about the
+    [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
     """
 
 
-FunctionCall = Union[Literal["none", "auto"], ChatCompletionFunctionCallOptionParam]
+FunctionCall: TypeAlias = Union[Literal["none", "auto"], ChatCompletionFunctionCallOptionParam]
 
 
 class Function(TypedDict, total=False):
@@ -216,12 +366,11 @@ class Function(TypedDict, total=False):
     how to call the function.
     """
 
-    parameters: shared_params.FunctionParameters
+    parameters: FunctionParameters
     """The parameters the functions accepts, described as a JSON Schema object.
 
-    See the
-    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
-    for examples, and the
+    See the [guide](https://platform.openai.com/docs/guides/function-calling) for
+    examples, and the
     [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
     documentation about the format.
 
@@ -229,32 +378,82 @@ class Function(TypedDict, total=False):
     """
 
 
-class ResponseFormat(TypedDict, total=False):
-    type: Literal["text", "json_object"]
-    """Must be one of `text` or `json_object`."""
+ResponseFormat: TypeAlias = Union[ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject]
 
 
-class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
-    stream: Optional[Literal[False]]
-    """If set, partial message deltas will be sent, like in ChatGPT.
+class WebSearchOptionsUserLocationApproximate(TypedDict, total=False):
+    """Approximate location parameters for the search."""
+
+    city: str
+    """Free text input for the city of the user, e.g. `San Francisco`."""
+
+    country: str
+    """
+    The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
+    the user, e.g. `US`.
+    """
+
+    region: str
+    """Free text input for the region of the user, e.g. `California`."""
+
+    timezone: str
+    """
+    The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
+    user, e.g. `America/Los_Angeles`.
+    """
+
+
+class WebSearchOptionsUserLocation(TypedDict, total=False):
+    """Approximate location parameters for the search."""
+
+    approximate: Required[WebSearchOptionsUserLocationApproximate]
+    """Approximate location parameters for the search."""
+
+    type: Required[Literal["approximate"]]
+    """The type of location approximation. Always `approximate`."""
+
+
+class WebSearchOptions(TypedDict, total=False):
+    """
+    This tool searches the web for relevant results to use in a response.
+    Learn more about the [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+    """
+
+    search_context_size: Literal["low", "medium", "high"]
+    """
+    High level guidance for the amount of context window space to use for the
+    search. One of `low`, `medium`, or `high`. `medium` is the default.
+    """
+
+    user_location: Optional[WebSearchOptionsUserLocation]
+    """Approximate location parameters for the search."""
 
-    Tokens will be sent as data-only
-    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-    as they become available, with the stream terminated by a `data: [DONE]`
-    message.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+    for more information, along with the
+    [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+    guide for more information on how to handle the streaming events.
     """
 
 
 class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
     stream: Required[Literal[True]]
-    """If set, partial message deltas will be sent, like in ChatGPT.
-
-    Tokens will be sent as data-only
-    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-    as they become available, with the stream terminated by a `data: [DONE]`
-    message.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+    for more information, along with the
+    [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+    guide for more information on how to handle the streaming events.
     """
 
 
diff --git a/src/openai/types/chat/completion_list_params.py b/src/openai/types/chat/completion_list_params.py
new file mode 100644
index 0000000000..32bd3f5c0a
--- /dev/null
+++ b/src/openai/types/chat/completion_list_params.py
@@ -0,0 +1,37 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, TypedDict
+
+from ..shared_params.metadata import Metadata
+
+__all__ = ["CompletionListParams"]
+
+
+class CompletionListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last chat completion from the previous pagination request."""
+
+    limit: int
+    """Number of Chat Completions to retrieve."""
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: str
+    """The model used to generate the Chat Completions."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for Chat Completions by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
+    """
diff --git a/src/openai/types/chat/completion_update_params.py b/src/openai/types/chat/completion_update_params.py
new file mode 100644
index 0000000000..fc71733f07
--- /dev/null
+++ b/src/openai/types/chat/completion_update_params.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Required, TypedDict
+
+from ..shared_params.metadata import Metadata
+
+__all__ = ["CompletionUpdateParams"]
+
+
+class CompletionUpdateParams(TypedDict, total=False):
+    metadata: Required[Optional[Metadata]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
diff --git a/src/openai/types/chat/completions/__init__.py b/src/openai/types/chat/completions/__init__.py
new file mode 100644
index 0000000000..b8e62d6a64
--- /dev/null
+++ b/src/openai/types/chat/completions/__init__.py
@@ -0,0 +1,5 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .message_list_params import MessageListParams as MessageListParams
diff --git a/src/openai/types/chat/completions/message_list_params.py b/src/openai/types/chat/completions/message_list_params.py
new file mode 100644
index 0000000000..4e694e83ea
--- /dev/null
+++ b/src/openai/types/chat/completions/message_list_params.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["MessageListParams"]
+
+
+class MessageListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last message from the previous pagination request."""
+
+    limit: int
+    """Number of messages to retrieve."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for messages by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
+    """
diff --git a/src/openai/types/chat_model.py b/src/openai/types/chat_model.py
index 0d2937ea32..f3b0e310cc 100644
--- a/src/openai/types/chat_model.py
+++ b/src/openai/types/chat_model.py
@@ -1,29 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing_extensions import Literal
+from .shared import chat_model
 
 __all__ = ["ChatModel"]
 
-ChatModel = Literal[
-    "gpt-4o",
-    "gpt-4o-2024-05-13",
-    "gpt-4-turbo",
-    "gpt-4-turbo-2024-04-09",
-    "gpt-4-0125-preview",
-    "gpt-4-turbo-preview",
-    "gpt-4-1106-preview",
-    "gpt-4-vision-preview",
-    "gpt-4",
-    "gpt-4-0314",
-    "gpt-4-0613",
-    "gpt-4-32k",
-    "gpt-4-32k-0314",
-    "gpt-4-32k-0613",
-    "gpt-3.5-turbo",
-    "gpt-3.5-turbo-16k",
-    "gpt-3.5-turbo-0301",
-    "gpt-3.5-turbo-0613",
-    "gpt-3.5-turbo-1106",
-    "gpt-3.5-turbo-0125",
-    "gpt-3.5-turbo-16k-0613",
-]
+ChatModel = chat_model.ChatModel
diff --git a/src/openai/types/completion.py b/src/openai/types/completion.py
index d3b3102a4a..ee59b2e209 100644
--- a/src/openai/types/completion.py
+++ b/src/openai/types/completion.py
@@ -11,6 +11,11 @@
 
 
 class Completion(BaseModel):
+    """Represents a completion response from the API.
+
+    Note: both the streamed and non-streamed response objects share the same shape (unlike the chat endpoint).
+    """
+
     id: str
     """A unique identifier for the completion."""
 
diff --git a/src/openai/types/completion_create_params.py b/src/openai/types/completion_create_params.py
index 9fe22fe3c9..f9beb9afc7 100644
--- a/src/openai/types/completion_create_params.py
+++ b/src/openai/types/completion_create_params.py
@@ -2,9 +2,10 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Iterable, Optional
+from typing import Dict, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
+from .._types import SequenceNotStr
 from .chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 
 __all__ = ["CompletionCreateParamsBase", "CompletionCreateParamsNonStreaming", "CompletionCreateParamsStreaming"]
@@ -17,11 +18,11 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
     """
 
-    prompt: Required[Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None]]
+    prompt: Required[Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None]]
     """
     The prompt(s) to generate completions for, encoded as a string, array of
     strings, array of tokens, or array of token arrays.
@@ -53,7 +54,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     Positive values penalize new tokens based on their existing frequency in the
     text so far, decreasing the model's likelihood to repeat the same line verbatim.
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
     """
 
     logit_bias: Optional[Dict[str, int]]
@@ -106,7 +107,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     Positive values penalize new tokens based on whether they appear in the text so
     far, increasing the model's likelihood to talk about new topics.
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
     """
 
     seed: Optional[int]
@@ -119,10 +120,11 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     response parameter to monitor changes in the backend.
     """
 
-    stop: Union[Optional[str], List[str], None]
-    """Up to 4 sequences where the API will stop generating further tokens.
+    stop: Union[Optional[str], SequenceNotStr[str], None]
+    """Not supported with latest reasoning models `o3` and `o4-mini`.
 
-    The returned text will not contain the stop sequence.
+    Up to 4 sequences where the API will stop generating further tokens. The
+    returned text will not contain the stop sequence.
     """
 
     stream_options: Optional[ChatCompletionStreamOptionsParam]
@@ -156,11 +158,11 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
 
 
-class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
+class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
     stream: Optional[Literal[False]]
     """Whether to stream back partial progress.
 
diff --git a/src/openai/types/completion_usage.py b/src/openai/types/completion_usage.py
index 0d57b96595..9b5202da14 100644
--- a/src/openai/types/completion_usage.py
+++ b/src/openai/types/completion_usage.py
@@ -1,13 +1,49 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
+from typing import Optional
 
 from .._models import BaseModel
 
-__all__ = ["CompletionUsage"]
+__all__ = ["CompletionUsage", "CompletionTokensDetails", "PromptTokensDetails"]
+
+
+class CompletionTokensDetails(BaseModel):
+    """Breakdown of tokens used in a completion."""
+
+    accepted_prediction_tokens: Optional[int] = None
+    """
+    When using Predicted Outputs, the number of tokens in the prediction that
+    appeared in the completion.
+    """
+
+    audio_tokens: Optional[int] = None
+    """Audio input tokens generated by the model."""
+
+    reasoning_tokens: Optional[int] = None
+    """Tokens generated by the model for reasoning."""
+
+    rejected_prediction_tokens: Optional[int] = None
+    """
+    When using Predicted Outputs, the number of tokens in the prediction that did
+    not appear in the completion. However, like reasoning tokens, these tokens are
+    still counted in the total completion tokens for purposes of billing, output,
+    and context window limits.
+    """
+
+
+class PromptTokensDetails(BaseModel):
+    """Breakdown of tokens used in the prompt."""
+
+    audio_tokens: Optional[int] = None
+    """Audio input tokens present in the prompt."""
+
+    cached_tokens: Optional[int] = None
+    """Cached tokens present in the prompt."""
 
 
 class CompletionUsage(BaseModel):
+    """Usage statistics for the completion request."""
+
     completion_tokens: int
     """Number of tokens in the generated completion."""
 
@@ -16,3 +52,9 @@ class CompletionUsage(BaseModel):
 
     total_tokens: int
     """Total number of tokens used in the request (prompt + completion)."""
+
+    completion_tokens_details: Optional[CompletionTokensDetails] = None
+    """Breakdown of tokens used in a completion."""
+
+    prompt_tokens_details: Optional[PromptTokensDetails] = None
+    """Breakdown of tokens used in the prompt."""
diff --git a/src/openai/types/container_create_params.py b/src/openai/types/container_create_params.py
new file mode 100644
index 0000000000..47101ecdb6
--- /dev/null
+++ b/src/openai/types/container_create_params.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .._types import SequenceNotStr
+
+__all__ = ["ContainerCreateParams", "ExpiresAfter"]
+
+
+class ContainerCreateParams(TypedDict, total=False):
+    name: Required[str]
+    """Name of the container to create."""
+
+    expires_after: ExpiresAfter
+    """Container expiration time in seconds relative to the 'anchor' time."""
+
+    file_ids: SequenceNotStr[str]
+    """IDs of files to copy to the container."""
+
+    memory_limit: Literal["1g", "4g", "16g", "64g"]
+    """Optional memory limit for the container. Defaults to "1g"."""
+
+
+class ExpiresAfter(TypedDict, total=False):
+    """Container expiration time in seconds relative to the 'anchor' time."""
+
+    anchor: Required[Literal["last_active_at"]]
+    """Time anchor for the expiration time.
+
+    Currently only 'last_active_at' is supported.
+    """
+
+    minutes: Required[int]
diff --git a/src/openai/types/container_create_response.py b/src/openai/types/container_create_response.py
new file mode 100644
index 0000000000..0ebcc04062
--- /dev/null
+++ b/src/openai/types/container_create_response.py
@@ -0,0 +1,52 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ContainerCreateResponse", "ExpiresAfter"]
+
+
+class ExpiresAfter(BaseModel):
+    """
+    The container will expire after this time period.
+    The anchor is the reference point for the expiration.
+    The minutes is the number of minutes after the anchor before the container expires.
+    """
+
+    anchor: Optional[Literal["last_active_at"]] = None
+    """The reference point for the expiration."""
+
+    minutes: Optional[int] = None
+    """The number of minutes after the anchor before the container expires."""
+
+
+class ContainerCreateResponse(BaseModel):
+    id: str
+    """Unique identifier for the container."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the container was created."""
+
+    name: str
+    """Name of the container."""
+
+    object: str
+    """The type of this object."""
+
+    status: str
+    """Status of the container (e.g., active, deleted)."""
+
+    expires_after: Optional[ExpiresAfter] = None
+    """
+    The container will expire after this time period. The anchor is the reference
+    point for the expiration. The minutes is the number of minutes after the anchor
+    before the container expires.
+    """
+
+    last_active_at: Optional[int] = None
+    """Unix timestamp (in seconds) when the container was last active."""
+
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+    """The memory limit configured for the container."""
diff --git a/src/openai/types/container_list_params.py b/src/openai/types/container_list_params.py
new file mode 100644
index 0000000000..4821a87d18
--- /dev/null
+++ b/src/openai/types/container_list_params.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["ContainerListParams"]
+
+
+class ContainerListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
diff --git a/src/openai/types/container_list_response.py b/src/openai/types/container_list_response.py
new file mode 100644
index 0000000000..8f39548201
--- /dev/null
+++ b/src/openai/types/container_list_response.py
@@ -0,0 +1,52 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ContainerListResponse", "ExpiresAfter"]
+
+
+class ExpiresAfter(BaseModel):
+    """
+    The container will expire after this time period.
+    The anchor is the reference point for the expiration.
+    The minutes is the number of minutes after the anchor before the container expires.
+    """
+
+    anchor: Optional[Literal["last_active_at"]] = None
+    """The reference point for the expiration."""
+
+    minutes: Optional[int] = None
+    """The number of minutes after the anchor before the container expires."""
+
+
+class ContainerListResponse(BaseModel):
+    id: str
+    """Unique identifier for the container."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the container was created."""
+
+    name: str
+    """Name of the container."""
+
+    object: str
+    """The type of this object."""
+
+    status: str
+    """Status of the container (e.g., active, deleted)."""
+
+    expires_after: Optional[ExpiresAfter] = None
+    """
+    The container will expire after this time period. The anchor is the reference
+    point for the expiration. The minutes is the number of minutes after the anchor
+    before the container expires.
+    """
+
+    last_active_at: Optional[int] = None
+    """Unix timestamp (in seconds) when the container was last active."""
+
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+    """The memory limit configured for the container."""
diff --git a/src/openai/types/container_retrieve_response.py b/src/openai/types/container_retrieve_response.py
new file mode 100644
index 0000000000..9ba3e18c3a
--- /dev/null
+++ b/src/openai/types/container_retrieve_response.py
@@ -0,0 +1,52 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ContainerRetrieveResponse", "ExpiresAfter"]
+
+
+class ExpiresAfter(BaseModel):
+    """
+    The container will expire after this time period.
+    The anchor is the reference point for the expiration.
+    The minutes is the number of minutes after the anchor before the container expires.
+    """
+
+    anchor: Optional[Literal["last_active_at"]] = None
+    """The reference point for the expiration."""
+
+    minutes: Optional[int] = None
+    """The number of minutes after the anchor before the container expires."""
+
+
+class ContainerRetrieveResponse(BaseModel):
+    id: str
+    """Unique identifier for the container."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the container was created."""
+
+    name: str
+    """Name of the container."""
+
+    object: str
+    """The type of this object."""
+
+    status: str
+    """Status of the container (e.g., active, deleted)."""
+
+    expires_after: Optional[ExpiresAfter] = None
+    """
+    The container will expire after this time period. The anchor is the reference
+    point for the expiration. The minutes is the number of minutes after the anchor
+    before the container expires.
+    """
+
+    last_active_at: Optional[int] = None
+    """Unix timestamp (in seconds) when the container was last active."""
+
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+    """The memory limit configured for the container."""
diff --git a/src/openai/types/containers/__init__.py b/src/openai/types/containers/__init__.py
new file mode 100644
index 0000000000..7d555ad3a4
--- /dev/null
+++ b/src/openai/types/containers/__init__.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .file_list_params import FileListParams as FileListParams
+from .file_create_params import FileCreateParams as FileCreateParams
+from .file_list_response import FileListResponse as FileListResponse
+from .file_create_response import FileCreateResponse as FileCreateResponse
+from .file_retrieve_response import FileRetrieveResponse as FileRetrieveResponse
diff --git a/src/openai/types/containers/file_create_params.py b/src/openai/types/containers/file_create_params.py
new file mode 100644
index 0000000000..1e41330017
--- /dev/null
+++ b/src/openai/types/containers/file_create_params.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+from ..._types import FileTypes
+
+__all__ = ["FileCreateParams"]
+
+
+class FileCreateParams(TypedDict, total=False):
+    file: FileTypes
+    """The File object (not file name) to be uploaded."""
+
+    file_id: str
+    """Name of the file to create."""
diff --git a/src/openai/types/containers/file_create_response.py b/src/openai/types/containers/file_create_response.py
new file mode 100644
index 0000000000..4a652483fc
--- /dev/null
+++ b/src/openai/types/containers/file_create_response.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FileCreateResponse"]
+
+
+class FileCreateResponse(BaseModel):
+    id: str
+    """Unique identifier for the file."""
+
+    bytes: int
+    """Size of the file in bytes."""
+
+    container_id: str
+    """The container this file belongs to."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the file was created."""
+
+    object: Literal["container.file"]
+    """The type of this object (`container.file`)."""
+
+    path: str
+    """Path of the file in the container."""
+
+    source: str
+    """Source of the file (e.g., `user`, `assistant`)."""
diff --git a/src/openai/types/containers/file_list_params.py b/src/openai/types/containers/file_list_params.py
new file mode 100644
index 0000000000..3565acaf36
--- /dev/null
+++ b/src/openai/types/containers/file_list_params.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["FileListParams"]
+
+
+class FileListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
diff --git a/src/openai/types/containers/file_list_response.py b/src/openai/types/containers/file_list_response.py
new file mode 100644
index 0000000000..e5eee38d99
--- /dev/null
+++ b/src/openai/types/containers/file_list_response.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FileListResponse"]
+
+
+class FileListResponse(BaseModel):
+    id: str
+    """Unique identifier for the file."""
+
+    bytes: int
+    """Size of the file in bytes."""
+
+    container_id: str
+    """The container this file belongs to."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the file was created."""
+
+    object: Literal["container.file"]
+    """The type of this object (`container.file`)."""
+
+    path: str
+    """Path of the file in the container."""
+
+    source: str
+    """Source of the file (e.g., `user`, `assistant`)."""
diff --git a/src/openai/types/containers/file_retrieve_response.py b/src/openai/types/containers/file_retrieve_response.py
new file mode 100644
index 0000000000..37fb0e43dd
--- /dev/null
+++ b/src/openai/types/containers/file_retrieve_response.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FileRetrieveResponse"]
+
+
+class FileRetrieveResponse(BaseModel):
+    id: str
+    """Unique identifier for the file."""
+
+    bytes: int
+    """Size of the file in bytes."""
+
+    container_id: str
+    """The container this file belongs to."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the file was created."""
+
+    object: Literal["container.file"]
+    """The type of this object (`container.file`)."""
+
+    path: str
+    """Path of the file in the container."""
+
+    source: str
+    """Source of the file (e.g., `user`, `assistant`)."""
diff --git a/src/openai/types/beta/chat/__init__.py b/src/openai/types/containers/files/__init__.py
similarity index 100%
rename from src/openai/types/beta/chat/__init__.py
rename to src/openai/types/containers/files/__init__.py
diff --git a/src/openai/types/conversations/__init__.py b/src/openai/types/conversations/__init__.py
new file mode 100644
index 0000000000..9dec848737
--- /dev/null
+++ b/src/openai/types/conversations/__init__.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .message import Message as Message
+from .conversation import Conversation as Conversation
+from .text_content import TextContent as TextContent
+from .refusal_content import RefusalContent as RefusalContent
+from .item_list_params import ItemListParams as ItemListParams
+from .conversation_item import ConversationItem as ConversationItem
+from .input_file_content import InputFileContent as InputFileContent
+from .input_text_content import InputTextContent as InputTextContent
+from .item_create_params import ItemCreateParams as ItemCreateParams
+from .input_image_content import InputImageContent as InputImageContent
+from .output_text_content import OutputTextContent as OutputTextContent
+from .item_retrieve_params import ItemRetrieveParams as ItemRetrieveParams
+from .summary_text_content import SummaryTextContent as SummaryTextContent
+from .refusal_content_param import RefusalContentParam as RefusalContentParam
+from .conversation_item_list import ConversationItemList as ConversationItemList
+from .input_file_content_param import InputFileContentParam as InputFileContentParam
+from .input_text_content_param import InputTextContentParam as InputTextContentParam
+from .input_image_content_param import InputImageContentParam as InputImageContentParam
+from .output_text_content_param import OutputTextContentParam as OutputTextContentParam
+from .conversation_create_params import ConversationCreateParams as ConversationCreateParams
+from .conversation_update_params import ConversationUpdateParams as ConversationUpdateParams
+from .computer_screenshot_content import ComputerScreenshotContent as ComputerScreenshotContent
+from .conversation_deleted_resource import ConversationDeletedResource as ConversationDeletedResource
diff --git a/src/openai/types/conversations/computer_screenshot_content.py b/src/openai/types/conversations/computer_screenshot_content.py
new file mode 100644
index 0000000000..e42096eba2
--- /dev/null
+++ b/src/openai/types/conversations/computer_screenshot_content.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ComputerScreenshotContent"]
+
+
+class ComputerScreenshotContent(BaseModel):
+    """A screenshot of a computer."""
+
+    file_id: Optional[str] = None
+    """The identifier of an uploaded file that contains the screenshot."""
+
+    image_url: Optional[str] = None
+    """The URL of the screenshot image."""
+
+    type: Literal["computer_screenshot"]
+    """Specifies the event type.
+
+    For a computer screenshot, this property is always set to `computer_screenshot`.
+    """
diff --git a/src/openai/types/conversations/conversation.py b/src/openai/types/conversations/conversation.py
new file mode 100644
index 0000000000..ed63d40355
--- /dev/null
+++ b/src/openai/types/conversations/conversation.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["Conversation"]
+
+
+class Conversation(BaseModel):
+    id: str
+    """The unique ID of the conversation."""
+
+    created_at: int
+    """
+    The time at which the conversation was created, measured in seconds since the
+    Unix epoch.
+    """
+
+    metadata: object
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters.
+    """
+
+    object: Literal["conversation"]
+    """The object type, which is always `conversation`."""
diff --git a/src/openai/types/conversations/conversation_create_params.py b/src/openai/types/conversations/conversation_create_params.py
new file mode 100644
index 0000000000..5f38d2aca7
--- /dev/null
+++ b/src/openai/types/conversations/conversation_create_params.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+from typing_extensions import TypedDict
+
+from ..shared_params.metadata import Metadata
+from ..responses.response_input_item_param import ResponseInputItemParam
+
+__all__ = ["ConversationCreateParams"]
+
+
+class ConversationCreateParams(TypedDict, total=False):
+    items: Optional[Iterable[ResponseInputItemParam]]
+    """Initial items to include in the conversation context.
+
+    You may add up to 20 items at a time.
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
diff --git a/src/openai/types/conversations/conversation_deleted_resource.py b/src/openai/types/conversations/conversation_deleted_resource.py
new file mode 100644
index 0000000000..7abcb2448e
--- /dev/null
+++ b/src/openai/types/conversations/conversation_deleted_resource.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationDeletedResource"]
+
+
+class ConversationDeletedResource(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: Literal["conversation.deleted"]
diff --git a/src/openai/types/conversations/conversation_item.py b/src/openai/types/conversations/conversation_item.py
new file mode 100644
index 0000000000..46268d381c
--- /dev/null
+++ b/src/openai/types/conversations/conversation_item.py
@@ -0,0 +1,248 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .message import Message
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from ..responses.response_reasoning_item import ResponseReasoningItem
+from ..responses.response_custom_tool_call import ResponseCustomToolCall
+from ..responses.response_computer_tool_call import ResponseComputerToolCall
+from ..responses.response_function_web_search import ResponseFunctionWebSearch
+from ..responses.response_apply_patch_tool_call import ResponseApplyPatchToolCall
+from ..responses.response_file_search_tool_call import ResponseFileSearchToolCall
+from ..responses.response_custom_tool_call_output import ResponseCustomToolCallOutput
+from ..responses.response_function_tool_call_item import ResponseFunctionToolCallItem
+from ..responses.response_function_shell_tool_call import ResponseFunctionShellToolCall
+from ..responses.response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
+from ..responses.response_apply_patch_tool_call_output import ResponseApplyPatchToolCallOutput
+from ..responses.response_computer_tool_call_output_item import ResponseComputerToolCallOutputItem
+from ..responses.response_function_tool_call_output_item import ResponseFunctionToolCallOutputItem
+from ..responses.response_function_shell_tool_call_output import ResponseFunctionShellToolCallOutput
+
+__all__ = [
+    "ConversationItem",
+    "ImageGenerationCall",
+    "LocalShellCall",
+    "LocalShellCallAction",
+    "LocalShellCallOutput",
+    "McpListTools",
+    "McpListToolsTool",
+    "McpApprovalRequest",
+    "McpApprovalResponse",
+    "McpCall",
+]
+
+
+class ImageGenerationCall(BaseModel):
+    """An image generation request made by the model."""
+
+    id: str
+    """The unique ID of the image generation call."""
+
+    result: Optional[str] = None
+    """The generated image encoded in base64."""
+
+    status: Literal["in_progress", "completed", "generating", "failed"]
+    """The status of the image generation call."""
+
+    type: Literal["image_generation_call"]
+    """The type of the image generation call. Always `image_generation_call`."""
+
+
+class LocalShellCallAction(BaseModel):
+    """Execute a shell command on the server."""
+
+    command: List[str]
+    """The command to run."""
+
+    env: Dict[str, str]
+    """Environment variables to set for the command."""
+
+    type: Literal["exec"]
+    """The type of the local shell action. Always `exec`."""
+
+    timeout_ms: Optional[int] = None
+    """Optional timeout in milliseconds for the command."""
+
+    user: Optional[str] = None
+    """Optional user to run the command as."""
+
+    working_directory: Optional[str] = None
+    """Optional working directory to run the command in."""
+
+
+class LocalShellCall(BaseModel):
+    """A tool call to run a command on the local shell."""
+
+    id: str
+    """The unique ID of the local shell call."""
+
+    action: LocalShellCallAction
+    """Execute a shell command on the server."""
+
+    call_id: str
+    """The unique ID of the local shell tool call generated by the model."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the local shell call."""
+
+    type: Literal["local_shell_call"]
+    """The type of the local shell call. Always `local_shell_call`."""
+
+
+class LocalShellCallOutput(BaseModel):
+    """The output of a local shell tool call."""
+
+    id: str
+    """The unique ID of the local shell tool call generated by the model."""
+
+    output: str
+    """A JSON string of the output of the local shell tool call."""
+
+    type: Literal["local_shell_call_output"]
+    """The type of the local shell tool call output. Always `local_shell_call_output`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item. One of `in_progress`, `completed`, or `incomplete`."""
+
+
+class McpListToolsTool(BaseModel):
+    """A tool available on an MCP server."""
+
+    input_schema: object
+    """The JSON schema describing the tool's input."""
+
+    name: str
+    """The name of the tool."""
+
+    annotations: Optional[object] = None
+    """Additional annotations about the tool."""
+
+    description: Optional[str] = None
+    """The description of the tool."""
+
+
+class McpListTools(BaseModel):
+    """A list of tools available on an MCP server."""
+
+    id: str
+    """The unique ID of the list."""
+
+    server_label: str
+    """The label of the MCP server."""
+
+    tools: List[McpListToolsTool]
+    """The tools available on the server."""
+
+    type: Literal["mcp_list_tools"]
+    """The type of the item. Always `mcp_list_tools`."""
+
+    error: Optional[str] = None
+    """Error message if the server could not list tools."""
+
+
+class McpApprovalRequest(BaseModel):
+    """A request for human approval of a tool invocation."""
+
+    id: str
+    """The unique ID of the approval request."""
+
+    arguments: str
+    """A JSON string of arguments for the tool."""
+
+    name: str
+    """The name of the tool to run."""
+
+    server_label: str
+    """The label of the MCP server making the request."""
+
+    type: Literal["mcp_approval_request"]
+    """The type of the item. Always `mcp_approval_request`."""
+
+
+class McpApprovalResponse(BaseModel):
+    """A response to an MCP approval request."""
+
+    id: str
+    """The unique ID of the approval response"""
+
+    approval_request_id: str
+    """The ID of the approval request being answered."""
+
+    approve: bool
+    """Whether the request was approved."""
+
+    type: Literal["mcp_approval_response"]
+    """The type of the item. Always `mcp_approval_response`."""
+
+    reason: Optional[str] = None
+    """Optional reason for the decision."""
+
+
+class McpCall(BaseModel):
+    """An invocation of a tool on an MCP server."""
+
+    id: str
+    """The unique ID of the tool call."""
+
+    arguments: str
+    """A JSON string of the arguments passed to the tool."""
+
+    name: str
+    """The name of the tool that was run."""
+
+    server_label: str
+    """The label of the MCP server running the tool."""
+
+    type: Literal["mcp_call"]
+    """The type of the item. Always `mcp_call`."""
+
+    approval_request_id: Optional[str] = None
+    """
+    Unique identifier for the MCP tool call approval request. Include this value in
+    a subsequent `mcp_approval_response` input to approve or reject the
+    corresponding tool call.
+    """
+
+    error: Optional[str] = None
+    """The error from the tool call, if any."""
+
+    output: Optional[str] = None
+    """The output from the tool call."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete", "calling", "failed"]] = None
+    """The status of the tool call.
+
+    One of `in_progress`, `completed`, `incomplete`, `calling`, or `failed`.
+    """
+
+
+ConversationItem: TypeAlias = Annotated[
+    Union[
+        Message,
+        ResponseFunctionToolCallItem,
+        ResponseFunctionToolCallOutputItem,
+        ResponseFileSearchToolCall,
+        ResponseFunctionWebSearch,
+        ImageGenerationCall,
+        ResponseComputerToolCall,
+        ResponseComputerToolCallOutputItem,
+        ResponseReasoningItem,
+        ResponseCodeInterpreterToolCall,
+        LocalShellCall,
+        LocalShellCallOutput,
+        ResponseFunctionShellToolCall,
+        ResponseFunctionShellToolCallOutput,
+        ResponseApplyPatchToolCall,
+        ResponseApplyPatchToolCallOutput,
+        McpListTools,
+        McpApprovalRequest,
+        McpApprovalResponse,
+        McpCall,
+        ResponseCustomToolCall,
+        ResponseCustomToolCallOutput,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/conversations/conversation_item_list.py b/src/openai/types/conversations/conversation_item_list.py
new file mode 100644
index 0000000000..74d945d864
--- /dev/null
+++ b/src/openai/types/conversations/conversation_item_list.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemList"]
+
+
+class ConversationItemList(BaseModel):
+    """A list of Conversation items."""
+
+    data: List[ConversationItem]
+    """A list of conversation items."""
+
+    first_id: str
+    """The ID of the first item in the list."""
+
+    has_more: bool
+    """Whether there are more items available."""
+
+    last_id: str
+    """The ID of the last item in the list."""
+
+    object: Literal["list"]
+    """The type of object returned, must be `list`."""
diff --git a/src/openai/types/conversations/conversation_update_params.py b/src/openai/types/conversations/conversation_update_params.py
new file mode 100644
index 0000000000..1f0dd09e50
--- /dev/null
+++ b/src/openai/types/conversations/conversation_update_params.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Required, TypedDict
+
+from ..shared_params.metadata import Metadata
+
+__all__ = ["ConversationUpdateParams"]
+
+
+class ConversationUpdateParams(TypedDict, total=False):
+    metadata: Required[Optional[Metadata]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
diff --git a/src/openai/types/conversations/input_file_content.py b/src/openai/types/conversations/input_file_content.py
new file mode 100644
index 0000000000..ca555d85fc
--- /dev/null
+++ b/src/openai/types/conversations/input_file_content.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..responses.response_input_file import ResponseInputFile
+
+__all__ = ["InputFileContent"]
+
+InputFileContent = ResponseInputFile
diff --git a/src/openai/types/conversations/input_file_content_param.py b/src/openai/types/conversations/input_file_content_param.py
new file mode 100644
index 0000000000..1ed8b8b9d1
--- /dev/null
+++ b/src/openai/types/conversations/input_file_content_param.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..responses.response_input_file_param import ResponseInputFileParam
+
+InputFileContentParam = ResponseInputFileParam
diff --git a/src/openai/types/conversations/input_image_content.py b/src/openai/types/conversations/input_image_content.py
new file mode 100644
index 0000000000..4304323c3a
--- /dev/null
+++ b/src/openai/types/conversations/input_image_content.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..responses.response_input_image import ResponseInputImage
+
+__all__ = ["InputImageContent"]
+
+InputImageContent = ResponseInputImage
diff --git a/src/openai/types/conversations/input_image_content_param.py b/src/openai/types/conversations/input_image_content_param.py
new file mode 100644
index 0000000000..a0ef9f545c
--- /dev/null
+++ b/src/openai/types/conversations/input_image_content_param.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..responses.response_input_image_param import ResponseInputImageParam
+
+InputImageContentParam = ResponseInputImageParam
diff --git a/src/openai/types/conversations/input_text_content.py b/src/openai/types/conversations/input_text_content.py
new file mode 100644
index 0000000000..cab8b26cb1
--- /dev/null
+++ b/src/openai/types/conversations/input_text_content.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..responses.response_input_text import ResponseInputText
+
+__all__ = ["InputTextContent"]
+
+InputTextContent = ResponseInputText
diff --git a/src/openai/types/conversations/input_text_content_param.py b/src/openai/types/conversations/input_text_content_param.py
new file mode 100644
index 0000000000..b1fd9a5f1c
--- /dev/null
+++ b/src/openai/types/conversations/input_text_content_param.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..responses.response_input_text_param import ResponseInputTextParam
+
+InputTextContentParam = ResponseInputTextParam
diff --git a/src/openai/types/conversations/item_create_params.py b/src/openai/types/conversations/item_create_params.py
new file mode 100644
index 0000000000..9158b7167f
--- /dev/null
+++ b/src/openai/types/conversations/item_create_params.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Iterable
+from typing_extensions import Required, TypedDict
+
+from ..responses.response_includable import ResponseIncludable
+from ..responses.response_input_item_param import ResponseInputItemParam
+
+__all__ = ["ItemCreateParams"]
+
+
+class ItemCreateParams(TypedDict, total=False):
+    items: Required[Iterable[ResponseInputItemParam]]
+    """The items to add to the conversation. You may add up to 20 items at a time."""
+
+    include: List[ResponseIncludable]
+    """Additional fields to include in the response.
+
+    See the `include` parameter for
+    [listing Conversation items above](https://platform.openai.com/docs/api-reference/conversations/list-items#conversations_list_items-include)
+    for more information.
+    """
diff --git a/src/openai/types/conversations/item_list_params.py b/src/openai/types/conversations/item_list_params.py
new file mode 100644
index 0000000000..a4dd61f399
--- /dev/null
+++ b/src/openai/types/conversations/item_list_params.py
@@ -0,0 +1,50 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal, TypedDict
+
+from ..responses.response_includable import ResponseIncludable
+
+__all__ = ["ItemListParams"]
+
+
+class ItemListParams(TypedDict, total=False):
+    after: str
+    """An item ID to list items after, used in pagination."""
+
+    include: List[ResponseIncludable]
+    """Specify additional output data to include in the model response.
+
+    Currently supported values are:
+
+    - `web_search_call.action.sources`: Include the sources of the web search tool
+      call.
+    - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+      in code interpreter tool call items.
+    - `computer_call_output.output.image_url`: Include image urls from the computer
+      call output.
+    - `file_search_call.results`: Include the search results of the file search tool
+      call.
+    - `message.input_image.image_url`: Include image urls from the input message.
+    - `message.output_text.logprobs`: Include logprobs with assistant messages.
+    - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+      tokens in reasoning item outputs. This enables reasoning items to be used in
+      multi-turn conversations when using the Responses API statelessly (like when
+      the `store` parameter is set to `false`, or when an organization is enrolled
+      in the zero data retention program).
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """The order to return the input items in. Default is `desc`.
+
+    - `asc`: Return the input items in ascending order.
+    - `desc`: Return the input items in descending order.
+    """
diff --git a/src/openai/types/conversations/item_retrieve_params.py b/src/openai/types/conversations/item_retrieve_params.py
new file mode 100644
index 0000000000..8c5db1e533
--- /dev/null
+++ b/src/openai/types/conversations/item_retrieve_params.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Required, TypedDict
+
+from ..responses.response_includable import ResponseIncludable
+
+__all__ = ["ItemRetrieveParams"]
+
+
+class ItemRetrieveParams(TypedDict, total=False):
+    conversation_id: Required[str]
+
+    include: List[ResponseIncludable]
+    """Additional fields to include in the response.
+
+    See the `include` parameter for
+    [listing Conversation items above](https://platform.openai.com/docs/api-reference/conversations/list-items#conversations_list_items-include)
+    for more information.
+    """
diff --git a/src/openai/types/conversations/message.py b/src/openai/types/conversations/message.py
new file mode 100644
index 0000000000..86c8860da8
--- /dev/null
+++ b/src/openai/types/conversations/message.py
@@ -0,0 +1,70 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .text_content import TextContent
+from .summary_text_content import SummaryTextContent
+from .computer_screenshot_content import ComputerScreenshotContent
+from ..responses.response_input_file import ResponseInputFile
+from ..responses.response_input_text import ResponseInputText
+from ..responses.response_input_image import ResponseInputImage
+from ..responses.response_output_text import ResponseOutputText
+from ..responses.response_output_refusal import ResponseOutputRefusal
+
+__all__ = ["Message", "Content", "ContentReasoningText"]
+
+
+class ContentReasoningText(BaseModel):
+    """Reasoning text from the model."""
+
+    text: str
+    """The reasoning text from the model."""
+
+    type: Literal["reasoning_text"]
+    """The type of the reasoning text. Always `reasoning_text`."""
+
+
+Content: TypeAlias = Annotated[
+    Union[
+        ResponseInputText,
+        ResponseOutputText,
+        TextContent,
+        SummaryTextContent,
+        ContentReasoningText,
+        ResponseOutputRefusal,
+        ResponseInputImage,
+        ComputerScreenshotContent,
+        ResponseInputFile,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class Message(BaseModel):
+    """A message to or from the model."""
+
+    id: str
+    """The unique ID of the message."""
+
+    content: List[Content]
+    """The content of the message"""
+
+    role: Literal["unknown", "user", "assistant", "system", "critic", "discriminator", "developer", "tool"]
+    """The role of the message.
+
+    One of `unknown`, `user`, `assistant`, `system`, `critic`, `discriminator`,
+    `developer`, or `tool`.
+    """
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Literal["message"]
+    """The type of the message. Always set to `message`."""
diff --git a/src/openai/types/conversations/output_text_content.py b/src/openai/types/conversations/output_text_content.py
new file mode 100644
index 0000000000..cfe9307d74
--- /dev/null
+++ b/src/openai/types/conversations/output_text_content.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..responses.response_output_text import ResponseOutputText
+
+__all__ = ["OutputTextContent"]
+
+OutputTextContent = ResponseOutputText
diff --git a/src/openai/types/conversations/output_text_content_param.py b/src/openai/types/conversations/output_text_content_param.py
new file mode 100644
index 0000000000..dc3e2026f6
--- /dev/null
+++ b/src/openai/types/conversations/output_text_content_param.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..responses.response_output_text_param import ResponseOutputTextParam
+
+OutputTextContentParam = ResponseOutputTextParam
diff --git a/src/openai/types/conversations/refusal_content.py b/src/openai/types/conversations/refusal_content.py
new file mode 100644
index 0000000000..6206c267dc
--- /dev/null
+++ b/src/openai/types/conversations/refusal_content.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..responses.response_output_refusal import ResponseOutputRefusal
+
+__all__ = ["RefusalContent"]
+
+RefusalContent = ResponseOutputRefusal
diff --git a/src/openai/types/conversations/refusal_content_param.py b/src/openai/types/conversations/refusal_content_param.py
new file mode 100644
index 0000000000..9b83da5f2d
--- /dev/null
+++ b/src/openai/types/conversations/refusal_content_param.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..responses.response_output_refusal_param import ResponseOutputRefusalParam
+
+RefusalContentParam = ResponseOutputRefusalParam
diff --git a/src/openai/types/conversations/summary_text_content.py b/src/openai/types/conversations/summary_text_content.py
new file mode 100644
index 0000000000..6464a36599
--- /dev/null
+++ b/src/openai/types/conversations/summary_text_content.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["SummaryTextContent"]
+
+
+class SummaryTextContent(BaseModel):
+    """A summary text from the model."""
+
+    text: str
+    """A summary of the reasoning output from the model so far."""
+
+    type: Literal["summary_text"]
+    """The type of the object. Always `summary_text`."""
diff --git a/src/openai/types/conversations/text_content.py b/src/openai/types/conversations/text_content.py
new file mode 100644
index 0000000000..e602466c47
--- /dev/null
+++ b/src/openai/types/conversations/text_content.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["TextContent"]
+
+
+class TextContent(BaseModel):
+    """A text content."""
+
+    text: str
+
+    type: Literal["text"]
diff --git a/src/openai/types/create_embedding_response.py b/src/openai/types/create_embedding_response.py
index eff247a112..314a7f9afc 100644
--- a/src/openai/types/create_embedding_response.py
+++ b/src/openai/types/create_embedding_response.py
@@ -10,6 +10,8 @@
 
 
 class Usage(BaseModel):
+    """The usage information for the request."""
+
     prompt_tokens: int
     """The number of tokens used by the prompt."""
 
diff --git a/src/openai/types/embedding.py b/src/openai/types/embedding.py
index 769b1d165f..fbffec01e0 100644
--- a/src/openai/types/embedding.py
+++ b/src/openai/types/embedding.py
@@ -9,6 +9,8 @@
 
 
 class Embedding(BaseModel):
+    """Represents an embedding vector returned by embedding endpoint."""
+
     embedding: List[float]
     """The embedding vector, which is a list of floats.
 
diff --git a/src/openai/types/embedding_create_params.py b/src/openai/types/embedding_create_params.py
index 930b3b7914..ab3e877964 100644
--- a/src/openai/types/embedding_create_params.py
+++ b/src/openai/types/embedding_create_params.py
@@ -2,32 +2,37 @@
 
 from __future__ import annotations
 
-from typing import List, Union, Iterable
+from typing import Union, Iterable
 from typing_extensions import Literal, Required, TypedDict
 
+from .._types import SequenceNotStr
+from .embedding_model import EmbeddingModel
+
 __all__ = ["EmbeddingCreateParams"]
 
 
 class EmbeddingCreateParams(TypedDict, total=False):
-    input: Required[Union[str, List[str], Iterable[int], Iterable[Iterable[int]]]]
+    input: Required[Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]]]]
     """Input text to embed, encoded as a string or array of tokens.
 
     To embed multiple inputs in a single request, pass an array of strings or array
     of token arrays. The input must not exceed the max input tokens for the model
-    (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any
-    array must be 2048 dimensions or less.
+    (8192 tokens for all embedding models), cannot be an empty string, and any array
+    must be 2048 dimensions or less.
     [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-    for counting tokens.
+    for counting tokens. In addition to the per-input token limit, all embedding
+    models enforce a maximum of 300,000 tokens summed across all inputs in a single
+    request.
     """
 
-    model: Required[Union[str, Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]]]
+    model: Required[Union[str, EmbeddingModel]]
     """ID of the model to use.
 
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
     """
 
     dimensions: int
@@ -46,5 +51,5 @@ class EmbeddingCreateParams(TypedDict, total=False):
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
diff --git a/src/openai/types/embedding_model.py b/src/openai/types/embedding_model.py
new file mode 100644
index 0000000000..075ff97644
--- /dev/null
+++ b/src/openai/types/embedding_model.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["EmbeddingModel"]
+
+EmbeddingModel: TypeAlias = Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]
diff --git a/src/openai/types/eval_create_params.py b/src/openai/types/eval_create_params.py
new file mode 100644
index 0000000000..a1d5ea5371
--- /dev/null
+++ b/src/openai/types/eval_create_params.py
@@ -0,0 +1,244 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .._types import SequenceNotStr
+from .shared_params.metadata import Metadata
+from .graders.grader_inputs_param import GraderInputsParam
+from .graders.python_grader_param import PythonGraderParam
+from .graders.score_model_grader_param import ScoreModelGraderParam
+from .graders.string_check_grader_param import StringCheckGraderParam
+from .responses.response_input_text_param import ResponseInputTextParam
+from .graders.text_similarity_grader_param import TextSimilarityGraderParam
+from .responses.response_input_audio_param import ResponseInputAudioParam
+
+__all__ = [
+    "EvalCreateParams",
+    "DataSourceConfig",
+    "DataSourceConfigCustom",
+    "DataSourceConfigLogs",
+    "DataSourceConfigStoredCompletions",
+    "TestingCriterion",
+    "TestingCriterionLabelModel",
+    "TestingCriterionLabelModelInput",
+    "TestingCriterionLabelModelInputSimpleInputMessage",
+    "TestingCriterionLabelModelInputEvalItem",
+    "TestingCriterionLabelModelInputEvalItemContent",
+    "TestingCriterionLabelModelInputEvalItemContentOutputText",
+    "TestingCriterionLabelModelInputEvalItemContentInputImage",
+    "TestingCriterionTextSimilarity",
+    "TestingCriterionPython",
+    "TestingCriterionScoreModel",
+]
+
+
+class EvalCreateParams(TypedDict, total=False):
+    data_source_config: Required[DataSourceConfig]
+    """The configuration for the data source used for the evaluation runs.
+
+    Dictates the schema of the data used in the evaluation.
+    """
+
+    testing_criteria: Required[Iterable[TestingCriterion]]
+    """A list of graders for all eval runs in this group.
+
+    Graders can reference variables in the data source using double curly braces
+    notation, like `{{item.variable_name}}`. To reference the model's output, use
+    the `sample` namespace (ie, `{{sample.output_text}}`).
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+
+class DataSourceConfigCustom(TypedDict, total=False):
+    """
+    A CustomDataSourceConfig object that defines the schema for the data source used for the evaluation runs.
+    This schema is used to define the shape of the data that will be:
+    - Used to define your testing criteria and
+    - What data is required when creating a run
+    """
+
+    item_schema: Required[Dict[str, object]]
+    """The json schema for each row in the data source."""
+
+    type: Required[Literal["custom"]]
+    """The type of data source. Always `custom`."""
+
+    include_sample_schema: bool
+    """
+    Whether the eval should expect you to populate the sample namespace (ie, by
+    generating responses off of your data source)
+    """
+
+
+class DataSourceConfigLogs(TypedDict, total=False):
+    """
+    A data source config which specifies the metadata property of your logs query.
+    This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc.
+    """
+
+    type: Required[Literal["logs"]]
+    """The type of data source. Always `logs`."""
+
+    metadata: Dict[str, object]
+    """Metadata filters for the logs data source."""
+
+
+class DataSourceConfigStoredCompletions(TypedDict, total=False):
+    """Deprecated in favor of LogsDataSourceConfig."""
+
+    type: Required[Literal["stored_completions"]]
+    """The type of data source. Always `stored_completions`."""
+
+    metadata: Dict[str, object]
+    """Metadata filters for the stored completions data source."""
+
+
+DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigLogs, DataSourceConfigStoredCompletions]
+
+
+class TestingCriterionLabelModelInputSimpleInputMessage(TypedDict, total=False):
+    content: Required[str]
+    """The content of the message."""
+
+    role: Required[str]
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class TestingCriterionLabelModelInputEvalItemContentOutputText(TypedDict, total=False):
+    """A text output from the model."""
+
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+class TestingCriterionLabelModelInputEvalItemContentInputImage(TypedDict, total=False):
+    """An image input block used within EvalItem content arrays."""
+
+    image_url: Required[str]
+    """The URL of the image input."""
+
+    type: Required[Literal["input_image"]]
+    """The type of the image input. Always `input_image`."""
+
+    detail: str
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+
+TestingCriterionLabelModelInputEvalItemContent: TypeAlias = Union[
+    str,
+    ResponseInputTextParam,
+    TestingCriterionLabelModelInputEvalItemContentOutputText,
+    TestingCriterionLabelModelInputEvalItemContentInputImage,
+    ResponseInputAudioParam,
+    GraderInputsParam,
+]
+
+
+class TestingCriterionLabelModelInputEvalItem(TypedDict, total=False):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role. Messages with the
+    `assistant` role are presumed to have been generated by the model in previous
+    interactions.
+    """
+
+    content: Required[TestingCriterionLabelModelInputEvalItemContent]
+    """Inputs to the model - can contain template strings.
+
+    Supports text, output text, input images, and input audio, either as a single
+    item or an array of items.
+    """
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+TestingCriterionLabelModelInput: TypeAlias = Union[
+    TestingCriterionLabelModelInputSimpleInputMessage, TestingCriterionLabelModelInputEvalItem
+]
+
+
+class TestingCriterionLabelModel(TypedDict, total=False):
+    """
+    A LabelModelGrader object which uses a model to assign labels to each item
+    in the evaluation.
+    """
+
+    input: Required[Iterable[TestingCriterionLabelModelInput]]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    labels: Required[SequenceNotStr[str]]
+    """The labels to classify to each item in the evaluation."""
+
+    model: Required[str]
+    """The model to use for the evaluation. Must support structured outputs."""
+
+    name: Required[str]
+    """The name of the grader."""
+
+    passing_labels: Required[SequenceNotStr[str]]
+    """The labels that indicate a passing result. Must be a subset of labels."""
+
+    type: Required[Literal["label_model"]]
+    """The object type, which is always `label_model`."""
+
+
+class TestingCriterionTextSimilarity(TextSimilarityGraderParam, total=False):
+    """A TextSimilarityGrader object which grades text based on similarity metrics."""
+
+    pass_threshold: Required[float]
+    """The threshold for the score."""
+
+
+class TestingCriterionPython(PythonGraderParam, total=False):
+    """A PythonGrader object that runs a python script on the input."""
+
+    pass_threshold: float
+    """The threshold for the score."""
+
+
+class TestingCriterionScoreModel(ScoreModelGraderParam, total=False):
+    """A ScoreModelGrader object that uses a model to assign a score to the input."""
+
+    pass_threshold: float
+    """The threshold for the score."""
+
+
+TestingCriterion: TypeAlias = Union[
+    TestingCriterionLabelModel,
+    StringCheckGraderParam,
+    TestingCriterionTextSimilarity,
+    TestingCriterionPython,
+    TestingCriterionScoreModel,
+]
diff --git a/src/openai/types/eval_create_response.py b/src/openai/types/eval_create_response.py
new file mode 100644
index 0000000000..f3166422ba
--- /dev/null
+++ b/src/openai/types/eval_create_response.py
@@ -0,0 +1,130 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .graders.python_grader import PythonGrader
+from .graders.label_model_grader import LabelModelGrader
+from .graders.score_model_grader import ScoreModelGrader
+from .graders.string_check_grader import StringCheckGrader
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .graders.text_similarity_grader import TextSimilarityGrader
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = [
+    "EvalCreateResponse",
+    "DataSourceConfig",
+    "DataSourceConfigLogs",
+    "TestingCriterion",
+    "TestingCriterionEvalGraderTextSimilarity",
+    "TestingCriterionEvalGraderPython",
+    "TestingCriterionEvalGraderScoreModel",
+]
+
+
+class DataSourceConfigLogs(BaseModel):
+    """
+    A LogsDataSourceConfig which specifies the metadata property of your logs query.
+    This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc.
+    The schema returned by this data source config is used to defined what variables are available in your evals.
+    `item` and `sample` are both defined when using this data source config.
+    """
+
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["logs"]
+    """The type of data source. Always `logs`."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+
+DataSourceConfig: TypeAlias = Annotated[
+    Union[EvalCustomDataSourceConfig, DataSourceConfigLogs, EvalStoredCompletionsDataSourceConfig],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class TestingCriterionEvalGraderTextSimilarity(TextSimilarityGrader):
+    __test__ = False
+    """A TextSimilarityGrader object which grades text based on similarity metrics."""
+    pass_threshold: float
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderPython(PythonGrader):
+    __test__ = False
+    """A PythonGrader object that runs a python script on the input."""
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderScoreModel(ScoreModelGrader):
+    __test__ = False
+    """A ScoreModelGrader object that uses a model to assign a score to the input."""
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+TestingCriterion: TypeAlias = Union[
+    LabelModelGrader,
+    StringCheckGrader,
+    TestingCriterionEvalGraderTextSimilarity,
+    TestingCriterionEvalGraderPython,
+    TestingCriterionEvalGraderScoreModel,
+]
+
+
+class EvalCreateResponse(BaseModel):
+    """
+    An Eval object with a data source config and testing criteria.
+    An Eval represents a task to be done for your LLM integration.
+    Like:
+     - Improve the quality of my chatbot
+     - See how well my chatbot handles customer support
+     - Check if o4-mini is better at my usecase than gpt-4o
+    """
+
+    id: str
+    """Unique identifier for the evaluation."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the eval was created."""
+
+    data_source_config: DataSourceConfig
+    """Configuration of data sources used in runs of the evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+    object: Literal["eval"]
+    """The object type."""
+
+    testing_criteria: List[TestingCriterion]
+    """A list of testing criteria."""
diff --git a/src/openai/types/eval_custom_data_source_config.py b/src/openai/types/eval_custom_data_source_config.py
new file mode 100644
index 0000000000..6234c4f47a
--- /dev/null
+++ b/src/openai/types/eval_custom_data_source_config.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["EvalCustomDataSourceConfig"]
+
+
+class EvalCustomDataSourceConfig(BaseModel):
+    """
+    A CustomDataSourceConfig which specifies the schema of your `item` and optionally `sample` namespaces.
+    The response schema defines the shape of the data that will be:
+    - Used to define your testing criteria and
+    - What data is required when creating a run
+    """
+
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["custom"]
+    """The type of data source. Always `custom`."""
diff --git a/src/openai/types/eval_delete_response.py b/src/openai/types/eval_delete_response.py
new file mode 100644
index 0000000000..a27261e242
--- /dev/null
+++ b/src/openai/types/eval_delete_response.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["EvalDeleteResponse"]
+
+
+class EvalDeleteResponse(BaseModel):
+    deleted: bool
+
+    eval_id: str
+
+    object: str
diff --git a/src/openai/types/eval_list_params.py b/src/openai/types/eval_list_params.py
new file mode 100644
index 0000000000..d9a12d0ddf
--- /dev/null
+++ b/src/openai/types/eval_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["EvalListParams"]
+
+
+class EvalListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last eval from the previous pagination request."""
+
+    limit: int
+    """Number of evals to retrieve."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for evals by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order.
+    """
+
+    order_by: Literal["created_at", "updated_at"]
+    """Evals can be ordered by creation time or last updated time.
+
+    Use `created_at` for creation time or `updated_at` for last updated time.
+    """
diff --git a/src/openai/types/eval_list_response.py b/src/openai/types/eval_list_response.py
new file mode 100644
index 0000000000..7cd92c5a09
--- /dev/null
+++ b/src/openai/types/eval_list_response.py
@@ -0,0 +1,130 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .graders.python_grader import PythonGrader
+from .graders.label_model_grader import LabelModelGrader
+from .graders.score_model_grader import ScoreModelGrader
+from .graders.string_check_grader import StringCheckGrader
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .graders.text_similarity_grader import TextSimilarityGrader
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = [
+    "EvalListResponse",
+    "DataSourceConfig",
+    "DataSourceConfigLogs",
+    "TestingCriterion",
+    "TestingCriterionEvalGraderTextSimilarity",
+    "TestingCriterionEvalGraderPython",
+    "TestingCriterionEvalGraderScoreModel",
+]
+
+
+class DataSourceConfigLogs(BaseModel):
+    """
+    A LogsDataSourceConfig which specifies the metadata property of your logs query.
+    This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc.
+    The schema returned by this data source config is used to defined what variables are available in your evals.
+    `item` and `sample` are both defined when using this data source config.
+    """
+
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["logs"]
+    """The type of data source. Always `logs`."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+
+DataSourceConfig: TypeAlias = Annotated[
+    Union[EvalCustomDataSourceConfig, DataSourceConfigLogs, EvalStoredCompletionsDataSourceConfig],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class TestingCriterionEvalGraderTextSimilarity(TextSimilarityGrader):
+    __test__ = False
+    """A TextSimilarityGrader object which grades text based on similarity metrics."""
+    pass_threshold: float
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderPython(PythonGrader):
+    __test__ = False
+    """A PythonGrader object that runs a python script on the input."""
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderScoreModel(ScoreModelGrader):
+    __test__ = False
+    """A ScoreModelGrader object that uses a model to assign a score to the input."""
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+TestingCriterion: TypeAlias = Union[
+    LabelModelGrader,
+    StringCheckGrader,
+    TestingCriterionEvalGraderTextSimilarity,
+    TestingCriterionEvalGraderPython,
+    TestingCriterionEvalGraderScoreModel,
+]
+
+
+class EvalListResponse(BaseModel):
+    """
+    An Eval object with a data source config and testing criteria.
+    An Eval represents a task to be done for your LLM integration.
+    Like:
+     - Improve the quality of my chatbot
+     - See how well my chatbot handles customer support
+     - Check if o4-mini is better at my usecase than gpt-4o
+    """
+
+    id: str
+    """Unique identifier for the evaluation."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the eval was created."""
+
+    data_source_config: DataSourceConfig
+    """Configuration of data sources used in runs of the evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+    object: Literal["eval"]
+    """The object type."""
+
+    testing_criteria: List[TestingCriterion]
+    """A list of testing criteria."""
diff --git a/src/openai/types/eval_retrieve_response.py b/src/openai/types/eval_retrieve_response.py
new file mode 100644
index 0000000000..56db7d6bc1
--- /dev/null
+++ b/src/openai/types/eval_retrieve_response.py
@@ -0,0 +1,130 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .graders.python_grader import PythonGrader
+from .graders.label_model_grader import LabelModelGrader
+from .graders.score_model_grader import ScoreModelGrader
+from .graders.string_check_grader import StringCheckGrader
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .graders.text_similarity_grader import TextSimilarityGrader
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = [
+    "EvalRetrieveResponse",
+    "DataSourceConfig",
+    "DataSourceConfigLogs",
+    "TestingCriterion",
+    "TestingCriterionEvalGraderTextSimilarity",
+    "TestingCriterionEvalGraderPython",
+    "TestingCriterionEvalGraderScoreModel",
+]
+
+
+class DataSourceConfigLogs(BaseModel):
+    """
+    A LogsDataSourceConfig which specifies the metadata property of your logs query.
+    This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc.
+    The schema returned by this data source config is used to defined what variables are available in your evals.
+    `item` and `sample` are both defined when using this data source config.
+    """
+
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["logs"]
+    """The type of data source. Always `logs`."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+
+DataSourceConfig: TypeAlias = Annotated[
+    Union[EvalCustomDataSourceConfig, DataSourceConfigLogs, EvalStoredCompletionsDataSourceConfig],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class TestingCriterionEvalGraderTextSimilarity(TextSimilarityGrader):
+    __test__ = False
+    """A TextSimilarityGrader object which grades text based on similarity metrics."""
+    pass_threshold: float
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderPython(PythonGrader):
+    __test__ = False
+    """A PythonGrader object that runs a python script on the input."""
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderScoreModel(ScoreModelGrader):
+    __test__ = False
+    """A ScoreModelGrader object that uses a model to assign a score to the input."""
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+TestingCriterion: TypeAlias = Union[
+    LabelModelGrader,
+    StringCheckGrader,
+    TestingCriterionEvalGraderTextSimilarity,
+    TestingCriterionEvalGraderPython,
+    TestingCriterionEvalGraderScoreModel,
+]
+
+
+class EvalRetrieveResponse(BaseModel):
+    """
+    An Eval object with a data source config and testing criteria.
+    An Eval represents a task to be done for your LLM integration.
+    Like:
+     - Improve the quality of my chatbot
+     - See how well my chatbot handles customer support
+     - Check if o4-mini is better at my usecase than gpt-4o
+    """
+
+    id: str
+    """Unique identifier for the evaluation."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the eval was created."""
+
+    data_source_config: DataSourceConfig
+    """Configuration of data sources used in runs of the evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+    object: Literal["eval"]
+    """The object type."""
+
+    testing_criteria: List[TestingCriterion]
+    """A list of testing criteria."""
diff --git a/src/openai/types/eval_stored_completions_data_source_config.py b/src/openai/types/eval_stored_completions_data_source_config.py
new file mode 100644
index 0000000000..d11f6ae14c
--- /dev/null
+++ b/src/openai/types/eval_stored_completions_data_source_config.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+from .shared.metadata import Metadata
+
+__all__ = ["EvalStoredCompletionsDataSourceConfig"]
+
+
+class EvalStoredCompletionsDataSourceConfig(BaseModel):
+    """Deprecated in favor of LogsDataSourceConfig."""
+
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["stored_completions"]
+    """The type of data source. Always `stored_completions`."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
diff --git a/src/openai/types/eval_update_params.py b/src/openai/types/eval_update_params.py
new file mode 100644
index 0000000000..042db29af5
--- /dev/null
+++ b/src/openai/types/eval_update_params.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import TypedDict
+
+from .shared_params.metadata import Metadata
+
+__all__ = ["EvalUpdateParams"]
+
+
+class EvalUpdateParams(TypedDict, total=False):
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """Rename the evaluation."""
diff --git a/src/openai/types/eval_update_response.py b/src/openai/types/eval_update_response.py
new file mode 100644
index 0000000000..30d4dbc3a1
--- /dev/null
+++ b/src/openai/types/eval_update_response.py
@@ -0,0 +1,130 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .graders.python_grader import PythonGrader
+from .graders.label_model_grader import LabelModelGrader
+from .graders.score_model_grader import ScoreModelGrader
+from .graders.string_check_grader import StringCheckGrader
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .graders.text_similarity_grader import TextSimilarityGrader
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = [
+    "EvalUpdateResponse",
+    "DataSourceConfig",
+    "DataSourceConfigLogs",
+    "TestingCriterion",
+    "TestingCriterionEvalGraderTextSimilarity",
+    "TestingCriterionEvalGraderPython",
+    "TestingCriterionEvalGraderScoreModel",
+]
+
+
+class DataSourceConfigLogs(BaseModel):
+    """
+    A LogsDataSourceConfig which specifies the metadata property of your logs query.
+    This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc.
+    The schema returned by this data source config is used to defined what variables are available in your evals.
+    `item` and `sample` are both defined when using this data source config.
+    """
+
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["logs"]
+    """The type of data source. Always `logs`."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+
+DataSourceConfig: TypeAlias = Annotated[
+    Union[EvalCustomDataSourceConfig, DataSourceConfigLogs, EvalStoredCompletionsDataSourceConfig],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class TestingCriterionEvalGraderTextSimilarity(TextSimilarityGrader):
+    __test__ = False
+    """A TextSimilarityGrader object which grades text based on similarity metrics."""
+    pass_threshold: float
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderPython(PythonGrader):
+    __test__ = False
+    """A PythonGrader object that runs a python script on the input."""
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+class TestingCriterionEvalGraderScoreModel(ScoreModelGrader):
+    __test__ = False
+    """A ScoreModelGrader object that uses a model to assign a score to the input."""
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+TestingCriterion: TypeAlias = Union[
+    LabelModelGrader,
+    StringCheckGrader,
+    TestingCriterionEvalGraderTextSimilarity,
+    TestingCriterionEvalGraderPython,
+    TestingCriterionEvalGraderScoreModel,
+]
+
+
+class EvalUpdateResponse(BaseModel):
+    """
+    An Eval object with a data source config and testing criteria.
+    An Eval represents a task to be done for your LLM integration.
+    Like:
+     - Improve the quality of my chatbot
+     - See how well my chatbot handles customer support
+     - Check if o4-mini is better at my usecase than gpt-4o
+    """
+
+    id: str
+    """Unique identifier for the evaluation."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the eval was created."""
+
+    data_source_config: DataSourceConfig
+    """Configuration of data sources used in runs of the evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+    object: Literal["eval"]
+    """The object type."""
+
+    testing_criteria: List[TestingCriterion]
+    """A list of testing criteria."""
diff --git a/src/openai/types/evals/__init__.py b/src/openai/types/evals/__init__.py
new file mode 100644
index 0000000000..ebf84c6b8d
--- /dev/null
+++ b/src/openai/types/evals/__init__.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .eval_api_error import EvalAPIError as EvalAPIError
+from .run_list_params import RunListParams as RunListParams
+from .run_create_params import RunCreateParams as RunCreateParams
+from .run_list_response import RunListResponse as RunListResponse
+from .run_cancel_response import RunCancelResponse as RunCancelResponse
+from .run_create_response import RunCreateResponse as RunCreateResponse
+from .run_delete_response import RunDeleteResponse as RunDeleteResponse
+from .run_retrieve_response import RunRetrieveResponse as RunRetrieveResponse
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource as CreateEvalJSONLRunDataSource
+from .create_eval_completions_run_data_source import (
+    CreateEvalCompletionsRunDataSource as CreateEvalCompletionsRunDataSource,
+)
+from .create_eval_jsonl_run_data_source_param import (
+    CreateEvalJSONLRunDataSourceParam as CreateEvalJSONLRunDataSourceParam,
+)
+from .create_eval_completions_run_data_source_param import (
+    CreateEvalCompletionsRunDataSourceParam as CreateEvalCompletionsRunDataSourceParam,
+)
diff --git a/src/openai/types/evals/create_eval_completions_run_data_source.py b/src/openai/types/evals/create_eval_completions_run_data_source.py
new file mode 100644
index 0000000000..726ae6abf0
--- /dev/null
+++ b/src/openai/types/evals/create_eval_completions_run_data_source.py
@@ -0,0 +1,258 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from ..shared.metadata import Metadata
+from ..graders.grader_inputs import GraderInputs
+from ..shared.reasoning_effort import ReasoningEffort
+from ..shared.response_format_text import ResponseFormatText
+from ..responses.easy_input_message import EasyInputMessage
+from ..responses.response_input_text import ResponseInputText
+from ..responses.response_input_audio import ResponseInputAudio
+from ..chat.chat_completion_function_tool import ChatCompletionFunctionTool
+from ..shared.response_format_json_object import ResponseFormatJSONObject
+from ..shared.response_format_json_schema import ResponseFormatJSONSchema
+
+__all__ = [
+    "CreateEvalCompletionsRunDataSource",
+    "Source",
+    "SourceFileContent",
+    "SourceFileContentContent",
+    "SourceFileID",
+    "SourceStoredCompletions",
+    "InputMessages",
+    "InputMessagesTemplate",
+    "InputMessagesTemplateTemplate",
+    "InputMessagesTemplateTemplateEvalItem",
+    "InputMessagesTemplateTemplateEvalItemContent",
+    "InputMessagesTemplateTemplateEvalItemContentOutputText",
+    "InputMessagesTemplateTemplateEvalItemContentInputImage",
+    "InputMessagesItemReference",
+    "SamplingParams",
+    "SamplingParamsResponseFormat",
+]
+
+
+class SourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class SourceFileContent(BaseModel):
+    content: List[SourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class SourceStoredCompletions(BaseModel):
+    """A StoredCompletionsRunDataSource configuration describing a set of filters"""
+
+    type: Literal["stored_completions"]
+    """The type of source. Always `stored_completions`."""
+
+    created_after: Optional[int] = None
+    """An optional Unix timestamp to filter items created after this time."""
+
+    created_before: Optional[int] = None
+    """An optional Unix timestamp to filter items created before this time."""
+
+    limit: Optional[int] = None
+    """An optional maximum number of items to return."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: Optional[str] = None
+    """An optional model to filter by (e.g., 'gpt-4o')."""
+
+
+Source: TypeAlias = Annotated[
+    Union[SourceFileContent, SourceFileID, SourceStoredCompletions], PropertyInfo(discriminator="type")
+]
+
+
+class InputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
+    """A text output from the model."""
+
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+class InputMessagesTemplateTemplateEvalItemContentInputImage(BaseModel):
+    """An image input block used within EvalItem content arrays."""
+
+    image_url: str
+    """The URL of the image input."""
+
+    type: Literal["input_image"]
+    """The type of the image input. Always `input_image`."""
+
+    detail: Optional[str] = None
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+
+InputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str,
+    ResponseInputText,
+    InputMessagesTemplateTemplateEvalItemContentOutputText,
+    InputMessagesTemplateTemplateEvalItemContentInputImage,
+    ResponseInputAudio,
+    GraderInputs,
+]
+
+
+class InputMessagesTemplateTemplateEvalItem(BaseModel):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role. Messages with the
+    `assistant` role are presumed to have been generated by the model in previous
+    interactions.
+    """
+
+    content: InputMessagesTemplateTemplateEvalItemContent
+    """Inputs to the model - can contain template strings.
+
+    Supports text, output text, input images, and input audio, either as a single
+    item or an array of items.
+    """
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+InputMessagesTemplateTemplate: TypeAlias = Union[EasyInputMessage, InputMessagesTemplateTemplateEvalItem]
+
+
+class InputMessagesTemplate(BaseModel):
+    template: List[InputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class InputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the `item` namespace. Ie, "item.input_trajectory" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+InputMessages: TypeAlias = Annotated[
+    Union[InputMessagesTemplate, InputMessagesItemReference], PropertyInfo(discriminator="type")
+]
+
+SamplingParamsResponseFormat: TypeAlias = Union[ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject]
+
+
+class SamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    response_format: Optional[SamplingParamsResponseFormat] = None
+    """An object specifying the format that the model must output.
+
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    tools: Optional[List[ChatCompletionFunctionTool]] = None
+    """A list of tools the model may call.
+
+    Currently, only functions are supported as a tool. Use this to provide a list of
+    functions the model may generate JSON inputs for. A max of 128 functions are
+    supported.
+    """
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class CreateEvalCompletionsRunDataSource(BaseModel):
+    """A CompletionsRunDataSource object describing a model sampling configuration."""
+
+    source: Source
+    """Determines what populates the `item` namespace in this run's data source."""
+
+    type: Literal["completions"]
+    """The type of run data source. Always `completions`."""
+
+    input_messages: Optional[InputMessages] = None
+    """Used when sampling from a model.
+
+    Dictates the structure of the messages passed into the model. Can either be a
+    reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template
+    with variable references to the `item` namespace.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[SamplingParams] = None
diff --git a/src/openai/types/evals/create_eval_completions_run_data_source_param.py b/src/openai/types/evals/create_eval_completions_run_data_source_param.py
new file mode 100644
index 0000000000..6842f84af9
--- /dev/null
+++ b/src/openai/types/evals/create_eval_completions_run_data_source_param.py
@@ -0,0 +1,254 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..shared_params.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from ..graders.grader_inputs_param import GraderInputsParam
+from ..responses.easy_input_message_param import EasyInputMessageParam
+from ..shared_params.response_format_text import ResponseFormatText
+from ..responses.response_input_text_param import ResponseInputTextParam
+from ..responses.response_input_audio_param import ResponseInputAudioParam
+from ..chat.chat_completion_function_tool_param import ChatCompletionFunctionToolParam
+from ..shared_params.response_format_json_object import ResponseFormatJSONObject
+from ..shared_params.response_format_json_schema import ResponseFormatJSONSchema
+
+__all__ = [
+    "CreateEvalCompletionsRunDataSourceParam",
+    "Source",
+    "SourceFileContent",
+    "SourceFileContentContent",
+    "SourceFileID",
+    "SourceStoredCompletions",
+    "InputMessages",
+    "InputMessagesTemplate",
+    "InputMessagesTemplateTemplate",
+    "InputMessagesTemplateTemplateEvalItem",
+    "InputMessagesTemplateTemplateEvalItemContent",
+    "InputMessagesTemplateTemplateEvalItemContentOutputText",
+    "InputMessagesTemplateTemplateEvalItemContentInputImage",
+    "InputMessagesItemReference",
+    "SamplingParams",
+    "SamplingParamsResponseFormat",
+]
+
+
+class SourceFileContentContent(TypedDict, total=False):
+    item: Required[Dict[str, object]]
+
+    sample: Dict[str, object]
+
+
+class SourceFileContent(TypedDict, total=False):
+    content: Required[Iterable[SourceFileContentContent]]
+    """The content of the jsonl file."""
+
+    type: Required[Literal["file_content"]]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(TypedDict, total=False):
+    id: Required[str]
+    """The identifier of the file."""
+
+    type: Required[Literal["file_id"]]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class SourceStoredCompletions(TypedDict, total=False):
+    """A StoredCompletionsRunDataSource configuration describing a set of filters"""
+
+    type: Required[Literal["stored_completions"]]
+    """The type of source. Always `stored_completions`."""
+
+    created_after: Optional[int]
+    """An optional Unix timestamp to filter items created after this time."""
+
+    created_before: Optional[int]
+    """An optional Unix timestamp to filter items created before this time."""
+
+    limit: Optional[int]
+    """An optional maximum number of items to return."""
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: Optional[str]
+    """An optional model to filter by (e.g., 'gpt-4o')."""
+
+
+Source: TypeAlias = Union[SourceFileContent, SourceFileID, SourceStoredCompletions]
+
+
+class InputMessagesTemplateTemplateEvalItemContentOutputText(TypedDict, total=False):
+    """A text output from the model."""
+
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+class InputMessagesTemplateTemplateEvalItemContentInputImage(TypedDict, total=False):
+    """An image input block used within EvalItem content arrays."""
+
+    image_url: Required[str]
+    """The URL of the image input."""
+
+    type: Required[Literal["input_image"]]
+    """The type of the image input. Always `input_image`."""
+
+    detail: str
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+
+InputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str,
+    ResponseInputTextParam,
+    InputMessagesTemplateTemplateEvalItemContentOutputText,
+    InputMessagesTemplateTemplateEvalItemContentInputImage,
+    ResponseInputAudioParam,
+    GraderInputsParam,
+]
+
+
+class InputMessagesTemplateTemplateEvalItem(TypedDict, total=False):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role. Messages with the
+    `assistant` role are presumed to have been generated by the model in previous
+    interactions.
+    """
+
+    content: Required[InputMessagesTemplateTemplateEvalItemContent]
+    """Inputs to the model - can contain template strings.
+
+    Supports text, output text, input images, and input audio, either as a single
+    item or an array of items.
+    """
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+InputMessagesTemplateTemplate: TypeAlias = Union[EasyInputMessageParam, InputMessagesTemplateTemplateEvalItem]
+
+
+class InputMessagesTemplate(TypedDict, total=False):
+    template: Required[Iterable[InputMessagesTemplateTemplate]]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    type: Required[Literal["template"]]
+    """The type of input messages. Always `template`."""
+
+
+class InputMessagesItemReference(TypedDict, total=False):
+    item_reference: Required[str]
+    """A reference to a variable in the `item` namespace. Ie, "item.input_trajectory" """
+
+    type: Required[Literal["item_reference"]]
+    """The type of input messages. Always `item_reference`."""
+
+
+InputMessages: TypeAlias = Union[InputMessagesTemplate, InputMessagesItemReference]
+
+SamplingParamsResponseFormat: TypeAlias = Union[ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject]
+
+
+class SamplingParams(TypedDict, total=False):
+    max_completion_tokens: int
+    """The maximum number of tokens in the generated output."""
+
+    reasoning_effort: Optional[ReasoningEffort]
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    response_format: SamplingParamsResponseFormat
+    """An object specifying the format that the model must output.
+
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+    seed: int
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: float
+    """A higher temperature increases randomness in the outputs."""
+
+    tools: Iterable[ChatCompletionFunctionToolParam]
+    """A list of tools the model may call.
+
+    Currently, only functions are supported as a tool. Use this to provide a list of
+    functions the model may generate JSON inputs for. A max of 128 functions are
+    supported.
+    """
+
+    top_p: float
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class CreateEvalCompletionsRunDataSourceParam(TypedDict, total=False):
+    """A CompletionsRunDataSource object describing a model sampling configuration."""
+
+    source: Required[Source]
+    """Determines what populates the `item` namespace in this run's data source."""
+
+    type: Required[Literal["completions"]]
+    """The type of run data source. Always `completions`."""
+
+    input_messages: InputMessages
+    """Used when sampling from a model.
+
+    Dictates the structure of the messages passed into the model. Can either be a
+    reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template
+    with variable references to the `item` namespace.
+    """
+
+    model: str
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: SamplingParams
diff --git a/src/openai/types/evals/create_eval_jsonl_run_data_source.py b/src/openai/types/evals/create_eval_jsonl_run_data_source.py
new file mode 100644
index 0000000000..36ede2d9eb
--- /dev/null
+++ b/src/openai/types/evals/create_eval_jsonl_run_data_source.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["CreateEvalJSONLRunDataSource", "Source", "SourceFileContent", "SourceFileContentContent", "SourceFileID"]
+
+
+class SourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class SourceFileContent(BaseModel):
+    content: List[SourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+Source: TypeAlias = Annotated[Union[SourceFileContent, SourceFileID], PropertyInfo(discriminator="type")]
+
+
+class CreateEvalJSONLRunDataSource(BaseModel):
+    """
+    A JsonlRunDataSource object with that specifies a JSONL file that matches the eval
+    """
+
+    source: Source
+    """Determines what populates the `item` namespace in the data source."""
+
+    type: Literal["jsonl"]
+    """The type of data source. Always `jsonl`."""
diff --git a/src/openai/types/evals/create_eval_jsonl_run_data_source_param.py b/src/openai/types/evals/create_eval_jsonl_run_data_source_param.py
new file mode 100644
index 0000000000..b87ba9c5df
--- /dev/null
+++ b/src/openai/types/evals/create_eval_jsonl_run_data_source_param.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+    "CreateEvalJSONLRunDataSourceParam",
+    "Source",
+    "SourceFileContent",
+    "SourceFileContentContent",
+    "SourceFileID",
+]
+
+
+class SourceFileContentContent(TypedDict, total=False):
+    item: Required[Dict[str, object]]
+
+    sample: Dict[str, object]
+
+
+class SourceFileContent(TypedDict, total=False):
+    content: Required[Iterable[SourceFileContentContent]]
+    """The content of the jsonl file."""
+
+    type: Required[Literal["file_content"]]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(TypedDict, total=False):
+    id: Required[str]
+    """The identifier of the file."""
+
+    type: Required[Literal["file_id"]]
+    """The type of jsonl source. Always `file_id`."""
+
+
+Source: TypeAlias = Union[SourceFileContent, SourceFileID]
+
+
+class CreateEvalJSONLRunDataSourceParam(TypedDict, total=False):
+    """
+    A JsonlRunDataSource object with that specifies a JSONL file that matches the eval
+    """
+
+    source: Required[Source]
+    """Determines what populates the `item` namespace in the data source."""
+
+    type: Required[Literal["jsonl"]]
+    """The type of data source. Always `jsonl`."""
diff --git a/src/openai/types/evals/eval_api_error.py b/src/openai/types/evals/eval_api_error.py
new file mode 100644
index 0000000000..9b2c1871fb
--- /dev/null
+++ b/src/openai/types/evals/eval_api_error.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["EvalAPIError"]
+
+
+class EvalAPIError(BaseModel):
+    """An object representing an error response from the Eval API."""
+
+    code: str
+    """The error code."""
+
+    message: str
+    """The error message."""
diff --git a/src/openai/types/evals/run_cancel_response.py b/src/openai/types/evals/run_cancel_response.py
new file mode 100644
index 0000000000..ea4797eecb
--- /dev/null
+++ b/src/openai/types/evals/run_cancel_response.py
@@ -0,0 +1,452 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..responses.tool import Tool
+from ..shared.metadata import Metadata
+from ..graders.grader_inputs import GraderInputs
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text import ResponseInputText
+from ..responses.response_input_audio import ResponseInputAudio
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from ..responses.response_format_text_config import ResponseFormatTextConfig
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = [
+    "RunCancelResponse",
+    "DataSource",
+    "DataSourceResponses",
+    "DataSourceResponsesSource",
+    "DataSourceResponsesSourceFileContent",
+    "DataSourceResponsesSourceFileContentContent",
+    "DataSourceResponsesSourceFileID",
+    "DataSourceResponsesSourceResponses",
+    "DataSourceResponsesInputMessages",
+    "DataSourceResponsesInputMessagesTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplateChatMessage",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItem",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentInputImage",
+    "DataSourceResponsesInputMessagesItemReference",
+    "DataSourceResponsesSamplingParams",
+    "DataSourceResponsesSamplingParamsText",
+    "PerModelUsage",
+    "PerTestingCriteriaResult",
+    "ResultCounts",
+]
+
+
+class DataSourceResponsesSourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class DataSourceResponsesSourceFileContent(BaseModel):
+    content: List[DataSourceResponsesSourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceResponsesSourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceResponsesSourceResponses(BaseModel):
+    """A EvalResponsesSource object describing a run data source configuration."""
+
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    created_after: Optional[int] = None
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int] = None
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str] = None
+    """Optional string to search the 'instructions' field.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object] = None
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    tools: Optional[List[str]] = None
+    """List of tool names. This is a query parameter used to select responses."""
+
+    top_p: Optional[float] = None
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]] = None
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceResponsesSource: TypeAlias = Annotated[
+    Union[DataSourceResponsesSourceFileContent, DataSourceResponsesSourceFileID, DataSourceResponsesSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateChatMessage(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
+    """A text output from the model."""
+
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentInputImage(BaseModel):
+    """An image input block used within EvalItem content arrays."""
+
+    image_url: str
+    """The URL of the image input."""
+
+    type: Literal["input_image"]
+    """The type of the image input. Always `input_image`."""
+
+    detail: Optional[str] = None
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+
+DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str,
+    ResponseInputText,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentInputImage,
+    ResponseInputAudio,
+    GraderInputs,
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItem(BaseModel):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role. Messages with the
+    `assistant` role are presumed to have been generated by the model in previous
+    interactions.
+    """
+
+    content: DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent
+    """Inputs to the model - can contain template strings.
+
+    Supports text, output text, input images, and input audio, either as a single
+    item or an array of items.
+    """
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+DataSourceResponsesInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceResponsesInputMessagesTemplateTemplateChatMessage,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceResponsesInputMessagesTemplate(BaseModel):
+    template: List[DataSourceResponsesInputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceResponsesInputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the `item` namespace. Ie, "item.name" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceResponsesInputMessages: TypeAlias = Annotated[
+    Union[DataSourceResponsesInputMessagesTemplate, DataSourceResponsesInputMessagesItemReference],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesSamplingParamsText(BaseModel):
+    """Configuration options for a text response from the model.
+
+    Can be plain
+    text or structured JSON data. Learn more:
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    format: Optional[ResponseFormatTextConfig] = None
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+
+class DataSourceResponsesSamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    text: Optional[DataSourceResponsesSamplingParamsText] = None
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tools: Optional[List[Tool]] = None
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceResponses(BaseModel):
+    """A ResponsesRunDataSource object describing a model sampling configuration."""
+
+    source: DataSourceResponsesSource
+    """Determines what populates the `item` namespace in this run's data source."""
+
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    input_messages: Optional[DataSourceResponsesInputMessages] = None
+    """Used when sampling from a model.
+
+    Dictates the structure of the messages passed into the model. Can either be a
+    reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template
+    with variable references to the `item` namespace.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[DataSourceResponsesSamplingParams] = None
+
+
+DataSource: TypeAlias = Annotated[
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PerModelUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    invocation_count: int
+    """The number of invocations."""
+
+    run_model_name: str = FieldInfo(alias="model_name")
+    """The name of the model."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+    failed: int
+    """Number of tests failed for this criteria."""
+
+    passed: int
+    """Number of tests passed for this criteria."""
+
+    testing_criteria: str
+    """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    errored: int
+    """Number of output items that resulted in an error."""
+
+    failed: int
+    """Number of output items that failed to pass the evaluation."""
+
+    passed: int
+    """Number of output items that passed the evaluation."""
+
+    total: int
+    """Total number of executed output items."""
+
+
+class RunCancelResponse(BaseModel):
+    """A schema representing an evaluation run."""
+
+    id: str
+    """Unique identifier for the evaluation run."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    data_source: DataSource
+    """Information about the run's data source."""
+
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    eval_id: str
+    """The identifier of the associated evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: str
+    """The model that is evaluated, if applicable."""
+
+    name: str
+    """The name of the evaluation run."""
+
+    object: Literal["eval.run"]
+    """The type of the object. Always "eval.run"."""
+
+    per_model_usage: List[PerModelUsage]
+    """Usage statistics for each model during the evaluation run."""
+
+    per_testing_criteria_results: List[PerTestingCriteriaResult]
+    """Results per testing criteria applied during the evaluation run."""
+
+    report_url: str
+    """The URL to the rendered evaluation run report on the UI dashboard."""
+
+    result_counts: ResultCounts
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/run_create_params.py b/src/openai/types/evals/run_create_params.py
new file mode 100644
index 0000000000..02804c30da
--- /dev/null
+++ b/src/openai/types/evals/run_create_params.py
@@ -0,0 +1,371 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+from ..responses.tool_param import ToolParam
+from ..shared_params.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from ..graders.grader_inputs_param import GraderInputsParam
+from ..responses.response_input_text_param import ResponseInputTextParam
+from ..responses.response_input_audio_param import ResponseInputAudioParam
+from .create_eval_jsonl_run_data_source_param import CreateEvalJSONLRunDataSourceParam
+from ..responses.response_format_text_config_param import ResponseFormatTextConfigParam
+from .create_eval_completions_run_data_source_param import CreateEvalCompletionsRunDataSourceParam
+
+__all__ = [
+    "RunCreateParams",
+    "DataSource",
+    "DataSourceCreateEvalResponsesRunDataSource",
+    "DataSourceCreateEvalResponsesRunDataSourceSource",
+    "DataSourceCreateEvalResponsesRunDataSourceSourceFileContent",
+    "DataSourceCreateEvalResponsesRunDataSourceSourceFileContentContent",
+    "DataSourceCreateEvalResponsesRunDataSourceSourceFileID",
+    "DataSourceCreateEvalResponsesRunDataSourceSourceResponses",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessages",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplate",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplate",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateChatMessage",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItem",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentInputImage",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesItemReference",
+    "DataSourceCreateEvalResponsesRunDataSourceSamplingParams",
+    "DataSourceCreateEvalResponsesRunDataSourceSamplingParamsText",
+]
+
+
+class RunCreateParams(TypedDict, total=False):
+    data_source: Required[DataSource]
+    """Details about the run's data source."""
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the run."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSourceFileContentContent(TypedDict, total=False):
+    item: Required[Dict[str, object]]
+
+    sample: Dict[str, object]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSourceFileContent(TypedDict, total=False):
+    content: Required[Iterable[DataSourceCreateEvalResponsesRunDataSourceSourceFileContentContent]]
+    """The content of the jsonl file."""
+
+    type: Required[Literal["file_content"]]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSourceFileID(TypedDict, total=False):
+    id: Required[str]
+    """The identifier of the file."""
+
+    type: Required[Literal["file_id"]]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSourceResponses(TypedDict, total=False):
+    """A EvalResponsesSource object describing a run data source configuration."""
+
+    type: Required[Literal["responses"]]
+    """The type of run data source. Always `responses`."""
+
+    created_after: Optional[int]
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int]
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str]
+    """Optional string to search the 'instructions' field.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object]
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str]
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort]
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    temperature: Optional[float]
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    tools: Optional[SequenceNotStr[str]]
+    """List of tool names. This is a query parameter used to select responses."""
+
+    top_p: Optional[float]
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[SequenceNotStr[str]]
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceCreateEvalResponsesRunDataSourceSource: TypeAlias = Union[
+    DataSourceCreateEvalResponsesRunDataSourceSourceFileContent,
+    DataSourceCreateEvalResponsesRunDataSourceSourceFileID,
+    DataSourceCreateEvalResponsesRunDataSourceSourceResponses,
+]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateChatMessage(TypedDict, total=False):
+    content: Required[str]
+    """The content of the message."""
+
+    role: Required[str]
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentOutputText(
+    TypedDict, total=False
+):
+    """A text output from the model."""
+
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentInputImage(
+    TypedDict, total=False
+):
+    """An image input block used within EvalItem content arrays."""
+
+    image_url: Required[str]
+    """The URL of the image input."""
+
+    type: Required[Literal["input_image"]]
+    """The type of the image input. Always `input_image`."""
+
+    detail: str
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+
+DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str,
+    ResponseInputTextParam,
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentOutputText,
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentInputImage,
+    ResponseInputAudioParam,
+    GraderInputsParam,
+]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItem(TypedDict, total=False):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role. Messages with the
+    `assistant` role are presumed to have been generated by the model in previous
+    interactions.
+    """
+
+    content: Required[DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContent]
+    """Inputs to the model - can contain template strings.
+
+    Supports text, output text, input images, and input audio, either as a single
+    item or an array of items.
+    """
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateChatMessage,
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplate(TypedDict, total=False):
+    template: Required[Iterable[DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplate]]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    type: Required[Literal["template"]]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesItemReference(TypedDict, total=False):
+    item_reference: Required[str]
+    """A reference to a variable in the `item` namespace. Ie, "item.name" """
+
+    type: Required[Literal["item_reference"]]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceCreateEvalResponsesRunDataSourceInputMessages: TypeAlias = Union[
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplate,
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesItemReference,
+]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSamplingParamsText(TypedDict, total=False):
+    """Configuration options for a text response from the model.
+
+    Can be plain
+    text or structured JSON data. Learn more:
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    format: ResponseFormatTextConfigParam
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total=False):
+    max_completion_tokens: int
+    """The maximum number of tokens in the generated output."""
+
+    reasoning_effort: Optional[ReasoningEffort]
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    seed: int
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: float
+    """A higher temperature increases randomness in the outputs."""
+
+    text: DataSourceCreateEvalResponsesRunDataSourceSamplingParamsText
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tools: Iterable[ToolParam]
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    top_p: float
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceCreateEvalResponsesRunDataSource(TypedDict, total=False):
+    """A ResponsesRunDataSource object describing a model sampling configuration."""
+
+    source: Required[DataSourceCreateEvalResponsesRunDataSourceSource]
+    """Determines what populates the `item` namespace in this run's data source."""
+
+    type: Required[Literal["responses"]]
+    """The type of run data source. Always `responses`."""
+
+    input_messages: DataSourceCreateEvalResponsesRunDataSourceInputMessages
+    """Used when sampling from a model.
+
+    Dictates the structure of the messages passed into the model. Can either be a
+    reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template
+    with variable references to the `item` namespace.
+    """
+
+    model: str
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: DataSourceCreateEvalResponsesRunDataSourceSamplingParams
+
+
+DataSource: TypeAlias = Union[
+    CreateEvalJSONLRunDataSourceParam,
+    CreateEvalCompletionsRunDataSourceParam,
+    DataSourceCreateEvalResponsesRunDataSource,
+]
diff --git a/src/openai/types/evals/run_create_response.py b/src/openai/types/evals/run_create_response.py
new file mode 100644
index 0000000000..2cb856de6f
--- /dev/null
+++ b/src/openai/types/evals/run_create_response.py
@@ -0,0 +1,452 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..responses.tool import Tool
+from ..shared.metadata import Metadata
+from ..graders.grader_inputs import GraderInputs
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text import ResponseInputText
+from ..responses.response_input_audio import ResponseInputAudio
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from ..responses.response_format_text_config import ResponseFormatTextConfig
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = [
+    "RunCreateResponse",
+    "DataSource",
+    "DataSourceResponses",
+    "DataSourceResponsesSource",
+    "DataSourceResponsesSourceFileContent",
+    "DataSourceResponsesSourceFileContentContent",
+    "DataSourceResponsesSourceFileID",
+    "DataSourceResponsesSourceResponses",
+    "DataSourceResponsesInputMessages",
+    "DataSourceResponsesInputMessagesTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplateChatMessage",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItem",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentInputImage",
+    "DataSourceResponsesInputMessagesItemReference",
+    "DataSourceResponsesSamplingParams",
+    "DataSourceResponsesSamplingParamsText",
+    "PerModelUsage",
+    "PerTestingCriteriaResult",
+    "ResultCounts",
+]
+
+
+class DataSourceResponsesSourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class DataSourceResponsesSourceFileContent(BaseModel):
+    content: List[DataSourceResponsesSourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceResponsesSourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceResponsesSourceResponses(BaseModel):
+    """A EvalResponsesSource object describing a run data source configuration."""
+
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    created_after: Optional[int] = None
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int] = None
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str] = None
+    """Optional string to search the 'instructions' field.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object] = None
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    tools: Optional[List[str]] = None
+    """List of tool names. This is a query parameter used to select responses."""
+
+    top_p: Optional[float] = None
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]] = None
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceResponsesSource: TypeAlias = Annotated[
+    Union[DataSourceResponsesSourceFileContent, DataSourceResponsesSourceFileID, DataSourceResponsesSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateChatMessage(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
+    """A text output from the model."""
+
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentInputImage(BaseModel):
+    """An image input block used within EvalItem content arrays."""
+
+    image_url: str
+    """The URL of the image input."""
+
+    type: Literal["input_image"]
+    """The type of the image input. Always `input_image`."""
+
+    detail: Optional[str] = None
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+
+DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str,
+    ResponseInputText,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentInputImage,
+    ResponseInputAudio,
+    GraderInputs,
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItem(BaseModel):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role. Messages with the
+    `assistant` role are presumed to have been generated by the model in previous
+    interactions.
+    """
+
+    content: DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent
+    """Inputs to the model - can contain template strings.
+
+    Supports text, output text, input images, and input audio, either as a single
+    item or an array of items.
+    """
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+DataSourceResponsesInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceResponsesInputMessagesTemplateTemplateChatMessage,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceResponsesInputMessagesTemplate(BaseModel):
+    template: List[DataSourceResponsesInputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceResponsesInputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the `item` namespace. Ie, "item.name" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceResponsesInputMessages: TypeAlias = Annotated[
+    Union[DataSourceResponsesInputMessagesTemplate, DataSourceResponsesInputMessagesItemReference],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesSamplingParamsText(BaseModel):
+    """Configuration options for a text response from the model.
+
+    Can be plain
+    text or structured JSON data. Learn more:
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    format: Optional[ResponseFormatTextConfig] = None
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+
+class DataSourceResponsesSamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    text: Optional[DataSourceResponsesSamplingParamsText] = None
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tools: Optional[List[Tool]] = None
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceResponses(BaseModel):
+    """A ResponsesRunDataSource object describing a model sampling configuration."""
+
+    source: DataSourceResponsesSource
+    """Determines what populates the `item` namespace in this run's data source."""
+
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    input_messages: Optional[DataSourceResponsesInputMessages] = None
+    """Used when sampling from a model.
+
+    Dictates the structure of the messages passed into the model. Can either be a
+    reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template
+    with variable references to the `item` namespace.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[DataSourceResponsesSamplingParams] = None
+
+
+DataSource: TypeAlias = Annotated[
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PerModelUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    invocation_count: int
+    """The number of invocations."""
+
+    run_model_name: str = FieldInfo(alias="model_name")
+    """The name of the model."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+    failed: int
+    """Number of tests failed for this criteria."""
+
+    passed: int
+    """Number of tests passed for this criteria."""
+
+    testing_criteria: str
+    """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    errored: int
+    """Number of output items that resulted in an error."""
+
+    failed: int
+    """Number of output items that failed to pass the evaluation."""
+
+    passed: int
+    """Number of output items that passed the evaluation."""
+
+    total: int
+    """Total number of executed output items."""
+
+
+class RunCreateResponse(BaseModel):
+    """A schema representing an evaluation run."""
+
+    id: str
+    """Unique identifier for the evaluation run."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    data_source: DataSource
+    """Information about the run's data source."""
+
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    eval_id: str
+    """The identifier of the associated evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: str
+    """The model that is evaluated, if applicable."""
+
+    name: str
+    """The name of the evaluation run."""
+
+    object: Literal["eval.run"]
+    """The type of the object. Always "eval.run"."""
+
+    per_model_usage: List[PerModelUsage]
+    """Usage statistics for each model during the evaluation run."""
+
+    per_testing_criteria_results: List[PerTestingCriteriaResult]
+    """Results per testing criteria applied during the evaluation run."""
+
+    report_url: str
+    """The URL to the rendered evaluation run report on the UI dashboard."""
+
+    result_counts: ResultCounts
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/run_delete_response.py b/src/openai/types/evals/run_delete_response.py
new file mode 100644
index 0000000000..d48d01f86c
--- /dev/null
+++ b/src/openai/types/evals/run_delete_response.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["RunDeleteResponse"]
+
+
+class RunDeleteResponse(BaseModel):
+    deleted: Optional[bool] = None
+
+    object: Optional[str] = None
+
+    run_id: Optional[str] = None
diff --git a/src/openai/types/evals/run_list_params.py b/src/openai/types/evals/run_list_params.py
new file mode 100644
index 0000000000..383b89d85c
--- /dev/null
+++ b/src/openai/types/evals/run_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["RunListParams"]
+
+
+class RunListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last run from the previous pagination request."""
+
+    limit: int
+    """Number of runs to retrieve."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for runs by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
+    """
+
+    status: Literal["queued", "in_progress", "completed", "canceled", "failed"]
+    """Filter runs by status.
+
+    One of `queued` | `in_progress` | `failed` | `completed` | `canceled`.
+    """
diff --git a/src/openai/types/evals/run_list_response.py b/src/openai/types/evals/run_list_response.py
new file mode 100644
index 0000000000..defd4aa6f9
--- /dev/null
+++ b/src/openai/types/evals/run_list_response.py
@@ -0,0 +1,452 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..responses.tool import Tool
+from ..shared.metadata import Metadata
+from ..graders.grader_inputs import GraderInputs
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text import ResponseInputText
+from ..responses.response_input_audio import ResponseInputAudio
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from ..responses.response_format_text_config import ResponseFormatTextConfig
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = [
+    "RunListResponse",
+    "DataSource",
+    "DataSourceResponses",
+    "DataSourceResponsesSource",
+    "DataSourceResponsesSourceFileContent",
+    "DataSourceResponsesSourceFileContentContent",
+    "DataSourceResponsesSourceFileID",
+    "DataSourceResponsesSourceResponses",
+    "DataSourceResponsesInputMessages",
+    "DataSourceResponsesInputMessagesTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplateChatMessage",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItem",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentInputImage",
+    "DataSourceResponsesInputMessagesItemReference",
+    "DataSourceResponsesSamplingParams",
+    "DataSourceResponsesSamplingParamsText",
+    "PerModelUsage",
+    "PerTestingCriteriaResult",
+    "ResultCounts",
+]
+
+
+class DataSourceResponsesSourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class DataSourceResponsesSourceFileContent(BaseModel):
+    content: List[DataSourceResponsesSourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceResponsesSourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceResponsesSourceResponses(BaseModel):
+    """A EvalResponsesSource object describing a run data source configuration."""
+
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    created_after: Optional[int] = None
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int] = None
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str] = None
+    """Optional string to search the 'instructions' field.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object] = None
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    tools: Optional[List[str]] = None
+    """List of tool names. This is a query parameter used to select responses."""
+
+    top_p: Optional[float] = None
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]] = None
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceResponsesSource: TypeAlias = Annotated[
+    Union[DataSourceResponsesSourceFileContent, DataSourceResponsesSourceFileID, DataSourceResponsesSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateChatMessage(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
+    """A text output from the model."""
+
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentInputImage(BaseModel):
+    """An image input block used within EvalItem content arrays."""
+
+    image_url: str
+    """The URL of the image input."""
+
+    type: Literal["input_image"]
+    """The type of the image input. Always `input_image`."""
+
+    detail: Optional[str] = None
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+
+DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str,
+    ResponseInputText,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentInputImage,
+    ResponseInputAudio,
+    GraderInputs,
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItem(BaseModel):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role. Messages with the
+    `assistant` role are presumed to have been generated by the model in previous
+    interactions.
+    """
+
+    content: DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent
+    """Inputs to the model - can contain template strings.
+
+    Supports text, output text, input images, and input audio, either as a single
+    item or an array of items.
+    """
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+DataSourceResponsesInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceResponsesInputMessagesTemplateTemplateChatMessage,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceResponsesInputMessagesTemplate(BaseModel):
+    template: List[DataSourceResponsesInputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceResponsesInputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the `item` namespace. Ie, "item.name" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceResponsesInputMessages: TypeAlias = Annotated[
+    Union[DataSourceResponsesInputMessagesTemplate, DataSourceResponsesInputMessagesItemReference],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesSamplingParamsText(BaseModel):
+    """Configuration options for a text response from the model.
+
+    Can be plain
+    text or structured JSON data. Learn more:
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    format: Optional[ResponseFormatTextConfig] = None
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+
+class DataSourceResponsesSamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    text: Optional[DataSourceResponsesSamplingParamsText] = None
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tools: Optional[List[Tool]] = None
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceResponses(BaseModel):
+    """A ResponsesRunDataSource object describing a model sampling configuration."""
+
+    source: DataSourceResponsesSource
+    """Determines what populates the `item` namespace in this run's data source."""
+
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    input_messages: Optional[DataSourceResponsesInputMessages] = None
+    """Used when sampling from a model.
+
+    Dictates the structure of the messages passed into the model. Can either be a
+    reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template
+    with variable references to the `item` namespace.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[DataSourceResponsesSamplingParams] = None
+
+
+DataSource: TypeAlias = Annotated[
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PerModelUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    invocation_count: int
+    """The number of invocations."""
+
+    run_model_name: str = FieldInfo(alias="model_name")
+    """The name of the model."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+    failed: int
+    """Number of tests failed for this criteria."""
+
+    passed: int
+    """Number of tests passed for this criteria."""
+
+    testing_criteria: str
+    """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    errored: int
+    """Number of output items that resulted in an error."""
+
+    failed: int
+    """Number of output items that failed to pass the evaluation."""
+
+    passed: int
+    """Number of output items that passed the evaluation."""
+
+    total: int
+    """Total number of executed output items."""
+
+
+class RunListResponse(BaseModel):
+    """A schema representing an evaluation run."""
+
+    id: str
+    """Unique identifier for the evaluation run."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    data_source: DataSource
+    """Information about the run's data source."""
+
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    eval_id: str
+    """The identifier of the associated evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: str
+    """The model that is evaluated, if applicable."""
+
+    name: str
+    """The name of the evaluation run."""
+
+    object: Literal["eval.run"]
+    """The type of the object. Always "eval.run"."""
+
+    per_model_usage: List[PerModelUsage]
+    """Usage statistics for each model during the evaluation run."""
+
+    per_testing_criteria_results: List[PerTestingCriteriaResult]
+    """Results per testing criteria applied during the evaluation run."""
+
+    report_url: str
+    """The URL to the rendered evaluation run report on the UI dashboard."""
+
+    result_counts: ResultCounts
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/run_retrieve_response.py b/src/openai/types/evals/run_retrieve_response.py
new file mode 100644
index 0000000000..4c218a0510
--- /dev/null
+++ b/src/openai/types/evals/run_retrieve_response.py
@@ -0,0 +1,452 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..responses.tool import Tool
+from ..shared.metadata import Metadata
+from ..graders.grader_inputs import GraderInputs
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text import ResponseInputText
+from ..responses.response_input_audio import ResponseInputAudio
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from ..responses.response_format_text_config import ResponseFormatTextConfig
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = [
+    "RunRetrieveResponse",
+    "DataSource",
+    "DataSourceResponses",
+    "DataSourceResponsesSource",
+    "DataSourceResponsesSourceFileContent",
+    "DataSourceResponsesSourceFileContentContent",
+    "DataSourceResponsesSourceFileID",
+    "DataSourceResponsesSourceResponses",
+    "DataSourceResponsesInputMessages",
+    "DataSourceResponsesInputMessagesTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplate",
+    "DataSourceResponsesInputMessagesTemplateTemplateChatMessage",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItem",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentInputImage",
+    "DataSourceResponsesInputMessagesItemReference",
+    "DataSourceResponsesSamplingParams",
+    "DataSourceResponsesSamplingParamsText",
+    "PerModelUsage",
+    "PerTestingCriteriaResult",
+    "ResultCounts",
+]
+
+
+class DataSourceResponsesSourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class DataSourceResponsesSourceFileContent(BaseModel):
+    content: List[DataSourceResponsesSourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceResponsesSourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceResponsesSourceResponses(BaseModel):
+    """A EvalResponsesSource object describing a run data source configuration."""
+
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    created_after: Optional[int] = None
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int] = None
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str] = None
+    """Optional string to search the 'instructions' field.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object] = None
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    tools: Optional[List[str]] = None
+    """List of tool names. This is a query parameter used to select responses."""
+
+    top_p: Optional[float] = None
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]] = None
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceResponsesSource: TypeAlias = Annotated[
+    Union[DataSourceResponsesSourceFileContent, DataSourceResponsesSourceFileID, DataSourceResponsesSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateChatMessage(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
+    """A text output from the model."""
+
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentInputImage(BaseModel):
+    """An image input block used within EvalItem content arrays."""
+
+    image_url: str
+    """The URL of the image input."""
+
+    type: Literal["input_image"]
+    """The type of the image input. Always `input_image`."""
+
+    detail: Optional[str] = None
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+
+DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str,
+    ResponseInputText,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentOutputText,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItemContentInputImage,
+    ResponseInputAudio,
+    GraderInputs,
+]
+
+
+class DataSourceResponsesInputMessagesTemplateTemplateEvalItem(BaseModel):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role. Messages with the
+    `assistant` role are presumed to have been generated by the model in previous
+    interactions.
+    """
+
+    content: DataSourceResponsesInputMessagesTemplateTemplateEvalItemContent
+    """Inputs to the model - can contain template strings.
+
+    Supports text, output text, input images, and input audio, either as a single
+    item or an array of items.
+    """
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+DataSourceResponsesInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceResponsesInputMessagesTemplateTemplateChatMessage,
+    DataSourceResponsesInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceResponsesInputMessagesTemplate(BaseModel):
+    template: List[DataSourceResponsesInputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the `item` namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceResponsesInputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the `item` namespace. Ie, "item.name" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceResponsesInputMessages: TypeAlias = Annotated[
+    Union[DataSourceResponsesInputMessagesTemplate, DataSourceResponsesInputMessagesItemReference],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceResponsesSamplingParamsText(BaseModel):
+    """Configuration options for a text response from the model.
+
+    Can be plain
+    text or structured JSON data. Learn more:
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    format: Optional[ResponseFormatTextConfig] = None
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+
+class DataSourceResponsesSamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    text: Optional[DataSourceResponsesSamplingParamsText] = None
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tools: Optional[List[Tool]] = None
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceResponses(BaseModel):
+    """A ResponsesRunDataSource object describing a model sampling configuration."""
+
+    source: DataSourceResponsesSource
+    """Determines what populates the `item` namespace in this run's data source."""
+
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    input_messages: Optional[DataSourceResponsesInputMessages] = None
+    """Used when sampling from a model.
+
+    Dictates the structure of the messages passed into the model. Can either be a
+    reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template
+    with variable references to the `item` namespace.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[DataSourceResponsesSamplingParams] = None
+
+
+DataSource: TypeAlias = Annotated[
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceResponses],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PerModelUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    invocation_count: int
+    """The number of invocations."""
+
+    run_model_name: str = FieldInfo(alias="model_name")
+    """The name of the model."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+    failed: int
+    """Number of tests failed for this criteria."""
+
+    passed: int
+    """Number of tests passed for this criteria."""
+
+    testing_criteria: str
+    """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    errored: int
+    """Number of output items that resulted in an error."""
+
+    failed: int
+    """Number of output items that failed to pass the evaluation."""
+
+    passed: int
+    """Number of output items that passed the evaluation."""
+
+    total: int
+    """Total number of executed output items."""
+
+
+class RunRetrieveResponse(BaseModel):
+    """A schema representing an evaluation run."""
+
+    id: str
+    """Unique identifier for the evaluation run."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    data_source: DataSource
+    """Information about the run's data source."""
+
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    eval_id: str
+    """The identifier of the associated evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: str
+    """The model that is evaluated, if applicable."""
+
+    name: str
+    """The name of the evaluation run."""
+
+    object: Literal["eval.run"]
+    """The type of the object. Always "eval.run"."""
+
+    per_model_usage: List[PerModelUsage]
+    """Usage statistics for each model during the evaluation run."""
+
+    per_testing_criteria_results: List[PerTestingCriteriaResult]
+    """Results per testing criteria applied during the evaluation run."""
+
+    report_url: str
+    """The URL to the rendered evaluation run report on the UI dashboard."""
+
+    result_counts: ResultCounts
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/runs/__init__.py b/src/openai/types/evals/runs/__init__.py
new file mode 100644
index 0000000000..b77cbb6acd
--- /dev/null
+++ b/src/openai/types/evals/runs/__init__.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .output_item_list_params import OutputItemListParams as OutputItemListParams
+from .output_item_list_response import OutputItemListResponse as OutputItemListResponse
+from .output_item_retrieve_response import OutputItemRetrieveResponse as OutputItemRetrieveResponse
diff --git a/src/openai/types/evals/runs/output_item_list_params.py b/src/openai/types/evals/runs/output_item_list_params.py
new file mode 100644
index 0000000000..073bfc69a7
--- /dev/null
+++ b/src/openai/types/evals/runs/output_item_list_params.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["OutputItemListParams"]
+
+
+class OutputItemListParams(TypedDict, total=False):
+    eval_id: Required[str]
+
+    after: str
+    """Identifier for the last output item from the previous pagination request."""
+
+    limit: int
+    """Number of output items to retrieve."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for output items by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
+    """
+
+    status: Literal["fail", "pass"]
+    """Filter output items by status.
+
+    Use `failed` to filter by failed output items or `pass` to filter by passed
+    output items.
+    """
diff --git a/src/openai/types/evals/runs/output_item_list_response.py b/src/openai/types/evals/runs/output_item_list_response.py
new file mode 100644
index 0000000000..a906a29df7
--- /dev/null
+++ b/src/openai/types/evals/runs/output_item_list_response.py
@@ -0,0 +1,144 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import TYPE_CHECKING, Dict, List, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from ...._models import BaseModel
+from ..eval_api_error import EvalAPIError
+
+__all__ = ["OutputItemListResponse", "Result", "Sample", "SampleInput", "SampleOutput", "SampleUsage"]
+
+
+class Result(BaseModel):
+    """A single grader result for an evaluation run output item."""
+
+    name: str
+    """The name of the grader."""
+
+    passed: bool
+    """Whether the grader considered the output a pass."""
+
+    score: float
+    """The numeric score produced by the grader."""
+
+    sample: Optional[Dict[str, object]] = None
+    """Optional sample or intermediate data produced by the grader."""
+
+    type: Optional[str] = None
+    """The grader type (for example, "string-check-grader")."""
+
+    if TYPE_CHECKING:
+        # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
+        # value to this field, so for compatibility we avoid doing it at runtime.
+        __pydantic_extra__: Dict[str, object] = FieldInfo(init=False)  # pyright: ignore[reportIncompatibleVariableOverride]
+
+        # Stub to indicate that arbitrary properties are accepted.
+        # To access properties that are not valid identifiers you can use `getattr`, e.g.
+        # `getattr(obj, '$type')`
+        def __getattr__(self, attr: str) -> object: ...
+    else:
+        __pydantic_extra__: Dict[str, object]
+
+
+class SampleInput(BaseModel):
+    """An input message."""
+
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message sender (e.g., system, user, developer)."""
+
+
+class SampleOutput(BaseModel):
+    content: Optional[str] = None
+    """The content of the message."""
+
+    role: Optional[str] = None
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class SampleUsage(BaseModel):
+    """Token usage details for the sample."""
+
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class Sample(BaseModel):
+    """A sample containing the input and output of the evaluation run."""
+
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    finish_reason: str
+    """The reason why the sample generation was finished."""
+
+    input: List[SampleInput]
+    """An array of input messages."""
+
+    max_completion_tokens: int
+    """The maximum number of tokens allowed for completion."""
+
+    model: str
+    """The model used for generating the sample."""
+
+    output: List[SampleOutput]
+    """An array of output messages."""
+
+    seed: int
+    """The seed used for generating the sample."""
+
+    temperature: float
+    """The sampling temperature used."""
+
+    top_p: float
+    """The top_p value used for sampling."""
+
+    usage: SampleUsage
+    """Token usage details for the sample."""
+
+
+class OutputItemListResponse(BaseModel):
+    """A schema representing an evaluation run output item."""
+
+    id: str
+    """Unique identifier for the evaluation run output item."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    datasource_item: Dict[str, object]
+    """Details of the input data source item."""
+
+    datasource_item_id: int
+    """The identifier for the data source item."""
+
+    eval_id: str
+    """The identifier of the evaluation group."""
+
+    object: Literal["eval.run.output_item"]
+    """The type of the object. Always "eval.run.output_item"."""
+
+    results: List[Result]
+    """A list of grader results for this output item."""
+
+    run_id: str
+    """The identifier of the evaluation run associated with this output item."""
+
+    sample: Sample
+    """A sample containing the input and output of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/runs/output_item_retrieve_response.py b/src/openai/types/evals/runs/output_item_retrieve_response.py
new file mode 100644
index 0000000000..42ba4b2864
--- /dev/null
+++ b/src/openai/types/evals/runs/output_item_retrieve_response.py
@@ -0,0 +1,144 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import TYPE_CHECKING, Dict, List, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from ...._models import BaseModel
+from ..eval_api_error import EvalAPIError
+
+__all__ = ["OutputItemRetrieveResponse", "Result", "Sample", "SampleInput", "SampleOutput", "SampleUsage"]
+
+
+class Result(BaseModel):
+    """A single grader result for an evaluation run output item."""
+
+    name: str
+    """The name of the grader."""
+
+    passed: bool
+    """Whether the grader considered the output a pass."""
+
+    score: float
+    """The numeric score produced by the grader."""
+
+    sample: Optional[Dict[str, object]] = None
+    """Optional sample or intermediate data produced by the grader."""
+
+    type: Optional[str] = None
+    """The grader type (for example, "string-check-grader")."""
+
+    if TYPE_CHECKING:
+        # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
+        # value to this field, so for compatibility we avoid doing it at runtime.
+        __pydantic_extra__: Dict[str, object] = FieldInfo(init=False)  # pyright: ignore[reportIncompatibleVariableOverride]
+
+        # Stub to indicate that arbitrary properties are accepted.
+        # To access properties that are not valid identifiers you can use `getattr`, e.g.
+        # `getattr(obj, '$type')`
+        def __getattr__(self, attr: str) -> object: ...
+    else:
+        __pydantic_extra__: Dict[str, object]
+
+
+class SampleInput(BaseModel):
+    """An input message."""
+
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message sender (e.g., system, user, developer)."""
+
+
+class SampleOutput(BaseModel):
+    content: Optional[str] = None
+    """The content of the message."""
+
+    role: Optional[str] = None
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class SampleUsage(BaseModel):
+    """Token usage details for the sample."""
+
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class Sample(BaseModel):
+    """A sample containing the input and output of the evaluation run."""
+
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    finish_reason: str
+    """The reason why the sample generation was finished."""
+
+    input: List[SampleInput]
+    """An array of input messages."""
+
+    max_completion_tokens: int
+    """The maximum number of tokens allowed for completion."""
+
+    model: str
+    """The model used for generating the sample."""
+
+    output: List[SampleOutput]
+    """An array of output messages."""
+
+    seed: int
+    """The seed used for generating the sample."""
+
+    temperature: float
+    """The sampling temperature used."""
+
+    top_p: float
+    """The top_p value used for sampling."""
+
+    usage: SampleUsage
+    """Token usage details for the sample."""
+
+
+class OutputItemRetrieveResponse(BaseModel):
+    """A schema representing an evaluation run output item."""
+
+    id: str
+    """Unique identifier for the evaluation run output item."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    datasource_item: Dict[str, object]
+    """Details of the input data source item."""
+
+    datasource_item_id: int
+    """The identifier for the data source item."""
+
+    eval_id: str
+    """The identifier of the evaluation group."""
+
+    object: Literal["eval.run.output_item"]
+    """The type of the object. Always "eval.run.output_item"."""
+
+    results: List[Result]
+    """A list of grader results for this output item."""
+
+    run_id: str
+    """The identifier of the evaluation run associated with this output item."""
+
+    sample: Sample
+    """A sample containing the input and output of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/file_chunking_strategy.py b/src/openai/types/file_chunking_strategy.py
new file mode 100644
index 0000000000..ee96bd7884
--- /dev/null
+++ b/src/openai/types/file_chunking_strategy.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .other_file_chunking_strategy_object import OtherFileChunkingStrategyObject
+from .static_file_chunking_strategy_object import StaticFileChunkingStrategyObject
+
+__all__ = ["FileChunkingStrategy"]
+
+FileChunkingStrategy: TypeAlias = Annotated[
+    Union[StaticFileChunkingStrategyObject, OtherFileChunkingStrategyObject], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/file_chunking_strategy_param.py b/src/openai/types/file_chunking_strategy_param.py
new file mode 100644
index 0000000000..25d94286d8
--- /dev/null
+++ b/src/openai/types/file_chunking_strategy_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .auto_file_chunking_strategy_param import AutoFileChunkingStrategyParam
+from .static_file_chunking_strategy_object_param import StaticFileChunkingStrategyObjectParam
+
+__all__ = ["FileChunkingStrategyParam"]
+
+FileChunkingStrategyParam: TypeAlias = Union[AutoFileChunkingStrategyParam, StaticFileChunkingStrategyObjectParam]
diff --git a/src/openai/types/file_content.py b/src/openai/types/file_content.py
index b4aa08a9a3..d89eee623e 100644
--- a/src/openai/types/file_content.py
+++ b/src/openai/types/file_content.py
@@ -1,6 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing_extensions import TypeAlias
 
 __all__ = ["FileContent"]
 
-FileContent = str
+FileContent: TypeAlias = str
diff --git a/src/openai/types/file_create_params.py b/src/openai/types/file_create_params.py
index caa913d4d2..f4367f7a7d 100644
--- a/src/openai/types/file_create_params.py
+++ b/src/openai/types/file_create_params.py
@@ -5,21 +5,46 @@
 from typing_extensions import Literal, Required, TypedDict
 
 from .._types import FileTypes
+from .file_purpose import FilePurpose
 
-__all__ = ["FileCreateParams"]
+__all__ = ["FileCreateParams", "ExpiresAfter"]
 
 
 class FileCreateParams(TypedDict, total=False):
     file: Required[FileTypes]
     """The File object (not file name) to be uploaded."""
 
-    purpose: Required[Literal["assistants", "batch", "fine-tune"]]
+    purpose: Required[FilePurpose]
     """The intended purpose of the uploaded file.
 
-    Use "assistants" for
-    [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-    [Message](https://platform.openai.com/docs/api-reference/messages) files,
-    "vision" for Assistants image file inputs, "batch" for
-    [Batch API](https://platform.openai.com/docs/guides/batch), and "fine-tune" for
-    [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning).
+    One of: - `assistants`: Used in the Assistants API - `batch`: Used in the Batch
+    API - `fine-tune`: Used for fine-tuning - `vision`: Images used for vision
+    fine-tuning - `user_data`: Flexible file type for any purpose - `evals`: Used
+    for eval data sets
+    """
+
+    expires_after: ExpiresAfter
+    """The expiration policy for a file.
+
+    By default, files with `purpose=batch` expire after 30 days and all other files
+    are persisted until they are manually deleted.
+    """
+
+
+class ExpiresAfter(TypedDict, total=False):
+    """The expiration policy for a file.
+
+    By default, files with `purpose=batch` expire after 30 days and all other files are persisted until they are manually deleted.
+    """
+
+    anchor: Required[Literal["created_at"]]
+    """Anchor timestamp after which the expiration policy applies.
+
+    Supported anchors: `created_at`.
+    """
+
+    seconds: Required[int]
+    """The number of seconds after the anchor time that the file will expire.
+
+    Must be between 3600 (1 hour) and 2592000 (30 days).
     """
diff --git a/src/openai/types/file_list_params.py b/src/openai/types/file_list_params.py
index 212eca13c0..058d874c29 100644
--- a/src/openai/types/file_list_params.py
+++ b/src/openai/types/file_list_params.py
@@ -2,11 +2,32 @@
 
 from __future__ import annotations
 
-from typing_extensions import TypedDict
+from typing_extensions import Literal, TypedDict
 
 __all__ = ["FileListParams"]
 
 
 class FileListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 10,000, and the default is 10,000.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
+
     purpose: str
     """Only return files with the given purpose."""
diff --git a/src/openai/types/file_object.py b/src/openai/types/file_object.py
index 6e2bf310a4..4a9901fd3f 100644
--- a/src/openai/types/file_object.py
+++ b/src/openai/types/file_object.py
@@ -9,6 +9,8 @@
 
 
 class FileObject(BaseModel):
+    """The `File` object represents a document that has been uploaded to OpenAI."""
+
     id: str
     """The file identifier, which can be referenced in the API endpoints."""
 
@@ -25,12 +27,19 @@ class FileObject(BaseModel):
     """The object type, which is always `file`."""
 
     purpose: Literal[
-        "assistants", "assistants_output", "batch", "batch_output", "fine-tune", "fine-tune-results", "vision"
+        "assistants",
+        "assistants_output",
+        "batch",
+        "batch_output",
+        "fine-tune",
+        "fine-tune-results",
+        "vision",
+        "user_data",
     ]
     """The intended purpose of the file.
 
     Supported values are `assistants`, `assistants_output`, `batch`, `batch_output`,
-    `fine-tune`, `fine-tune-results` and `vision`.
+    `fine-tune`, `fine-tune-results`, `vision`, and `user_data`.
     """
 
     status: Literal["uploaded", "processed", "error"]
@@ -40,6 +49,9 @@ class FileObject(BaseModel):
     `error`.
     """
 
+    expires_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the file will expire."""
+
     status_details: Optional[str] = None
     """Deprecated.
 
diff --git a/src/openai/types/file_purpose.py b/src/openai/types/file_purpose.py
new file mode 100644
index 0000000000..b2c2d5f9fc
--- /dev/null
+++ b/src/openai/types/file_purpose.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["FilePurpose"]
+
+FilePurpose: TypeAlias = Literal["assistants", "batch", "fine-tune", "vision", "user_data", "evals"]
diff --git a/src/openai/types/fine_tuning/__init__.py b/src/openai/types/fine_tuning/__init__.py
index 92b81329b1..cc664eacea 100644
--- a/src/openai/types/fine_tuning/__init__.py
+++ b/src/openai/types/fine_tuning/__init__.py
@@ -2,13 +2,25 @@
 
 from __future__ import annotations
 
+from .dpo_method import DpoMethod as DpoMethod
 from .fine_tuning_job import FineTuningJob as FineTuningJob
 from .job_list_params import JobListParams as JobListParams
+from .dpo_method_param import DpoMethodParam as DpoMethodParam
 from .job_create_params import JobCreateParams as JobCreateParams
+from .supervised_method import SupervisedMethod as SupervisedMethod
+from .dpo_hyperparameters import DpoHyperparameters as DpoHyperparameters
+from .reinforcement_method import ReinforcementMethod as ReinforcementMethod
 from .fine_tuning_job_event import FineTuningJobEvent as FineTuningJobEvent
 from .job_list_events_params import JobListEventsParams as JobListEventsParams
+from .supervised_method_param import SupervisedMethodParam as SupervisedMethodParam
+from .dpo_hyperparameters_param import DpoHyperparametersParam as DpoHyperparametersParam
+from .reinforcement_method_param import ReinforcementMethodParam as ReinforcementMethodParam
+from .supervised_hyperparameters import SupervisedHyperparameters as SupervisedHyperparameters
 from .fine_tuning_job_integration import FineTuningJobIntegration as FineTuningJobIntegration
+from .reinforcement_hyperparameters import ReinforcementHyperparameters as ReinforcementHyperparameters
+from .supervised_hyperparameters_param import SupervisedHyperparametersParam as SupervisedHyperparametersParam
 from .fine_tuning_job_wandb_integration import FineTuningJobWandbIntegration as FineTuningJobWandbIntegration
+from .reinforcement_hyperparameters_param import ReinforcementHyperparametersParam as ReinforcementHyperparametersParam
 from .fine_tuning_job_wandb_integration_object import (
     FineTuningJobWandbIntegrationObject as FineTuningJobWandbIntegrationObject,
 )
diff --git a/src/openai/types/fine_tuning/alpha/__init__.py b/src/openai/types/fine_tuning/alpha/__init__.py
new file mode 100644
index 0000000000..6394961b0b
--- /dev/null
+++ b/src/openai/types/fine_tuning/alpha/__init__.py
@@ -0,0 +1,8 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .grader_run_params import GraderRunParams as GraderRunParams
+from .grader_run_response import GraderRunResponse as GraderRunResponse
+from .grader_validate_params import GraderValidateParams as GraderValidateParams
+from .grader_validate_response import GraderValidateResponse as GraderValidateResponse
diff --git a/src/openai/types/fine_tuning/alpha/grader_run_params.py b/src/openai/types/fine_tuning/alpha/grader_run_params.py
new file mode 100644
index 0000000000..646407fe09
--- /dev/null
+++ b/src/openai/types/fine_tuning/alpha/grader_run_params.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Required, TypeAlias, TypedDict
+
+from ...graders.multi_grader_param import MultiGraderParam
+from ...graders.python_grader_param import PythonGraderParam
+from ...graders.score_model_grader_param import ScoreModelGraderParam
+from ...graders.string_check_grader_param import StringCheckGraderParam
+from ...graders.text_similarity_grader_param import TextSimilarityGraderParam
+
+__all__ = ["GraderRunParams", "Grader"]
+
+
+class GraderRunParams(TypedDict, total=False):
+    grader: Required[Grader]
+    """The grader used for the fine-tuning job."""
+
+    model_sample: Required[str]
+    """The model sample to be evaluated.
+
+    This value will be used to populate the `sample` namespace. See
+    [the guide](https://platform.openai.com/docs/guides/graders) for more details.
+    The `output_json` variable will be populated if the model sample is a valid JSON
+    string.
+    """
+
+    item: object
+    """The dataset item provided to the grader.
+
+    This will be used to populate the `item` namespace. See
+    [the guide](https://platform.openai.com/docs/guides/graders) for more details.
+    """
+
+
+Grader: TypeAlias = Union[
+    StringCheckGraderParam, TextSimilarityGraderParam, PythonGraderParam, ScoreModelGraderParam, MultiGraderParam
+]
diff --git a/src/openai/types/fine_tuning/alpha/grader_run_response.py b/src/openai/types/fine_tuning/alpha/grader_run_response.py
new file mode 100644
index 0000000000..8ef046d133
--- /dev/null
+++ b/src/openai/types/fine_tuning/alpha/grader_run_response.py
@@ -0,0 +1,67 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+
+from pydantic import Field as FieldInfo
+
+from ...._models import BaseModel
+
+__all__ = ["GraderRunResponse", "Metadata", "MetadataErrors"]
+
+
+class MetadataErrors(BaseModel):
+    formula_parse_error: bool
+
+    invalid_variable_error: bool
+
+    api_model_grader_parse_error: bool = FieldInfo(alias="model_grader_parse_error")
+
+    api_model_grader_refusal_error: bool = FieldInfo(alias="model_grader_refusal_error")
+
+    api_model_grader_server_error: bool = FieldInfo(alias="model_grader_server_error")
+
+    api_model_grader_server_error_details: Optional[str] = FieldInfo(
+        alias="model_grader_server_error_details", default=None
+    )
+
+    other_error: bool
+
+    python_grader_runtime_error: bool
+
+    python_grader_runtime_error_details: Optional[str] = None
+
+    python_grader_server_error: bool
+
+    python_grader_server_error_type: Optional[str] = None
+
+    sample_parse_error: bool
+
+    truncated_observation_error: bool
+
+    unresponsive_reward_error: bool
+
+
+class Metadata(BaseModel):
+    errors: MetadataErrors
+
+    execution_time: float
+
+    name: str
+
+    sampled_model_name: Optional[str] = None
+
+    scores: Dict[str, object]
+
+    token_usage: Optional[int] = None
+
+    type: str
+
+
+class GraderRunResponse(BaseModel):
+    metadata: Metadata
+
+    api_model_grader_token_usage_per_model: Dict[str, object] = FieldInfo(alias="model_grader_token_usage_per_model")
+
+    reward: float
+
+    sub_rewards: Dict[str, object]
diff --git a/src/openai/types/fine_tuning/alpha/grader_validate_params.py b/src/openai/types/fine_tuning/alpha/grader_validate_params.py
new file mode 100644
index 0000000000..fe9eb44e32
--- /dev/null
+++ b/src/openai/types/fine_tuning/alpha/grader_validate_params.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Required, TypeAlias, TypedDict
+
+from ...graders.multi_grader_param import MultiGraderParam
+from ...graders.python_grader_param import PythonGraderParam
+from ...graders.score_model_grader_param import ScoreModelGraderParam
+from ...graders.string_check_grader_param import StringCheckGraderParam
+from ...graders.text_similarity_grader_param import TextSimilarityGraderParam
+
+__all__ = ["GraderValidateParams", "Grader"]
+
+
+class GraderValidateParams(TypedDict, total=False):
+    grader: Required[Grader]
+    """The grader used for the fine-tuning job."""
+
+
+Grader: TypeAlias = Union[
+    StringCheckGraderParam, TextSimilarityGraderParam, PythonGraderParam, ScoreModelGraderParam, MultiGraderParam
+]
diff --git a/src/openai/types/fine_tuning/alpha/grader_validate_response.py b/src/openai/types/fine_tuning/alpha/grader_validate_response.py
new file mode 100644
index 0000000000..b373292d80
--- /dev/null
+++ b/src/openai/types/fine_tuning/alpha/grader_validate_response.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import TypeAlias
+
+from ...._models import BaseModel
+from ...graders.multi_grader import MultiGrader
+from ...graders.python_grader import PythonGrader
+from ...graders.score_model_grader import ScoreModelGrader
+from ...graders.string_check_grader import StringCheckGrader
+from ...graders.text_similarity_grader import TextSimilarityGrader
+
+__all__ = ["GraderValidateResponse", "Grader"]
+
+Grader: TypeAlias = Union[StringCheckGrader, TextSimilarityGrader, PythonGrader, ScoreModelGrader, MultiGrader]
+
+
+class GraderValidateResponse(BaseModel):
+    grader: Optional[Grader] = None
+    """The grader used for the fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/checkpoints/__init__.py b/src/openai/types/fine_tuning/checkpoints/__init__.py
new file mode 100644
index 0000000000..2947b33145
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/__init__.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .permission_create_params import PermissionCreateParams as PermissionCreateParams
+from .permission_create_response import PermissionCreateResponse as PermissionCreateResponse
+from .permission_delete_response import PermissionDeleteResponse as PermissionDeleteResponse
+from .permission_retrieve_params import PermissionRetrieveParams as PermissionRetrieveParams
+from .permission_retrieve_response import PermissionRetrieveResponse as PermissionRetrieveResponse
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_create_params.py b/src/openai/types/fine_tuning/checkpoints/permission_create_params.py
new file mode 100644
index 0000000000..e7cf4e4ee4
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_create_params.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from ...._types import SequenceNotStr
+
+__all__ = ["PermissionCreateParams"]
+
+
+class PermissionCreateParams(TypedDict, total=False):
+    project_ids: Required[SequenceNotStr[str]]
+    """The project identifiers to grant access to."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_create_response.py b/src/openai/types/fine_tuning/checkpoints/permission_create_response.py
new file mode 100644
index 0000000000..459fa9dee7
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_create_response.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["PermissionCreateResponse"]
+
+
+class PermissionCreateResponse(BaseModel):
+    """
+    The `checkpoint.permission` object represents a permission for a fine-tuned model checkpoint.
+    """
+
+    id: str
+    """The permission identifier, which can be referenced in the API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the permission was created."""
+
+    object: Literal["checkpoint.permission"]
+    """The object type, which is always "checkpoint.permission"."""
+
+    project_id: str
+    """The project identifier that the permission is for."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_delete_response.py b/src/openai/types/fine_tuning/checkpoints/permission_delete_response.py
new file mode 100644
index 0000000000..1a92d912fa
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_delete_response.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["PermissionDeleteResponse"]
+
+
+class PermissionDeleteResponse(BaseModel):
+    id: str
+    """The ID of the fine-tuned model checkpoint permission that was deleted."""
+
+    deleted: bool
+    """Whether the fine-tuned model checkpoint permission was successfully deleted."""
+
+    object: Literal["checkpoint.permission"]
+    """The object type, which is always "checkpoint.permission"."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py
new file mode 100644
index 0000000000..6e66a867ca
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["PermissionRetrieveParams"]
+
+
+class PermissionRetrieveParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last permission ID from the previous pagination request."""
+
+    limit: int
+    """Number of permissions to retrieve."""
+
+    order: Literal["ascending", "descending"]
+    """The order in which to retrieve permissions."""
+
+    project_id: str
+    """The ID of the project to get permissions for."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py
new file mode 100644
index 0000000000..34208958ef
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["PermissionRetrieveResponse", "Data"]
+
+
+class Data(BaseModel):
+    """
+    The `checkpoint.permission` object represents a permission for a fine-tuned model checkpoint.
+    """
+
+    id: str
+    """The permission identifier, which can be referenced in the API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the permission was created."""
+
+    object: Literal["checkpoint.permission"]
+    """The object type, which is always "checkpoint.permission"."""
+
+    project_id: str
+    """The project identifier that the permission is for."""
+
+
+class PermissionRetrieveResponse(BaseModel):
+    data: List[Data]
+
+    has_more: bool
+
+    object: Literal["list"]
+
+    first_id: Optional[str] = None
+
+    last_id: Optional[str] = None
diff --git a/src/openai/types/fine_tuning/dpo_hyperparameters.py b/src/openai/types/fine_tuning/dpo_hyperparameters.py
new file mode 100644
index 0000000000..cd39f308a4
--- /dev/null
+++ b/src/openai/types/fine_tuning/dpo_hyperparameters.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["DpoHyperparameters"]
+
+
+class DpoHyperparameters(BaseModel):
+    """The hyperparameters used for the DPO fine-tuning job."""
+
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    beta: Union[Literal["auto"], float, None] = None
+    """The beta value for the DPO method.
+
+    A higher beta value will increase the weight of the penalty between the policy
+    and reference model.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
diff --git a/src/openai/types/fine_tuning/dpo_hyperparameters_param.py b/src/openai/types/fine_tuning/dpo_hyperparameters_param.py
new file mode 100644
index 0000000000..12b2c41ca8
--- /dev/null
+++ b/src/openai/types/fine_tuning/dpo_hyperparameters_param.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["DpoHyperparametersParam"]
+
+
+class DpoHyperparametersParam(TypedDict, total=False):
+    """The hyperparameters used for the DPO fine-tuning job."""
+
+    batch_size: Union[Literal["auto"], int]
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    beta: Union[Literal["auto"], float]
+    """The beta value for the DPO method.
+
+    A higher beta value will increase the weight of the penalty between the policy
+    and reference model.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float]
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int]
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
diff --git a/src/openai/types/fine_tuning/dpo_method.py b/src/openai/types/fine_tuning/dpo_method.py
new file mode 100644
index 0000000000..452c182016
--- /dev/null
+++ b/src/openai/types/fine_tuning/dpo_method.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .dpo_hyperparameters import DpoHyperparameters
+
+__all__ = ["DpoMethod"]
+
+
+class DpoMethod(BaseModel):
+    """Configuration for the DPO fine-tuning method."""
+
+    hyperparameters: Optional[DpoHyperparameters] = None
+    """The hyperparameters used for the DPO fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/dpo_method_param.py b/src/openai/types/fine_tuning/dpo_method_param.py
new file mode 100644
index 0000000000..6bd74d9760
--- /dev/null
+++ b/src/openai/types/fine_tuning/dpo_method_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+from .dpo_hyperparameters_param import DpoHyperparametersParam
+
+__all__ = ["DpoMethodParam"]
+
+
+class DpoMethodParam(TypedDict, total=False):
+    """Configuration for the DPO fine-tuning method."""
+
+    hyperparameters: DpoHyperparametersParam
+    """The hyperparameters used for the DPO fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/fine_tuning_job.py b/src/openai/types/fine_tuning/fine_tuning_job.py
index 7ac8792787..bb8a4d597b 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job.py
@@ -4,12 +4,20 @@
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from .dpo_method import DpoMethod
+from ..shared.metadata import Metadata
+from .supervised_method import SupervisedMethod
+from .reinforcement_method import ReinforcementMethod
 from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
 
-__all__ = ["FineTuningJob", "Error", "Hyperparameters"]
+__all__ = ["FineTuningJob", "Error", "Hyperparameters", "Method"]
 
 
 class Error(BaseModel):
+    """
+    For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure.
+    """
+
     code: str
     """A machine-readable error code."""
 
@@ -24,16 +32,52 @@ class Error(BaseModel):
 
 
 class Hyperparameters(BaseModel):
-    n_epochs: Union[Literal["auto"], int]
+    """The hyperparameters used for the fine-tuning job.
+
+    This value will only be returned when running `supervised` jobs.
+    """
+
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
     """The number of epochs to train the model for.
 
-    An epoch refers to one full cycle through the training dataset. "auto" decides
-    the optimal number of epochs based on the size of the dataset. If setting the
-    number manually, we support any number between 1 and 50 epochs.
+    An epoch refers to one full cycle through the training dataset.
     """
 
 
+class Method(BaseModel):
+    """The method used for fine-tuning."""
+
+    type: Literal["supervised", "dpo", "reinforcement"]
+    """The type of method. Is either `supervised`, `dpo`, or `reinforcement`."""
+
+    dpo: Optional[DpoMethod] = None
+    """Configuration for the DPO fine-tuning method."""
+
+    reinforcement: Optional[ReinforcementMethod] = None
+    """Configuration for the reinforcement fine-tuning method."""
+
+    supervised: Optional[SupervisedMethod] = None
+    """Configuration for the supervised fine-tuning method."""
+
+
 class FineTuningJob(BaseModel):
+    """
+    The `fine_tuning.job` object represents a fine-tuning job that has been created through the API.
+    """
+
     id: str
     """The object identifier, which can be referenced in the API endpoints."""
 
@@ -61,8 +105,7 @@ class FineTuningJob(BaseModel):
     hyperparameters: Hyperparameters
     """The hyperparameters used for the fine-tuning job.
 
-    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
-    for more details.
+    This value will only be returned when running `supervised` jobs.
     """
 
     model: str
@@ -118,3 +161,16 @@ class FineTuningJob(BaseModel):
 
     integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
     """A list of integrations to enable for this fine-tuning job."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    method: Optional[Method] = None
+    """The method used for fine-tuning."""
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_event.py b/src/openai/types/fine_tuning/fine_tuning_job_event.py
index 2d204bb980..7452b818c6 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job_event.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job_event.py
@@ -1,5 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+import builtins
+from typing import Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
@@ -8,12 +10,25 @@
 
 
 class FineTuningJobEvent(BaseModel):
+    """Fine-tuning job event object"""
+
     id: str
+    """The object identifier."""
 
     created_at: int
+    """The Unix timestamp (in seconds) for when the fine-tuning job was created."""
 
     level: Literal["info", "warn", "error"]
+    """The log level of the event."""
 
     message: str
+    """The message of the event."""
 
     object: Literal["fine_tuning.job.event"]
+    """The object type, which is always "fine_tuning.job.event"."""
+
+    data: Optional[builtins.object] = None
+    """The data associated with the event."""
+
+    type: Optional[Literal["message", "metrics"]] = None
+    """The type of event."""
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
index 8076313cae..ae85dde581 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job_integration.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
-
 from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
 
+__all__ = ["FineTuningJobIntegration"]
+
 FineTuningJobIntegration = FineTuningJobWandbIntegrationObject
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
index 4ac282eb54..0e33aa84c8 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job_wandb_integration.py
@@ -8,6 +8,13 @@
 
 
 class FineTuningJobWandbIntegration(BaseModel):
+    """The settings for your integration with Weights and Biases.
+
+    This payload specifies the project that
+    metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags
+    to your run, and set a default entity (team, username, etc) to be associated with your run.
+    """
+
     project: str
     """The name of the project that the new run will be created under."""
 
diff --git a/src/openai/types/fine_tuning/job_create_params.py b/src/openai/types/fine_tuning/job_create_params.py
index 1925f90d12..181bede2d9 100644
--- a/src/openai/types/fine_tuning/job_create_params.py
+++ b/src/openai/types/fine_tuning/job_create_params.py
@@ -2,18 +2,24 @@
 
 from __future__ import annotations
 
-from typing import List, Union, Iterable, Optional
+from typing import Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["JobCreateParams", "Hyperparameters", "Integration", "IntegrationWandb"]
+from ..._types import SequenceNotStr
+from .dpo_method_param import DpoMethodParam
+from ..shared_params.metadata import Metadata
+from .supervised_method_param import SupervisedMethodParam
+from .reinforcement_method_param import ReinforcementMethodParam
+
+__all__ = ["JobCreateParams", "Hyperparameters", "Integration", "IntegrationWandb", "Method"]
 
 
 class JobCreateParams(TypedDict, total=False):
-    model: Required[Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]]]
+    model: Required[Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo", "gpt-4o-mini"]]]
     """The name of the model to fine-tune.
 
     You can select one of the
-    [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
+    [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned).
     """
 
     training_file: Required[str]
@@ -25,16 +31,40 @@ class JobCreateParams(TypedDict, total=False):
     Your dataset must be formatted as a JSONL file. Additionally, you must upload
     your file with the purpose `fine-tune`.
 
-    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+    The contents of the file should differ depending on if the model uses the
+    [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
+    [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+    format, or if the fine-tuning method uses the
+    [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
+    format.
+
+    See the
+    [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
     for more details.
     """
 
     hyperparameters: Hyperparameters
-    """The hyperparameters used for the fine-tuning job."""
+    """
+    The hyperparameters used for the fine-tuning job. This value is now deprecated
+    in favor of `method`, and should be passed in under the `method` parameter.
+    """
 
     integrations: Optional[Iterable[Integration]]
     """A list of integrations to enable for your fine-tuning job."""
 
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    method: Method
+    """The method used for fine-tuning."""
+
     seed: Optional[int]
     """The seed controls the reproducibility of the job.
 
@@ -45,11 +75,11 @@ class JobCreateParams(TypedDict, total=False):
 
     suffix: Optional[str]
     """
-    A string of up to 18 characters that will be added to your fine-tuned model
+    A string of up to 64 characters that will be added to your fine-tuned model
     name.
 
     For example, a `suffix` of "custom-model-name" would produce a model name like
-    `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
+    `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`.
     """
 
     validation_file: Optional[str]
@@ -63,12 +93,18 @@ class JobCreateParams(TypedDict, total=False):
     Your dataset must be formatted as a JSONL file. You must upload your file with
     the purpose `fine-tune`.
 
-    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+    See the
+    [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
     for more details.
     """
 
 
 class Hyperparameters(TypedDict, total=False):
+    """
+    The hyperparameters used for the fine-tuning job.
+    This value is now deprecated in favor of `method`, and should be passed in under the `method` parameter.
+    """
+
     batch_size: Union[Literal["auto"], int]
     """Number of examples in each batch.
 
@@ -90,6 +126,13 @@ class Hyperparameters(TypedDict, total=False):
 
 
 class IntegrationWandb(TypedDict, total=False):
+    """The settings for your integration with Weights and Biases.
+
+    This payload specifies the project that
+    metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags
+    to your run, and set a default entity (team, username, etc) to be associated with your run.
+    """
+
     project: Required[str]
     """The name of the project that the new run will be created under."""
 
@@ -107,7 +150,7 @@ class IntegrationWandb(TypedDict, total=False):
     If not set, we will use the Job ID as the name.
     """
 
-    tags: List[str]
+    tags: SequenceNotStr[str]
     """A list of tags to be attached to the newly created run.
 
     These tags are passed through directly to WandB. Some default tags are generated
@@ -129,3 +172,19 @@ class Integration(TypedDict, total=False):
     can set an explicit display name for your run, add tags to your run, and set a
     default entity (team, username, etc) to be associated with your run.
     """
+
+
+class Method(TypedDict, total=False):
+    """The method used for fine-tuning."""
+
+    type: Required[Literal["supervised", "dpo", "reinforcement"]]
+    """The type of method. Is either `supervised`, `dpo`, or `reinforcement`."""
+
+    dpo: DpoMethodParam
+    """Configuration for the DPO fine-tuning method."""
+
+    reinforcement: ReinforcementMethodParam
+    """Configuration for the reinforcement fine-tuning method."""
+
+    supervised: SupervisedMethodParam
+    """Configuration for the supervised fine-tuning method."""
diff --git a/src/openai/types/fine_tuning/job_list_params.py b/src/openai/types/fine_tuning/job_list_params.py
index 5c075ca33f..b79f3ce86a 100644
--- a/src/openai/types/fine_tuning/job_list_params.py
+++ b/src/openai/types/fine_tuning/job_list_params.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+from typing import Dict, Optional
 from typing_extensions import TypedDict
 
 __all__ = ["JobListParams"]
@@ -13,3 +14,10 @@ class JobListParams(TypedDict, total=False):
 
     limit: int
     """Number of fine-tuning jobs to retrieve."""
+
+    metadata: Optional[Dict[str, str]]
+    """Optional metadata filter.
+
+    To filter, use the syntax `metadata[k]=v`. Alternatively, set `metadata=null` to
+    indicate no metadata.
+    """
diff --git a/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py b/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
index bd07317a3e..f8a04b6395 100644
--- a/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
+++ b/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py
@@ -9,6 +9,8 @@
 
 
 class Metrics(BaseModel):
+    """Metrics at the step number during the fine-tuning job."""
+
     full_valid_loss: Optional[float] = None
 
     full_valid_mean_token_accuracy: Optional[float] = None
@@ -25,6 +27,10 @@ class Metrics(BaseModel):
 
 
 class FineTuningJobCheckpoint(BaseModel):
+    """
+    The `fine_tuning.job.checkpoint` object represents a model checkpoint for a fine-tuning job that is ready to use.
+    """
+
     id: str
     """The checkpoint identifier, which can be referenced in the API endpoints."""
 
diff --git a/src/openai/types/fine_tuning/reinforcement_hyperparameters.py b/src/openai/types/fine_tuning/reinforcement_hyperparameters.py
new file mode 100644
index 0000000000..4c289fd659
--- /dev/null
+++ b/src/openai/types/fine_tuning/reinforcement_hyperparameters.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ReinforcementHyperparameters"]
+
+
+class ReinforcementHyperparameters(BaseModel):
+    """The hyperparameters used for the reinforcement fine-tuning job."""
+
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    compute_multiplier: Union[Literal["auto"], float, None] = None
+    """
+    Multiplier on amount of compute used for exploring search space during training.
+    """
+
+    eval_interval: Union[Literal["auto"], int, None] = None
+    """The number of training steps between evaluation runs."""
+
+    eval_samples: Union[Literal["auto"], int, None] = None
+    """Number of evaluation samples to generate per training step."""
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+    reasoning_effort: Optional[Literal["default", "low", "medium", "high"]] = None
+    """Level of reasoning effort."""
diff --git a/src/openai/types/fine_tuning/reinforcement_hyperparameters_param.py b/src/openai/types/fine_tuning/reinforcement_hyperparameters_param.py
new file mode 100644
index 0000000000..7be716f143
--- /dev/null
+++ b/src/openai/types/fine_tuning/reinforcement_hyperparameters_param.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["ReinforcementHyperparametersParam"]
+
+
+class ReinforcementHyperparametersParam(TypedDict, total=False):
+    """The hyperparameters used for the reinforcement fine-tuning job."""
+
+    batch_size: Union[Literal["auto"], int]
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    compute_multiplier: Union[Literal["auto"], float]
+    """
+    Multiplier on amount of compute used for exploring search space during training.
+    """
+
+    eval_interval: Union[Literal["auto"], int]
+    """The number of training steps between evaluation runs."""
+
+    eval_samples: Union[Literal["auto"], int]
+    """Number of evaluation samples to generate per training step."""
+
+    learning_rate_multiplier: Union[Literal["auto"], float]
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int]
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+    reasoning_effort: Literal["default", "low", "medium", "high"]
+    """Level of reasoning effort."""
diff --git a/src/openai/types/fine_tuning/reinforcement_method.py b/src/openai/types/fine_tuning/reinforcement_method.py
new file mode 100644
index 0000000000..a8a3685148
--- /dev/null
+++ b/src/openai/types/fine_tuning/reinforcement_method.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import TypeAlias
+
+from ..._models import BaseModel
+from ..graders.multi_grader import MultiGrader
+from ..graders.python_grader import PythonGrader
+from ..graders.score_model_grader import ScoreModelGrader
+from ..graders.string_check_grader import StringCheckGrader
+from .reinforcement_hyperparameters import ReinforcementHyperparameters
+from ..graders.text_similarity_grader import TextSimilarityGrader
+
+__all__ = ["ReinforcementMethod", "Grader"]
+
+Grader: TypeAlias = Union[StringCheckGrader, TextSimilarityGrader, PythonGrader, ScoreModelGrader, MultiGrader]
+
+
+class ReinforcementMethod(BaseModel):
+    """Configuration for the reinforcement fine-tuning method."""
+
+    grader: Grader
+    """The grader used for the fine-tuning job."""
+
+    hyperparameters: Optional[ReinforcementHyperparameters] = None
+    """The hyperparameters used for the reinforcement fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/reinforcement_method_param.py b/src/openai/types/fine_tuning/reinforcement_method_param.py
new file mode 100644
index 0000000000..ea75bfeb69
--- /dev/null
+++ b/src/openai/types/fine_tuning/reinforcement_method_param.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Required, TypeAlias, TypedDict
+
+from ..graders.multi_grader_param import MultiGraderParam
+from ..graders.python_grader_param import PythonGraderParam
+from ..graders.score_model_grader_param import ScoreModelGraderParam
+from ..graders.string_check_grader_param import StringCheckGraderParam
+from .reinforcement_hyperparameters_param import ReinforcementHyperparametersParam
+from ..graders.text_similarity_grader_param import TextSimilarityGraderParam
+
+__all__ = ["ReinforcementMethodParam", "Grader"]
+
+Grader: TypeAlias = Union[
+    StringCheckGraderParam, TextSimilarityGraderParam, PythonGraderParam, ScoreModelGraderParam, MultiGraderParam
+]
+
+
+class ReinforcementMethodParam(TypedDict, total=False):
+    """Configuration for the reinforcement fine-tuning method."""
+
+    grader: Required[Grader]
+    """The grader used for the fine-tuning job."""
+
+    hyperparameters: ReinforcementHyperparametersParam
+    """The hyperparameters used for the reinforcement fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/supervised_hyperparameters.py b/src/openai/types/fine_tuning/supervised_hyperparameters.py
new file mode 100644
index 0000000000..1231bbdd80
--- /dev/null
+++ b/src/openai/types/fine_tuning/supervised_hyperparameters.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["SupervisedHyperparameters"]
+
+
+class SupervisedHyperparameters(BaseModel):
+    """The hyperparameters used for the fine-tuning job."""
+
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
diff --git a/src/openai/types/fine_tuning/supervised_hyperparameters_param.py b/src/openai/types/fine_tuning/supervised_hyperparameters_param.py
new file mode 100644
index 0000000000..de0e021dea
--- /dev/null
+++ b/src/openai/types/fine_tuning/supervised_hyperparameters_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["SupervisedHyperparametersParam"]
+
+
+class SupervisedHyperparametersParam(TypedDict, total=False):
+    """The hyperparameters used for the fine-tuning job."""
+
+    batch_size: Union[Literal["auto"], int]
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float]
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int]
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
diff --git a/src/openai/types/fine_tuning/supervised_method.py b/src/openai/types/fine_tuning/supervised_method.py
new file mode 100644
index 0000000000..96e102582d
--- /dev/null
+++ b/src/openai/types/fine_tuning/supervised_method.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .supervised_hyperparameters import SupervisedHyperparameters
+
+__all__ = ["SupervisedMethod"]
+
+
+class SupervisedMethod(BaseModel):
+    """Configuration for the supervised fine-tuning method."""
+
+    hyperparameters: Optional[SupervisedHyperparameters] = None
+    """The hyperparameters used for the fine-tuning job."""
diff --git a/src/openai/types/fine_tuning/supervised_method_param.py b/src/openai/types/fine_tuning/supervised_method_param.py
new file mode 100644
index 0000000000..4381cd184b
--- /dev/null
+++ b/src/openai/types/fine_tuning/supervised_method_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+from .supervised_hyperparameters_param import SupervisedHyperparametersParam
+
+__all__ = ["SupervisedMethodParam"]
+
+
+class SupervisedMethodParam(TypedDict, total=False):
+    """Configuration for the supervised fine-tuning method."""
+
+    hyperparameters: SupervisedHyperparametersParam
+    """The hyperparameters used for the fine-tuning job."""
diff --git a/src/openai/types/graders/__init__.py b/src/openai/types/graders/__init__.py
new file mode 100644
index 0000000000..4f70eb6c2f
--- /dev/null
+++ b/src/openai/types/graders/__init__.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .multi_grader import MultiGrader as MultiGrader
+from .grader_inputs import GraderInputs as GraderInputs
+from .python_grader import PythonGrader as PythonGrader
+from .label_model_grader import LabelModelGrader as LabelModelGrader
+from .multi_grader_param import MultiGraderParam as MultiGraderParam
+from .score_model_grader import ScoreModelGrader as ScoreModelGrader
+from .grader_inputs_param import GraderInputsParam as GraderInputsParam
+from .python_grader_param import PythonGraderParam as PythonGraderParam
+from .string_check_grader import StringCheckGrader as StringCheckGrader
+from .text_similarity_grader import TextSimilarityGrader as TextSimilarityGrader
+from .label_model_grader_param import LabelModelGraderParam as LabelModelGraderParam
+from .score_model_grader_param import ScoreModelGraderParam as ScoreModelGraderParam
+from .string_check_grader_param import StringCheckGraderParam as StringCheckGraderParam
+from .text_similarity_grader_param import TextSimilarityGraderParam as TextSimilarityGraderParam
diff --git a/src/openai/types/graders/grader_inputs.py b/src/openai/types/graders/grader_inputs.py
new file mode 100644
index 0000000000..edc966d889
--- /dev/null
+++ b/src/openai/types/graders/grader_inputs.py
@@ -0,0 +1,43 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from ..responses.response_input_text import ResponseInputText
+from ..responses.response_input_audio import ResponseInputAudio
+
+__all__ = ["GraderInputs", "GraderInputItem", "GraderInputItemOutputText", "GraderInputItemInputImage"]
+
+
+class GraderInputItemOutputText(BaseModel):
+    """A text output from the model."""
+
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+class GraderInputItemInputImage(BaseModel):
+    """An image input block used within EvalItem content arrays."""
+
+    image_url: str
+    """The URL of the image input."""
+
+    type: Literal["input_image"]
+    """The type of the image input. Always `input_image`."""
+
+    detail: Optional[str] = None
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+
+GraderInputItem: TypeAlias = Union[
+    str, ResponseInputText, GraderInputItemOutputText, GraderInputItemInputImage, ResponseInputAudio
+]
+
+GraderInputs: TypeAlias = List[GraderInputItem]
diff --git a/src/openai/types/graders/grader_inputs_param.py b/src/openai/types/graders/grader_inputs_param.py
new file mode 100644
index 0000000000..7d8341eb32
--- /dev/null
+++ b/src/openai/types/graders/grader_inputs_param.py
@@ -0,0 +1,53 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..responses.response_input_text_param import ResponseInputTextParam
+from ..responses.response_input_audio_param import ResponseInputAudioParam
+
+__all__ = [
+    "GraderInputsParam",
+    "GraderInputsParamItem",
+    "GraderInputsParamItemOutputText",
+    "GraderInputsParamItemInputImage",
+]
+
+
+class GraderInputsParamItemOutputText(TypedDict, total=False):
+    """A text output from the model."""
+
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+class GraderInputsParamItemInputImage(TypedDict, total=False):
+    """An image input block used within EvalItem content arrays."""
+
+    image_url: Required[str]
+    """The URL of the image input."""
+
+    type: Required[Literal["input_image"]]
+    """The type of the image input. Always `input_image`."""
+
+    detail: str
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+
+GraderInputsParamItem: TypeAlias = Union[
+    str,
+    ResponseInputTextParam,
+    GraderInputsParamItemOutputText,
+    GraderInputsParamItemInputImage,
+    ResponseInputAudioParam,
+]
+
+GraderInputsParam: TypeAlias = List[GraderInputsParamItem]
diff --git a/src/openai/types/graders/label_model_grader.py b/src/openai/types/graders/label_model_grader.py
new file mode 100644
index 0000000000..d3c942235e
--- /dev/null
+++ b/src/openai/types/graders/label_model_grader.py
@@ -0,0 +1,92 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .grader_inputs import GraderInputs
+from ..responses.response_input_text import ResponseInputText
+from ..responses.response_input_audio import ResponseInputAudio
+
+__all__ = ["LabelModelGrader", "Input", "InputContent", "InputContentOutputText", "InputContentInputImage"]
+
+
+class InputContentOutputText(BaseModel):
+    """A text output from the model."""
+
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+class InputContentInputImage(BaseModel):
+    """An image input block used within EvalItem content arrays."""
+
+    image_url: str
+    """The URL of the image input."""
+
+    type: Literal["input_image"]
+    """The type of the image input. Always `input_image`."""
+
+    detail: Optional[str] = None
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+
+InputContent: TypeAlias = Union[
+    str, ResponseInputText, InputContentOutputText, InputContentInputImage, ResponseInputAudio, GraderInputs
+]
+
+
+class Input(BaseModel):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role. Messages with the
+    `assistant` role are presumed to have been generated by the model in previous
+    interactions.
+    """
+
+    content: InputContent
+    """Inputs to the model - can contain template strings.
+
+    Supports text, output text, input images, and input audio, either as a single
+    item or an array of items.
+    """
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+class LabelModelGrader(BaseModel):
+    """
+    A LabelModelGrader object which uses a model to assign labels to each item
+    in the evaluation.
+    """
+
+    input: List[Input]
+
+    labels: List[str]
+    """The labels to assign to each item in the evaluation."""
+
+    model: str
+    """The model to use for the evaluation. Must support structured outputs."""
+
+    name: str
+    """The name of the grader."""
+
+    passing_labels: List[str]
+    """The labels that indicate a passing result. Must be a subset of labels."""
+
+    type: Literal["label_model"]
+    """The object type, which is always `label_model`."""
diff --git a/src/openai/types/graders/label_model_grader_param.py b/src/openai/types/graders/label_model_grader_param.py
new file mode 100644
index 0000000000..a5b6959cff
--- /dev/null
+++ b/src/openai/types/graders/label_model_grader_param.py
@@ -0,0 +1,99 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+from .grader_inputs_param import GraderInputsParam
+from ..responses.response_input_text_param import ResponseInputTextParam
+from ..responses.response_input_audio_param import ResponseInputAudioParam
+
+__all__ = ["LabelModelGraderParam", "Input", "InputContent", "InputContentOutputText", "InputContentInputImage"]
+
+
+class InputContentOutputText(TypedDict, total=False):
+    """A text output from the model."""
+
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+class InputContentInputImage(TypedDict, total=False):
+    """An image input block used within EvalItem content arrays."""
+
+    image_url: Required[str]
+    """The URL of the image input."""
+
+    type: Required[Literal["input_image"]]
+    """The type of the image input. Always `input_image`."""
+
+    detail: str
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+
+InputContent: TypeAlias = Union[
+    str,
+    ResponseInputTextParam,
+    InputContentOutputText,
+    InputContentInputImage,
+    ResponseInputAudioParam,
+    GraderInputsParam,
+]
+
+
+class Input(TypedDict, total=False):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role. Messages with the
+    `assistant` role are presumed to have been generated by the model in previous
+    interactions.
+    """
+
+    content: Required[InputContent]
+    """Inputs to the model - can contain template strings.
+
+    Supports text, output text, input images, and input audio, either as a single
+    item or an array of items.
+    """
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+class LabelModelGraderParam(TypedDict, total=False):
+    """
+    A LabelModelGrader object which uses a model to assign labels to each item
+    in the evaluation.
+    """
+
+    input: Required[Iterable[Input]]
+
+    labels: Required[SequenceNotStr[str]]
+    """The labels to assign to each item in the evaluation."""
+
+    model: Required[str]
+    """The model to use for the evaluation. Must support structured outputs."""
+
+    name: Required[str]
+    """The name of the grader."""
+
+    passing_labels: Required[SequenceNotStr[str]]
+    """The labels that indicate a passing result. Must be a subset of labels."""
+
+    type: Required[Literal["label_model"]]
+    """The object type, which is always `label_model`."""
diff --git a/src/openai/types/graders/multi_grader.py b/src/openai/types/graders/multi_grader.py
new file mode 100644
index 0000000000..022ddb406a
--- /dev/null
+++ b/src/openai/types/graders/multi_grader.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .python_grader import PythonGrader
+from .label_model_grader import LabelModelGrader
+from .score_model_grader import ScoreModelGrader
+from .string_check_grader import StringCheckGrader
+from .text_similarity_grader import TextSimilarityGrader
+
+__all__ = ["MultiGrader", "Graders"]
+
+Graders: TypeAlias = Union[StringCheckGrader, TextSimilarityGrader, PythonGrader, ScoreModelGrader, LabelModelGrader]
+
+
+class MultiGrader(BaseModel):
+    """
+    A MultiGrader object combines the output of multiple graders to produce a single score.
+    """
+
+    calculate_output: str
+    """A formula to calculate the output based on grader results."""
+
+    graders: Graders
+    """
+    A StringCheckGrader object that performs a string comparison between input and
+    reference using a specified operation.
+    """
+
+    name: str
+    """The name of the grader."""
+
+    type: Literal["multi"]
+    """The object type, which is always `multi`."""
diff --git a/src/openai/types/graders/multi_grader_param.py b/src/openai/types/graders/multi_grader_param.py
new file mode 100644
index 0000000000..064267a5aa
--- /dev/null
+++ b/src/openai/types/graders/multi_grader_param.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .python_grader_param import PythonGraderParam
+from .label_model_grader_param import LabelModelGraderParam
+from .score_model_grader_param import ScoreModelGraderParam
+from .string_check_grader_param import StringCheckGraderParam
+from .text_similarity_grader_param import TextSimilarityGraderParam
+
+__all__ = ["MultiGraderParam", "Graders"]
+
+Graders: TypeAlias = Union[
+    StringCheckGraderParam, TextSimilarityGraderParam, PythonGraderParam, ScoreModelGraderParam, LabelModelGraderParam
+]
+
+
+class MultiGraderParam(TypedDict, total=False):
+    """
+    A MultiGrader object combines the output of multiple graders to produce a single score.
+    """
+
+    calculate_output: Required[str]
+    """A formula to calculate the output based on grader results."""
+
+    graders: Required[Graders]
+    """
+    A StringCheckGrader object that performs a string comparison between input and
+    reference using a specified operation.
+    """
+
+    name: Required[str]
+    """The name of the grader."""
+
+    type: Required[Literal["multi"]]
+    """The object type, which is always `multi`."""
diff --git a/src/openai/types/graders/python_grader.py b/src/openai/types/graders/python_grader.py
new file mode 100644
index 0000000000..81aafdae0a
--- /dev/null
+++ b/src/openai/types/graders/python_grader.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["PythonGrader"]
+
+
+class PythonGrader(BaseModel):
+    """A PythonGrader object that runs a python script on the input."""
+
+    name: str
+    """The name of the grader."""
+
+    source: str
+    """The source code of the python script."""
+
+    type: Literal["python"]
+    """The object type, which is always `python`."""
+
+    image_tag: Optional[str] = None
+    """The image tag to use for the python script."""
diff --git a/src/openai/types/graders/python_grader_param.py b/src/openai/types/graders/python_grader_param.py
new file mode 100644
index 0000000000..3be7bab432
--- /dev/null
+++ b/src/openai/types/graders/python_grader_param.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["PythonGraderParam"]
+
+
+class PythonGraderParam(TypedDict, total=False):
+    """A PythonGrader object that runs a python script on the input."""
+
+    name: Required[str]
+    """The name of the grader."""
+
+    source: Required[str]
+    """The source code of the python script."""
+
+    type: Required[Literal["python"]]
+    """The object type, which is always `python`."""
+
+    image_tag: str
+    """The image tag to use for the python script."""
diff --git a/src/openai/types/graders/score_model_grader.py b/src/openai/types/graders/score_model_grader.py
new file mode 100644
index 0000000000..85d11e8666
--- /dev/null
+++ b/src/openai/types/graders/score_model_grader.py
@@ -0,0 +1,135 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .grader_inputs import GraderInputs
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text import ResponseInputText
+from ..responses.response_input_audio import ResponseInputAudio
+
+__all__ = [
+    "ScoreModelGrader",
+    "Input",
+    "InputContent",
+    "InputContentOutputText",
+    "InputContentInputImage",
+    "SamplingParams",
+]
+
+
+class InputContentOutputText(BaseModel):
+    """A text output from the model."""
+
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+class InputContentInputImage(BaseModel):
+    """An image input block used within EvalItem content arrays."""
+
+    image_url: str
+    """The URL of the image input."""
+
+    type: Literal["input_image"]
+    """The type of the image input. Always `input_image`."""
+
+    detail: Optional[str] = None
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+
+InputContent: TypeAlias = Union[
+    str, ResponseInputText, InputContentOutputText, InputContentInputImage, ResponseInputAudio, GraderInputs
+]
+
+
+class Input(BaseModel):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role. Messages with the
+    `assistant` role are presumed to have been generated by the model in previous
+    interactions.
+    """
+
+    content: InputContent
+    """Inputs to the model - can contain template strings.
+
+    Supports text, output text, input images, and input audio, either as a single
+    item or an array of items.
+    """
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+class SamplingParams(BaseModel):
+    """The sampling parameters for the model."""
+
+    max_completions_tokens: Optional[int] = None
+    """The maximum number of tokens the grader model may generate in its response."""
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class ScoreModelGrader(BaseModel):
+    """A ScoreModelGrader object that uses a model to assign a score to the input."""
+
+    input: List[Input]
+    """The input messages evaluated by the grader.
+
+    Supports text, output text, input image, and input audio content blocks, and may
+    include template strings.
+    """
+
+    model: str
+    """The model to use for the evaluation."""
+
+    name: str
+    """The name of the grader."""
+
+    type: Literal["score_model"]
+    """The object type, which is always `score_model`."""
+
+    range: Optional[List[float]] = None
+    """The range of the score. Defaults to `[0, 1]`."""
+
+    sampling_params: Optional[SamplingParams] = None
+    """The sampling parameters for the model."""
diff --git a/src/openai/types/graders/score_model_grader_param.py b/src/openai/types/graders/score_model_grader_param.py
new file mode 100644
index 0000000000..9f1c42e051
--- /dev/null
+++ b/src/openai/types/graders/score_model_grader_param.py
@@ -0,0 +1,141 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .grader_inputs_param import GraderInputsParam
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text_param import ResponseInputTextParam
+from ..responses.response_input_audio_param import ResponseInputAudioParam
+
+__all__ = [
+    "ScoreModelGraderParam",
+    "Input",
+    "InputContent",
+    "InputContentOutputText",
+    "InputContentInputImage",
+    "SamplingParams",
+]
+
+
+class InputContentOutputText(TypedDict, total=False):
+    """A text output from the model."""
+
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+class InputContentInputImage(TypedDict, total=False):
+    """An image input block used within EvalItem content arrays."""
+
+    image_url: Required[str]
+    """The URL of the image input."""
+
+    type: Required[Literal["input_image"]]
+    """The type of the image input. Always `input_image`."""
+
+    detail: str
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+
+InputContent: TypeAlias = Union[
+    str,
+    ResponseInputTextParam,
+    InputContentOutputText,
+    InputContentInputImage,
+    ResponseInputAudioParam,
+    GraderInputsParam,
+]
+
+
+class Input(TypedDict, total=False):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role. Messages with the
+    `assistant` role are presumed to have been generated by the model in previous
+    interactions.
+    """
+
+    content: Required[InputContent]
+    """Inputs to the model - can contain template strings.
+
+    Supports text, output text, input images, and input audio, either as a single
+    item or an array of items.
+    """
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+class SamplingParams(TypedDict, total=False):
+    """The sampling parameters for the model."""
+
+    max_completions_tokens: Optional[int]
+    """The maximum number of tokens the grader model may generate in its response."""
+
+    reasoning_effort: Optional[ReasoningEffort]
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    seed: Optional[int]
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float]
+    """A higher temperature increases randomness in the outputs."""
+
+    top_p: Optional[float]
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class ScoreModelGraderParam(TypedDict, total=False):
+    """A ScoreModelGrader object that uses a model to assign a score to the input."""
+
+    input: Required[Iterable[Input]]
+    """The input messages evaluated by the grader.
+
+    Supports text, output text, input image, and input audio content blocks, and may
+    include template strings.
+    """
+
+    model: Required[str]
+    """The model to use for the evaluation."""
+
+    name: Required[str]
+    """The name of the grader."""
+
+    type: Required[Literal["score_model"]]
+    """The object type, which is always `score_model`."""
+
+    range: Iterable[float]
+    """The range of the score. Defaults to `[0, 1]`."""
+
+    sampling_params: SamplingParams
+    """The sampling parameters for the model."""
diff --git a/src/openai/types/graders/string_check_grader.py b/src/openai/types/graders/string_check_grader.py
new file mode 100644
index 0000000000..efd3679da9
--- /dev/null
+++ b/src/openai/types/graders/string_check_grader.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["StringCheckGrader"]
+
+
+class StringCheckGrader(BaseModel):
+    """
+    A StringCheckGrader object that performs a string comparison between input and reference using a specified operation.
+    """
+
+    input: str
+    """The input text. This may include template strings."""
+
+    name: str
+    """The name of the grader."""
+
+    operation: Literal["eq", "ne", "like", "ilike"]
+    """The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`."""
+
+    reference: str
+    """The reference text. This may include template strings."""
+
+    type: Literal["string_check"]
+    """The object type, which is always `string_check`."""
diff --git a/src/openai/types/graders/string_check_grader_param.py b/src/openai/types/graders/string_check_grader_param.py
new file mode 100644
index 0000000000..da9e961568
--- /dev/null
+++ b/src/openai/types/graders/string_check_grader_param.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["StringCheckGraderParam"]
+
+
+class StringCheckGraderParam(TypedDict, total=False):
+    """
+    A StringCheckGrader object that performs a string comparison between input and reference using a specified operation.
+    """
+
+    input: Required[str]
+    """The input text. This may include template strings."""
+
+    name: Required[str]
+    """The name of the grader."""
+
+    operation: Required[Literal["eq", "ne", "like", "ilike"]]
+    """The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`."""
+
+    reference: Required[str]
+    """The reference text. This may include template strings."""
+
+    type: Required[Literal["string_check"]]
+    """The object type, which is always `string_check`."""
diff --git a/src/openai/types/graders/text_similarity_grader.py b/src/openai/types/graders/text_similarity_grader.py
new file mode 100644
index 0000000000..a9d39a2fbd
--- /dev/null
+++ b/src/openai/types/graders/text_similarity_grader.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["TextSimilarityGrader"]
+
+
+class TextSimilarityGrader(BaseModel):
+    """A TextSimilarityGrader object which grades text based on similarity metrics."""
+
+    evaluation_metric: Literal[
+        "cosine",
+        "fuzzy_match",
+        "bleu",
+        "gleu",
+        "meteor",
+        "rouge_1",
+        "rouge_2",
+        "rouge_3",
+        "rouge_4",
+        "rouge_5",
+        "rouge_l",
+    ]
+    """The evaluation metric to use.
+
+    One of `cosine`, `fuzzy_match`, `bleu`, `gleu`, `meteor`, `rouge_1`, `rouge_2`,
+    `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`.
+    """
+
+    input: str
+    """The text being graded."""
+
+    name: str
+    """The name of the grader."""
+
+    reference: str
+    """The text being graded against."""
+
+    type: Literal["text_similarity"]
+    """The type of grader."""
diff --git a/src/openai/types/graders/text_similarity_grader_param.py b/src/openai/types/graders/text_similarity_grader_param.py
new file mode 100644
index 0000000000..0907c3c2a7
--- /dev/null
+++ b/src/openai/types/graders/text_similarity_grader_param.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["TextSimilarityGraderParam"]
+
+
+class TextSimilarityGraderParam(TypedDict, total=False):
+    """A TextSimilarityGrader object which grades text based on similarity metrics."""
+
+    evaluation_metric: Required[
+        Literal[
+            "cosine",
+            "fuzzy_match",
+            "bleu",
+            "gleu",
+            "meteor",
+            "rouge_1",
+            "rouge_2",
+            "rouge_3",
+            "rouge_4",
+            "rouge_5",
+            "rouge_l",
+        ]
+    ]
+    """The evaluation metric to use.
+
+    One of `cosine`, `fuzzy_match`, `bleu`, `gleu`, `meteor`, `rouge_1`, `rouge_2`,
+    `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`.
+    """
+
+    input: Required[str]
+    """The text being graded."""
+
+    name: Required[str]
+    """The name of the grader."""
+
+    reference: Required[str]
+    """The text being graded against."""
+
+    type: Required[Literal["text_similarity"]]
+    """The type of grader."""
diff --git a/src/openai/types/image.py b/src/openai/types/image.py
index f48aa2c702..9e2a23fa40 100644
--- a/src/openai/types/image.py
+++ b/src/openai/types/image.py
@@ -8,17 +8,21 @@
 
 
 class Image(BaseModel):
+    """Represents the content or the URL of an image generated by the OpenAI API."""
+
     b64_json: Optional[str] = None
-    """
-    The base64-encoded JSON of the generated image, if `response_format` is
-    `b64_json`.
+    """The base64-encoded JSON of the generated image.
+
+    Default value for `gpt-image-1`, and only present if `response_format` is set to
+    `b64_json` for `dall-e-2` and `dall-e-3`.
     """
 
     revised_prompt: Optional[str] = None
-    """
-    The prompt that was used to generate the image, if there was any revision to the
-    prompt.
-    """
+    """For `dall-e-3` only, the revised prompt that was used to generate the image."""
 
     url: Optional[str] = None
-    """The URL of the generated image, if `response_format` is `url` (default)."""
+    """
+    When using `dall-e-2` or `dall-e-3`, the URL of the generated image if
+    `response_format` is set to `url` (default value). Unsupported for
+    `gpt-image-1`.
+    """
diff --git a/src/openai/types/image_create_variation_params.py b/src/openai/types/image_create_variation_params.py
index 2549307372..d10b74b2c2 100644
--- a/src/openai/types/image_create_variation_params.py
+++ b/src/openai/types/image_create_variation_params.py
@@ -6,6 +6,7 @@
 from typing_extensions import Literal, Required, TypedDict
 
 from .._types import FileTypes
+from .image_model import ImageModel
 
 __all__ = ["ImageCreateVariationParams"]
 
@@ -17,17 +18,14 @@ class ImageCreateVariationParams(TypedDict, total=False):
     Must be a valid PNG file, less than 4MB, and square.
     """
 
-    model: Union[str, Literal["dall-e-2"], None]
+    model: Union[str, ImageModel, None]
     """The model to use for image generation.
 
     Only `dall-e-2` is supported at this time.
     """
 
     n: Optional[int]
-    """The number of images to generate.
-
-    Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
-    """
+    """The number of images to generate. Must be between 1 and 10."""
 
     response_format: Optional[Literal["url", "b64_json"]]
     """The format in which the generated images are returned.
@@ -46,5 +44,5 @@ class ImageCreateVariationParams(TypedDict, total=False):
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
diff --git a/src/openai/types/image_edit_completed_event.py b/src/openai/types/image_edit_completed_event.py
new file mode 100644
index 0000000000..5bd2986d2a
--- /dev/null
+++ b/src/openai/types/image_edit_completed_event.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ImageEditCompletedEvent", "Usage", "UsageInputTokensDetails"]
+
+
+class UsageInputTokensDetails(BaseModel):
+    """The input tokens detailed information for the image generation."""
+
+    image_tokens: int
+    """The number of image tokens in the input prompt."""
+
+    text_tokens: int
+    """The number of text tokens in the input prompt."""
+
+
+class Usage(BaseModel):
+    """For `gpt-image-1` only, the token usage information for the image generation."""
+
+    input_tokens: int
+    """The number of tokens (images and text) in the input prompt."""
+
+    input_tokens_details: UsageInputTokensDetails
+    """The input tokens detailed information for the image generation."""
+
+    output_tokens: int
+    """The number of image tokens in the output image."""
+
+    total_tokens: int
+    """The total number of tokens (images and text) used for the image generation."""
+
+
+class ImageEditCompletedEvent(BaseModel):
+    """Emitted when image editing has completed and the final image is available."""
+
+    b64_json: str
+    """Base64-encoded final edited image data, suitable for rendering as an image."""
+
+    background: Literal["transparent", "opaque", "auto"]
+    """The background setting for the edited image."""
+
+    created_at: int
+    """The Unix timestamp when the event was created."""
+
+    output_format: Literal["png", "webp", "jpeg"]
+    """The output format for the edited image."""
+
+    quality: Literal["low", "medium", "high", "auto"]
+    """The quality setting for the edited image."""
+
+    size: Literal["1024x1024", "1024x1536", "1536x1024", "auto"]
+    """The size of the edited image."""
+
+    type: Literal["image_edit.completed"]
+    """The type of the event. Always `image_edit.completed`."""
+
+    usage: Usage
+    """For `gpt-image-1` only, the token usage information for the image generation."""
diff --git a/src/openai/types/image_edit_params.py b/src/openai/types/image_edit_params.py
index 073456e349..2a8fab0f20 100644
--- a/src/openai/types/image_edit_params.py
+++ b/src/openai/types/image_edit_params.py
@@ -5,57 +5,141 @@
 from typing import Union, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-from .._types import FileTypes
+from .._types import FileTypes, SequenceNotStr
+from .image_model import ImageModel
 
-__all__ = ["ImageEditParams"]
+__all__ = ["ImageEditParamsBase", "ImageEditParamsNonStreaming", "ImageEditParamsStreaming"]
 
 
-class ImageEditParams(TypedDict, total=False):
-    image: Required[FileTypes]
-    """The image to edit.
+class ImageEditParamsBase(TypedDict, total=False):
+    image: Required[Union[FileTypes, SequenceNotStr[FileTypes]]]
+    """The image(s) to edit. Must be a supported image file or an array of images.
 
-    Must be a valid PNG file, less than 4MB, and square. If mask is not provided,
-    image must have transparency, which will be used as the mask.
+    For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
+    50MB. You can provide up to 16 images.
+
+    For `dall-e-2`, you can only provide one image, and it should be a square `png`
+    file less than 4MB.
     """
 
     prompt: Required[str]
     """A text description of the desired image(s).
 
-    The maximum length is 1000 characters.
+    The maximum length is 1000 characters for `dall-e-2`, and 32000 characters for
+    `gpt-image-1`.
+    """
+
+    background: Optional[Literal["transparent", "opaque", "auto"]]
+    """
+    Allows to set transparency for the background of the generated image(s). This
+    parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+    `opaque` or `auto` (default value). When `auto` is used, the model will
+    automatically determine the best background for the image.
+
+    If `transparent`, the output format needs to support transparency, so it should
+    be set to either `png` (default value) or `webp`.
+    """
+
+    input_fidelity: Optional[Literal["high", "low"]]
+    """
+    Control how much effort the model will exert to match the style and features,
+    especially facial features, of input images. This parameter is only supported
+    for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+    `low`. Defaults to `low`.
     """
 
     mask: FileTypes
     """An additional image whose fully transparent areas (e.g.
 
-    where alpha is zero) indicate where `image` should be edited. Must be a valid
-    PNG file, less than 4MB, and have the same dimensions as `image`.
+    where alpha is zero) indicate where `image` should be edited. If there are
+    multiple images provided, the mask will be applied on the first image. Must be a
+    valid PNG file, less than 4MB, and have the same dimensions as `image`.
     """
 
-    model: Union[str, Literal["dall-e-2"], None]
+    model: Union[str, ImageModel, None]
     """The model to use for image generation.
 
-    Only `dall-e-2` is supported at this time.
+    Only `dall-e-2` and `gpt-image-1` are supported. Defaults to `dall-e-2` unless a
+    parameter specific to `gpt-image-1` is used.
     """
 
     n: Optional[int]
     """The number of images to generate. Must be between 1 and 10."""
 
+    output_compression: Optional[int]
+    """The compression level (0-100%) for the generated images.
+
+    This parameter is only supported for `gpt-image-1` with the `webp` or `jpeg`
+    output formats, and defaults to 100.
+    """
+
+    output_format: Optional[Literal["png", "jpeg", "webp"]]
+    """The format in which the generated images are returned.
+
+    This parameter is only supported for `gpt-image-1`. Must be one of `png`,
+    `jpeg`, or `webp`. The default value is `png`.
+    """
+
+    partial_images: Optional[int]
+    """The number of partial images to generate.
+
+    This parameter is used for streaming responses that return partial images. Value
+    must be between 0 and 3. When set to 0, the response will be a single image sent
+    in one streaming event.
+
+    Note that the final image may be sent before the full number of partial images
+    are generated if the full image is generated more quickly.
+    """
+
+    quality: Optional[Literal["standard", "low", "medium", "high", "auto"]]
+    """The quality of the image that will be generated.
+
+    `high`, `medium` and `low` are only supported for `gpt-image-1`. `dall-e-2` only
+    supports `standard` quality. Defaults to `auto`.
+    """
+
     response_format: Optional[Literal["url", "b64_json"]]
     """The format in which the generated images are returned.
 
     Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
-    image has been generated.
+    image has been generated. This parameter is only supported for `dall-e-2`, as
+    `gpt-image-1` will always return base64-encoded images.
     """
 
-    size: Optional[Literal["256x256", "512x512", "1024x1024"]]
+    size: Optional[Literal["256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "auto"]]
     """The size of the generated images.
 
-    Must be one of `256x256`, `512x512`, or `1024x1024`.
+    Must be one of `1024x1024`, `1536x1024` (landscape), `1024x1536` (portrait), or
+    `auto` (default value) for `gpt-image-1`, and one of `256x256`, `512x512`, or
+    `1024x1024` for `dall-e-2`.
     """
 
     user: str
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+    """
+
+
+class ImageEditParamsNonStreaming(ImageEditParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """Edit the image in streaming mode.
+
+    Defaults to `false`. See the
+    [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+    for more information.
+    """
+
+
+class ImageEditParamsStreaming(ImageEditParamsBase):
+    stream: Required[Literal[True]]
+    """Edit the image in streaming mode.
+
+    Defaults to `false`. See the
+    [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+    for more information.
     """
+
+
+ImageEditParams = Union[ImageEditParamsNonStreaming, ImageEditParamsStreaming]
diff --git a/src/openai/types/image_edit_partial_image_event.py b/src/openai/types/image_edit_partial_image_event.py
new file mode 100644
index 0000000000..7bbd8c9b13
--- /dev/null
+++ b/src/openai/types/image_edit_partial_image_event.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ImageEditPartialImageEvent"]
+
+
+class ImageEditPartialImageEvent(BaseModel):
+    """Emitted when a partial image is available during image editing streaming."""
+
+    b64_json: str
+    """Base64-encoded partial image data, suitable for rendering as an image."""
+
+    background: Literal["transparent", "opaque", "auto"]
+    """The background setting for the requested edited image."""
+
+    created_at: int
+    """The Unix timestamp when the event was created."""
+
+    output_format: Literal["png", "webp", "jpeg"]
+    """The output format for the requested edited image."""
+
+    partial_image_index: int
+    """0-based index for the partial image (streaming)."""
+
+    quality: Literal["low", "medium", "high", "auto"]
+    """The quality setting for the requested edited image."""
+
+    size: Literal["1024x1024", "1024x1536", "1536x1024", "auto"]
+    """The size of the requested edited image."""
+
+    type: Literal["image_edit.partial_image"]
+    """The type of the event. Always `image_edit.partial_image`."""
diff --git a/src/openai/types/image_edit_stream_event.py b/src/openai/types/image_edit_stream_event.py
new file mode 100644
index 0000000000..759f6c6db5
--- /dev/null
+++ b/src/openai/types/image_edit_stream_event.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .image_edit_completed_event import ImageEditCompletedEvent
+from .image_edit_partial_image_event import ImageEditPartialImageEvent
+
+__all__ = ["ImageEditStreamEvent"]
+
+ImageEditStreamEvent: TypeAlias = Annotated[
+    Union[ImageEditPartialImageEvent, ImageEditCompletedEvent], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/image_gen_completed_event.py b/src/openai/types/image_gen_completed_event.py
new file mode 100644
index 0000000000..dc9ccb8cfc
--- /dev/null
+++ b/src/openai/types/image_gen_completed_event.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ImageGenCompletedEvent", "Usage", "UsageInputTokensDetails"]
+
+
+class UsageInputTokensDetails(BaseModel):
+    """The input tokens detailed information for the image generation."""
+
+    image_tokens: int
+    """The number of image tokens in the input prompt."""
+
+    text_tokens: int
+    """The number of text tokens in the input prompt."""
+
+
+class Usage(BaseModel):
+    """For `gpt-image-1` only, the token usage information for the image generation."""
+
+    input_tokens: int
+    """The number of tokens (images and text) in the input prompt."""
+
+    input_tokens_details: UsageInputTokensDetails
+    """The input tokens detailed information for the image generation."""
+
+    output_tokens: int
+    """The number of image tokens in the output image."""
+
+    total_tokens: int
+    """The total number of tokens (images and text) used for the image generation."""
+
+
+class ImageGenCompletedEvent(BaseModel):
+    """Emitted when image generation has completed and the final image is available."""
+
+    b64_json: str
+    """Base64-encoded image data, suitable for rendering as an image."""
+
+    background: Literal["transparent", "opaque", "auto"]
+    """The background setting for the generated image."""
+
+    created_at: int
+    """The Unix timestamp when the event was created."""
+
+    output_format: Literal["png", "webp", "jpeg"]
+    """The output format for the generated image."""
+
+    quality: Literal["low", "medium", "high", "auto"]
+    """The quality setting for the generated image."""
+
+    size: Literal["1024x1024", "1024x1536", "1536x1024", "auto"]
+    """The size of the generated image."""
+
+    type: Literal["image_generation.completed"]
+    """The type of the event. Always `image_generation.completed`."""
+
+    usage: Usage
+    """For `gpt-image-1` only, the token usage information for the image generation."""
diff --git a/src/openai/types/image_gen_partial_image_event.py b/src/openai/types/image_gen_partial_image_event.py
new file mode 100644
index 0000000000..df29c00a63
--- /dev/null
+++ b/src/openai/types/image_gen_partial_image_event.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ImageGenPartialImageEvent"]
+
+
+class ImageGenPartialImageEvent(BaseModel):
+    """Emitted when a partial image is available during image generation streaming."""
+
+    b64_json: str
+    """Base64-encoded partial image data, suitable for rendering as an image."""
+
+    background: Literal["transparent", "opaque", "auto"]
+    """The background setting for the requested image."""
+
+    created_at: int
+    """The Unix timestamp when the event was created."""
+
+    output_format: Literal["png", "webp", "jpeg"]
+    """The output format for the requested image."""
+
+    partial_image_index: int
+    """0-based index for the partial image (streaming)."""
+
+    quality: Literal["low", "medium", "high", "auto"]
+    """The quality setting for the requested image."""
+
+    size: Literal["1024x1024", "1024x1536", "1536x1024", "auto"]
+    """The size of the requested image."""
+
+    type: Literal["image_generation.partial_image"]
+    """The type of the event. Always `image_generation.partial_image`."""
diff --git a/src/openai/types/image_gen_stream_event.py b/src/openai/types/image_gen_stream_event.py
new file mode 100644
index 0000000000..7dde5d5245
--- /dev/null
+++ b/src/openai/types/image_gen_stream_event.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .image_gen_completed_event import ImageGenCompletedEvent
+from .image_gen_partial_image_event import ImageGenPartialImageEvent
+
+__all__ = ["ImageGenStreamEvent"]
+
+ImageGenStreamEvent: TypeAlias = Annotated[
+    Union[ImageGenPartialImageEvent, ImageGenCompletedEvent], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/image_generate_params.py b/src/openai/types/image_generate_params.py
index 18c56f8ed6..3270ca1d6e 100644
--- a/src/openai/types/image_generate_params.py
+++ b/src/openai/types/image_generate_params.py
@@ -5,19 +5,42 @@
 from typing import Union, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["ImageGenerateParams"]
+from .image_model import ImageModel
 
+__all__ = ["ImageGenerateParamsBase", "ImageGenerateParamsNonStreaming", "ImageGenerateParamsStreaming"]
 
-class ImageGenerateParams(TypedDict, total=False):
+
+class ImageGenerateParamsBase(TypedDict, total=False):
     prompt: Required[str]
     """A text description of the desired image(s).
 
-    The maximum length is 1000 characters for `dall-e-2` and 4000 characters for
-    `dall-e-3`.
+    The maximum length is 32000 characters for `gpt-image-1`, 1000 characters for
+    `dall-e-2` and 4000 characters for `dall-e-3`.
+    """
+
+    background: Optional[Literal["transparent", "opaque", "auto"]]
+    """
+    Allows to set transparency for the background of the generated image(s). This
+    parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+    `opaque` or `auto` (default value). When `auto` is used, the model will
+    automatically determine the best background for the image.
+
+    If `transparent`, the output format needs to support transparency, so it should
+    be set to either `png` (default value) or `webp`.
+    """
+
+    model: Union[str, ImageModel, None]
+    """The model to use for image generation.
+
+    One of `dall-e-2`, `dall-e-3`, or `gpt-image-1`. Defaults to `dall-e-2` unless a
+    parameter specific to `gpt-image-1` is used.
     """
 
-    model: Union[str, Literal["dall-e-2", "dall-e-3"], None]
-    """The model to use for image generation."""
+    moderation: Optional[Literal["low", "auto"]]
+    """Control the content-moderation level for images generated by `gpt-image-1`.
+
+    Must be either `low` for less restrictive filtering or `auto` (default value).
+    """
 
     n: Optional[int]
     """The number of images to generate.
@@ -25,39 +48,96 @@ class ImageGenerateParams(TypedDict, total=False):
     Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
     """
 
-    quality: Literal["standard", "hd"]
+    output_compression: Optional[int]
+    """The compression level (0-100%) for the generated images.
+
+    This parameter is only supported for `gpt-image-1` with the `webp` or `jpeg`
+    output formats, and defaults to 100.
+    """
+
+    output_format: Optional[Literal["png", "jpeg", "webp"]]
+    """The format in which the generated images are returned.
+
+    This parameter is only supported for `gpt-image-1`. Must be one of `png`,
+    `jpeg`, or `webp`.
+    """
+
+    partial_images: Optional[int]
+    """The number of partial images to generate.
+
+    This parameter is used for streaming responses that return partial images. Value
+    must be between 0 and 3. When set to 0, the response will be a single image sent
+    in one streaming event.
+
+    Note that the final image may be sent before the full number of partial images
+    are generated if the full image is generated more quickly.
+    """
+
+    quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]]
     """The quality of the image that will be generated.
 
-    `hd` creates images with finer details and greater consistency across the image.
-    This param is only supported for `dall-e-3`.
+    - `auto` (default value) will automatically select the best quality for the
+      given model.
+    - `high`, `medium` and `low` are supported for `gpt-image-1`.
+    - `hd` and `standard` are supported for `dall-e-3`.
+    - `standard` is the only option for `dall-e-2`.
     """
 
     response_format: Optional[Literal["url", "b64_json"]]
-    """The format in which the generated images are returned.
+    """The format in which generated images with `dall-e-2` and `dall-e-3` are
+    returned.
 
     Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
-    image has been generated.
+    image has been generated. This parameter isn't supported for `gpt-image-1` which
+    will always return base64-encoded images.
     """
 
-    size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]]
+    size: Optional[
+        Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+    ]
     """The size of the generated images.
 
-    Must be one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`. Must be one
-    of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3` models.
+    Must be one of `1024x1024`, `1536x1024` (landscape), `1024x1536` (portrait), or
+    `auto` (default value) for `gpt-image-1`, one of `256x256`, `512x512`, or
+    `1024x1024` for `dall-e-2`, and one of `1024x1024`, `1792x1024`, or `1024x1792`
+    for `dall-e-3`.
     """
 
     style: Optional[Literal["vivid", "natural"]]
     """The style of the generated images.
 
-    Must be one of `vivid` or `natural`. Vivid causes the model to lean towards
-    generating hyper-real and dramatic images. Natural causes the model to produce
-    more natural, less hyper-real looking images. This param is only supported for
-    `dall-e-3`.
+    This parameter is only supported for `dall-e-3`. Must be one of `vivid` or
+    `natural`. Vivid causes the model to lean towards generating hyper-real and
+    dramatic images. Natural causes the model to produce more natural, less
+    hyper-real looking images.
     """
 
     user: str
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+    """
+
+
+class ImageGenerateParamsNonStreaming(ImageGenerateParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """Generate the image in streaming mode.
+
+    Defaults to `false`. See the
+    [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+    for more information. This parameter is only supported for `gpt-image-1`.
+    """
+
+
+class ImageGenerateParamsStreaming(ImageGenerateParamsBase):
+    stream: Required[Literal[True]]
+    """Generate the image in streaming mode.
+
+    Defaults to `false`. See the
+    [Image generation guide](https://platform.openai.com/docs/guides/image-generation)
+    for more information. This parameter is only supported for `gpt-image-1`.
     """
+
+
+ImageGenerateParams = Union[ImageGenerateParamsNonStreaming, ImageGenerateParamsStreaming]
diff --git a/src/openai/types/image_model.py b/src/openai/types/image_model.py
new file mode 100644
index 0000000000..22b1281fa9
--- /dev/null
+++ b/src/openai/types/image_model.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ImageModel"]
+
+ImageModel: TypeAlias = Literal["dall-e-2", "dall-e-3", "gpt-image-1", "gpt-image-1-mini"]
diff --git a/src/openai/types/images_response.py b/src/openai/types/images_response.py
index 7cee813184..914017823e 100644
--- a/src/openai/types/images_response.py
+++ b/src/openai/types/images_response.py
@@ -1,14 +1,66 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List
+from typing import List, Optional
+from typing_extensions import Literal
 
 from .image import Image
 from .._models import BaseModel
 
-__all__ = ["ImagesResponse"]
+__all__ = ["ImagesResponse", "Usage", "UsageInputTokensDetails"]
+
+
+class UsageInputTokensDetails(BaseModel):
+    """The input tokens detailed information for the image generation."""
+
+    image_tokens: int
+    """The number of image tokens in the input prompt."""
+
+    text_tokens: int
+    """The number of text tokens in the input prompt."""
+
+
+class Usage(BaseModel):
+    """For `gpt-image-1` only, the token usage information for the image generation."""
+
+    input_tokens: int
+    """The number of tokens (images and text) in the input prompt."""
+
+    input_tokens_details: UsageInputTokensDetails
+    """The input tokens detailed information for the image generation."""
+
+    output_tokens: int
+    """The number of output tokens generated by the model."""
+
+    total_tokens: int
+    """The total number of tokens (images and text) used for the image generation."""
 
 
 class ImagesResponse(BaseModel):
+    """The response from the image generation endpoint."""
+
     created: int
+    """The Unix timestamp (in seconds) of when the image was created."""
+
+    background: Optional[Literal["transparent", "opaque"]] = None
+    """The background parameter used for the image generation.
+
+    Either `transparent` or `opaque`.
+    """
+
+    data: Optional[List[Image]] = None
+    """The list of generated images."""
+
+    output_format: Optional[Literal["png", "webp", "jpeg"]] = None
+    """The output format of the image generation. Either `png`, `webp`, or `jpeg`."""
+
+    quality: Optional[Literal["low", "medium", "high"]] = None
+    """The quality of the image generated. Either `low`, `medium`, or `high`."""
+
+    size: Optional[Literal["1024x1024", "1024x1536", "1536x1024"]] = None
+    """The size of the image generated.
+
+    Either `1024x1024`, `1024x1536`, or `1536x1024`.
+    """
 
-    data: List[Image]
+    usage: Optional[Usage] = None
+    """For `gpt-image-1` only, the token usage information for the image generation."""
diff --git a/src/openai/types/model.py b/src/openai/types/model.py
index 2631ee8d1a..6506224a20 100644
--- a/src/openai/types/model.py
+++ b/src/openai/types/model.py
@@ -8,6 +8,8 @@
 
 
 class Model(BaseModel):
+    """Describes an OpenAI model offering that can be used with the API."""
+
     id: str
     """The model identifier, which can be referenced in the API endpoints."""
 
diff --git a/src/openai/types/model_deleted.py b/src/openai/types/model_deleted.py
index d9a48bb1b5..e7601f74e4 100644
--- a/src/openai/types/model_deleted.py
+++ b/src/openai/types/model_deleted.py
@@ -1,7 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
-
 from .._models import BaseModel
 
 __all__ = ["ModelDeleted"]
diff --git a/src/openai/types/moderation.py b/src/openai/types/moderation.py
index 5aa691823a..a6acc26db4 100644
--- a/src/openai/types/moderation.py
+++ b/src/openai/types/moderation.py
@@ -1,14 +1,18 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import List, Optional
+from typing_extensions import Literal
 
 from pydantic import Field as FieldInfo
 
 from .._models import BaseModel
 
-__all__ = ["Moderation", "Categories", "CategoryScores"]
+__all__ = ["Moderation", "Categories", "CategoryAppliedInputTypes", "CategoryScores"]
 
 
 class Categories(BaseModel):
+    """A list of the categories, and whether they are flagged or not."""
+
     harassment: bool
     """
     Content that expresses, incites, or promotes harassing language towards any
@@ -36,6 +40,20 @@ class Categories(BaseModel):
     orientation, disability status, or caste.
     """
 
+    illicit: Optional[bool] = None
+    """
+    Content that includes instructions or advice that facilitate the planning or
+    execution of wrongdoing, or that gives advice or instruction on how to commit
+    illicit acts. For example, "how to shoplift" would fit this category.
+    """
+
+    illicit_violent: Optional[bool] = FieldInfo(alias="illicit/violent", default=None)
+    """
+    Content that includes instructions or advice that facilitate the planning or
+    execution of wrongdoing that also includes violence, or that gives advice or
+    instruction on the procurement of any weapon.
+    """
+
     self_harm: bool = FieldInfo(alias="self-harm")
     """
     Content that promotes, encourages, or depicts acts of self-harm, such as
@@ -72,7 +90,54 @@ class Categories(BaseModel):
     """Content that depicts death, violence, or physical injury in graphic detail."""
 
 
+class CategoryAppliedInputTypes(BaseModel):
+    """
+    A list of the categories along with the input type(s) that the score applies to.
+    """
+
+    harassment: List[Literal["text"]]
+    """The applied input type(s) for the category 'harassment'."""
+
+    harassment_threatening: List[Literal["text"]] = FieldInfo(alias="harassment/threatening")
+    """The applied input type(s) for the category 'harassment/threatening'."""
+
+    hate: List[Literal["text"]]
+    """The applied input type(s) for the category 'hate'."""
+
+    hate_threatening: List[Literal["text"]] = FieldInfo(alias="hate/threatening")
+    """The applied input type(s) for the category 'hate/threatening'."""
+
+    illicit: List[Literal["text"]]
+    """The applied input type(s) for the category 'illicit'."""
+
+    illicit_violent: List[Literal["text"]] = FieldInfo(alias="illicit/violent")
+    """The applied input type(s) for the category 'illicit/violent'."""
+
+    self_harm: List[Literal["text", "image"]] = FieldInfo(alias="self-harm")
+    """The applied input type(s) for the category 'self-harm'."""
+
+    self_harm_instructions: List[Literal["text", "image"]] = FieldInfo(alias="self-harm/instructions")
+    """The applied input type(s) for the category 'self-harm/instructions'."""
+
+    self_harm_intent: List[Literal["text", "image"]] = FieldInfo(alias="self-harm/intent")
+    """The applied input type(s) for the category 'self-harm/intent'."""
+
+    sexual: List[Literal["text", "image"]]
+    """The applied input type(s) for the category 'sexual'."""
+
+    sexual_minors: List[Literal["text"]] = FieldInfo(alias="sexual/minors")
+    """The applied input type(s) for the category 'sexual/minors'."""
+
+    violence: List[Literal["text", "image"]]
+    """The applied input type(s) for the category 'violence'."""
+
+    violence_graphic: List[Literal["text", "image"]] = FieldInfo(alias="violence/graphic")
+    """The applied input type(s) for the category 'violence/graphic'."""
+
+
 class CategoryScores(BaseModel):
+    """A list of the categories along with their scores as predicted by model."""
+
     harassment: float
     """The score for the category 'harassment'."""
 
@@ -85,6 +150,12 @@ class CategoryScores(BaseModel):
     hate_threatening: float = FieldInfo(alias="hate/threatening")
     """The score for the category 'hate/threatening'."""
 
+    illicit: float
+    """The score for the category 'illicit'."""
+
+    illicit_violent: float = FieldInfo(alias="illicit/violent")
+    """The score for the category 'illicit/violent'."""
+
     self_harm: float = FieldInfo(alias="self-harm")
     """The score for the category 'self-harm'."""
 
@@ -111,6 +182,11 @@ class Moderation(BaseModel):
     categories: Categories
     """A list of the categories, and whether they are flagged or not."""
 
+    category_applied_input_types: CategoryAppliedInputTypes
+    """
+    A list of the categories along with the input type(s) that the score applies to.
+    """
+
     category_scores: CategoryScores
     """A list of the categories along with their scores as predicted by model."""
 
diff --git a/src/openai/types/moderation_create_params.py b/src/openai/types/moderation_create_params.py
index d4608def54..65d9b7e561 100644
--- a/src/openai/types/moderation_create_params.py
+++ b/src/openai/types/moderation_create_params.py
@@ -2,24 +2,29 @@
 
 from __future__ import annotations
 
-from typing import List, Union
-from typing_extensions import Literal, Required, TypedDict
+from typing import Union, Iterable
+from typing_extensions import Required, TypedDict
+
+from .._types import SequenceNotStr
+from .moderation_model import ModerationModel
+from .moderation_multi_modal_input_param import ModerationMultiModalInputParam
 
 __all__ = ["ModerationCreateParams"]
 
 
 class ModerationCreateParams(TypedDict, total=False):
-    input: Required[Union[str, List[str]]]
-    """The input text to classify"""
+    input: Required[Union[str, SequenceNotStr[str], Iterable[ModerationMultiModalInputParam]]]
+    """Input (or inputs) to classify.
 
-    model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]]
+    Can be a single string, an array of strings, or an array of multi-modal input
+    objects similar to other models.
     """
-    Two content moderations models are available: `text-moderation-stable` and
-    `text-moderation-latest`.
-
-    The default is `text-moderation-latest` which will be automatically upgraded
-    over time. This ensures you are always using our most accurate model. If you use
-    `text-moderation-stable`, we will provide advanced notice before updating the
-    model. Accuracy of `text-moderation-stable` may be slightly lower than for
-    `text-moderation-latest`.
+
+    model: Union[str, ModerationModel]
+    """The content moderation model you would like to use.
+
+    Learn more in
+    [the moderation guide](https://platform.openai.com/docs/guides/moderation), and
+    learn about available models
+    [here](https://platform.openai.com/docs/models#moderation).
     """
diff --git a/src/openai/types/moderation_create_response.py b/src/openai/types/moderation_create_response.py
index 79684f8a70..23c03875bf 100644
--- a/src/openai/types/moderation_create_response.py
+++ b/src/openai/types/moderation_create_response.py
@@ -9,6 +9,8 @@
 
 
 class ModerationCreateResponse(BaseModel):
+    """Represents if a given text input is potentially harmful."""
+
     id: str
     """The unique identifier for the moderation request."""
 
diff --git a/src/openai/types/moderation_image_url_input_param.py b/src/openai/types/moderation_image_url_input_param.py
new file mode 100644
index 0000000000..9c0fe25685
--- /dev/null
+++ b/src/openai/types/moderation_image_url_input_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ModerationImageURLInputParam", "ImageURL"]
+
+
+class ImageURL(TypedDict, total=False):
+    """Contains either an image URL or a data URL for a base64 encoded image."""
+
+    url: Required[str]
+    """Either a URL of the image or the base64 encoded image data."""
+
+
+class ModerationImageURLInputParam(TypedDict, total=False):
+    """An object describing an image to classify."""
+
+    image_url: Required[ImageURL]
+    """Contains either an image URL or a data URL for a base64 encoded image."""
+
+    type: Required[Literal["image_url"]]
+    """Always `image_url`."""
diff --git a/src/openai/types/moderation_model.py b/src/openai/types/moderation_model.py
new file mode 100644
index 0000000000..64954c4547
--- /dev/null
+++ b/src/openai/types/moderation_model.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ModerationModel"]
+
+ModerationModel: TypeAlias = Literal[
+    "omni-moderation-latest", "omni-moderation-2024-09-26", "text-moderation-latest", "text-moderation-stable"
+]
diff --git a/src/openai/types/moderation_multi_modal_input_param.py b/src/openai/types/moderation_multi_modal_input_param.py
new file mode 100644
index 0000000000..4314e7b031
--- /dev/null
+++ b/src/openai/types/moderation_multi_modal_input_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .moderation_text_input_param import ModerationTextInputParam
+from .moderation_image_url_input_param import ModerationImageURLInputParam
+
+__all__ = ["ModerationMultiModalInputParam"]
+
+ModerationMultiModalInputParam: TypeAlias = Union[ModerationImageURLInputParam, ModerationTextInputParam]
diff --git a/src/openai/types/moderation_text_input_param.py b/src/openai/types/moderation_text_input_param.py
new file mode 100644
index 0000000000..786ecbe625
--- /dev/null
+++ b/src/openai/types/moderation_text_input_param.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ModerationTextInputParam"]
+
+
+class ModerationTextInputParam(TypedDict, total=False):
+    """An object describing text to classify."""
+
+    text: Required[str]
+    """A string of text to classify."""
+
+    type: Required[Literal["text"]]
+    """Always `text`."""
diff --git a/src/openai/types/other_file_chunking_strategy_object.py b/src/openai/types/other_file_chunking_strategy_object.py
new file mode 100644
index 0000000000..a5371425d7
--- /dev/null
+++ b/src/openai/types/other_file_chunking_strategy_object.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["OtherFileChunkingStrategyObject"]
+
+
+class OtherFileChunkingStrategyObject(BaseModel):
+    """This is returned when the chunking strategy is unknown.
+
+    Typically, this is because the file was indexed before the `chunking_strategy` concept was introduced in the API.
+    """
+
+    type: Literal["other"]
+    """Always `other`."""
diff --git a/src/openai/types/realtime/__init__.py b/src/openai/types/realtime/__init__.py
new file mode 100644
index 0000000000..c2a141d727
--- /dev/null
+++ b/src/openai/types/realtime/__init__.py
@@ -0,0 +1,240 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .realtime_error import RealtimeError as RealtimeError
+from .call_refer_params import CallReferParams as CallReferParams
+from .conversation_item import ConversationItem as ConversationItem
+from .realtime_response import RealtimeResponse as RealtimeResponse
+from .call_accept_params import CallAcceptParams as CallAcceptParams
+from .call_create_params import CallCreateParams as CallCreateParams
+from .call_reject_params import CallRejectParams as CallRejectParams
+from .audio_transcription import AudioTranscription as AudioTranscription
+from .log_prob_properties import LogProbProperties as LogProbProperties
+from .realtime_truncation import RealtimeTruncation as RealtimeTruncation
+from .response_done_event import ResponseDoneEvent as ResponseDoneEvent
+from .noise_reduction_type import NoiseReductionType as NoiseReductionType
+from .realtime_error_event import RealtimeErrorEvent as RealtimeErrorEvent
+from .session_update_event import SessionUpdateEvent as SessionUpdateEvent
+from .mcp_list_tools_failed import McpListToolsFailed as McpListToolsFailed
+from .realtime_audio_config import RealtimeAudioConfig as RealtimeAudioConfig
+from .realtime_client_event import RealtimeClientEvent as RealtimeClientEvent
+from .realtime_server_event import RealtimeServerEvent as RealtimeServerEvent
+from .realtime_tools_config import RealtimeToolsConfig as RealtimeToolsConfig
+from .response_cancel_event import ResponseCancelEvent as ResponseCancelEvent
+from .response_create_event import ResponseCreateEvent as ResponseCreateEvent
+from .session_created_event import SessionCreatedEvent as SessionCreatedEvent
+from .session_updated_event import SessionUpdatedEvent as SessionUpdatedEvent
+from .conversation_item_done import ConversationItemDone as ConversationItemDone
+from .realtime_audio_formats import RealtimeAudioFormats as RealtimeAudioFormats
+from .realtime_function_tool import RealtimeFunctionTool as RealtimeFunctionTool
+from .realtime_mcp_tool_call import RealtimeMcpToolCall as RealtimeMcpToolCall
+from .realtime_mcphttp_error import RealtimeMcphttpError as RealtimeMcphttpError
+from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
+from .conversation_item_added import ConversationItemAdded as ConversationItemAdded
+from .conversation_item_param import ConversationItemParam as ConversationItemParam
+from .realtime_connect_params import RealtimeConnectParams as RealtimeConnectParams
+from .realtime_mcp_list_tools import RealtimeMcpListTools as RealtimeMcpListTools
+from .realtime_response_usage import RealtimeResponseUsage as RealtimeResponseUsage
+from .realtime_tracing_config import RealtimeTracingConfig as RealtimeTracingConfig
+from .mcp_list_tools_completed import McpListToolsCompleted as McpListToolsCompleted
+from .realtime_response_status import RealtimeResponseStatus as RealtimeResponseStatus
+from .response_mcp_call_failed import ResponseMcpCallFailed as ResponseMcpCallFailed
+from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
+from .audio_transcription_param import AudioTranscriptionParam as AudioTranscriptionParam
+from .rate_limits_updated_event import RateLimitsUpdatedEvent as RateLimitsUpdatedEvent
+from .realtime_truncation_param import RealtimeTruncationParam as RealtimeTruncationParam
+from .response_audio_done_event import ResponseAudioDoneEvent as ResponseAudioDoneEvent
+from .response_text_delta_event import ResponseTextDeltaEvent as ResponseTextDeltaEvent
+from .conversation_created_event import ConversationCreatedEvent as ConversationCreatedEvent
+from .mcp_list_tools_in_progress import McpListToolsInProgress as McpListToolsInProgress
+from .response_audio_delta_event import ResponseAudioDeltaEvent as ResponseAudioDeltaEvent
+from .session_update_event_param import SessionUpdateEventParam as SessionUpdateEventParam
+from .client_secret_create_params import ClientSecretCreateParams as ClientSecretCreateParams
+from .realtime_audio_config_input import RealtimeAudioConfigInput as RealtimeAudioConfigInput
+from .realtime_audio_config_param import RealtimeAudioConfigParam as RealtimeAudioConfigParam
+from .realtime_client_event_param import RealtimeClientEventParam as RealtimeClientEventParam
+from .realtime_mcp_protocol_error import RealtimeMcpProtocolError as RealtimeMcpProtocolError
+from .realtime_tool_choice_config import RealtimeToolChoiceConfig as RealtimeToolChoiceConfig
+from .realtime_tools_config_param import RealtimeToolsConfigParam as RealtimeToolsConfigParam
+from .realtime_tools_config_union import RealtimeToolsConfigUnion as RealtimeToolsConfigUnion
+from .response_cancel_event_param import ResponseCancelEventParam as ResponseCancelEventParam
+from .response_create_event_param import ResponseCreateEventParam as ResponseCreateEventParam
+from .response_mcp_call_completed import ResponseMcpCallCompleted as ResponseMcpCallCompleted
+from .realtime_audio_config_output import RealtimeAudioConfigOutput as RealtimeAudioConfigOutput
+from .realtime_audio_formats_param import RealtimeAudioFormatsParam as RealtimeAudioFormatsParam
+from .realtime_function_tool_param import RealtimeFunctionToolParam as RealtimeFunctionToolParam
+from .realtime_mcp_tool_call_param import RealtimeMcpToolCallParam as RealtimeMcpToolCallParam
+from .realtime_mcphttp_error_param import RealtimeMcphttpErrorParam as RealtimeMcphttpErrorParam
+from .client_secret_create_response import ClientSecretCreateResponse as ClientSecretCreateResponse
+from .realtime_mcp_approval_request import RealtimeMcpApprovalRequest as RealtimeMcpApprovalRequest
+from .realtime_mcp_list_tools_param import RealtimeMcpListToolsParam as RealtimeMcpListToolsParam
+from .realtime_tracing_config_param import RealtimeTracingConfigParam as RealtimeTracingConfigParam
+from .response_mcp_call_in_progress import ResponseMcpCallInProgress as ResponseMcpCallInProgress
+from .conversation_item_create_event import ConversationItemCreateEvent as ConversationItemCreateEvent
+from .conversation_item_delete_event import ConversationItemDeleteEvent as ConversationItemDeleteEvent
+from .input_audio_buffer_clear_event import InputAudioBufferClearEvent as InputAudioBufferClearEvent
+from .realtime_mcp_approval_response import RealtimeMcpApprovalResponse as RealtimeMcpApprovalResponse
+from .realtime_session_client_secret import RealtimeSessionClientSecret as RealtimeSessionClientSecret
+from .conversation_item_created_event import ConversationItemCreatedEvent as ConversationItemCreatedEvent
+from .conversation_item_deleted_event import ConversationItemDeletedEvent as ConversationItemDeletedEvent
+from .input_audio_buffer_append_event import InputAudioBufferAppendEvent as InputAudioBufferAppendEvent
+from .input_audio_buffer_commit_event import InputAudioBufferCommitEvent as InputAudioBufferCommitEvent
+from .output_audio_buffer_clear_event import OutputAudioBufferClearEvent as OutputAudioBufferClearEvent
+from .realtime_response_create_params import RealtimeResponseCreateParams as RealtimeResponseCreateParams
+from .realtime_session_create_request import RealtimeSessionCreateRequest as RealtimeSessionCreateRequest
+from .response_output_item_done_event import ResponseOutputItemDoneEvent as ResponseOutputItemDoneEvent
+from .conversation_item_retrieve_event import ConversationItemRetrieveEvent as ConversationItemRetrieveEvent
+from .conversation_item_truncate_event import ConversationItemTruncateEvent as ConversationItemTruncateEvent
+from .input_audio_buffer_cleared_event import InputAudioBufferClearedEvent as InputAudioBufferClearedEvent
+from .realtime_session_create_response import RealtimeSessionCreateResponse as RealtimeSessionCreateResponse
+from .response_content_part_done_event import ResponseContentPartDoneEvent as ResponseContentPartDoneEvent
+from .response_mcp_call_arguments_done import ResponseMcpCallArgumentsDone as ResponseMcpCallArgumentsDone
+from .response_output_item_added_event import ResponseOutputItemAddedEvent as ResponseOutputItemAddedEvent
+from .conversation_item_truncated_event import ConversationItemTruncatedEvent as ConversationItemTruncatedEvent
+from .realtime_audio_config_input_param import RealtimeAudioConfigInputParam as RealtimeAudioConfigInputParam
+from .realtime_mcp_protocol_error_param import RealtimeMcpProtocolErrorParam as RealtimeMcpProtocolErrorParam
+from .realtime_mcp_tool_execution_error import RealtimeMcpToolExecutionError as RealtimeMcpToolExecutionError
+from .realtime_response_create_mcp_tool import RealtimeResponseCreateMcpTool as RealtimeResponseCreateMcpTool
+from .realtime_tool_choice_config_param import RealtimeToolChoiceConfigParam as RealtimeToolChoiceConfigParam
+from .realtime_tools_config_union_param import RealtimeToolsConfigUnionParam as RealtimeToolsConfigUnionParam
+from .response_content_part_added_event import ResponseContentPartAddedEvent as ResponseContentPartAddedEvent
+from .response_mcp_call_arguments_delta import ResponseMcpCallArgumentsDelta as ResponseMcpCallArgumentsDelta
+from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent as InputAudioBufferCommittedEvent
+from .realtime_audio_config_output_param import RealtimeAudioConfigOutputParam as RealtimeAudioConfigOutputParam
+from .realtime_audio_input_turn_detection import RealtimeAudioInputTurnDetection as RealtimeAudioInputTurnDetection
+from .realtime_mcp_approval_request_param import RealtimeMcpApprovalRequestParam as RealtimeMcpApprovalRequestParam
+from .realtime_truncation_retention_ratio import RealtimeTruncationRetentionRatio as RealtimeTruncationRetentionRatio
+from .conversation_item_create_event_param import ConversationItemCreateEventParam as ConversationItemCreateEventParam
+from .conversation_item_delete_event_param import ConversationItemDeleteEventParam as ConversationItemDeleteEventParam
+from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam as InputAudioBufferClearEventParam
+from .input_audio_buffer_timeout_triggered import InputAudioBufferTimeoutTriggered as InputAudioBufferTimeoutTriggered
+from .realtime_mcp_approval_response_param import RealtimeMcpApprovalResponseParam as RealtimeMcpApprovalResponseParam
+from .realtime_transcription_session_audio import RealtimeTranscriptionSessionAudio as RealtimeTranscriptionSessionAudio
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent
+from .input_audio_buffer_append_event_param import InputAudioBufferAppendEventParam as InputAudioBufferAppendEventParam
+from .input_audio_buffer_commit_event_param import InputAudioBufferCommitEventParam as InputAudioBufferCommitEventParam
+from .output_audio_buffer_clear_event_param import OutputAudioBufferClearEventParam as OutputAudioBufferClearEventParam
+from .realtime_response_create_audio_output import (
+    RealtimeResponseCreateAudioOutput as RealtimeResponseCreateAudioOutput,
+)
+from .realtime_response_create_params_param import (
+    RealtimeResponseCreateParamsParam as RealtimeResponseCreateParamsParam,
+)
+from .realtime_session_create_request_param import (
+    RealtimeSessionCreateRequestParam as RealtimeSessionCreateRequestParam,
+)
+from .response_audio_transcript_delta_event import (
+    ResponseAudioTranscriptDeltaEvent as ResponseAudioTranscriptDeltaEvent,
+)
+from .conversation_item_retrieve_event_param import (
+    ConversationItemRetrieveEventParam as ConversationItemRetrieveEventParam,
+)
+from .conversation_item_truncate_event_param import (
+    ConversationItemTruncateEventParam as ConversationItemTruncateEventParam,
+)
+from .input_audio_buffer_speech_started_event import (
+    InputAudioBufferSpeechStartedEvent as InputAudioBufferSpeechStartedEvent,
+)
+from .input_audio_buffer_speech_stopped_event import (
+    InputAudioBufferSpeechStoppedEvent as InputAudioBufferSpeechStoppedEvent,
+)
+from .realtime_conversation_item_user_message import (
+    RealtimeConversationItemUserMessage as RealtimeConversationItemUserMessage,
+)
+from .realtime_mcp_tool_execution_error_param import (
+    RealtimeMcpToolExecutionErrorParam as RealtimeMcpToolExecutionErrorParam,
+)
+from .realtime_response_create_mcp_tool_param import (
+    RealtimeResponseCreateMcpToolParam as RealtimeResponseCreateMcpToolParam,
+)
+from .realtime_conversation_item_function_call import (
+    RealtimeConversationItemFunctionCall as RealtimeConversationItemFunctionCall,
+)
+from .realtime_audio_input_turn_detection_param import (
+    RealtimeAudioInputTurnDetectionParam as RealtimeAudioInputTurnDetectionParam,
+)
+from .realtime_conversation_item_system_message import (
+    RealtimeConversationItemSystemMessage as RealtimeConversationItemSystemMessage,
+)
+from .realtime_truncation_retention_ratio_param import (
+    RealtimeTruncationRetentionRatioParam as RealtimeTruncationRetentionRatioParam,
+)
+from .realtime_transcription_session_audio_input import (
+    RealtimeTranscriptionSessionAudioInput as RealtimeTranscriptionSessionAudioInput,
+)
+from .realtime_transcription_session_audio_param import (
+    RealtimeTranscriptionSessionAudioParam as RealtimeTranscriptionSessionAudioParam,
+)
+from .realtime_response_create_audio_output_param import (
+    RealtimeResponseCreateAudioOutputParam as RealtimeResponseCreateAudioOutputParam,
+)
+from .realtime_response_usage_input_token_details import (
+    RealtimeResponseUsageInputTokenDetails as RealtimeResponseUsageInputTokenDetails,
+)
+from .response_function_call_arguments_done_event import (
+    ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
+)
+from .input_audio_buffer_dtmf_event_received_event import (
+    InputAudioBufferDtmfEventReceivedEvent as InputAudioBufferDtmfEventReceivedEvent,
+)
+from .realtime_conversation_item_assistant_message import (
+    RealtimeConversationItemAssistantMessage as RealtimeConversationItemAssistantMessage,
+)
+from .realtime_response_usage_output_token_details import (
+    RealtimeResponseUsageOutputTokenDetails as RealtimeResponseUsageOutputTokenDetails,
+)
+from .response_function_call_arguments_delta_event import (
+    ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent,
+)
+from .realtime_conversation_item_user_message_param import (
+    RealtimeConversationItemUserMessageParam as RealtimeConversationItemUserMessageParam,
+)
+from .realtime_transcription_session_create_request import (
+    RealtimeTranscriptionSessionCreateRequest as RealtimeTranscriptionSessionCreateRequest,
+)
+from .realtime_transcription_session_turn_detection import (
+    RealtimeTranscriptionSessionTurnDetection as RealtimeTranscriptionSessionTurnDetection,
+)
+from .realtime_conversation_item_function_call_param import (
+    RealtimeConversationItemFunctionCallParam as RealtimeConversationItemFunctionCallParam,
+)
+from .realtime_transcription_session_create_response import (
+    RealtimeTranscriptionSessionCreateResponse as RealtimeTranscriptionSessionCreateResponse,
+)
+from .realtime_conversation_item_function_call_output import (
+    RealtimeConversationItemFunctionCallOutput as RealtimeConversationItemFunctionCallOutput,
+)
+from .realtime_conversation_item_system_message_param import (
+    RealtimeConversationItemSystemMessageParam as RealtimeConversationItemSystemMessageParam,
+)
+from .realtime_transcription_session_audio_input_param import (
+    RealtimeTranscriptionSessionAudioInputParam as RealtimeTranscriptionSessionAudioInputParam,
+)
+from .realtime_conversation_item_assistant_message_param import (
+    RealtimeConversationItemAssistantMessageParam as RealtimeConversationItemAssistantMessageParam,
+)
+from .conversation_item_input_audio_transcription_segment import (
+    ConversationItemInputAudioTranscriptionSegment as ConversationItemInputAudioTranscriptionSegment,
+)
+from .realtime_transcription_session_create_request_param import (
+    RealtimeTranscriptionSessionCreateRequestParam as RealtimeTranscriptionSessionCreateRequestParam,
+)
+from .realtime_conversation_item_function_call_output_param import (
+    RealtimeConversationItemFunctionCallOutputParam as RealtimeConversationItemFunctionCallOutputParam,
+)
+from .conversation_item_input_audio_transcription_delta_event import (
+    ConversationItemInputAudioTranscriptionDeltaEvent as ConversationItemInputAudioTranscriptionDeltaEvent,
+)
+from .conversation_item_input_audio_transcription_failed_event import (
+    ConversationItemInputAudioTranscriptionFailedEvent as ConversationItemInputAudioTranscriptionFailedEvent,
+)
+from .realtime_transcription_session_audio_input_turn_detection import (
+    RealtimeTranscriptionSessionAudioInputTurnDetection as RealtimeTranscriptionSessionAudioInputTurnDetection,
+)
+from .conversation_item_input_audio_transcription_completed_event import (
+    ConversationItemInputAudioTranscriptionCompletedEvent as ConversationItemInputAudioTranscriptionCompletedEvent,
+)
+from .realtime_transcription_session_audio_input_turn_detection_param import (
+    RealtimeTranscriptionSessionAudioInputTurnDetectionParam as RealtimeTranscriptionSessionAudioInputTurnDetectionParam,
+)
diff --git a/src/openai/types/realtime/audio_transcription.py b/src/openai/types/realtime/audio_transcription.py
new file mode 100644
index 0000000000..3e5c8e0cb4
--- /dev/null
+++ b/src/openai/types/realtime/audio_transcription.py
@@ -0,0 +1,37 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["AudioTranscription"]
+
+
+class AudioTranscription(BaseModel):
+    language: Optional[str] = None
+    """The language of the input audio.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
+    """
+
+    model: Optional[
+        Literal["whisper-1", "gpt-4o-mini-transcribe", "gpt-4o-transcribe", "gpt-4o-transcribe-diarize"]
+    ] = None
+    """The model to use for transcription.
+
+    Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`,
+    and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need
+    diarization with speaker labels.
+    """
+
+    prompt: Optional[str] = None
+    """
+    An optional text to guide the model's style or continue a previous audio
+    segment. For `whisper-1`, the
+    [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+    For `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the
+    prompt is a free text string, for example "expect words related to technology".
+    """
diff --git a/src/openai/types/realtime/audio_transcription_param.py b/src/openai/types/realtime/audio_transcription_param.py
new file mode 100644
index 0000000000..3b65e42c8f
--- /dev/null
+++ b/src/openai/types/realtime/audio_transcription_param.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["AudioTranscriptionParam"]
+
+
+class AudioTranscriptionParam(TypedDict, total=False):
+    language: str
+    """The language of the input audio.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
+    """
+
+    model: Literal["whisper-1", "gpt-4o-mini-transcribe", "gpt-4o-transcribe", "gpt-4o-transcribe-diarize"]
+    """The model to use for transcription.
+
+    Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`,
+    and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need
+    diarization with speaker labels.
+    """
+
+    prompt: str
+    """
+    An optional text to guide the model's style or continue a previous audio
+    segment. For `whisper-1`, the
+    [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+    For `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the
+    prompt is a free text string, for example "expect words related to technology".
+    """
diff --git a/src/openai/types/realtime/call_accept_params.py b/src/openai/types/realtime/call_accept_params.py
new file mode 100644
index 0000000000..917b71cb0d
--- /dev/null
+++ b/src/openai/types/realtime/call_accept_params.py
@@ -0,0 +1,127 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .realtime_truncation_param import RealtimeTruncationParam
+from .realtime_audio_config_param import RealtimeAudioConfigParam
+from .realtime_tools_config_param import RealtimeToolsConfigParam
+from .realtime_tracing_config_param import RealtimeTracingConfigParam
+from ..responses.response_prompt_param import ResponsePromptParam
+from .realtime_tool_choice_config_param import RealtimeToolChoiceConfigParam
+
+__all__ = ["CallAcceptParams"]
+
+
+class CallAcceptParams(TypedDict, total=False):
+    type: Required[Literal["realtime"]]
+    """The type of session to create. Always `realtime` for the Realtime API."""
+
+    audio: RealtimeAudioConfigParam
+    """Configuration for input and output audio."""
+
+    include: List[Literal["item.input_audio_transcription.logprobs"]]
+    """Additional fields to include in server outputs.
+
+    `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+    transcription.
+    """
+
+    instructions: str
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_output_tokens: Union[int, Literal["inf"]]
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    model: Union[
+        str,
+        Literal[
+            "gpt-realtime",
+            "gpt-realtime-2025-08-28",
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-realtime-preview-2025-06-03",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+            "gpt-realtime-mini",
+            "gpt-realtime-mini-2025-10-06",
+            "gpt-audio-mini",
+            "gpt-audio-mini-2025-10-06",
+        ],
+    ]
+    """The Realtime model used for this session."""
+
+    output_modalities: List[Literal["text", "audio"]]
+    """The set of modalities the model can respond with.
+
+    It defaults to `["audio"]`, indicating that the model will respond with audio
+    plus a transcript. `["text"]` can be used to make the model respond with text
+    only. It is not possible to request both `text` and `audio` at the same time.
+    """
+
+    prompt: Optional[ResponsePromptParam]
+    """
+    Reference to a prompt template and its variables.
+    [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    """
+
+    tool_choice: RealtimeToolChoiceConfigParam
+    """How the model chooses tools.
+
+    Provide one of the string modes or force a specific function/MCP tool.
+    """
+
+    tools: RealtimeToolsConfigParam
+    """Tools available to the model."""
+
+    tracing: Optional[RealtimeTracingConfigParam]
+    """
+    Realtime API can write session traces to the
+    [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once
+    tracing is enabled for a session, the configuration cannot be modified.
+
+    `auto` will create a trace for the session with default values for the workflow
+    name, group id, and metadata.
+    """
+
+    truncation: RealtimeTruncationParam
+    """
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs.
+
+    Clients can configure truncation behavior to truncate with a lower max token
+    limit, which is an effective way to control token usage and cost.
+
+    Truncation will reduce the number of cached tokens on the next turn (busting the
+    cache), since messages are dropped from the beginning of the context. However,
+    clients can also configure truncation to retain messages up to a fraction of the
+    maximum context size, which will reduce the need for future truncations and thus
+    improve the cache rate.
+
+    Truncation can be disabled entirely, which means the server will never truncate
+    but would instead return an error if the conversation exceeds the model's input
+    token limit.
+    """
diff --git a/src/openai/types/realtime/call_create_params.py b/src/openai/types/realtime/call_create_params.py
new file mode 100644
index 0000000000..a378092a66
--- /dev/null
+++ b/src/openai/types/realtime/call_create_params.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from .realtime_session_create_request_param import RealtimeSessionCreateRequestParam
+
+__all__ = ["CallCreateParams"]
+
+
+class CallCreateParams(TypedDict, total=False):
+    sdp: Required[str]
+    """WebRTC Session Description Protocol (SDP) offer generated by the caller."""
+
+    session: RealtimeSessionCreateRequestParam
+    """Realtime session object configuration."""
diff --git a/src/openai/types/realtime/call_refer_params.py b/src/openai/types/realtime/call_refer_params.py
new file mode 100644
index 0000000000..3d8623855b
--- /dev/null
+++ b/src/openai/types/realtime/call_refer_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["CallReferParams"]
+
+
+class CallReferParams(TypedDict, total=False):
+    target_uri: Required[str]
+    """URI that should appear in the SIP Refer-To header.
+
+    Supports values like `tel:+14155550123` or `sip:agent@example.com`.
+    """
diff --git a/src/openai/types/realtime/call_reject_params.py b/src/openai/types/realtime/call_reject_params.py
new file mode 100644
index 0000000000..f12222cded
--- /dev/null
+++ b/src/openai/types/realtime/call_reject_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["CallRejectParams"]
+
+
+class CallRejectParams(TypedDict, total=False):
+    status_code: int
+    """SIP response code to send back to the caller.
+
+    Defaults to `603` (Decline) when omitted.
+    """
diff --git a/src/openai/types/realtime/client_secret_create_params.py b/src/openai/types/realtime/client_secret_create_params.py
new file mode 100644
index 0000000000..2297f3f6d2
--- /dev/null
+++ b/src/openai/types/realtime/client_secret_create_params.py
@@ -0,0 +1,54 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias, TypedDict
+
+from .realtime_session_create_request_param import RealtimeSessionCreateRequestParam
+from .realtime_transcription_session_create_request_param import RealtimeTranscriptionSessionCreateRequestParam
+
+__all__ = ["ClientSecretCreateParams", "ExpiresAfter", "Session"]
+
+
+class ClientSecretCreateParams(TypedDict, total=False):
+    expires_after: ExpiresAfter
+    """Configuration for the client secret expiration.
+
+    Expiration refers to the time after which a client secret will no longer be
+    valid for creating sessions. The session itself may continue after that time
+    once started. A secret can be used to create multiple sessions until it expires.
+    """
+
+    session: Session
+    """Session configuration to use for the client secret.
+
+    Choose either a realtime session or a transcription session.
+    """
+
+
+class ExpiresAfter(TypedDict, total=False):
+    """Configuration for the client secret expiration.
+
+    Expiration refers to the time after which
+    a client secret will no longer be valid for creating sessions. The session itself may
+    continue after that time once started. A secret can be used to create multiple sessions
+    until it expires.
+    """
+
+    anchor: Literal["created_at"]
+    """
+    The anchor point for the client secret expiration, meaning that `seconds` will
+    be added to the `created_at` time of the client secret to produce an expiration
+    timestamp. Only `created_at` is currently supported.
+    """
+
+    seconds: int
+    """The number of seconds from the anchor point to the expiration.
+
+    Select a value between `10` and `7200` (2 hours). This default to 600 seconds
+    (10 minutes) if not specified.
+    """
+
+
+Session: TypeAlias = Union[RealtimeSessionCreateRequestParam, RealtimeTranscriptionSessionCreateRequestParam]
diff --git a/src/openai/types/realtime/client_secret_create_response.py b/src/openai/types/realtime/client_secret_create_response.py
new file mode 100644
index 0000000000..3a30b10544
--- /dev/null
+++ b/src/openai/types/realtime/client_secret_create_response.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .realtime_session_create_response import RealtimeSessionCreateResponse
+from .realtime_transcription_session_create_response import RealtimeTranscriptionSessionCreateResponse
+
+__all__ = ["ClientSecretCreateResponse", "Session"]
+
+Session: TypeAlias = Annotated[
+    Union[RealtimeSessionCreateResponse, RealtimeTranscriptionSessionCreateResponse], PropertyInfo(discriminator="type")
+]
+
+
+class ClientSecretCreateResponse(BaseModel):
+    """Response from creating a session and client secret for the Realtime API."""
+
+    expires_at: int
+    """Expiration timestamp for the client secret, in seconds since epoch."""
+
+    session: Session
+    """The session configuration for either a realtime or transcription session."""
+
+    value: str
+    """The generated client secret value."""
diff --git a/src/openai/types/realtime/conversation_created_event.py b/src/openai/types/realtime/conversation_created_event.py
new file mode 100644
index 0000000000..3026322e86
--- /dev/null
+++ b/src/openai/types/realtime/conversation_created_event.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationCreatedEvent", "Conversation"]
+
+
+class Conversation(BaseModel):
+    """The conversation resource."""
+
+    id: Optional[str] = None
+    """The unique ID of the conversation."""
+
+    object: Optional[Literal["realtime.conversation"]] = None
+    """The object type, must be `realtime.conversation`."""
+
+
+class ConversationCreatedEvent(BaseModel):
+    """Returned when a conversation is created. Emitted right after session creation."""
+
+    conversation: Conversation
+    """The conversation resource."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    type: Literal["conversation.created"]
+    """The event type, must be `conversation.created`."""
diff --git a/src/openai/types/realtime/conversation_item.py b/src/openai/types/realtime/conversation_item.py
new file mode 100644
index 0000000000..be021520a2
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .realtime_mcp_tool_call import RealtimeMcpToolCall
+from .realtime_mcp_list_tools import RealtimeMcpListTools
+from .realtime_mcp_approval_request import RealtimeMcpApprovalRequest
+from .realtime_mcp_approval_response import RealtimeMcpApprovalResponse
+from .realtime_conversation_item_user_message import RealtimeConversationItemUserMessage
+from .realtime_conversation_item_function_call import RealtimeConversationItemFunctionCall
+from .realtime_conversation_item_system_message import RealtimeConversationItemSystemMessage
+from .realtime_conversation_item_assistant_message import RealtimeConversationItemAssistantMessage
+from .realtime_conversation_item_function_call_output import RealtimeConversationItemFunctionCallOutput
+
+__all__ = ["ConversationItem"]
+
+ConversationItem: TypeAlias = Annotated[
+    Union[
+        RealtimeConversationItemSystemMessage,
+        RealtimeConversationItemUserMessage,
+        RealtimeConversationItemAssistantMessage,
+        RealtimeConversationItemFunctionCall,
+        RealtimeConversationItemFunctionCallOutput,
+        RealtimeMcpApprovalResponse,
+        RealtimeMcpListTools,
+        RealtimeMcpToolCall,
+        RealtimeMcpApprovalRequest,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/realtime/conversation_item_added.py b/src/openai/types/realtime/conversation_item_added.py
new file mode 100644
index 0000000000..0e336a9261
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_added.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemAdded"]
+
+
+class ConversationItemAdded(BaseModel):
+    """Sent by the server when an Item is added to the default Conversation.
+
+    This can happen in several cases:
+    - When the client sends a `conversation.item.create` event.
+    - When the input audio buffer is committed. In this case the item will be a user message containing the audio from the buffer.
+    - When the model is generating a Response. In this case the `conversation.item.added` event will be sent when the model starts generating a specific Item, and thus it will not yet have any content (and `status` will be `in_progress`).
+
+    The event will include the full content of the Item (except when model is generating a Response) except for audio data, which can be retrieved separately with a `conversation.item.retrieve` event if necessary.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """A single item within a Realtime conversation."""
+
+    type: Literal["conversation.item.added"]
+    """The event type, must be `conversation.item.added`."""
+
+    previous_item_id: Optional[str] = None
+    """The ID of the item that precedes this one, if any.
+
+    This is used to maintain ordering when items are inserted.
+    """
diff --git a/src/openai/types/realtime/conversation_item_create_event.py b/src/openai/types/realtime/conversation_item_create_event.py
new file mode 100644
index 0000000000..bf2d129744
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_create_event.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemCreateEvent"]
+
+
+class ConversationItemCreateEvent(BaseModel):
+    """
+    Add a new Item to the Conversation's context, including messages, function
+    calls, and function call responses. This event can be used both to populate a
+    "history" of the conversation and to add new items mid-stream, but has the
+    current limitation that it cannot populate assistant audio messages.
+
+    If successful, the server will respond with a `conversation.item.created`
+    event, otherwise an `error` event will be sent.
+    """
+
+    item: ConversationItem
+    """A single item within a Realtime conversation."""
+
+    type: Literal["conversation.item.create"]
+    """The event type, must be `conversation.item.create`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
+
+    previous_item_id: Optional[str] = None
+    """The ID of the preceding item after which the new item will be inserted.
+
+    If not set, the new item will be appended to the end of the conversation. If set
+    to `root`, the new item will be added to the beginning of the conversation. If
+    set to an existing ID, it allows an item to be inserted mid-conversation. If the
+    ID cannot be found, an error will be returned and the item will not be added.
+    """
diff --git a/src/openai/types/realtime/conversation_item_create_event_param.py b/src/openai/types/realtime/conversation_item_create_event_param.py
new file mode 100644
index 0000000000..be7f0ff011
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_create_event_param.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .conversation_item_param import ConversationItemParam
+
+__all__ = ["ConversationItemCreateEventParam"]
+
+
+class ConversationItemCreateEventParam(TypedDict, total=False):
+    """
+    Add a new Item to the Conversation's context, including messages, function
+    calls, and function call responses. This event can be used both to populate a
+    "history" of the conversation and to add new items mid-stream, but has the
+    current limitation that it cannot populate assistant audio messages.
+
+    If successful, the server will respond with a `conversation.item.created`
+    event, otherwise an `error` event will be sent.
+    """
+
+    item: Required[ConversationItemParam]
+    """A single item within a Realtime conversation."""
+
+    type: Required[Literal["conversation.item.create"]]
+    """The event type, must be `conversation.item.create`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
+
+    previous_item_id: str
+    """The ID of the preceding item after which the new item will be inserted.
+
+    If not set, the new item will be appended to the end of the conversation. If set
+    to `root`, the new item will be added to the beginning of the conversation. If
+    set to an existing ID, it allows an item to be inserted mid-conversation. If the
+    ID cannot be found, an error will be returned and the item will not be added.
+    """
diff --git a/src/openai/types/realtime/conversation_item_created_event.py b/src/openai/types/realtime/conversation_item_created_event.py
new file mode 100644
index 0000000000..6ae6f05ffe
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_created_event.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemCreatedEvent"]
+
+
+class ConversationItemCreatedEvent(BaseModel):
+    """Returned when a conversation item is created.
+
+    There are several scenarios that produce this event:
+      - The server is generating a Response, which if successful will produce
+        either one or two Items, which will be of type `message`
+        (role `assistant`) or type `function_call`.
+      - The input audio buffer has been committed, either by the client or the
+        server (in `server_vad` mode). The server will take the content of the
+        input audio buffer and add it to a new user message Item.
+      - The client has sent a `conversation.item.create` event to add a new Item
+        to the Conversation.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """A single item within a Realtime conversation."""
+
+    type: Literal["conversation.item.created"]
+    """The event type, must be `conversation.item.created`."""
+
+    previous_item_id: Optional[str] = None
+    """
+    The ID of the preceding item in the Conversation context, allows the client to
+    understand the order of the conversation. Can be `null` if the item has no
+    predecessor.
+    """
diff --git a/src/openai/types/realtime/conversation_item_delete_event.py b/src/openai/types/realtime/conversation_item_delete_event.py
new file mode 100644
index 0000000000..c662f386e3
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_delete_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationItemDeleteEvent"]
+
+
+class ConversationItemDeleteEvent(BaseModel):
+    """Send this event when you want to remove any item from the conversation
+    history.
+
+    The server will respond with a `conversation.item.deleted` event,
+    unless the item does not exist in the conversation history, in which case the
+    server will respond with an error.
+    """
+
+    item_id: str
+    """The ID of the item to delete."""
+
+    type: Literal["conversation.item.delete"]
+    """The event type, must be `conversation.item.delete`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/realtime/conversation_item_delete_event_param.py b/src/openai/types/realtime/conversation_item_delete_event_param.py
new file mode 100644
index 0000000000..e79bb68c9a
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_delete_event_param.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemDeleteEventParam"]
+
+
+class ConversationItemDeleteEventParam(TypedDict, total=False):
+    """Send this event when you want to remove any item from the conversation
+    history.
+
+    The server will respond with a `conversation.item.deleted` event,
+    unless the item does not exist in the conversation history, in which case the
+    server will respond with an error.
+    """
+
+    item_id: Required[str]
+    """The ID of the item to delete."""
+
+    type: Required[Literal["conversation.item.delete"]]
+    """The event type, must be `conversation.item.delete`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/realtime/conversation_item_deleted_event.py b/src/openai/types/realtime/conversation_item_deleted_event.py
new file mode 100644
index 0000000000..9826289ebf
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_deleted_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationItemDeletedEvent"]
+
+
+class ConversationItemDeletedEvent(BaseModel):
+    """
+    Returned when an item in the conversation is deleted by the client with a
+    `conversation.item.delete` event. This event is used to synchronize the
+    server's understanding of the conversation history with the client's view.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item that was deleted."""
+
+    type: Literal["conversation.item.deleted"]
+    """The event type, must be `conversation.item.deleted`."""
diff --git a/src/openai/types/realtime/conversation_item_done.py b/src/openai/types/realtime/conversation_item_done.py
new file mode 100644
index 0000000000..6a823c65a8
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_done.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemDone"]
+
+
+class ConversationItemDone(BaseModel):
+    """Returned when a conversation item is finalized.
+
+    The event will include the full content of the Item except for audio data, which can be retrieved separately with a `conversation.item.retrieve` event if needed.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """A single item within a Realtime conversation."""
+
+    type: Literal["conversation.item.done"]
+    """The event type, must be `conversation.item.done`."""
+
+    previous_item_id: Optional[str] = None
+    """The ID of the item that precedes this one, if any.
+
+    This is used to maintain ordering when items are inserted.
+    """
diff --git a/src/openai/types/realtime/conversation_item_input_audio_transcription_completed_event.py b/src/openai/types/realtime/conversation_item_input_audio_transcription_completed_event.py
new file mode 100644
index 0000000000..3304233f8f
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_input_audio_transcription_completed_event.py
@@ -0,0 +1,98 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .log_prob_properties import LogProbProperties
+
+__all__ = [
+    "ConversationItemInputAudioTranscriptionCompletedEvent",
+    "Usage",
+    "UsageTranscriptTextUsageTokens",
+    "UsageTranscriptTextUsageTokensInputTokenDetails",
+    "UsageTranscriptTextUsageDuration",
+]
+
+
+class UsageTranscriptTextUsageTokensInputTokenDetails(BaseModel):
+    """Details about the input tokens billed for this request."""
+
+    audio_tokens: Optional[int] = None
+    """Number of audio tokens billed for this request."""
+
+    text_tokens: Optional[int] = None
+    """Number of text tokens billed for this request."""
+
+
+class UsageTranscriptTextUsageTokens(BaseModel):
+    """Usage statistics for models billed by token usage."""
+
+    input_tokens: int
+    """Number of input tokens billed for this request."""
+
+    output_tokens: int
+    """Number of output tokens generated."""
+
+    total_tokens: int
+    """Total number of tokens used (input + output)."""
+
+    type: Literal["tokens"]
+    """The type of the usage object. Always `tokens` for this variant."""
+
+    input_token_details: Optional[UsageTranscriptTextUsageTokensInputTokenDetails] = None
+    """Details about the input tokens billed for this request."""
+
+
+class UsageTranscriptTextUsageDuration(BaseModel):
+    """Usage statistics for models billed by audio input duration."""
+
+    seconds: float
+    """Duration of the input audio in seconds."""
+
+    type: Literal["duration"]
+    """The type of the usage object. Always `duration` for this variant."""
+
+
+Usage: TypeAlias = Union[UsageTranscriptTextUsageTokens, UsageTranscriptTextUsageDuration]
+
+
+class ConversationItemInputAudioTranscriptionCompletedEvent(BaseModel):
+    """
+    This event is the output of audio transcription for user audio written to the
+    user audio buffer. Transcription begins when the input audio buffer is
+    committed by the client or server (when VAD is enabled). Transcription runs
+    asynchronously with Response creation, so this event may come before or after
+    the Response events.
+
+    Realtime API models accept audio natively, and thus input transcription is a
+    separate process run on a separate ASR (Automatic Speech Recognition) model.
+    The transcript may diverge somewhat from the model's interpretation, and
+    should be treated as a rough guide.
+    """
+
+    content_index: int
+    """The index of the content part containing the audio."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item containing the audio that is being transcribed."""
+
+    transcript: str
+    """The transcribed text."""
+
+    type: Literal["conversation.item.input_audio_transcription.completed"]
+    """
+    The event type, must be `conversation.item.input_audio_transcription.completed`.
+    """
+
+    usage: Usage
+    """
+    Usage statistics for the transcription, this is billed according to the ASR
+    model's pricing rather than the realtime model's pricing.
+    """
+
+    logprobs: Optional[List[LogProbProperties]] = None
+    """The log probabilities of the transcription."""
diff --git a/src/openai/types/realtime/conversation_item_input_audio_transcription_delta_event.py b/src/openai/types/realtime/conversation_item_input_audio_transcription_delta_event.py
new file mode 100644
index 0000000000..5f3f54810f
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_input_audio_transcription_delta_event.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .log_prob_properties import LogProbProperties
+
+__all__ = ["ConversationItemInputAudioTranscriptionDeltaEvent"]
+
+
+class ConversationItemInputAudioTranscriptionDeltaEvent(BaseModel):
+    """
+    Returned when the text value of an input audio transcription content part is updated with incremental transcription results.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item containing the audio that is being transcribed."""
+
+    type: Literal["conversation.item.input_audio_transcription.delta"]
+    """The event type, must be `conversation.item.input_audio_transcription.delta`."""
+
+    content_index: Optional[int] = None
+    """The index of the content part in the item's content array."""
+
+    delta: Optional[str] = None
+    """The text delta."""
+
+    logprobs: Optional[List[LogProbProperties]] = None
+    """The log probabilities of the transcription.
+
+    These can be enabled by configurating the session with
+    `"include": ["item.input_audio_transcription.logprobs"]`. Each entry in the
+    array corresponds a log probability of which token would be selected for this
+    chunk of transcription. This can help to identify if it was possible there were
+    multiple valid options for a given chunk of transcription.
+    """
diff --git a/src/openai/types/realtime/conversation_item_input_audio_transcription_failed_event.py b/src/openai/types/realtime/conversation_item_input_audio_transcription_failed_event.py
new file mode 100644
index 0000000000..e8ad05e43c
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_input_audio_transcription_failed_event.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationItemInputAudioTranscriptionFailedEvent", "Error"]
+
+
+class Error(BaseModel):
+    """Details of the transcription error."""
+
+    code: Optional[str] = None
+    """Error code, if any."""
+
+    message: Optional[str] = None
+    """A human-readable error message."""
+
+    param: Optional[str] = None
+    """Parameter related to the error, if any."""
+
+    type: Optional[str] = None
+    """The type of error."""
+
+
+class ConversationItemInputAudioTranscriptionFailedEvent(BaseModel):
+    """
+    Returned when input audio transcription is configured, and a transcription
+    request for a user message failed. These events are separate from other
+    `error` events so that the client can identify the related Item.
+    """
+
+    content_index: int
+    """The index of the content part containing the audio."""
+
+    error: Error
+    """Details of the transcription error."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item."""
+
+    type: Literal["conversation.item.input_audio_transcription.failed"]
+    """The event type, must be `conversation.item.input_audio_transcription.failed`."""
diff --git a/src/openai/types/realtime/conversation_item_input_audio_transcription_segment.py b/src/openai/types/realtime/conversation_item_input_audio_transcription_segment.py
new file mode 100644
index 0000000000..dcc4916580
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_input_audio_transcription_segment.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationItemInputAudioTranscriptionSegment"]
+
+
+class ConversationItemInputAudioTranscriptionSegment(BaseModel):
+    """Returned when an input audio transcription segment is identified for an item."""
+
+    id: str
+    """The segment identifier."""
+
+    content_index: int
+    """The index of the input audio content part within the item."""
+
+    end: float
+    """End time of the segment in seconds."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item containing the input audio content."""
+
+    speaker: str
+    """The detected speaker label for this segment."""
+
+    start: float
+    """Start time of the segment in seconds."""
+
+    text: str
+    """The text for this segment."""
+
+    type: Literal["conversation.item.input_audio_transcription.segment"]
+    """The event type, must be `conversation.item.input_audio_transcription.segment`."""
diff --git a/src/openai/types/realtime/conversation_item_param.py b/src/openai/types/realtime/conversation_item_param.py
new file mode 100644
index 0000000000..c8b442ecad
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_param.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .realtime_mcp_tool_call_param import RealtimeMcpToolCallParam
+from .realtime_mcp_list_tools_param import RealtimeMcpListToolsParam
+from .realtime_mcp_approval_request_param import RealtimeMcpApprovalRequestParam
+from .realtime_mcp_approval_response_param import RealtimeMcpApprovalResponseParam
+from .realtime_conversation_item_user_message_param import RealtimeConversationItemUserMessageParam
+from .realtime_conversation_item_function_call_param import RealtimeConversationItemFunctionCallParam
+from .realtime_conversation_item_system_message_param import RealtimeConversationItemSystemMessageParam
+from .realtime_conversation_item_assistant_message_param import RealtimeConversationItemAssistantMessageParam
+from .realtime_conversation_item_function_call_output_param import RealtimeConversationItemFunctionCallOutputParam
+
+__all__ = ["ConversationItemParam"]
+
+ConversationItemParam: TypeAlias = Union[
+    RealtimeConversationItemSystemMessageParam,
+    RealtimeConversationItemUserMessageParam,
+    RealtimeConversationItemAssistantMessageParam,
+    RealtimeConversationItemFunctionCallParam,
+    RealtimeConversationItemFunctionCallOutputParam,
+    RealtimeMcpApprovalResponseParam,
+    RealtimeMcpListToolsParam,
+    RealtimeMcpToolCallParam,
+    RealtimeMcpApprovalRequestParam,
+]
diff --git a/src/openai/types/realtime/conversation_item_retrieve_event.py b/src/openai/types/realtime/conversation_item_retrieve_event.py
new file mode 100644
index 0000000000..e7d8eb6c49
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_retrieve_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationItemRetrieveEvent"]
+
+
+class ConversationItemRetrieveEvent(BaseModel):
+    """
+    Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
+    The server will respond with a `conversation.item.retrieved` event,
+    unless the item does not exist in the conversation history, in which case the
+    server will respond with an error.
+    """
+
+    item_id: str
+    """The ID of the item to retrieve."""
+
+    type: Literal["conversation.item.retrieve"]
+    """The event type, must be `conversation.item.retrieve`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/realtime/conversation_item_retrieve_event_param.py b/src/openai/types/realtime/conversation_item_retrieve_event_param.py
new file mode 100644
index 0000000000..59fdb6fb93
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_retrieve_event_param.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemRetrieveEventParam"]
+
+
+class ConversationItemRetrieveEventParam(TypedDict, total=False):
+    """
+    Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
+    The server will respond with a `conversation.item.retrieved` event,
+    unless the item does not exist in the conversation history, in which case the
+    server will respond with an error.
+    """
+
+    item_id: Required[str]
+    """The ID of the item to retrieve."""
+
+    type: Required[Literal["conversation.item.retrieve"]]
+    """The event type, must be `conversation.item.retrieve`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/realtime/conversation_item_truncate_event.py b/src/openai/types/realtime/conversation_item_truncate_event.py
new file mode 100644
index 0000000000..16c82183c4
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_truncate_event.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationItemTruncateEvent"]
+
+
+class ConversationItemTruncateEvent(BaseModel):
+    """Send this event to truncate a previous assistant message’s audio.
+
+    The server
+    will produce audio faster than realtime, so this event is useful when the user
+    interrupts to truncate audio that has already been sent to the client but not
+    yet played. This will synchronize the server's understanding of the audio with
+    the client's playback.
+
+    Truncating audio will delete the server-side text transcript to ensure there
+    is not text in the context that hasn't been heard by the user.
+
+    If successful, the server will respond with a `conversation.item.truncated`
+    event.
+    """
+
+    audio_end_ms: int
+    """Inclusive duration up to which audio is truncated, in milliseconds.
+
+    If the audio_end_ms is greater than the actual audio duration, the server will
+    respond with an error.
+    """
+
+    content_index: int
+    """The index of the content part to truncate. Set this to `0`."""
+
+    item_id: str
+    """The ID of the assistant message item to truncate.
+
+    Only assistant message items can be truncated.
+    """
+
+    type: Literal["conversation.item.truncate"]
+    """The event type, must be `conversation.item.truncate`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/realtime/conversation_item_truncate_event_param.py b/src/openai/types/realtime/conversation_item_truncate_event_param.py
new file mode 100644
index 0000000000..e9b41fc980
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_truncate_event_param.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemTruncateEventParam"]
+
+
+class ConversationItemTruncateEventParam(TypedDict, total=False):
+    """Send this event to truncate a previous assistant message’s audio.
+
+    The server
+    will produce audio faster than realtime, so this event is useful when the user
+    interrupts to truncate audio that has already been sent to the client but not
+    yet played. This will synchronize the server's understanding of the audio with
+    the client's playback.
+
+    Truncating audio will delete the server-side text transcript to ensure there
+    is not text in the context that hasn't been heard by the user.
+
+    If successful, the server will respond with a `conversation.item.truncated`
+    event.
+    """
+
+    audio_end_ms: Required[int]
+    """Inclusive duration up to which audio is truncated, in milliseconds.
+
+    If the audio_end_ms is greater than the actual audio duration, the server will
+    respond with an error.
+    """
+
+    content_index: Required[int]
+    """The index of the content part to truncate. Set this to `0`."""
+
+    item_id: Required[str]
+    """The ID of the assistant message item to truncate.
+
+    Only assistant message items can be truncated.
+    """
+
+    type: Required[Literal["conversation.item.truncate"]]
+    """The event type, must be `conversation.item.truncate`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/realtime/conversation_item_truncated_event.py b/src/openai/types/realtime/conversation_item_truncated_event.py
new file mode 100644
index 0000000000..c78a776d9b
--- /dev/null
+++ b/src/openai/types/realtime/conversation_item_truncated_event.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ConversationItemTruncatedEvent"]
+
+
+class ConversationItemTruncatedEvent(BaseModel):
+    """
+    Returned when an earlier assistant audio message item is truncated by the
+    client with a `conversation.item.truncate` event. This event is used to
+    synchronize the server's understanding of the audio with the client's playback.
+
+    This action will truncate the audio and remove the server-side text transcript
+    to ensure there is no text in the context that hasn't been heard by the user.
+    """
+
+    audio_end_ms: int
+    """The duration up to which the audio was truncated, in milliseconds."""
+
+    content_index: int
+    """The index of the content part that was truncated."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the assistant message item that was truncated."""
+
+    type: Literal["conversation.item.truncated"]
+    """The event type, must be `conversation.item.truncated`."""
diff --git a/src/openai/types/realtime/input_audio_buffer_append_event.py b/src/openai/types/realtime/input_audio_buffer_append_event.py
new file mode 100644
index 0000000000..4c9e9a544d
--- /dev/null
+++ b/src/openai/types/realtime/input_audio_buffer_append_event.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferAppendEvent"]
+
+
+class InputAudioBufferAppendEvent(BaseModel):
+    """Send this event to append audio bytes to the input audio buffer.
+
+    The audio
+    buffer is temporary storage you can write to and later commit. A "commit" will create a new
+    user message item in the conversation history from the buffer content and clear the buffer.
+    Input audio transcription (if enabled) will be generated when the buffer is committed.
+
+    If VAD is enabled the audio buffer is used to detect speech and the server will decide
+    when to commit. When Server VAD is disabled, you must commit the audio buffer
+    manually. Input audio noise reduction operates on writes to the audio buffer.
+
+    The client may choose how much audio to place in each event up to a maximum
+    of 15 MiB, for example streaming smaller chunks from the client may allow the
+    VAD to be more responsive. Unlike most other client events, the server will
+    not send a confirmation response to this event.
+    """
+
+    audio: str
+    """Base64-encoded audio bytes.
+
+    This must be in the format specified by the `input_audio_format` field in the
+    session configuration.
+    """
+
+    type: Literal["input_audio_buffer.append"]
+    """The event type, must be `input_audio_buffer.append`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/realtime/input_audio_buffer_append_event_param.py b/src/openai/types/realtime/input_audio_buffer_append_event_param.py
new file mode 100644
index 0000000000..a0d308e4d9
--- /dev/null
+++ b/src/openai/types/realtime/input_audio_buffer_append_event_param.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferAppendEventParam"]
+
+
+class InputAudioBufferAppendEventParam(TypedDict, total=False):
+    """Send this event to append audio bytes to the input audio buffer.
+
+    The audio
+    buffer is temporary storage you can write to and later commit. A "commit" will create a new
+    user message item in the conversation history from the buffer content and clear the buffer.
+    Input audio transcription (if enabled) will be generated when the buffer is committed.
+
+    If VAD is enabled the audio buffer is used to detect speech and the server will decide
+    when to commit. When Server VAD is disabled, you must commit the audio buffer
+    manually. Input audio noise reduction operates on writes to the audio buffer.
+
+    The client may choose how much audio to place in each event up to a maximum
+    of 15 MiB, for example streaming smaller chunks from the client may allow the
+    VAD to be more responsive. Unlike most other client events, the server will
+    not send a confirmation response to this event.
+    """
+
+    audio: Required[str]
+    """Base64-encoded audio bytes.
+
+    This must be in the format specified by the `input_audio_format` field in the
+    session configuration.
+    """
+
+    type: Required[Literal["input_audio_buffer.append"]]
+    """The event type, must be `input_audio_buffer.append`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/realtime/input_audio_buffer_clear_event.py b/src/openai/types/realtime/input_audio_buffer_clear_event.py
new file mode 100644
index 0000000000..5526bcbfa9
--- /dev/null
+++ b/src/openai/types/realtime/input_audio_buffer_clear_event.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferClearEvent"]
+
+
+class InputAudioBufferClearEvent(BaseModel):
+    """Send this event to clear the audio bytes in the buffer.
+
+    The server will
+    respond with an `input_audio_buffer.cleared` event.
+    """
+
+    type: Literal["input_audio_buffer.clear"]
+    """The event type, must be `input_audio_buffer.clear`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/realtime/input_audio_buffer_clear_event_param.py b/src/openai/types/realtime/input_audio_buffer_clear_event_param.py
new file mode 100644
index 0000000000..8e0e9c55fa
--- /dev/null
+++ b/src/openai/types/realtime/input_audio_buffer_clear_event_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferClearEventParam"]
+
+
+class InputAudioBufferClearEventParam(TypedDict, total=False):
+    """Send this event to clear the audio bytes in the buffer.
+
+    The server will
+    respond with an `input_audio_buffer.cleared` event.
+    """
+
+    type: Required[Literal["input_audio_buffer.clear"]]
+    """The event type, must be `input_audio_buffer.clear`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/realtime/input_audio_buffer_cleared_event.py b/src/openai/types/realtime/input_audio_buffer_cleared_event.py
new file mode 100644
index 0000000000..e4775567dc
--- /dev/null
+++ b/src/openai/types/realtime/input_audio_buffer_cleared_event.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferClearedEvent"]
+
+
+class InputAudioBufferClearedEvent(BaseModel):
+    """
+    Returned when the input audio buffer is cleared by the client with a
+    `input_audio_buffer.clear` event.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    type: Literal["input_audio_buffer.cleared"]
+    """The event type, must be `input_audio_buffer.cleared`."""
diff --git a/src/openai/types/realtime/input_audio_buffer_commit_event.py b/src/openai/types/realtime/input_audio_buffer_commit_event.py
new file mode 100644
index 0000000000..fe2ec01783
--- /dev/null
+++ b/src/openai/types/realtime/input_audio_buffer_commit_event.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferCommitEvent"]
+
+
+class InputAudioBufferCommitEvent(BaseModel):
+    """
+    Send this event to commit the user input audio buffer, which will create a  new user message item in the conversation. This event will produce an error  if the input audio buffer is empty. When in Server VAD mode, the client does  not need to send this event, the server will commit the audio buffer  automatically.
+
+    Committing the input audio buffer will trigger input audio transcription  (if enabled in session configuration), but it will not create a response  from the model. The server will respond with an `input_audio_buffer.committed` event.
+    """
+
+    type: Literal["input_audio_buffer.commit"]
+    """The event type, must be `input_audio_buffer.commit`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/realtime/input_audio_buffer_commit_event_param.py b/src/openai/types/realtime/input_audio_buffer_commit_event_param.py
new file mode 100644
index 0000000000..20342795e8
--- /dev/null
+++ b/src/openai/types/realtime/input_audio_buffer_commit_event_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferCommitEventParam"]
+
+
+class InputAudioBufferCommitEventParam(TypedDict, total=False):
+    """
+    Send this event to commit the user input audio buffer, which will create a  new user message item in the conversation. This event will produce an error  if the input audio buffer is empty. When in Server VAD mode, the client does  not need to send this event, the server will commit the audio buffer  automatically.
+
+    Committing the input audio buffer will trigger input audio transcription  (if enabled in session configuration), but it will not create a response  from the model. The server will respond with an `input_audio_buffer.committed` event.
+    """
+
+    type: Required[Literal["input_audio_buffer.commit"]]
+    """The event type, must be `input_audio_buffer.commit`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/realtime/input_audio_buffer_committed_event.py b/src/openai/types/realtime/input_audio_buffer_committed_event.py
new file mode 100644
index 0000000000..15dc8254f3
--- /dev/null
+++ b/src/openai/types/realtime/input_audio_buffer_committed_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferCommittedEvent"]
+
+
+class InputAudioBufferCommittedEvent(BaseModel):
+    """
+    Returned when an input audio buffer is committed, either by the client or
+    automatically in server VAD mode. The `item_id` property is the ID of the user
+    message item that will be created, thus a `conversation.item.created` event
+    will also be sent to the client.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item that will be created."""
+
+    type: Literal["input_audio_buffer.committed"]
+    """The event type, must be `input_audio_buffer.committed`."""
+
+    previous_item_id: Optional[str] = None
+    """
+    The ID of the preceding item after which the new item will be inserted. Can be
+    `null` if the item has no predecessor.
+    """
diff --git a/src/openai/types/realtime/input_audio_buffer_dtmf_event_received_event.py b/src/openai/types/realtime/input_audio_buffer_dtmf_event_received_event.py
new file mode 100644
index 0000000000..c2623cc7b8
--- /dev/null
+++ b/src/openai/types/realtime/input_audio_buffer_dtmf_event_received_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferDtmfEventReceivedEvent"]
+
+
+class InputAudioBufferDtmfEventReceivedEvent(BaseModel):
+    """**SIP Only:** Returned when an DTMF event is received.
+
+    A DTMF event is a message that
+    represents a telephone keypad press (0–9, *, #, A–D). The `event` property
+    is the keypad that the user press. The `received_at` is the UTC Unix Timestamp
+    that the server received the event.
+    """
+
+    event: str
+    """The telephone keypad that was pressed by the user."""
+
+    received_at: int
+    """UTC Unix Timestamp when DTMF Event was received by server."""
+
+    type: Literal["input_audio_buffer.dtmf_event_received"]
+    """The event type, must be `input_audio_buffer.dtmf_event_received`."""
diff --git a/src/openai/types/realtime/input_audio_buffer_speech_started_event.py b/src/openai/types/realtime/input_audio_buffer_speech_started_event.py
new file mode 100644
index 0000000000..1bd4c74eb0
--- /dev/null
+++ b/src/openai/types/realtime/input_audio_buffer_speech_started_event.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferSpeechStartedEvent"]
+
+
+class InputAudioBufferSpeechStartedEvent(BaseModel):
+    """
+    Sent by the server when in `server_vad` mode to indicate that speech has been
+    detected in the audio buffer. This can happen any time audio is added to the
+    buffer (unless speech is already detected). The client may want to use this
+    event to interrupt audio playback or provide visual feedback to the user.
+
+    The client should expect to receive a `input_audio_buffer.speech_stopped` event
+    when speech stops. The `item_id` property is the ID of the user message item
+    that will be created when speech stops and will also be included in the
+    `input_audio_buffer.speech_stopped` event (unless the client manually commits
+    the audio buffer during VAD activation).
+    """
+
+    audio_start_ms: int
+    """
+    Milliseconds from the start of all audio written to the buffer during the
+    session when speech was first detected. This will correspond to the beginning of
+    audio sent to the model, and thus includes the `prefix_padding_ms` configured in
+    the Session.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item that will be created when speech stops."""
+
+    type: Literal["input_audio_buffer.speech_started"]
+    """The event type, must be `input_audio_buffer.speech_started`."""
diff --git a/src/openai/types/realtime/input_audio_buffer_speech_stopped_event.py b/src/openai/types/realtime/input_audio_buffer_speech_stopped_event.py
new file mode 100644
index 0000000000..b3fb20929a
--- /dev/null
+++ b/src/openai/types/realtime/input_audio_buffer_speech_stopped_event.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferSpeechStoppedEvent"]
+
+
+class InputAudioBufferSpeechStoppedEvent(BaseModel):
+    """
+    Returned in `server_vad` mode when the server detects the end of speech in
+    the audio buffer. The server will also send an `conversation.item.created`
+    event with the user message item that is created from the audio buffer.
+    """
+
+    audio_end_ms: int
+    """Milliseconds since the session started when speech stopped.
+
+    This will correspond to the end of audio sent to the model, and thus includes
+    the `min_silence_duration_ms` configured in the Session.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item that will be created."""
+
+    type: Literal["input_audio_buffer.speech_stopped"]
+    """The event type, must be `input_audio_buffer.speech_stopped`."""
diff --git a/src/openai/types/realtime/input_audio_buffer_timeout_triggered.py b/src/openai/types/realtime/input_audio_buffer_timeout_triggered.py
new file mode 100644
index 0000000000..72b107d56e
--- /dev/null
+++ b/src/openai/types/realtime/input_audio_buffer_timeout_triggered.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferTimeoutTriggered"]
+
+
+class InputAudioBufferTimeoutTriggered(BaseModel):
+    """Returned when the Server VAD timeout is triggered for the input audio buffer.
+
+    This is configured
+    with `idle_timeout_ms` in the `turn_detection` settings of the session, and it indicates that
+    there hasn't been any speech detected for the configured duration.
+
+    The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the last
+    model response up to the triggering time, as an offset from the beginning of audio written
+    to the input audio buffer. This means it demarcates the segment of audio that was silent and
+    the difference between the start and end values will roughly match the configured timeout.
+
+    The empty audio will be committed to the conversation as an `input_audio` item (there will be a
+    `input_audio_buffer.committed` event) and a model response will be generated. There may be speech
+    that didn't trigger VAD but is still detected by the model, so the model may respond with
+    something relevant to the conversation or a prompt to continue speaking.
+    """
+
+    audio_end_ms: int
+    """
+    Millisecond offset of audio written to the input audio buffer at the time the
+    timeout was triggered.
+    """
+
+    audio_start_ms: int
+    """
+    Millisecond offset of audio written to the input audio buffer that was after the
+    playback time of the last model response.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item associated with this segment."""
+
+    type: Literal["input_audio_buffer.timeout_triggered"]
+    """The event type, must be `input_audio_buffer.timeout_triggered`."""
diff --git a/src/openai/types/realtime/log_prob_properties.py b/src/openai/types/realtime/log_prob_properties.py
new file mode 100644
index 0000000000..423af1c492
--- /dev/null
+++ b/src/openai/types/realtime/log_prob_properties.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+
+from ..._models import BaseModel
+
+__all__ = ["LogProbProperties"]
+
+
+class LogProbProperties(BaseModel):
+    """A log probability object."""
+
+    token: str
+    """The token that was used to generate the log probability."""
+
+    bytes: List[int]
+    """The bytes that were used to generate the log probability."""
+
+    logprob: float
+    """The log probability of the token."""
diff --git a/src/openai/types/realtime/mcp_list_tools_completed.py b/src/openai/types/realtime/mcp_list_tools_completed.py
new file mode 100644
index 0000000000..2fe64147d6
--- /dev/null
+++ b/src/openai/types/realtime/mcp_list_tools_completed.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["McpListToolsCompleted"]
+
+
+class McpListToolsCompleted(BaseModel):
+    """Returned when listing MCP tools has completed for an item."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the MCP list tools item."""
+
+    type: Literal["mcp_list_tools.completed"]
+    """The event type, must be `mcp_list_tools.completed`."""
diff --git a/src/openai/types/realtime/mcp_list_tools_failed.py b/src/openai/types/realtime/mcp_list_tools_failed.py
new file mode 100644
index 0000000000..8cad7c0a12
--- /dev/null
+++ b/src/openai/types/realtime/mcp_list_tools_failed.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["McpListToolsFailed"]
+
+
+class McpListToolsFailed(BaseModel):
+    """Returned when listing MCP tools has failed for an item."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the MCP list tools item."""
+
+    type: Literal["mcp_list_tools.failed"]
+    """The event type, must be `mcp_list_tools.failed`."""
diff --git a/src/openai/types/realtime/mcp_list_tools_in_progress.py b/src/openai/types/realtime/mcp_list_tools_in_progress.py
new file mode 100644
index 0000000000..823bb875a3
--- /dev/null
+++ b/src/openai/types/realtime/mcp_list_tools_in_progress.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["McpListToolsInProgress"]
+
+
+class McpListToolsInProgress(BaseModel):
+    """Returned when listing MCP tools is in progress for an item."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the MCP list tools item."""
+
+    type: Literal["mcp_list_tools.in_progress"]
+    """The event type, must be `mcp_list_tools.in_progress`."""
diff --git a/src/openai/types/realtime/noise_reduction_type.py b/src/openai/types/realtime/noise_reduction_type.py
new file mode 100644
index 0000000000..f4338991bb
--- /dev/null
+++ b/src/openai/types/realtime/noise_reduction_type.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["NoiseReductionType"]
+
+NoiseReductionType: TypeAlias = Literal["near_field", "far_field"]
diff --git a/src/openai/types/realtime/output_audio_buffer_clear_event.py b/src/openai/types/realtime/output_audio_buffer_clear_event.py
new file mode 100644
index 0000000000..b3fa7620ac
--- /dev/null
+++ b/src/openai/types/realtime/output_audio_buffer_clear_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["OutputAudioBufferClearEvent"]
+
+
+class OutputAudioBufferClearEvent(BaseModel):
+    """**WebRTC/SIP Only:** Emit to cut off the current audio response.
+
+    This will trigger the server to
+    stop generating audio and emit a `output_audio_buffer.cleared` event. This
+    event should be preceded by a `response.cancel` client event to stop the
+    generation of the current response.
+    [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+    """
+
+    type: Literal["output_audio_buffer.clear"]
+    """The event type, must be `output_audio_buffer.clear`."""
+
+    event_id: Optional[str] = None
+    """The unique ID of the client event used for error handling."""
diff --git a/src/openai/types/realtime/output_audio_buffer_clear_event_param.py b/src/openai/types/realtime/output_audio_buffer_clear_event_param.py
new file mode 100644
index 0000000000..59f897a5c1
--- /dev/null
+++ b/src/openai/types/realtime/output_audio_buffer_clear_event_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["OutputAudioBufferClearEventParam"]
+
+
+class OutputAudioBufferClearEventParam(TypedDict, total=False):
+    """**WebRTC/SIP Only:** Emit to cut off the current audio response.
+
+    This will trigger the server to
+    stop generating audio and emit a `output_audio_buffer.cleared` event. This
+    event should be preceded by a `response.cancel` client event to stop the
+    generation of the current response.
+    [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+    """
+
+    type: Required[Literal["output_audio_buffer.clear"]]
+    """The event type, must be `output_audio_buffer.clear`."""
+
+    event_id: str
+    """The unique ID of the client event used for error handling."""
diff --git a/src/openai/types/realtime/rate_limits_updated_event.py b/src/openai/types/realtime/rate_limits_updated_event.py
new file mode 100644
index 0000000000..951de103af
--- /dev/null
+++ b/src/openai/types/realtime/rate_limits_updated_event.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RateLimitsUpdatedEvent", "RateLimit"]
+
+
+class RateLimit(BaseModel):
+    limit: Optional[int] = None
+    """The maximum allowed value for the rate limit."""
+
+    name: Optional[Literal["requests", "tokens"]] = None
+    """The name of the rate limit (`requests`, `tokens`)."""
+
+    remaining: Optional[int] = None
+    """The remaining value before the limit is reached."""
+
+    reset_seconds: Optional[float] = None
+    """Seconds until the rate limit resets."""
+
+
+class RateLimitsUpdatedEvent(BaseModel):
+    """Emitted at the beginning of a Response to indicate the updated rate limits.
+
+
+    When a Response is created some tokens will be "reserved" for the output
+    tokens, the rate limits shown here reflect that reservation, which is then
+    adjusted accordingly once the Response is completed.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    rate_limits: List[RateLimit]
+    """List of rate limit information."""
+
+    type: Literal["rate_limits.updated"]
+    """The event type, must be `rate_limits.updated`."""
diff --git a/src/openai/types/realtime/realtime_audio_config.py b/src/openai/types/realtime/realtime_audio_config.py
new file mode 100644
index 0000000000..daa50358a8
--- /dev/null
+++ b/src/openai/types/realtime/realtime_audio_config.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .realtime_audio_config_input import RealtimeAudioConfigInput
+from .realtime_audio_config_output import RealtimeAudioConfigOutput
+
+__all__ = ["RealtimeAudioConfig"]
+
+
+class RealtimeAudioConfig(BaseModel):
+    """Configuration for input and output audio."""
+
+    input: Optional[RealtimeAudioConfigInput] = None
+
+    output: Optional[RealtimeAudioConfigOutput] = None
diff --git a/src/openai/types/realtime/realtime_audio_config_input.py b/src/openai/types/realtime/realtime_audio_config_input.py
new file mode 100644
index 0000000000..08e1b14601
--- /dev/null
+++ b/src/openai/types/realtime/realtime_audio_config_input.py
@@ -0,0 +1,70 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .audio_transcription import AudioTranscription
+from .noise_reduction_type import NoiseReductionType
+from .realtime_audio_formats import RealtimeAudioFormats
+from .realtime_audio_input_turn_detection import RealtimeAudioInputTurnDetection
+
+__all__ = ["RealtimeAudioConfigInput", "NoiseReduction"]
+
+
+class NoiseReduction(BaseModel):
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off.
+    Noise reduction filters audio added to the input audio buffer before it is sent to VAD and the model.
+    Filtering the audio can improve VAD and turn detection accuracy (reducing false positives) and model performance by improving perception of the input audio.
+    """
+
+    type: Optional[NoiseReductionType] = None
+    """Type of noise reduction.
+
+    `near_field` is for close-talking microphones such as headphones, `far_field` is
+    for far-field microphones such as laptop or conference room microphones.
+    """
+
+
+class RealtimeAudioConfigInput(BaseModel):
+    format: Optional[RealtimeAudioFormats] = None
+    """The format of the input audio."""
+
+    noise_reduction: Optional[NoiseReduction] = None
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off. Noise reduction filters audio added to
+    the input audio buffer before it is sent to VAD and the model. Filtering the
+    audio can improve VAD and turn detection accuracy (reducing false positives) and
+    model performance by improving perception of the input audio.
+    """
+
+    transcription: Optional[AudioTranscription] = None
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through
+    [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+    and should be treated as guidance of input audio content rather than precisely
+    what the model heard. The client can optionally set the language and prompt for
+    transcription, these offer additional guidance to the transcription service.
+    """
+
+    turn_detection: Optional[RealtimeAudioInputTurnDetection] = None
+    """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+    This can be set to `null` to turn off, in which case the client must manually
+    trigger model response.
+
+    Server VAD means that the model will detect the start and end of speech based on
+    audio volume and respond at the end of user speech.
+
+    Semantic VAD is more advanced and uses a turn detection model (in conjunction
+    with VAD) to semantically estimate whether the user has finished speaking, then
+    dynamically sets a timeout based on this probability. For example, if user audio
+    trails off with "uhhm", the model will score a low probability of turn end and
+    wait longer for the user to continue speaking. This can be useful for more
+    natural conversations, but may have a higher latency.
+    """
diff --git a/src/openai/types/realtime/realtime_audio_config_input_param.py b/src/openai/types/realtime/realtime_audio_config_input_param.py
new file mode 100644
index 0000000000..73495e6cd3
--- /dev/null
+++ b/src/openai/types/realtime/realtime_audio_config_input_param.py
@@ -0,0 +1,72 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import TypedDict
+
+from .noise_reduction_type import NoiseReductionType
+from .audio_transcription_param import AudioTranscriptionParam
+from .realtime_audio_formats_param import RealtimeAudioFormatsParam
+from .realtime_audio_input_turn_detection_param import RealtimeAudioInputTurnDetectionParam
+
+__all__ = ["RealtimeAudioConfigInputParam", "NoiseReduction"]
+
+
+class NoiseReduction(TypedDict, total=False):
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off.
+    Noise reduction filters audio added to the input audio buffer before it is sent to VAD and the model.
+    Filtering the audio can improve VAD and turn detection accuracy (reducing false positives) and model performance by improving perception of the input audio.
+    """
+
+    type: NoiseReductionType
+    """Type of noise reduction.
+
+    `near_field` is for close-talking microphones such as headphones, `far_field` is
+    for far-field microphones such as laptop or conference room microphones.
+    """
+
+
+class RealtimeAudioConfigInputParam(TypedDict, total=False):
+    format: RealtimeAudioFormatsParam
+    """The format of the input audio."""
+
+    noise_reduction: NoiseReduction
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off. Noise reduction filters audio added to
+    the input audio buffer before it is sent to VAD and the model. Filtering the
+    audio can improve VAD and turn detection accuracy (reducing false positives) and
+    model performance by improving perception of the input audio.
+    """
+
+    transcription: AudioTranscriptionParam
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through
+    [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+    and should be treated as guidance of input audio content rather than precisely
+    what the model heard. The client can optionally set the language and prompt for
+    transcription, these offer additional guidance to the transcription service.
+    """
+
+    turn_detection: Optional[RealtimeAudioInputTurnDetectionParam]
+    """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+    This can be set to `null` to turn off, in which case the client must manually
+    trigger model response.
+
+    Server VAD means that the model will detect the start and end of speech based on
+    audio volume and respond at the end of user speech.
+
+    Semantic VAD is more advanced and uses a turn detection model (in conjunction
+    with VAD) to semantically estimate whether the user has finished speaking, then
+    dynamically sets a timeout based on this probability. For example, if user audio
+    trails off with "uhhm", the model will score a low probability of turn end and
+    wait longer for the user to continue speaking. This can be useful for more
+    natural conversations, but may have a higher latency.
+    """
diff --git a/src/openai/types/realtime/realtime_audio_config_output.py b/src/openai/types/realtime/realtime_audio_config_output.py
new file mode 100644
index 0000000000..a8af237c1d
--- /dev/null
+++ b/src/openai/types/realtime/realtime_audio_config_output.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_audio_formats import RealtimeAudioFormats
+
+__all__ = ["RealtimeAudioConfigOutput"]
+
+
+class RealtimeAudioConfigOutput(BaseModel):
+    format: Optional[RealtimeAudioFormats] = None
+    """The format of the output audio."""
+
+    speed: Optional[float] = None
+    """
+    The speed of the model's spoken response as a multiple of the original speed.
+    1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+    This value can only be changed in between model turns, not while a response is
+    in progress.
+
+    This parameter is a post-processing adjustment to the audio after it is
+    generated, it's also possible to prompt the model to speak faster or slower.
+    """
+
+    voice: Union[
+        str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"], None
+    ] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`. We recommend
+    `marin` and `cedar` for best quality.
+    """
diff --git a/src/openai/types/realtime/realtime_audio_config_output_param.py b/src/openai/types/realtime/realtime_audio_config_output_param.py
new file mode 100644
index 0000000000..8e887d3464
--- /dev/null
+++ b/src/openai/types/realtime/realtime_audio_config_output_param.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypedDict
+
+from .realtime_audio_formats_param import RealtimeAudioFormatsParam
+
+__all__ = ["RealtimeAudioConfigOutputParam"]
+
+
+class RealtimeAudioConfigOutputParam(TypedDict, total=False):
+    format: RealtimeAudioFormatsParam
+    """The format of the output audio."""
+
+    speed: float
+    """
+    The speed of the model's spoken response as a multiple of the original speed.
+    1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+    This value can only be changed in between model turns, not while a response is
+    in progress.
+
+    This parameter is a post-processing adjustment to the audio after it is
+    generated, it's also possible to prompt the model to speak faster or slower.
+    """
+
+    voice: Union[str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]]
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`. We recommend
+    `marin` and `cedar` for best quality.
+    """
diff --git a/src/openai/types/realtime/realtime_audio_config_param.py b/src/openai/types/realtime/realtime_audio_config_param.py
new file mode 100644
index 0000000000..7899fe359b
--- /dev/null
+++ b/src/openai/types/realtime/realtime_audio_config_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+from .realtime_audio_config_input_param import RealtimeAudioConfigInputParam
+from .realtime_audio_config_output_param import RealtimeAudioConfigOutputParam
+
+__all__ = ["RealtimeAudioConfigParam"]
+
+
+class RealtimeAudioConfigParam(TypedDict, total=False):
+    """Configuration for input and output audio."""
+
+    input: RealtimeAudioConfigInputParam
+
+    output: RealtimeAudioConfigOutputParam
diff --git a/src/openai/types/realtime/realtime_audio_formats.py b/src/openai/types/realtime/realtime_audio_formats.py
new file mode 100644
index 0000000000..fa10c9a7a4
--- /dev/null
+++ b/src/openai/types/realtime/realtime_audio_formats.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["RealtimeAudioFormats", "AudioPCM", "AudioPCMU", "AudioPCMA"]
+
+
+class AudioPCM(BaseModel):
+    """The PCM audio format. Only a 24kHz sample rate is supported."""
+
+    rate: Optional[Literal[24000]] = None
+    """The sample rate of the audio. Always `24000`."""
+
+    type: Optional[Literal["audio/pcm"]] = None
+    """The audio format. Always `audio/pcm`."""
+
+
+class AudioPCMU(BaseModel):
+    """The G.711 μ-law format."""
+
+    type: Optional[Literal["audio/pcmu"]] = None
+    """The audio format. Always `audio/pcmu`."""
+
+
+class AudioPCMA(BaseModel):
+    """The G.711 A-law format."""
+
+    type: Optional[Literal["audio/pcma"]] = None
+    """The audio format. Always `audio/pcma`."""
+
+
+RealtimeAudioFormats: TypeAlias = Annotated[Union[AudioPCM, AudioPCMU, AudioPCMA], PropertyInfo(discriminator="type")]
diff --git a/src/openai/types/realtime/realtime_audio_formats_param.py b/src/openai/types/realtime/realtime_audio_formats_param.py
new file mode 100644
index 0000000000..6392f632c3
--- /dev/null
+++ b/src/openai/types/realtime/realtime_audio_formats_param.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias, TypedDict
+
+__all__ = ["RealtimeAudioFormatsParam", "AudioPCM", "AudioPCMU", "AudioPCMA"]
+
+
+class AudioPCM(TypedDict, total=False):
+    """The PCM audio format. Only a 24kHz sample rate is supported."""
+
+    rate: Literal[24000]
+    """The sample rate of the audio. Always `24000`."""
+
+    type: Literal["audio/pcm"]
+    """The audio format. Always `audio/pcm`."""
+
+
+class AudioPCMU(TypedDict, total=False):
+    """The G.711 μ-law format."""
+
+    type: Literal["audio/pcmu"]
+    """The audio format. Always `audio/pcmu`."""
+
+
+class AudioPCMA(TypedDict, total=False):
+    """The G.711 A-law format."""
+
+    type: Literal["audio/pcma"]
+    """The audio format. Always `audio/pcma`."""
+
+
+RealtimeAudioFormatsParam: TypeAlias = Union[AudioPCM, AudioPCMU, AudioPCMA]
diff --git a/src/openai/types/realtime/realtime_audio_input_turn_detection.py b/src/openai/types/realtime/realtime_audio_input_turn_detection.py
new file mode 100644
index 0000000000..8d9aff3563
--- /dev/null
+++ b/src/openai/types/realtime/realtime_audio_input_turn_detection.py
@@ -0,0 +1,115 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["RealtimeAudioInputTurnDetection", "ServerVad", "SemanticVad"]
+
+
+class ServerVad(BaseModel):
+    """
+    Server-side voice activity detection (VAD) which flips on when user speech is detected and off after a period of silence.
+    """
+
+    type: Literal["server_vad"]
+    """Type of turn detection, `server_vad` to turn on simple Server VAD."""
+
+    create_response: Optional[bool] = None
+    """Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
+    """
+
+    idle_timeout_ms: Optional[int] = None
+    """Optional timeout after which a model response will be triggered automatically.
+
+    This is useful for situations in which a long pause from the user is unexpected,
+    such as a phone call. The model will effectively prompt the user to continue the
+    conversation based on the current context.
+
+    The timeout value will be applied after the last model response's audio has
+    finished playing, i.e. it's set to the `response.done` time plus audio playback
+    duration.
+
+    An `input_audio_buffer.timeout_triggered` event (plus events associated with the
+    Response) will be emitted when the timeout is reached. Idle timeout is currently
+    only supported for `server_vad` mode.
+    """
+
+    interrupt_response: Optional[bool] = None
+    """
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
+    """
+
+    prefix_padding_ms: Optional[int] = None
+    """Used only for `server_vad` mode.
+
+    Amount of audio to include before the VAD detected speech (in milliseconds).
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Used only for `server_vad` mode.
+
+    Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+    With shorter values the model will respond more quickly, but may jump in on
+    short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Used only for `server_vad` mode.
+
+    Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+    threshold will require louder audio to activate the model, and thus might
+    perform better in noisy environments.
+    """
+
+
+class SemanticVad(BaseModel):
+    """
+    Server-side semantic turn detection which uses a model to determine when the user has finished speaking.
+    """
+
+    type: Literal["semantic_vad"]
+    """Type of turn detection, `semantic_vad` to turn on Semantic VAD."""
+
+    create_response: Optional[bool] = None
+    """
+    Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+    """
+
+    eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
+    """Used only for `semantic_vad` mode.
+
+    The eagerness of the model to respond. `low` will wait longer for the user to
+    continue speaking, `high` will respond more quickly. `auto` is the default and
+    is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
+    4s, and 2s respectively.
+    """
+
+    interrupt_response: Optional[bool] = None
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs.
+    """
+
+
+RealtimeAudioInputTurnDetection: TypeAlias = Annotated[
+    Union[ServerVad, SemanticVad, None], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py b/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py
new file mode 100644
index 0000000000..30522d74e1
--- /dev/null
+++ b/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py
@@ -0,0 +1,112 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = ["RealtimeAudioInputTurnDetectionParam", "ServerVad", "SemanticVad"]
+
+
+class ServerVad(TypedDict, total=False):
+    """
+    Server-side voice activity detection (VAD) which flips on when user speech is detected and off after a period of silence.
+    """
+
+    type: Required[Literal["server_vad"]]
+    """Type of turn detection, `server_vad` to turn on simple Server VAD."""
+
+    create_response: bool
+    """Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
+    """
+
+    idle_timeout_ms: Optional[int]
+    """Optional timeout after which a model response will be triggered automatically.
+
+    This is useful for situations in which a long pause from the user is unexpected,
+    such as a phone call. The model will effectively prompt the user to continue the
+    conversation based on the current context.
+
+    The timeout value will be applied after the last model response's audio has
+    finished playing, i.e. it's set to the `response.done` time plus audio playback
+    duration.
+
+    An `input_audio_buffer.timeout_triggered` event (plus events associated with the
+    Response) will be emitted when the timeout is reached. Idle timeout is currently
+    only supported for `server_vad` mode.
+    """
+
+    interrupt_response: bool
+    """
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
+    """
+
+    prefix_padding_ms: int
+    """Used only for `server_vad` mode.
+
+    Amount of audio to include before the VAD detected speech (in milliseconds).
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: int
+    """Used only for `server_vad` mode.
+
+    Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+    With shorter values the model will respond more quickly, but may jump in on
+    short pauses from the user.
+    """
+
+    threshold: float
+    """Used only for `server_vad` mode.
+
+    Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+    threshold will require louder audio to activate the model, and thus might
+    perform better in noisy environments.
+    """
+
+
+class SemanticVad(TypedDict, total=False):
+    """
+    Server-side semantic turn detection which uses a model to determine when the user has finished speaking.
+    """
+
+    type: Required[Literal["semantic_vad"]]
+    """Type of turn detection, `semantic_vad` to turn on Semantic VAD."""
+
+    create_response: bool
+    """
+    Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+    """
+
+    eagerness: Literal["low", "medium", "high", "auto"]
+    """Used only for `semantic_vad` mode.
+
+    The eagerness of the model to respond. `low` will wait longer for the user to
+    continue speaking, `high` will respond more quickly. `auto` is the default and
+    is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
+    4s, and 2s respectively.
+    """
+
+    interrupt_response: bool
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs.
+    """
+
+
+RealtimeAudioInputTurnDetectionParam: TypeAlias = Union[ServerVad, SemanticVad]
diff --git a/src/openai/types/realtime/realtime_client_event.py b/src/openai/types/realtime/realtime_client_event.py
new file mode 100644
index 0000000000..3b1c348daa
--- /dev/null
+++ b/src/openai/types/realtime/realtime_client_event.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .session_update_event import SessionUpdateEvent
+from .response_cancel_event import ResponseCancelEvent
+from .response_create_event import ResponseCreateEvent
+from .conversation_item_create_event import ConversationItemCreateEvent
+from .conversation_item_delete_event import ConversationItemDeleteEvent
+from .input_audio_buffer_clear_event import InputAudioBufferClearEvent
+from .input_audio_buffer_append_event import InputAudioBufferAppendEvent
+from .input_audio_buffer_commit_event import InputAudioBufferCommitEvent
+from .output_audio_buffer_clear_event import OutputAudioBufferClearEvent
+from .conversation_item_retrieve_event import ConversationItemRetrieveEvent
+from .conversation_item_truncate_event import ConversationItemTruncateEvent
+
+__all__ = ["RealtimeClientEvent"]
+
+RealtimeClientEvent: TypeAlias = Annotated[
+    Union[
+        ConversationItemCreateEvent,
+        ConversationItemDeleteEvent,
+        ConversationItemRetrieveEvent,
+        ConversationItemTruncateEvent,
+        InputAudioBufferAppendEvent,
+        InputAudioBufferClearEvent,
+        OutputAudioBufferClearEvent,
+        InputAudioBufferCommitEvent,
+        ResponseCancelEvent,
+        ResponseCreateEvent,
+        SessionUpdateEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/realtime/realtime_client_event_param.py b/src/openai/types/realtime/realtime_client_event_param.py
new file mode 100644
index 0000000000..cda5766e2a
--- /dev/null
+++ b/src/openai/types/realtime/realtime_client_event_param.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .session_update_event_param import SessionUpdateEventParam
+from .response_cancel_event_param import ResponseCancelEventParam
+from .response_create_event_param import ResponseCreateEventParam
+from .conversation_item_create_event_param import ConversationItemCreateEventParam
+from .conversation_item_delete_event_param import ConversationItemDeleteEventParam
+from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam
+from .input_audio_buffer_append_event_param import InputAudioBufferAppendEventParam
+from .input_audio_buffer_commit_event_param import InputAudioBufferCommitEventParam
+from .output_audio_buffer_clear_event_param import OutputAudioBufferClearEventParam
+from .conversation_item_retrieve_event_param import ConversationItemRetrieveEventParam
+from .conversation_item_truncate_event_param import ConversationItemTruncateEventParam
+
+__all__ = ["RealtimeClientEventParam"]
+
+RealtimeClientEventParam: TypeAlias = Union[
+    ConversationItemCreateEventParam,
+    ConversationItemDeleteEventParam,
+    ConversationItemRetrieveEventParam,
+    ConversationItemTruncateEventParam,
+    InputAudioBufferAppendEventParam,
+    InputAudioBufferClearEventParam,
+    OutputAudioBufferClearEventParam,
+    InputAudioBufferCommitEventParam,
+    ResponseCancelEventParam,
+    ResponseCreateEventParam,
+    SessionUpdateEventParam,
+]
diff --git a/src/openai/types/realtime/realtime_connect_params.py b/src/openai/types/realtime/realtime_connect_params.py
new file mode 100644
index 0000000000..950f36212f
--- /dev/null
+++ b/src/openai/types/realtime/realtime_connect_params.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["RealtimeConnectParams"]
+
+
+class RealtimeConnectParams(TypedDict, total=False):
+    call_id: str
+
+    model: str
diff --git a/src/openai/types/realtime/realtime_conversation_item_assistant_message.py b/src/openai/types/realtime/realtime_conversation_item_assistant_message.py
new file mode 100644
index 0000000000..207831a3c8
--- /dev/null
+++ b/src/openai/types/realtime/realtime_conversation_item_assistant_message.py
@@ -0,0 +1,60 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeConversationItemAssistantMessage", "Content"]
+
+
+class Content(BaseModel):
+    audio: Optional[str] = None
+    """
+    Base64-encoded audio bytes, these will be parsed as the format specified in the
+    session output audio type configuration. This defaults to PCM 16-bit 24kHz mono
+    if not specified.
+    """
+
+    text: Optional[str] = None
+    """The text content."""
+
+    transcript: Optional[str] = None
+    """
+    The transcript of the audio content, this will always be present if the output
+    type is `audio`.
+    """
+
+    type: Optional[Literal["output_text", "output_audio"]] = None
+    """
+    The content type, `output_text` or `output_audio` depending on the session
+    `output_modalities` configuration.
+    """
+
+
+class RealtimeConversationItemAssistantMessage(BaseModel):
+    """An assistant message item in a Realtime conversation."""
+
+    content: List[Content]
+    """The content of the message."""
+
+    role: Literal["assistant"]
+    """The role of the message sender. Always `assistant`."""
+
+    type: Literal["message"]
+    """The type of the item. Always `message`."""
+
+    id: Optional[str] = None
+    """The unique ID of the item.
+
+    This may be provided by the client or generated by the server.
+    """
+
+    object: Optional[Literal["realtime.item"]] = None
+    """Identifier for the API object being returned - always `realtime.item`.
+
+    Optional when creating a new item.
+    """
+
+    status: Optional[Literal["completed", "incomplete", "in_progress"]] = None
+    """The status of the item. Has no effect on the conversation."""
diff --git a/src/openai/types/realtime/realtime_conversation_item_assistant_message_param.py b/src/openai/types/realtime/realtime_conversation_item_assistant_message_param.py
new file mode 100644
index 0000000000..abc78e7d3f
--- /dev/null
+++ b/src/openai/types/realtime/realtime_conversation_item_assistant_message_param.py
@@ -0,0 +1,60 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeConversationItemAssistantMessageParam", "Content"]
+
+
+class Content(TypedDict, total=False):
+    audio: str
+    """
+    Base64-encoded audio bytes, these will be parsed as the format specified in the
+    session output audio type configuration. This defaults to PCM 16-bit 24kHz mono
+    if not specified.
+    """
+
+    text: str
+    """The text content."""
+
+    transcript: str
+    """
+    The transcript of the audio content, this will always be present if the output
+    type is `audio`.
+    """
+
+    type: Literal["output_text", "output_audio"]
+    """
+    The content type, `output_text` or `output_audio` depending on the session
+    `output_modalities` configuration.
+    """
+
+
+class RealtimeConversationItemAssistantMessageParam(TypedDict, total=False):
+    """An assistant message item in a Realtime conversation."""
+
+    content: Required[Iterable[Content]]
+    """The content of the message."""
+
+    role: Required[Literal["assistant"]]
+    """The role of the message sender. Always `assistant`."""
+
+    type: Required[Literal["message"]]
+    """The type of the item. Always `message`."""
+
+    id: str
+    """The unique ID of the item.
+
+    This may be provided by the client or generated by the server.
+    """
+
+    object: Literal["realtime.item"]
+    """Identifier for the API object being returned - always `realtime.item`.
+
+    Optional when creating a new item.
+    """
+
+    status: Literal["completed", "incomplete", "in_progress"]
+    """The status of the item. Has no effect on the conversation."""
diff --git a/src/openai/types/realtime/realtime_conversation_item_function_call.py b/src/openai/types/realtime/realtime_conversation_item_function_call.py
new file mode 100644
index 0000000000..4e40394883
--- /dev/null
+++ b/src/openai/types/realtime/realtime_conversation_item_function_call.py
@@ -0,0 +1,43 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeConversationItemFunctionCall"]
+
+
+class RealtimeConversationItemFunctionCall(BaseModel):
+    """A function call item in a Realtime conversation."""
+
+    arguments: str
+    """The arguments of the function call.
+
+    This is a JSON-encoded string representing the arguments passed to the function,
+    for example `{"arg1": "value1", "arg2": 42}`.
+    """
+
+    name: str
+    """The name of the function being called."""
+
+    type: Literal["function_call"]
+    """The type of the item. Always `function_call`."""
+
+    id: Optional[str] = None
+    """The unique ID of the item.
+
+    This may be provided by the client or generated by the server.
+    """
+
+    call_id: Optional[str] = None
+    """The ID of the function call."""
+
+    object: Optional[Literal["realtime.item"]] = None
+    """Identifier for the API object being returned - always `realtime.item`.
+
+    Optional when creating a new item.
+    """
+
+    status: Optional[Literal["completed", "incomplete", "in_progress"]] = None
+    """The status of the item. Has no effect on the conversation."""
diff --git a/src/openai/types/realtime/realtime_conversation_item_function_call_output.py b/src/openai/types/realtime/realtime_conversation_item_function_call_output.py
new file mode 100644
index 0000000000..cdbc352d85
--- /dev/null
+++ b/src/openai/types/realtime/realtime_conversation_item_function_call_output.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeConversationItemFunctionCallOutput"]
+
+
+class RealtimeConversationItemFunctionCallOutput(BaseModel):
+    """A function call output item in a Realtime conversation."""
+
+    call_id: str
+    """The ID of the function call this output is for."""
+
+    output: str
+    """
+    The output of the function call, this is free text and can contain any
+    information or simply be empty.
+    """
+
+    type: Literal["function_call_output"]
+    """The type of the item. Always `function_call_output`."""
+
+    id: Optional[str] = None
+    """The unique ID of the item.
+
+    This may be provided by the client or generated by the server.
+    """
+
+    object: Optional[Literal["realtime.item"]] = None
+    """Identifier for the API object being returned - always `realtime.item`.
+
+    Optional when creating a new item.
+    """
+
+    status: Optional[Literal["completed", "incomplete", "in_progress"]] = None
+    """The status of the item. Has no effect on the conversation."""
diff --git a/src/openai/types/realtime/realtime_conversation_item_function_call_output_param.py b/src/openai/types/realtime/realtime_conversation_item_function_call_output_param.py
new file mode 100644
index 0000000000..2e56a81dc3
--- /dev/null
+++ b/src/openai/types/realtime/realtime_conversation_item_function_call_output_param.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeConversationItemFunctionCallOutputParam"]
+
+
+class RealtimeConversationItemFunctionCallOutputParam(TypedDict, total=False):
+    """A function call output item in a Realtime conversation."""
+
+    call_id: Required[str]
+    """The ID of the function call this output is for."""
+
+    output: Required[str]
+    """
+    The output of the function call, this is free text and can contain any
+    information or simply be empty.
+    """
+
+    type: Required[Literal["function_call_output"]]
+    """The type of the item. Always `function_call_output`."""
+
+    id: str
+    """The unique ID of the item.
+
+    This may be provided by the client or generated by the server.
+    """
+
+    object: Literal["realtime.item"]
+    """Identifier for the API object being returned - always `realtime.item`.
+
+    Optional when creating a new item.
+    """
+
+    status: Literal["completed", "incomplete", "in_progress"]
+    """The status of the item. Has no effect on the conversation."""
diff --git a/src/openai/types/realtime/realtime_conversation_item_function_call_param.py b/src/openai/types/realtime/realtime_conversation_item_function_call_param.py
new file mode 100644
index 0000000000..6467ce149e
--- /dev/null
+++ b/src/openai/types/realtime/realtime_conversation_item_function_call_param.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeConversationItemFunctionCallParam"]
+
+
+class RealtimeConversationItemFunctionCallParam(TypedDict, total=False):
+    """A function call item in a Realtime conversation."""
+
+    arguments: Required[str]
+    """The arguments of the function call.
+
+    This is a JSON-encoded string representing the arguments passed to the function,
+    for example `{"arg1": "value1", "arg2": 42}`.
+    """
+
+    name: Required[str]
+    """The name of the function being called."""
+
+    type: Required[Literal["function_call"]]
+    """The type of the item. Always `function_call`."""
+
+    id: str
+    """The unique ID of the item.
+
+    This may be provided by the client or generated by the server.
+    """
+
+    call_id: str
+    """The ID of the function call."""
+
+    object: Literal["realtime.item"]
+    """Identifier for the API object being returned - always `realtime.item`.
+
+    Optional when creating a new item.
+    """
+
+    status: Literal["completed", "incomplete", "in_progress"]
+    """The status of the item. Has no effect on the conversation."""
diff --git a/src/openai/types/realtime/realtime_conversation_item_system_message.py b/src/openai/types/realtime/realtime_conversation_item_system_message.py
new file mode 100644
index 0000000000..f69bc03937
--- /dev/null
+++ b/src/openai/types/realtime/realtime_conversation_item_system_message.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeConversationItemSystemMessage", "Content"]
+
+
+class Content(BaseModel):
+    text: Optional[str] = None
+    """The text content."""
+
+    type: Optional[Literal["input_text"]] = None
+    """The content type. Always `input_text` for system messages."""
+
+
+class RealtimeConversationItemSystemMessage(BaseModel):
+    """
+    A system message in a Realtime conversation can be used to provide additional context or instructions to the model. This is similar but distinct from the instruction prompt provided at the start of a conversation, as system messages can be added at any point in the conversation. For major changes to the conversation's behavior, use instructions, but for smaller updates (e.g. "the user is now asking about a different topic"), use system messages.
+    """
+
+    content: List[Content]
+    """The content of the message."""
+
+    role: Literal["system"]
+    """The role of the message sender. Always `system`."""
+
+    type: Literal["message"]
+    """The type of the item. Always `message`."""
+
+    id: Optional[str] = None
+    """The unique ID of the item.
+
+    This may be provided by the client or generated by the server.
+    """
+
+    object: Optional[Literal["realtime.item"]] = None
+    """Identifier for the API object being returned - always `realtime.item`.
+
+    Optional when creating a new item.
+    """
+
+    status: Optional[Literal["completed", "incomplete", "in_progress"]] = None
+    """The status of the item. Has no effect on the conversation."""
diff --git a/src/openai/types/realtime/realtime_conversation_item_system_message_param.py b/src/openai/types/realtime/realtime_conversation_item_system_message_param.py
new file mode 100644
index 0000000000..93880198fa
--- /dev/null
+++ b/src/openai/types/realtime/realtime_conversation_item_system_message_param.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeConversationItemSystemMessageParam", "Content"]
+
+
+class Content(TypedDict, total=False):
+    text: str
+    """The text content."""
+
+    type: Literal["input_text"]
+    """The content type. Always `input_text` for system messages."""
+
+
+class RealtimeConversationItemSystemMessageParam(TypedDict, total=False):
+    """
+    A system message in a Realtime conversation can be used to provide additional context or instructions to the model. This is similar but distinct from the instruction prompt provided at the start of a conversation, as system messages can be added at any point in the conversation. For major changes to the conversation's behavior, use instructions, but for smaller updates (e.g. "the user is now asking about a different topic"), use system messages.
+    """
+
+    content: Required[Iterable[Content]]
+    """The content of the message."""
+
+    role: Required[Literal["system"]]
+    """The role of the message sender. Always `system`."""
+
+    type: Required[Literal["message"]]
+    """The type of the item. Always `message`."""
+
+    id: str
+    """The unique ID of the item.
+
+    This may be provided by the client or generated by the server.
+    """
+
+    object: Literal["realtime.item"]
+    """Identifier for the API object being returned - always `realtime.item`.
+
+    Optional when creating a new item.
+    """
+
+    status: Literal["completed", "incomplete", "in_progress"]
+    """The status of the item. Has no effect on the conversation."""
diff --git a/src/openai/types/realtime/realtime_conversation_item_user_message.py b/src/openai/types/realtime/realtime_conversation_item_user_message.py
new file mode 100644
index 0000000000..20e9614eb6
--- /dev/null
+++ b/src/openai/types/realtime/realtime_conversation_item_user_message.py
@@ -0,0 +1,71 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeConversationItemUserMessage", "Content"]
+
+
+class Content(BaseModel):
+    audio: Optional[str] = None
+    """
+    Base64-encoded audio bytes (for `input_audio`), these will be parsed as the
+    format specified in the session input audio type configuration. This defaults to
+    PCM 16-bit 24kHz mono if not specified.
+    """
+
+    detail: Optional[Literal["auto", "low", "high"]] = None
+    """The detail level of the image (for `input_image`).
+
+    `auto` will default to `high`.
+    """
+
+    image_url: Optional[str] = None
+    """Base64-encoded image bytes (for `input_image`) as a data URI.
+
+    For example `data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...`. Supported
+    formats are PNG and JPEG.
+    """
+
+    text: Optional[str] = None
+    """The text content (for `input_text`)."""
+
+    transcript: Optional[str] = None
+    """Transcript of the audio (for `input_audio`).
+
+    This is not sent to the model, but will be attached to the message item for
+    reference.
+    """
+
+    type: Optional[Literal["input_text", "input_audio", "input_image"]] = None
+    """The content type (`input_text`, `input_audio`, or `input_image`)."""
+
+
+class RealtimeConversationItemUserMessage(BaseModel):
+    """A user message item in a Realtime conversation."""
+
+    content: List[Content]
+    """The content of the message."""
+
+    role: Literal["user"]
+    """The role of the message sender. Always `user`."""
+
+    type: Literal["message"]
+    """The type of the item. Always `message`."""
+
+    id: Optional[str] = None
+    """The unique ID of the item.
+
+    This may be provided by the client or generated by the server.
+    """
+
+    object: Optional[Literal["realtime.item"]] = None
+    """Identifier for the API object being returned - always `realtime.item`.
+
+    Optional when creating a new item.
+    """
+
+    status: Optional[Literal["completed", "incomplete", "in_progress"]] = None
+    """The status of the item. Has no effect on the conversation."""
diff --git a/src/openai/types/realtime/realtime_conversation_item_user_message_param.py b/src/openai/types/realtime/realtime_conversation_item_user_message_param.py
new file mode 100644
index 0000000000..69a24692e8
--- /dev/null
+++ b/src/openai/types/realtime/realtime_conversation_item_user_message_param.py
@@ -0,0 +1,71 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeConversationItemUserMessageParam", "Content"]
+
+
+class Content(TypedDict, total=False):
+    audio: str
+    """
+    Base64-encoded audio bytes (for `input_audio`), these will be parsed as the
+    format specified in the session input audio type configuration. This defaults to
+    PCM 16-bit 24kHz mono if not specified.
+    """
+
+    detail: Literal["auto", "low", "high"]
+    """The detail level of the image (for `input_image`).
+
+    `auto` will default to `high`.
+    """
+
+    image_url: str
+    """Base64-encoded image bytes (for `input_image`) as a data URI.
+
+    For example `data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...`. Supported
+    formats are PNG and JPEG.
+    """
+
+    text: str
+    """The text content (for `input_text`)."""
+
+    transcript: str
+    """Transcript of the audio (for `input_audio`).
+
+    This is not sent to the model, but will be attached to the message item for
+    reference.
+    """
+
+    type: Literal["input_text", "input_audio", "input_image"]
+    """The content type (`input_text`, `input_audio`, or `input_image`)."""
+
+
+class RealtimeConversationItemUserMessageParam(TypedDict, total=False):
+    """A user message item in a Realtime conversation."""
+
+    content: Required[Iterable[Content]]
+    """The content of the message."""
+
+    role: Required[Literal["user"]]
+    """The role of the message sender. Always `user`."""
+
+    type: Required[Literal["message"]]
+    """The type of the item. Always `message`."""
+
+    id: str
+    """The unique ID of the item.
+
+    This may be provided by the client or generated by the server.
+    """
+
+    object: Literal["realtime.item"]
+    """Identifier for the API object being returned - always `realtime.item`.
+
+    Optional when creating a new item.
+    """
+
+    status: Literal["completed", "incomplete", "in_progress"]
+    """The status of the item. Has no effect on the conversation."""
diff --git a/src/openai/types/realtime/realtime_error.py b/src/openai/types/realtime/realtime_error.py
new file mode 100644
index 0000000000..2aa5bc9425
--- /dev/null
+++ b/src/openai/types/realtime/realtime_error.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeError"]
+
+
+class RealtimeError(BaseModel):
+    """Details of the error."""
+
+    message: str
+    """A human-readable error message."""
+
+    type: str
+    """The type of error (e.g., "invalid_request_error", "server_error")."""
+
+    code: Optional[str] = None
+    """Error code, if any."""
+
+    event_id: Optional[str] = None
+    """The event_id of the client event that caused the error, if applicable."""
+
+    param: Optional[str] = None
+    """Parameter related to the error, if any."""
diff --git a/src/openai/types/realtime/realtime_error_event.py b/src/openai/types/realtime/realtime_error_event.py
new file mode 100644
index 0000000000..574464b29e
--- /dev/null
+++ b/src/openai/types/realtime/realtime_error_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_error import RealtimeError
+
+__all__ = ["RealtimeErrorEvent"]
+
+
+class RealtimeErrorEvent(BaseModel):
+    """
+    Returned when an error occurs, which could be a client problem or a server
+    problem. Most errors are recoverable and the session will stay open, we
+    recommend to implementors to monitor and log error messages by default.
+    """
+
+    error: RealtimeError
+    """Details of the error."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    type: Literal["error"]
+    """The event type, must be `error`."""
diff --git a/src/openai/types/realtime/realtime_function_tool.py b/src/openai/types/realtime/realtime_function_tool.py
new file mode 100644
index 0000000000..48dbf9929d
--- /dev/null
+++ b/src/openai/types/realtime/realtime_function_tool.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeFunctionTool"]
+
+
+class RealtimeFunctionTool(BaseModel):
+    description: Optional[str] = None
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    parameters: Optional[object] = None
+    """Parameters of the function in JSON Schema."""
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool, i.e. `function`."""
diff --git a/src/openai/types/realtime/realtime_function_tool_param.py b/src/openai/types/realtime/realtime_function_tool_param.py
new file mode 100644
index 0000000000..f42e3e497c
--- /dev/null
+++ b/src/openai/types/realtime/realtime_function_tool_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["RealtimeFunctionToolParam"]
+
+
+class RealtimeFunctionToolParam(TypedDict, total=False):
+    description: str
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: str
+    """The name of the function."""
+
+    parameters: object
+    """Parameters of the function in JSON Schema."""
+
+    type: Literal["function"]
+    """The type of the tool, i.e. `function`."""
diff --git a/src/openai/types/realtime/realtime_mcp_approval_request.py b/src/openai/types/realtime/realtime_mcp_approval_request.py
new file mode 100644
index 0000000000..1744c90070
--- /dev/null
+++ b/src/openai/types/realtime/realtime_mcp_approval_request.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeMcpApprovalRequest"]
+
+
+class RealtimeMcpApprovalRequest(BaseModel):
+    """A Realtime item requesting human approval of a tool invocation."""
+
+    id: str
+    """The unique ID of the approval request."""
+
+    arguments: str
+    """A JSON string of arguments for the tool."""
+
+    name: str
+    """The name of the tool to run."""
+
+    server_label: str
+    """The label of the MCP server making the request."""
+
+    type: Literal["mcp_approval_request"]
+    """The type of the item. Always `mcp_approval_request`."""
diff --git a/src/openai/types/realtime/realtime_mcp_approval_request_param.py b/src/openai/types/realtime/realtime_mcp_approval_request_param.py
new file mode 100644
index 0000000000..f7cb68d67e
--- /dev/null
+++ b/src/openai/types/realtime/realtime_mcp_approval_request_param.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeMcpApprovalRequestParam"]
+
+
+class RealtimeMcpApprovalRequestParam(TypedDict, total=False):
+    """A Realtime item requesting human approval of a tool invocation."""
+
+    id: Required[str]
+    """The unique ID of the approval request."""
+
+    arguments: Required[str]
+    """A JSON string of arguments for the tool."""
+
+    name: Required[str]
+    """The name of the tool to run."""
+
+    server_label: Required[str]
+    """The label of the MCP server making the request."""
+
+    type: Required[Literal["mcp_approval_request"]]
+    """The type of the item. Always `mcp_approval_request`."""
diff --git a/src/openai/types/realtime/realtime_mcp_approval_response.py b/src/openai/types/realtime/realtime_mcp_approval_response.py
new file mode 100644
index 0000000000..f8525a12fc
--- /dev/null
+++ b/src/openai/types/realtime/realtime_mcp_approval_response.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeMcpApprovalResponse"]
+
+
+class RealtimeMcpApprovalResponse(BaseModel):
+    """A Realtime item responding to an MCP approval request."""
+
+    id: str
+    """The unique ID of the approval response."""
+
+    approval_request_id: str
+    """The ID of the approval request being answered."""
+
+    approve: bool
+    """Whether the request was approved."""
+
+    type: Literal["mcp_approval_response"]
+    """The type of the item. Always `mcp_approval_response`."""
+
+    reason: Optional[str] = None
+    """Optional reason for the decision."""
diff --git a/src/openai/types/realtime/realtime_mcp_approval_response_param.py b/src/openai/types/realtime/realtime_mcp_approval_response_param.py
new file mode 100644
index 0000000000..6a65f7ce38
--- /dev/null
+++ b/src/openai/types/realtime/realtime_mcp_approval_response_param.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeMcpApprovalResponseParam"]
+
+
+class RealtimeMcpApprovalResponseParam(TypedDict, total=False):
+    """A Realtime item responding to an MCP approval request."""
+
+    id: Required[str]
+    """The unique ID of the approval response."""
+
+    approval_request_id: Required[str]
+    """The ID of the approval request being answered."""
+
+    approve: Required[bool]
+    """Whether the request was approved."""
+
+    type: Required[Literal["mcp_approval_response"]]
+    """The type of the item. Always `mcp_approval_response`."""
+
+    reason: Optional[str]
+    """Optional reason for the decision."""
diff --git a/src/openai/types/realtime/realtime_mcp_list_tools.py b/src/openai/types/realtime/realtime_mcp_list_tools.py
new file mode 100644
index 0000000000..669d1fb43b
--- /dev/null
+++ b/src/openai/types/realtime/realtime_mcp_list_tools.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeMcpListTools", "Tool"]
+
+
+class Tool(BaseModel):
+    """A tool available on an MCP server."""
+
+    input_schema: object
+    """The JSON schema describing the tool's input."""
+
+    name: str
+    """The name of the tool."""
+
+    annotations: Optional[object] = None
+    """Additional annotations about the tool."""
+
+    description: Optional[str] = None
+    """The description of the tool."""
+
+
+class RealtimeMcpListTools(BaseModel):
+    """A Realtime item listing tools available on an MCP server."""
+
+    server_label: str
+    """The label of the MCP server."""
+
+    tools: List[Tool]
+    """The tools available on the server."""
+
+    type: Literal["mcp_list_tools"]
+    """The type of the item. Always `mcp_list_tools`."""
+
+    id: Optional[str] = None
+    """The unique ID of the list."""
diff --git a/src/openai/types/realtime/realtime_mcp_list_tools_param.py b/src/openai/types/realtime/realtime_mcp_list_tools_param.py
new file mode 100644
index 0000000000..614fa53347
--- /dev/null
+++ b/src/openai/types/realtime/realtime_mcp_list_tools_param.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeMcpListToolsParam", "Tool"]
+
+
+class Tool(TypedDict, total=False):
+    """A tool available on an MCP server."""
+
+    input_schema: Required[object]
+    """The JSON schema describing the tool's input."""
+
+    name: Required[str]
+    """The name of the tool."""
+
+    annotations: Optional[object]
+    """Additional annotations about the tool."""
+
+    description: Optional[str]
+    """The description of the tool."""
+
+
+class RealtimeMcpListToolsParam(TypedDict, total=False):
+    """A Realtime item listing tools available on an MCP server."""
+
+    server_label: Required[str]
+    """The label of the MCP server."""
+
+    tools: Required[Iterable[Tool]]
+    """The tools available on the server."""
+
+    type: Required[Literal["mcp_list_tools"]]
+    """The type of the item. Always `mcp_list_tools`."""
+
+    id: str
+    """The unique ID of the list."""
diff --git a/src/openai/types/realtime/realtime_mcp_protocol_error.py b/src/openai/types/realtime/realtime_mcp_protocol_error.py
new file mode 100644
index 0000000000..2e7cfdffa3
--- /dev/null
+++ b/src/openai/types/realtime/realtime_mcp_protocol_error.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeMcpProtocolError"]
+
+
+class RealtimeMcpProtocolError(BaseModel):
+    code: int
+
+    message: str
+
+    type: Literal["protocol_error"]
diff --git a/src/openai/types/realtime/realtime_mcp_protocol_error_param.py b/src/openai/types/realtime/realtime_mcp_protocol_error_param.py
new file mode 100644
index 0000000000..bebe3d379e
--- /dev/null
+++ b/src/openai/types/realtime/realtime_mcp_protocol_error_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeMcpProtocolErrorParam"]
+
+
+class RealtimeMcpProtocolErrorParam(TypedDict, total=False):
+    code: Required[int]
+
+    message: Required[str]
+
+    type: Required[Literal["protocol_error"]]
diff --git a/src/openai/types/realtime/realtime_mcp_tool_call.py b/src/openai/types/realtime/realtime_mcp_tool_call.py
new file mode 100644
index 0000000000..f53ad0eaa9
--- /dev/null
+++ b/src/openai/types/realtime/realtime_mcp_tool_call.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .realtime_mcphttp_error import RealtimeMcphttpError
+from .realtime_mcp_protocol_error import RealtimeMcpProtocolError
+from .realtime_mcp_tool_execution_error import RealtimeMcpToolExecutionError
+
+__all__ = ["RealtimeMcpToolCall", "Error"]
+
+Error: TypeAlias = Annotated[
+    Union[RealtimeMcpProtocolError, RealtimeMcpToolExecutionError, RealtimeMcphttpError, None],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class RealtimeMcpToolCall(BaseModel):
+    """A Realtime item representing an invocation of a tool on an MCP server."""
+
+    id: str
+    """The unique ID of the tool call."""
+
+    arguments: str
+    """A JSON string of the arguments passed to the tool."""
+
+    name: str
+    """The name of the tool that was run."""
+
+    server_label: str
+    """The label of the MCP server running the tool."""
+
+    type: Literal["mcp_call"]
+    """The type of the item. Always `mcp_call`."""
+
+    approval_request_id: Optional[str] = None
+    """The ID of an associated approval request, if any."""
+
+    error: Optional[Error] = None
+    """The error from the tool call, if any."""
+
+    output: Optional[str] = None
+    """The output from the tool call."""
diff --git a/src/openai/types/realtime/realtime_mcp_tool_call_param.py b/src/openai/types/realtime/realtime_mcp_tool_call_param.py
new file mode 100644
index 0000000000..8ccb5efc8a
--- /dev/null
+++ b/src/openai/types/realtime/realtime_mcp_tool_call_param.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .realtime_mcphttp_error_param import RealtimeMcphttpErrorParam
+from .realtime_mcp_protocol_error_param import RealtimeMcpProtocolErrorParam
+from .realtime_mcp_tool_execution_error_param import RealtimeMcpToolExecutionErrorParam
+
+__all__ = ["RealtimeMcpToolCallParam", "Error"]
+
+Error: TypeAlias = Union[RealtimeMcpProtocolErrorParam, RealtimeMcpToolExecutionErrorParam, RealtimeMcphttpErrorParam]
+
+
+class RealtimeMcpToolCallParam(TypedDict, total=False):
+    """A Realtime item representing an invocation of a tool on an MCP server."""
+
+    id: Required[str]
+    """The unique ID of the tool call."""
+
+    arguments: Required[str]
+    """A JSON string of the arguments passed to the tool."""
+
+    name: Required[str]
+    """The name of the tool that was run."""
+
+    server_label: Required[str]
+    """The label of the MCP server running the tool."""
+
+    type: Required[Literal["mcp_call"]]
+    """The type of the item. Always `mcp_call`."""
+
+    approval_request_id: Optional[str]
+    """The ID of an associated approval request, if any."""
+
+    error: Optional[Error]
+    """The error from the tool call, if any."""
+
+    output: Optional[str]
+    """The output from the tool call."""
diff --git a/src/openai/types/realtime/realtime_mcp_tool_execution_error.py b/src/openai/types/realtime/realtime_mcp_tool_execution_error.py
new file mode 100644
index 0000000000..a2ed063129
--- /dev/null
+++ b/src/openai/types/realtime/realtime_mcp_tool_execution_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeMcpToolExecutionError"]
+
+
+class RealtimeMcpToolExecutionError(BaseModel):
+    message: str
+
+    type: Literal["tool_execution_error"]
diff --git a/src/openai/types/realtime/realtime_mcp_tool_execution_error_param.py b/src/openai/types/realtime/realtime_mcp_tool_execution_error_param.py
new file mode 100644
index 0000000000..619e11c305
--- /dev/null
+++ b/src/openai/types/realtime/realtime_mcp_tool_execution_error_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeMcpToolExecutionErrorParam"]
+
+
+class RealtimeMcpToolExecutionErrorParam(TypedDict, total=False):
+    message: Required[str]
+
+    type: Required[Literal["tool_execution_error"]]
diff --git a/src/openai/types/realtime/realtime_mcphttp_error.py b/src/openai/types/realtime/realtime_mcphttp_error.py
new file mode 100644
index 0000000000..53cff91e6e
--- /dev/null
+++ b/src/openai/types/realtime/realtime_mcphttp_error.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeMcphttpError"]
+
+
+class RealtimeMcphttpError(BaseModel):
+    code: int
+
+    message: str
+
+    type: Literal["http_error"]
diff --git a/src/openai/types/realtime/realtime_mcphttp_error_param.py b/src/openai/types/realtime/realtime_mcphttp_error_param.py
new file mode 100644
index 0000000000..2b80a6f0a4
--- /dev/null
+++ b/src/openai/types/realtime/realtime_mcphttp_error_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeMcphttpErrorParam"]
+
+
+class RealtimeMcphttpErrorParam(TypedDict, total=False):
+    code: Required[int]
+
+    message: Required[str]
+
+    type: Required[Literal["http_error"]]
diff --git a/src/openai/types/realtime/realtime_response.py b/src/openai/types/realtime/realtime_response.py
new file mode 100644
index 0000000000..a23edc48ab
--- /dev/null
+++ b/src/openai/types/realtime/realtime_response.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.metadata import Metadata
+from .conversation_item import ConversationItem
+from .realtime_audio_formats import RealtimeAudioFormats
+from .realtime_response_usage import RealtimeResponseUsage
+from .realtime_response_status import RealtimeResponseStatus
+
+__all__ = ["RealtimeResponse", "Audio", "AudioOutput"]
+
+
+class AudioOutput(BaseModel):
+    format: Optional[RealtimeAudioFormats] = None
+    """The format of the output audio."""
+
+    voice: Union[
+        str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"], None
+    ] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`. We recommend
+    `marin` and `cedar` for best quality.
+    """
+
+
+class Audio(BaseModel):
+    """Configuration for audio output."""
+
+    output: Optional[AudioOutput] = None
+
+
+class RealtimeResponse(BaseModel):
+    """The response resource."""
+
+    id: Optional[str] = None
+    """The unique ID of the response, will look like `resp_1234`."""
+
+    audio: Optional[Audio] = None
+    """Configuration for audio output."""
+
+    conversation_id: Optional[str] = None
+    """
+    Which conversation the response is added to, determined by the `conversation`
+    field in the `response.create` event. If `auto`, the response will be added to
+    the default conversation and the value of `conversation_id` will be an id like
+    `conv_1234`. If `none`, the response will not be added to any conversation and
+    the value of `conversation_id` will be `null`. If responses are being triggered
+    automatically by VAD the response will be added to the default conversation
+    """
+
+    max_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls, that was used in this response.
+    """
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    object: Optional[Literal["realtime.response"]] = None
+    """The object type, must be `realtime.response`."""
+
+    output: Optional[List[ConversationItem]] = None
+    """The list of output items generated by the response."""
+
+    output_modalities: Optional[List[Literal["text", "audio"]]] = None
+    """
+    The set of modalities the model used to respond, currently the only possible
+    values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text
+    transcript. Setting the output to mode `text` will disable audio output from the
+    model.
+    """
+
+    status: Optional[Literal["completed", "cancelled", "failed", "incomplete", "in_progress"]] = None
+    """
+    The final status of the response (`completed`, `cancelled`, `failed`, or
+    `incomplete`, `in_progress`).
+    """
+
+    status_details: Optional[RealtimeResponseStatus] = None
+    """Additional details about the status."""
+
+    usage: Optional[RealtimeResponseUsage] = None
+    """Usage statistics for the Response, this will correspond to billing.
+
+    A Realtime API session will maintain a conversation context and append new Items
+    to the Conversation, thus output from previous turns (text and audio tokens)
+    will become the input for later turns.
+    """
diff --git a/src/openai/types/realtime/realtime_response_create_audio_output.py b/src/openai/types/realtime/realtime_response_create_audio_output.py
new file mode 100644
index 0000000000..b8f4d284d5
--- /dev/null
+++ b/src/openai/types/realtime/realtime_response_create_audio_output.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_audio_formats import RealtimeAudioFormats
+
+__all__ = ["RealtimeResponseCreateAudioOutput", "Output"]
+
+
+class Output(BaseModel):
+    format: Optional[RealtimeAudioFormats] = None
+    """The format of the output audio."""
+
+    voice: Union[
+        str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"], None
+    ] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`. We recommend
+    `marin` and `cedar` for best quality.
+    """
+
+
+class RealtimeResponseCreateAudioOutput(BaseModel):
+    """Configuration for audio input and output."""
+
+    output: Optional[Output] = None
diff --git a/src/openai/types/realtime/realtime_response_create_audio_output_param.py b/src/openai/types/realtime/realtime_response_create_audio_output_param.py
new file mode 100644
index 0000000000..30a4633698
--- /dev/null
+++ b/src/openai/types/realtime/realtime_response_create_audio_output_param.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypedDict
+
+from .realtime_audio_formats_param import RealtimeAudioFormatsParam
+
+__all__ = ["RealtimeResponseCreateAudioOutputParam", "Output"]
+
+
+class Output(TypedDict, total=False):
+    format: RealtimeAudioFormatsParam
+    """The format of the output audio."""
+
+    voice: Union[str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]]
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`. We recommend
+    `marin` and `cedar` for best quality.
+    """
+
+
+class RealtimeResponseCreateAudioOutputParam(TypedDict, total=False):
+    """Configuration for audio input and output."""
+
+    output: Output
diff --git a/src/openai/types/realtime/realtime_response_create_mcp_tool.py b/src/openai/types/realtime/realtime_response_create_mcp_tool.py
new file mode 100644
index 0000000000..72189e10e6
--- /dev/null
+++ b/src/openai/types/realtime/realtime_response_create_mcp_tool.py
@@ -0,0 +1,153 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+
+__all__ = [
+    "RealtimeResponseCreateMcpTool",
+    "AllowedTools",
+    "AllowedToolsMcpToolFilter",
+    "RequireApproval",
+    "RequireApprovalMcpToolApprovalFilter",
+    "RequireApprovalMcpToolApprovalFilterAlways",
+    "RequireApprovalMcpToolApprovalFilterNever",
+]
+
+
+class AllowedToolsMcpToolFilter(BaseModel):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: Optional[bool] = None
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: Optional[List[str]] = None
+    """List of allowed tool names."""
+
+
+AllowedTools: TypeAlias = Union[List[str], AllowedToolsMcpToolFilter, None]
+
+
+class RequireApprovalMcpToolApprovalFilterAlways(BaseModel):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: Optional[bool] = None
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: Optional[List[str]] = None
+    """List of allowed tool names."""
+
+
+class RequireApprovalMcpToolApprovalFilterNever(BaseModel):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: Optional[bool] = None
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: Optional[List[str]] = None
+    """List of allowed tool names."""
+
+
+class RequireApprovalMcpToolApprovalFilter(BaseModel):
+    """Specify which of the MCP server's tools require approval.
+
+    Can be
+    `always`, `never`, or a filter object associated with tools
+    that require approval.
+    """
+
+    always: Optional[RequireApprovalMcpToolApprovalFilterAlways] = None
+    """A filter object to specify which tools are allowed."""
+
+    never: Optional[RequireApprovalMcpToolApprovalFilterNever] = None
+    """A filter object to specify which tools are allowed."""
+
+
+RequireApproval: TypeAlias = Union[RequireApprovalMcpToolApprovalFilter, Literal["always", "never"], None]
+
+
+class RealtimeResponseCreateMcpTool(BaseModel):
+    """
+    Give the model access to additional tools via remote Model Context Protocol
+    (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
+    """
+
+    server_label: str
+    """A label for this MCP server, used to identify it in tool calls."""
+
+    type: Literal["mcp"]
+    """The type of the MCP tool. Always `mcp`."""
+
+    allowed_tools: Optional[AllowedTools] = None
+    """List of allowed tool names or a filter object."""
+
+    authorization: Optional[str] = None
+    """
+    An OAuth access token that can be used with a remote MCP server, either with a
+    custom MCP server URL or a service connector. Your application must handle the
+    OAuth authorization flow and provide the token here.
+    """
+
+    connector_id: Optional[
+        Literal[
+            "connector_dropbox",
+            "connector_gmail",
+            "connector_googlecalendar",
+            "connector_googledrive",
+            "connector_microsoftteams",
+            "connector_outlookcalendar",
+            "connector_outlookemail",
+            "connector_sharepoint",
+        ]
+    ] = None
+    """Identifier for service connectors, like those available in ChatGPT.
+
+    One of `server_url` or `connector_id` must be provided. Learn more about service
+    connectors
+    [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+
+    Currently supported `connector_id` values are:
+
+    - Dropbox: `connector_dropbox`
+    - Gmail: `connector_gmail`
+    - Google Calendar: `connector_googlecalendar`
+    - Google Drive: `connector_googledrive`
+    - Microsoft Teams: `connector_microsoftteams`
+    - Outlook Calendar: `connector_outlookcalendar`
+    - Outlook Email: `connector_outlookemail`
+    - SharePoint: `connector_sharepoint`
+    """
+
+    headers: Optional[Dict[str, str]] = None
+    """Optional HTTP headers to send to the MCP server.
+
+    Use for authentication or other purposes.
+    """
+
+    require_approval: Optional[RequireApproval] = None
+    """Specify which of the MCP server's tools require approval."""
+
+    server_description: Optional[str] = None
+    """Optional description of the MCP server, used to provide more context."""
+
+    server_url: Optional[str] = None
+    """The URL for the MCP server.
+
+    One of `server_url` or `connector_id` must be provided.
+    """
diff --git a/src/openai/types/realtime/realtime_response_create_mcp_tool_param.py b/src/openai/types/realtime/realtime_response_create_mcp_tool_param.py
new file mode 100644
index 0000000000..68dd6bdb5c
--- /dev/null
+++ b/src/openai/types/realtime/realtime_response_create_mcp_tool_param.py
@@ -0,0 +1,153 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+
+__all__ = [
+    "RealtimeResponseCreateMcpToolParam",
+    "AllowedTools",
+    "AllowedToolsMcpToolFilter",
+    "RequireApproval",
+    "RequireApprovalMcpToolApprovalFilter",
+    "RequireApprovalMcpToolApprovalFilterAlways",
+    "RequireApprovalMcpToolApprovalFilterNever",
+]
+
+
+class AllowedToolsMcpToolFilter(TypedDict, total=False):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: bool
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: SequenceNotStr[str]
+    """List of allowed tool names."""
+
+
+AllowedTools: TypeAlias = Union[SequenceNotStr[str], AllowedToolsMcpToolFilter]
+
+
+class RequireApprovalMcpToolApprovalFilterAlways(TypedDict, total=False):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: bool
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: SequenceNotStr[str]
+    """List of allowed tool names."""
+
+
+class RequireApprovalMcpToolApprovalFilterNever(TypedDict, total=False):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: bool
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: SequenceNotStr[str]
+    """List of allowed tool names."""
+
+
+class RequireApprovalMcpToolApprovalFilter(TypedDict, total=False):
+    """Specify which of the MCP server's tools require approval.
+
+    Can be
+    `always`, `never`, or a filter object associated with tools
+    that require approval.
+    """
+
+    always: RequireApprovalMcpToolApprovalFilterAlways
+    """A filter object to specify which tools are allowed."""
+
+    never: RequireApprovalMcpToolApprovalFilterNever
+    """A filter object to specify which tools are allowed."""
+
+
+RequireApproval: TypeAlias = Union[RequireApprovalMcpToolApprovalFilter, Literal["always", "never"]]
+
+
+class RealtimeResponseCreateMcpToolParam(TypedDict, total=False):
+    """
+    Give the model access to additional tools via remote Model Context Protocol
+    (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
+    """
+
+    server_label: Required[str]
+    """A label for this MCP server, used to identify it in tool calls."""
+
+    type: Required[Literal["mcp"]]
+    """The type of the MCP tool. Always `mcp`."""
+
+    allowed_tools: Optional[AllowedTools]
+    """List of allowed tool names or a filter object."""
+
+    authorization: str
+    """
+    An OAuth access token that can be used with a remote MCP server, either with a
+    custom MCP server URL or a service connector. Your application must handle the
+    OAuth authorization flow and provide the token here.
+    """
+
+    connector_id: Literal[
+        "connector_dropbox",
+        "connector_gmail",
+        "connector_googlecalendar",
+        "connector_googledrive",
+        "connector_microsoftteams",
+        "connector_outlookcalendar",
+        "connector_outlookemail",
+        "connector_sharepoint",
+    ]
+    """Identifier for service connectors, like those available in ChatGPT.
+
+    One of `server_url` or `connector_id` must be provided. Learn more about service
+    connectors
+    [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+
+    Currently supported `connector_id` values are:
+
+    - Dropbox: `connector_dropbox`
+    - Gmail: `connector_gmail`
+    - Google Calendar: `connector_googlecalendar`
+    - Google Drive: `connector_googledrive`
+    - Microsoft Teams: `connector_microsoftteams`
+    - Outlook Calendar: `connector_outlookcalendar`
+    - Outlook Email: `connector_outlookemail`
+    - SharePoint: `connector_sharepoint`
+    """
+
+    headers: Optional[Dict[str, str]]
+    """Optional HTTP headers to send to the MCP server.
+
+    Use for authentication or other purposes.
+    """
+
+    require_approval: Optional[RequireApproval]
+    """Specify which of the MCP server's tools require approval."""
+
+    server_description: str
+    """Optional description of the MCP server, used to provide more context."""
+
+    server_url: str
+    """The URL for the MCP server.
+
+    One of `server_url` or `connector_id` must be provided.
+    """
diff --git a/src/openai/types/realtime/realtime_response_create_params.py b/src/openai/types/realtime/realtime_response_create_params.py
new file mode 100644
index 0000000000..deec8c9280
--- /dev/null
+++ b/src/openai/types/realtime/realtime_response_create_params.py
@@ -0,0 +1,100 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from ..shared.metadata import Metadata
+from .conversation_item import ConversationItem
+from .realtime_function_tool import RealtimeFunctionTool
+from ..responses.response_prompt import ResponsePrompt
+from ..responses.tool_choice_mcp import ToolChoiceMcp
+from ..responses.tool_choice_options import ToolChoiceOptions
+from ..responses.tool_choice_function import ToolChoiceFunction
+from .realtime_response_create_mcp_tool import RealtimeResponseCreateMcpTool
+from .realtime_response_create_audio_output import RealtimeResponseCreateAudioOutput
+
+__all__ = ["RealtimeResponseCreateParams", "ToolChoice", "Tool"]
+
+ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunction, ToolChoiceMcp]
+
+Tool: TypeAlias = Union[RealtimeFunctionTool, RealtimeResponseCreateMcpTool]
+
+
+class RealtimeResponseCreateParams(BaseModel):
+    """Create a new Realtime response with these parameters"""
+
+    audio: Optional[RealtimeResponseCreateAudioOutput] = None
+    """Configuration for audio input and output."""
+
+    conversation: Union[str, Literal["auto", "none"], None] = None
+    """Controls which conversation the response is added to.
+
+    Currently supports `auto` and `none`, with `auto` as the default value. The
+    `auto` value means that the contents of the response will be added to the
+    default conversation. Set this to `none` to create an out-of-band response which
+    will not add items to default conversation.
+    """
+
+    input: Optional[List[ConversationItem]] = None
+    """Input items to include in the prompt for the model.
+
+    Using this field creates a new context for this Response instead of using the
+    default conversation. An empty array `[]` will clear the context for this
+    Response. Note that this can include references to items that previously
+    appeared in the session using their id.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior. Note that the server sets default instructions which will be used if
+    this field is not set and are visible in the `session.created` event at the
+    start of the session.
+    """
+
+    max_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    output_modalities: Optional[List[Literal["text", "audio"]]] = None
+    """
+    The set of modalities the model used to respond, currently the only possible
+    values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text
+    transcript. Setting the output to mode `text` will disable audio output from the
+    model.
+    """
+
+    prompt: Optional[ResponsePrompt] = None
+    """
+    Reference to a prompt template and its variables.
+    [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    """
+
+    tool_choice: Optional[ToolChoice] = None
+    """How the model chooses tools.
+
+    Provide one of the string modes or force a specific function/MCP tool.
+    """
+
+    tools: Optional[List[Tool]] = None
+    """Tools available to the model."""
diff --git a/src/openai/types/realtime/realtime_response_create_params_param.py b/src/openai/types/realtime/realtime_response_create_params_param.py
new file mode 100644
index 0000000000..caad5bc900
--- /dev/null
+++ b/src/openai/types/realtime/realtime_response_create_params_param.py
@@ -0,0 +1,101 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, TypeAlias, TypedDict
+
+from ..shared_params.metadata import Metadata
+from .conversation_item_param import ConversationItemParam
+from .realtime_function_tool_param import RealtimeFunctionToolParam
+from ..responses.tool_choice_options import ToolChoiceOptions
+from ..responses.response_prompt_param import ResponsePromptParam
+from ..responses.tool_choice_mcp_param import ToolChoiceMcpParam
+from ..responses.tool_choice_function_param import ToolChoiceFunctionParam
+from .realtime_response_create_mcp_tool_param import RealtimeResponseCreateMcpToolParam
+from .realtime_response_create_audio_output_param import RealtimeResponseCreateAudioOutputParam
+
+__all__ = ["RealtimeResponseCreateParamsParam", "ToolChoice", "Tool"]
+
+ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunctionParam, ToolChoiceMcpParam]
+
+Tool: TypeAlias = Union[RealtimeFunctionToolParam, RealtimeResponseCreateMcpToolParam]
+
+
+class RealtimeResponseCreateParamsParam(TypedDict, total=False):
+    """Create a new Realtime response with these parameters"""
+
+    audio: RealtimeResponseCreateAudioOutputParam
+    """Configuration for audio input and output."""
+
+    conversation: Union[str, Literal["auto", "none"]]
+    """Controls which conversation the response is added to.
+
+    Currently supports `auto` and `none`, with `auto` as the default value. The
+    `auto` value means that the contents of the response will be added to the
+    default conversation. Set this to `none` to create an out-of-band response which
+    will not add items to default conversation.
+    """
+
+    input: Iterable[ConversationItemParam]
+    """Input items to include in the prompt for the model.
+
+    Using this field creates a new context for this Response instead of using the
+    default conversation. An empty array `[]` will clear the context for this
+    Response. Note that this can include references to items that previously
+    appeared in the session using their id.
+    """
+
+    instructions: str
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior. Note that the server sets default instructions which will be used if
+    this field is not set and are visible in the `session.created` event at the
+    start of the session.
+    """
+
+    max_output_tokens: Union[int, Literal["inf"]]
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    output_modalities: List[Literal["text", "audio"]]
+    """
+    The set of modalities the model used to respond, currently the only possible
+    values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text
+    transcript. Setting the output to mode `text` will disable audio output from the
+    model.
+    """
+
+    prompt: Optional[ResponsePromptParam]
+    """
+    Reference to a prompt template and its variables.
+    [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    """
+
+    tool_choice: ToolChoice
+    """How the model chooses tools.
+
+    Provide one of the string modes or force a specific function/MCP tool.
+    """
+
+    tools: Iterable[Tool]
+    """Tools available to the model."""
diff --git a/src/openai/types/realtime/realtime_response_status.py b/src/openai/types/realtime/realtime_response_status.py
new file mode 100644
index 0000000000..26b272ae5a
--- /dev/null
+++ b/src/openai/types/realtime/realtime_response_status.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeResponseStatus", "Error"]
+
+
+class Error(BaseModel):
+    """
+    A description of the error that caused the response to fail,
+    populated when the `status` is `failed`.
+    """
+
+    code: Optional[str] = None
+    """Error code, if any."""
+
+    type: Optional[str] = None
+    """The type of error."""
+
+
+class RealtimeResponseStatus(BaseModel):
+    """Additional details about the status."""
+
+    error: Optional[Error] = None
+    """
+    A description of the error that caused the response to fail, populated when the
+    `status` is `failed`.
+    """
+
+    reason: Optional[Literal["turn_detected", "client_cancelled", "max_output_tokens", "content_filter"]] = None
+    """The reason the Response did not complete.
+
+    For a `cancelled` Response, one of `turn_detected` (the server VAD detected a
+    new start of speech) or `client_cancelled` (the client sent a cancel event). For
+    an `incomplete` Response, one of `max_output_tokens` or `content_filter` (the
+    server-side safety filter activated and cut off the response).
+    """
+
+    type: Optional[Literal["completed", "cancelled", "incomplete", "failed"]] = None
+    """
+    The type of error that caused the response to fail, corresponding with the
+    `status` field (`completed`, `cancelled`, `incomplete`, `failed`).
+    """
diff --git a/src/openai/types/realtime/realtime_response_usage.py b/src/openai/types/realtime/realtime_response_usage.py
new file mode 100644
index 0000000000..a5985d8a7b
--- /dev/null
+++ b/src/openai/types/realtime/realtime_response_usage.py
@@ -0,0 +1,49 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .realtime_response_usage_input_token_details import RealtimeResponseUsageInputTokenDetails
+from .realtime_response_usage_output_token_details import RealtimeResponseUsageOutputTokenDetails
+
+__all__ = ["RealtimeResponseUsage"]
+
+
+class RealtimeResponseUsage(BaseModel):
+    """Usage statistics for the Response, this will correspond to billing.
+
+    A
+    Realtime API session will maintain a conversation context and append new
+    Items to the Conversation, thus output from previous turns (text and
+    audio tokens) will become the input for later turns.
+    """
+
+    input_token_details: Optional[RealtimeResponseUsageInputTokenDetails] = None
+    """Details about the input tokens used in the Response.
+
+    Cached tokens are tokens from previous turns in the conversation that are
+    included as context for the current response. Cached tokens here are counted as
+    a subset of input tokens, meaning input tokens will include cached and uncached
+    tokens.
+    """
+
+    input_tokens: Optional[int] = None
+    """
+    The number of input tokens used in the Response, including text and audio
+    tokens.
+    """
+
+    output_token_details: Optional[RealtimeResponseUsageOutputTokenDetails] = None
+    """Details about the output tokens used in the Response."""
+
+    output_tokens: Optional[int] = None
+    """
+    The number of output tokens sent in the Response, including text and audio
+    tokens.
+    """
+
+    total_tokens: Optional[int] = None
+    """
+    The total number of tokens in the Response including input and output text and
+    audio tokens.
+    """
diff --git a/src/openai/types/realtime/realtime_response_usage_input_token_details.py b/src/openai/types/realtime/realtime_response_usage_input_token_details.py
new file mode 100644
index 0000000000..0fc71749e9
--- /dev/null
+++ b/src/openai/types/realtime/realtime_response_usage_input_token_details.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeResponseUsageInputTokenDetails", "CachedTokensDetails"]
+
+
+class CachedTokensDetails(BaseModel):
+    """Details about the cached tokens used as input for the Response."""
+
+    audio_tokens: Optional[int] = None
+    """The number of cached audio tokens used as input for the Response."""
+
+    image_tokens: Optional[int] = None
+    """The number of cached image tokens used as input for the Response."""
+
+    text_tokens: Optional[int] = None
+    """The number of cached text tokens used as input for the Response."""
+
+
+class RealtimeResponseUsageInputTokenDetails(BaseModel):
+    """Details about the input tokens used in the Response.
+
+    Cached tokens are tokens from previous turns in the conversation that are included as context for the current response. Cached tokens here are counted as a subset of input tokens, meaning input tokens will include cached and uncached tokens.
+    """
+
+    audio_tokens: Optional[int] = None
+    """The number of audio tokens used as input for the Response."""
+
+    cached_tokens: Optional[int] = None
+    """The number of cached tokens used as input for the Response."""
+
+    cached_tokens_details: Optional[CachedTokensDetails] = None
+    """Details about the cached tokens used as input for the Response."""
+
+    image_tokens: Optional[int] = None
+    """The number of image tokens used as input for the Response."""
+
+    text_tokens: Optional[int] = None
+    """The number of text tokens used as input for the Response."""
diff --git a/src/openai/types/realtime/realtime_response_usage_output_token_details.py b/src/openai/types/realtime/realtime_response_usage_output_token_details.py
new file mode 100644
index 0000000000..2154c77d5d
--- /dev/null
+++ b/src/openai/types/realtime/realtime_response_usage_output_token_details.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeResponseUsageOutputTokenDetails"]
+
+
+class RealtimeResponseUsageOutputTokenDetails(BaseModel):
+    """Details about the output tokens used in the Response."""
+
+    audio_tokens: Optional[int] = None
+    """The number of audio tokens used in the Response."""
+
+    text_tokens: Optional[int] = None
+    """The number of text tokens used in the Response."""
diff --git a/src/openai/types/realtime/realtime_server_event.py b/src/openai/types/realtime/realtime_server_event.py
new file mode 100644
index 0000000000..5de53d053e
--- /dev/null
+++ b/src/openai/types/realtime/realtime_server_event.py
@@ -0,0 +1,185 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+from .response_done_event import ResponseDoneEvent
+from .realtime_error_event import RealtimeErrorEvent
+from .mcp_list_tools_failed import McpListToolsFailed
+from .session_created_event import SessionCreatedEvent
+from .session_updated_event import SessionUpdatedEvent
+from .conversation_item_done import ConversationItemDone
+from .response_created_event import ResponseCreatedEvent
+from .conversation_item_added import ConversationItemAdded
+from .mcp_list_tools_completed import McpListToolsCompleted
+from .response_mcp_call_failed import ResponseMcpCallFailed
+from .response_text_done_event import ResponseTextDoneEvent
+from .rate_limits_updated_event import RateLimitsUpdatedEvent
+from .response_audio_done_event import ResponseAudioDoneEvent
+from .response_text_delta_event import ResponseTextDeltaEvent
+from .conversation_created_event import ConversationCreatedEvent
+from .mcp_list_tools_in_progress import McpListToolsInProgress
+from .response_audio_delta_event import ResponseAudioDeltaEvent
+from .response_mcp_call_completed import ResponseMcpCallCompleted
+from .response_mcp_call_in_progress import ResponseMcpCallInProgress
+from .conversation_item_created_event import ConversationItemCreatedEvent
+from .conversation_item_deleted_event import ConversationItemDeletedEvent
+from .response_output_item_done_event import ResponseOutputItemDoneEvent
+from .input_audio_buffer_cleared_event import InputAudioBufferClearedEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent
+from .response_mcp_call_arguments_done import ResponseMcpCallArgumentsDone
+from .response_output_item_added_event import ResponseOutputItemAddedEvent
+from .conversation_item_truncated_event import ConversationItemTruncatedEvent
+from .response_content_part_added_event import ResponseContentPartAddedEvent
+from .response_mcp_call_arguments_delta import ResponseMcpCallArgumentsDelta
+from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent
+from .input_audio_buffer_timeout_triggered import InputAudioBufferTimeoutTriggered
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent
+from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaEvent
+from .input_audio_buffer_speech_started_event import InputAudioBufferSpeechStartedEvent
+from .input_audio_buffer_speech_stopped_event import InputAudioBufferSpeechStoppedEvent
+from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent
+from .input_audio_buffer_dtmf_event_received_event import InputAudioBufferDtmfEventReceivedEvent
+from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent
+from .conversation_item_input_audio_transcription_segment import ConversationItemInputAudioTranscriptionSegment
+from .conversation_item_input_audio_transcription_delta_event import ConversationItemInputAudioTranscriptionDeltaEvent
+from .conversation_item_input_audio_transcription_failed_event import ConversationItemInputAudioTranscriptionFailedEvent
+from .conversation_item_input_audio_transcription_completed_event import (
+    ConversationItemInputAudioTranscriptionCompletedEvent,
+)
+
+__all__ = [
+    "RealtimeServerEvent",
+    "ConversationItemRetrieved",
+    "OutputAudioBufferStarted",
+    "OutputAudioBufferStopped",
+    "OutputAudioBufferCleared",
+]
+
+
+class ConversationItemRetrieved(BaseModel):
+    """Returned when a conversation item is retrieved with `conversation.item.retrieve`.
+
+    This is provided as a way to fetch the server's representation of an item, for example to get access to the post-processed audio data after noise cancellation and VAD. It includes the full content of the Item, including audio data.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """A single item within a Realtime conversation."""
+
+    type: Literal["conversation.item.retrieved"]
+    """The event type, must be `conversation.item.retrieved`."""
+
+
+class OutputAudioBufferStarted(BaseModel):
+    """
+    **WebRTC/SIP Only:** Emitted when the server begins streaming audio to the client. This event is
+    emitted after an audio content part has been added (`response.content_part.added`)
+    to the response.
+    [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    response_id: str
+    """The unique ID of the response that produced the audio."""
+
+    type: Literal["output_audio_buffer.started"]
+    """The event type, must be `output_audio_buffer.started`."""
+
+
+class OutputAudioBufferStopped(BaseModel):
+    """
+    **WebRTC/SIP Only:** Emitted when the output audio buffer has been completely drained on the server,
+    and no more audio is forthcoming. This event is emitted after the full response
+    data has been sent to the client (`response.done`).
+    [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    response_id: str
+    """The unique ID of the response that produced the audio."""
+
+    type: Literal["output_audio_buffer.stopped"]
+    """The event type, must be `output_audio_buffer.stopped`."""
+
+
+class OutputAudioBufferCleared(BaseModel):
+    """**WebRTC/SIP Only:** Emitted when the output audio buffer is cleared.
+
+    This happens either in VAD
+    mode when the user has interrupted (`input_audio_buffer.speech_started`),
+    or when the client has emitted the `output_audio_buffer.clear` event to manually
+    cut off the current audio response.
+    [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    response_id: str
+    """The unique ID of the response that produced the audio."""
+
+    type: Literal["output_audio_buffer.cleared"]
+    """The event type, must be `output_audio_buffer.cleared`."""
+
+
+RealtimeServerEvent: TypeAlias = Annotated[
+    Union[
+        ConversationCreatedEvent,
+        ConversationItemCreatedEvent,
+        ConversationItemDeletedEvent,
+        ConversationItemInputAudioTranscriptionCompletedEvent,
+        ConversationItemInputAudioTranscriptionDeltaEvent,
+        ConversationItemInputAudioTranscriptionFailedEvent,
+        ConversationItemRetrieved,
+        ConversationItemTruncatedEvent,
+        RealtimeErrorEvent,
+        InputAudioBufferClearedEvent,
+        InputAudioBufferCommittedEvent,
+        InputAudioBufferDtmfEventReceivedEvent,
+        InputAudioBufferSpeechStartedEvent,
+        InputAudioBufferSpeechStoppedEvent,
+        RateLimitsUpdatedEvent,
+        ResponseAudioDeltaEvent,
+        ResponseAudioDoneEvent,
+        ResponseAudioTranscriptDeltaEvent,
+        ResponseAudioTranscriptDoneEvent,
+        ResponseContentPartAddedEvent,
+        ResponseContentPartDoneEvent,
+        ResponseCreatedEvent,
+        ResponseDoneEvent,
+        ResponseFunctionCallArgumentsDeltaEvent,
+        ResponseFunctionCallArgumentsDoneEvent,
+        ResponseOutputItemAddedEvent,
+        ResponseOutputItemDoneEvent,
+        ResponseTextDeltaEvent,
+        ResponseTextDoneEvent,
+        SessionCreatedEvent,
+        SessionUpdatedEvent,
+        OutputAudioBufferStarted,
+        OutputAudioBufferStopped,
+        OutputAudioBufferCleared,
+        ConversationItemAdded,
+        ConversationItemDone,
+        InputAudioBufferTimeoutTriggered,
+        ConversationItemInputAudioTranscriptionSegment,
+        McpListToolsInProgress,
+        McpListToolsCompleted,
+        McpListToolsFailed,
+        ResponseMcpCallArgumentsDelta,
+        ResponseMcpCallArgumentsDone,
+        ResponseMcpCallInProgress,
+        ResponseMcpCallCompleted,
+        ResponseMcpCallFailed,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/realtime/realtime_session_client_secret.py b/src/openai/types/realtime/realtime_session_client_secret.py
new file mode 100644
index 0000000000..13a12f5502
--- /dev/null
+++ b/src/openai/types/realtime/realtime_session_client_secret.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeSessionClientSecret"]
+
+
+class RealtimeSessionClientSecret(BaseModel):
+    """Ephemeral key returned by the API."""
+
+    expires_at: int
+    """Timestamp for when the token expires.
+
+    Currently, all tokens expire after one minute.
+    """
+
+    value: str
+    """
+    Ephemeral key usable in client environments to authenticate connections to the
+    Realtime API. Use this in client-side environments rather than a standard API
+    token, which should only be used server-side.
+    """
diff --git a/src/openai/types/realtime/realtime_session_create_request.py b/src/openai/types/realtime/realtime_session_create_request.py
new file mode 100644
index 0000000000..76738816a0
--- /dev/null
+++ b/src/openai/types/realtime/realtime_session_create_request.py
@@ -0,0 +1,129 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_truncation import RealtimeTruncation
+from .realtime_audio_config import RealtimeAudioConfig
+from .realtime_tools_config import RealtimeToolsConfig
+from .realtime_tracing_config import RealtimeTracingConfig
+from ..responses.response_prompt import ResponsePrompt
+from .realtime_tool_choice_config import RealtimeToolChoiceConfig
+
+__all__ = ["RealtimeSessionCreateRequest"]
+
+
+class RealtimeSessionCreateRequest(BaseModel):
+    """Realtime session object configuration."""
+
+    type: Literal["realtime"]
+    """The type of session to create. Always `realtime` for the Realtime API."""
+
+    audio: Optional[RealtimeAudioConfig] = None
+    """Configuration for input and output audio."""
+
+    include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
+    """Additional fields to include in server outputs.
+
+    `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+    transcription.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    model: Union[
+        str,
+        Literal[
+            "gpt-realtime",
+            "gpt-realtime-2025-08-28",
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-realtime-preview-2025-06-03",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+            "gpt-realtime-mini",
+            "gpt-realtime-mini-2025-10-06",
+            "gpt-audio-mini",
+            "gpt-audio-mini-2025-10-06",
+        ],
+        None,
+    ] = None
+    """The Realtime model used for this session."""
+
+    output_modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    It defaults to `["audio"]`, indicating that the model will respond with audio
+    plus a transcript. `["text"]` can be used to make the model respond with text
+    only. It is not possible to request both `text` and `audio` at the same time.
+    """
+
+    prompt: Optional[ResponsePrompt] = None
+    """
+    Reference to a prompt template and its variables.
+    [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    """
+
+    tool_choice: Optional[RealtimeToolChoiceConfig] = None
+    """How the model chooses tools.
+
+    Provide one of the string modes or force a specific function/MCP tool.
+    """
+
+    tools: Optional[RealtimeToolsConfig] = None
+    """Tools available to the model."""
+
+    tracing: Optional[RealtimeTracingConfig] = None
+    """
+    Realtime API can write session traces to the
+    [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once
+    tracing is enabled for a session, the configuration cannot be modified.
+
+    `auto` will create a trace for the session with default values for the workflow
+    name, group id, and metadata.
+    """
+
+    truncation: Optional[RealtimeTruncation] = None
+    """
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs.
+
+    Clients can configure truncation behavior to truncate with a lower max token
+    limit, which is an effective way to control token usage and cost.
+
+    Truncation will reduce the number of cached tokens on the next turn (busting the
+    cache), since messages are dropped from the beginning of the context. However,
+    clients can also configure truncation to retain messages up to a fraction of the
+    maximum context size, which will reduce the need for future truncations and thus
+    improve the cache rate.
+
+    Truncation can be disabled entirely, which means the server will never truncate
+    but would instead return an error if the conversation exceeds the model's input
+    token limit.
+    """
diff --git a/src/openai/types/realtime/realtime_session_create_request_param.py b/src/openai/types/realtime/realtime_session_create_request_param.py
new file mode 100644
index 0000000000..cc5806fe11
--- /dev/null
+++ b/src/openai/types/realtime/realtime_session_create_request_param.py
@@ -0,0 +1,129 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .realtime_truncation_param import RealtimeTruncationParam
+from .realtime_audio_config_param import RealtimeAudioConfigParam
+from .realtime_tools_config_param import RealtimeToolsConfigParam
+from .realtime_tracing_config_param import RealtimeTracingConfigParam
+from ..responses.response_prompt_param import ResponsePromptParam
+from .realtime_tool_choice_config_param import RealtimeToolChoiceConfigParam
+
+__all__ = ["RealtimeSessionCreateRequestParam"]
+
+
+class RealtimeSessionCreateRequestParam(TypedDict, total=False):
+    """Realtime session object configuration."""
+
+    type: Required[Literal["realtime"]]
+    """The type of session to create. Always `realtime` for the Realtime API."""
+
+    audio: RealtimeAudioConfigParam
+    """Configuration for input and output audio."""
+
+    include: List[Literal["item.input_audio_transcription.logprobs"]]
+    """Additional fields to include in server outputs.
+
+    `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+    transcription.
+    """
+
+    instructions: str
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_output_tokens: Union[int, Literal["inf"]]
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    model: Union[
+        str,
+        Literal[
+            "gpt-realtime",
+            "gpt-realtime-2025-08-28",
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-realtime-preview-2025-06-03",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+            "gpt-realtime-mini",
+            "gpt-realtime-mini-2025-10-06",
+            "gpt-audio-mini",
+            "gpt-audio-mini-2025-10-06",
+        ],
+    ]
+    """The Realtime model used for this session."""
+
+    output_modalities: List[Literal["text", "audio"]]
+    """The set of modalities the model can respond with.
+
+    It defaults to `["audio"]`, indicating that the model will respond with audio
+    plus a transcript. `["text"]` can be used to make the model respond with text
+    only. It is not possible to request both `text` and `audio` at the same time.
+    """
+
+    prompt: Optional[ResponsePromptParam]
+    """
+    Reference to a prompt template and its variables.
+    [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    """
+
+    tool_choice: RealtimeToolChoiceConfigParam
+    """How the model chooses tools.
+
+    Provide one of the string modes or force a specific function/MCP tool.
+    """
+
+    tools: RealtimeToolsConfigParam
+    """Tools available to the model."""
+
+    tracing: Optional[RealtimeTracingConfigParam]
+    """
+    Realtime API can write session traces to the
+    [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once
+    tracing is enabled for a session, the configuration cannot be modified.
+
+    `auto` will create a trace for the session with default values for the workflow
+    name, group id, and metadata.
+    """
+
+    truncation: RealtimeTruncationParam
+    """
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs.
+
+    Clients can configure truncation behavior to truncate with a lower max token
+    limit, which is an effective way to control token usage and cost.
+
+    Truncation will reduce the number of cached tokens on the next turn (busting the
+    cache), since messages are dropped from the beginning of the context. However,
+    clients can also configure truncation to retain messages up to a fraction of the
+    maximum context size, which will reduce the need for future truncations and thus
+    improve the cache rate.
+
+    Truncation can be disabled entirely, which means the server will never truncate
+    but would instead return an error if the conversation exceeds the model's input
+    token limit.
+    """
diff --git a/src/openai/types/realtime/realtime_session_create_response.py b/src/openai/types/realtime/realtime_session_create_response.py
new file mode 100644
index 0000000000..46d32e8571
--- /dev/null
+++ b/src/openai/types/realtime/realtime_session_create_response.py
@@ -0,0 +1,532 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .audio_transcription import AudioTranscription
+from .realtime_truncation import RealtimeTruncation
+from .noise_reduction_type import NoiseReductionType
+from .realtime_audio_formats import RealtimeAudioFormats
+from .realtime_function_tool import RealtimeFunctionTool
+from ..responses.response_prompt import ResponsePrompt
+from ..responses.tool_choice_mcp import ToolChoiceMcp
+from ..responses.tool_choice_options import ToolChoiceOptions
+from .realtime_session_client_secret import RealtimeSessionClientSecret
+from ..responses.tool_choice_function import ToolChoiceFunction
+
+__all__ = [
+    "RealtimeSessionCreateResponse",
+    "Audio",
+    "AudioInput",
+    "AudioInputNoiseReduction",
+    "AudioInputTurnDetection",
+    "AudioInputTurnDetectionServerVad",
+    "AudioInputTurnDetectionSemanticVad",
+    "AudioOutput",
+    "ToolChoice",
+    "Tool",
+    "ToolMcpTool",
+    "ToolMcpToolAllowedTools",
+    "ToolMcpToolAllowedToolsMcpToolFilter",
+    "ToolMcpToolRequireApproval",
+    "ToolMcpToolRequireApprovalMcpToolApprovalFilter",
+    "ToolMcpToolRequireApprovalMcpToolApprovalFilterAlways",
+    "ToolMcpToolRequireApprovalMcpToolApprovalFilterNever",
+    "Tracing",
+    "TracingTracingConfiguration",
+]
+
+
+class AudioInputNoiseReduction(BaseModel):
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off.
+    Noise reduction filters audio added to the input audio buffer before it is sent to VAD and the model.
+    Filtering the audio can improve VAD and turn detection accuracy (reducing false positives) and model performance by improving perception of the input audio.
+    """
+
+    type: Optional[NoiseReductionType] = None
+    """Type of noise reduction.
+
+    `near_field` is for close-talking microphones such as headphones, `far_field` is
+    for far-field microphones such as laptop or conference room microphones.
+    """
+
+
+class AudioInputTurnDetectionServerVad(BaseModel):
+    """
+    Server-side voice activity detection (VAD) which flips on when user speech is detected and off after a period of silence.
+    """
+
+    type: Literal["server_vad"]
+    """Type of turn detection, `server_vad` to turn on simple Server VAD."""
+
+    create_response: Optional[bool] = None
+    """Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
+    """
+
+    idle_timeout_ms: Optional[int] = None
+    """Optional timeout after which a model response will be triggered automatically.
+
+    This is useful for situations in which a long pause from the user is unexpected,
+    such as a phone call. The model will effectively prompt the user to continue the
+    conversation based on the current context.
+
+    The timeout value will be applied after the last model response's audio has
+    finished playing, i.e. it's set to the `response.done` time plus audio playback
+    duration.
+
+    An `input_audio_buffer.timeout_triggered` event (plus events associated with the
+    Response) will be emitted when the timeout is reached. Idle timeout is currently
+    only supported for `server_vad` mode.
+    """
+
+    interrupt_response: Optional[bool] = None
+    """
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
+    """
+
+    prefix_padding_ms: Optional[int] = None
+    """Used only for `server_vad` mode.
+
+    Amount of audio to include before the VAD detected speech (in milliseconds).
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Used only for `server_vad` mode.
+
+    Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+    With shorter values the model will respond more quickly, but may jump in on
+    short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Used only for `server_vad` mode.
+
+    Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+    threshold will require louder audio to activate the model, and thus might
+    perform better in noisy environments.
+    """
+
+
+class AudioInputTurnDetectionSemanticVad(BaseModel):
+    """
+    Server-side semantic turn detection which uses a model to determine when the user has finished speaking.
+    """
+
+    type: Literal["semantic_vad"]
+    """Type of turn detection, `semantic_vad` to turn on Semantic VAD."""
+
+    create_response: Optional[bool] = None
+    """
+    Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+    """
+
+    eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
+    """Used only for `semantic_vad` mode.
+
+    The eagerness of the model to respond. `low` will wait longer for the user to
+    continue speaking, `high` will respond more quickly. `auto` is the default and
+    is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
+    4s, and 2s respectively.
+    """
+
+    interrupt_response: Optional[bool] = None
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs.
+    """
+
+
+AudioInputTurnDetection: TypeAlias = Annotated[
+    Union[AudioInputTurnDetectionServerVad, AudioInputTurnDetectionSemanticVad, None],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class AudioInput(BaseModel):
+    format: Optional[RealtimeAudioFormats] = None
+    """The format of the input audio."""
+
+    noise_reduction: Optional[AudioInputNoiseReduction] = None
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off. Noise reduction filters audio added to
+    the input audio buffer before it is sent to VAD and the model. Filtering the
+    audio can improve VAD and turn detection accuracy (reducing false positives) and
+    model performance by improving perception of the input audio.
+    """
+
+    transcription: Optional[AudioTranscription] = None
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through
+    [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+    and should be treated as guidance of input audio content rather than precisely
+    what the model heard. The client can optionally set the language and prompt for
+    transcription, these offer additional guidance to the transcription service.
+    """
+
+    turn_detection: Optional[AudioInputTurnDetection] = None
+    """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+    This can be set to `null` to turn off, in which case the client must manually
+    trigger model response.
+
+    Server VAD means that the model will detect the start and end of speech based on
+    audio volume and respond at the end of user speech.
+
+    Semantic VAD is more advanced and uses a turn detection model (in conjunction
+    with VAD) to semantically estimate whether the user has finished speaking, then
+    dynamically sets a timeout based on this probability. For example, if user audio
+    trails off with "uhhm", the model will score a low probability of turn end and
+    wait longer for the user to continue speaking. This can be useful for more
+    natural conversations, but may have a higher latency.
+    """
+
+
+class AudioOutput(BaseModel):
+    format: Optional[RealtimeAudioFormats] = None
+    """The format of the output audio."""
+
+    speed: Optional[float] = None
+    """
+    The speed of the model's spoken response as a multiple of the original speed.
+    1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+    This value can only be changed in between model turns, not while a response is
+    in progress.
+
+    This parameter is a post-processing adjustment to the audio after it is
+    generated, it's also possible to prompt the model to speak faster or slower.
+    """
+
+    voice: Union[
+        str, Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"], None
+    ] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`. We recommend
+    `marin` and `cedar` for best quality.
+    """
+
+
+class Audio(BaseModel):
+    """Configuration for input and output audio."""
+
+    input: Optional[AudioInput] = None
+
+    output: Optional[AudioOutput] = None
+
+
+ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunction, ToolChoiceMcp]
+
+
+class ToolMcpToolAllowedToolsMcpToolFilter(BaseModel):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: Optional[bool] = None
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: Optional[List[str]] = None
+    """List of allowed tool names."""
+
+
+ToolMcpToolAllowedTools: TypeAlias = Union[List[str], ToolMcpToolAllowedToolsMcpToolFilter, None]
+
+
+class ToolMcpToolRequireApprovalMcpToolApprovalFilterAlways(BaseModel):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: Optional[bool] = None
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: Optional[List[str]] = None
+    """List of allowed tool names."""
+
+
+class ToolMcpToolRequireApprovalMcpToolApprovalFilterNever(BaseModel):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: Optional[bool] = None
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: Optional[List[str]] = None
+    """List of allowed tool names."""
+
+
+class ToolMcpToolRequireApprovalMcpToolApprovalFilter(BaseModel):
+    """Specify which of the MCP server's tools require approval.
+
+    Can be
+    `always`, `never`, or a filter object associated with tools
+    that require approval.
+    """
+
+    always: Optional[ToolMcpToolRequireApprovalMcpToolApprovalFilterAlways] = None
+    """A filter object to specify which tools are allowed."""
+
+    never: Optional[ToolMcpToolRequireApprovalMcpToolApprovalFilterNever] = None
+    """A filter object to specify which tools are allowed."""
+
+
+ToolMcpToolRequireApproval: TypeAlias = Union[
+    ToolMcpToolRequireApprovalMcpToolApprovalFilter, Literal["always", "never"], None
+]
+
+
+class ToolMcpTool(BaseModel):
+    """
+    Give the model access to additional tools via remote Model Context Protocol
+    (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
+    """
+
+    server_label: str
+    """A label for this MCP server, used to identify it in tool calls."""
+
+    type: Literal["mcp"]
+    """The type of the MCP tool. Always `mcp`."""
+
+    allowed_tools: Optional[ToolMcpToolAllowedTools] = None
+    """List of allowed tool names or a filter object."""
+
+    authorization: Optional[str] = None
+    """
+    An OAuth access token that can be used with a remote MCP server, either with a
+    custom MCP server URL or a service connector. Your application must handle the
+    OAuth authorization flow and provide the token here.
+    """
+
+    connector_id: Optional[
+        Literal[
+            "connector_dropbox",
+            "connector_gmail",
+            "connector_googlecalendar",
+            "connector_googledrive",
+            "connector_microsoftteams",
+            "connector_outlookcalendar",
+            "connector_outlookemail",
+            "connector_sharepoint",
+        ]
+    ] = None
+    """Identifier for service connectors, like those available in ChatGPT.
+
+    One of `server_url` or `connector_id` must be provided. Learn more about service
+    connectors
+    [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+
+    Currently supported `connector_id` values are:
+
+    - Dropbox: `connector_dropbox`
+    - Gmail: `connector_gmail`
+    - Google Calendar: `connector_googlecalendar`
+    - Google Drive: `connector_googledrive`
+    - Microsoft Teams: `connector_microsoftteams`
+    - Outlook Calendar: `connector_outlookcalendar`
+    - Outlook Email: `connector_outlookemail`
+    - SharePoint: `connector_sharepoint`
+    """
+
+    headers: Optional[Dict[str, str]] = None
+    """Optional HTTP headers to send to the MCP server.
+
+    Use for authentication or other purposes.
+    """
+
+    require_approval: Optional[ToolMcpToolRequireApproval] = None
+    """Specify which of the MCP server's tools require approval."""
+
+    server_description: Optional[str] = None
+    """Optional description of the MCP server, used to provide more context."""
+
+    server_url: Optional[str] = None
+    """The URL for the MCP server.
+
+    One of `server_url` or `connector_id` must be provided.
+    """
+
+
+Tool: TypeAlias = Union[RealtimeFunctionTool, ToolMcpTool]
+
+
+class TracingTracingConfiguration(BaseModel):
+    """Granular configuration for tracing."""
+
+    group_id: Optional[str] = None
+    """
+    The group id to attach to this trace to enable filtering and grouping in the
+    Traces Dashboard.
+    """
+
+    metadata: Optional[object] = None
+    """
+    The arbitrary metadata to attach to this trace to enable filtering in the Traces
+    Dashboard.
+    """
+
+    workflow_name: Optional[str] = None
+    """The name of the workflow to attach to this trace.
+
+    This is used to name the trace in the Traces Dashboard.
+    """
+
+
+Tracing: TypeAlias = Union[Literal["auto"], TracingTracingConfiguration, None]
+
+
+class RealtimeSessionCreateResponse(BaseModel):
+    """A new Realtime session configuration, with an ephemeral key.
+
+    Default TTL
+    for keys is one minute.
+    """
+
+    client_secret: RealtimeSessionClientSecret
+    """Ephemeral key returned by the API."""
+
+    type: Literal["realtime"]
+    """The type of session to create. Always `realtime` for the Realtime API."""
+
+    audio: Optional[Audio] = None
+    """Configuration for input and output audio."""
+
+    include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
+    """Additional fields to include in server outputs.
+
+    `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+    transcription.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    model: Union[
+        str,
+        Literal[
+            "gpt-realtime",
+            "gpt-realtime-2025-08-28",
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-realtime-preview-2025-06-03",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+            "gpt-realtime-mini",
+            "gpt-realtime-mini-2025-10-06",
+            "gpt-audio-mini",
+            "gpt-audio-mini-2025-10-06",
+        ],
+        None,
+    ] = None
+    """The Realtime model used for this session."""
+
+    output_modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    It defaults to `["audio"]`, indicating that the model will respond with audio
+    plus a transcript. `["text"]` can be used to make the model respond with text
+    only. It is not possible to request both `text` and `audio` at the same time.
+    """
+
+    prompt: Optional[ResponsePrompt] = None
+    """
+    Reference to a prompt template and its variables.
+    [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    """
+
+    tool_choice: Optional[ToolChoice] = None
+    """How the model chooses tools.
+
+    Provide one of the string modes or force a specific function/MCP tool.
+    """
+
+    tools: Optional[List[Tool]] = None
+    """Tools available to the model."""
+
+    tracing: Optional[Tracing] = None
+    """
+    Realtime API can write session traces to the
+    [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once
+    tracing is enabled for a session, the configuration cannot be modified.
+
+    `auto` will create a trace for the session with default values for the workflow
+    name, group id, and metadata.
+    """
+
+    truncation: Optional[RealtimeTruncation] = None
+    """
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs.
+
+    Clients can configure truncation behavior to truncate with a lower max token
+    limit, which is an effective way to control token usage and cost.
+
+    Truncation will reduce the number of cached tokens on the next turn (busting the
+    cache), since messages are dropped from the beginning of the context. However,
+    clients can also configure truncation to retain messages up to a fraction of the
+    maximum context size, which will reduce the need for future truncations and thus
+    improve the cache rate.
+
+    Truncation can be disabled entirely, which means the server will never truncate
+    but would instead return an error if the conversation exceeds the model's input
+    token limit.
+    """
diff --git a/src/openai/types/realtime/realtime_tool_choice_config.py b/src/openai/types/realtime/realtime_tool_choice_config.py
new file mode 100644
index 0000000000..f93c490004
--- /dev/null
+++ b/src/openai/types/realtime/realtime_tool_choice_config.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from ..responses.tool_choice_mcp import ToolChoiceMcp
+from ..responses.tool_choice_options import ToolChoiceOptions
+from ..responses.tool_choice_function import ToolChoiceFunction
+
+__all__ = ["RealtimeToolChoiceConfig"]
+
+RealtimeToolChoiceConfig: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunction, ToolChoiceMcp]
diff --git a/src/openai/types/realtime/realtime_tool_choice_config_param.py b/src/openai/types/realtime/realtime_tool_choice_config_param.py
new file mode 100644
index 0000000000..af92f243b0
--- /dev/null
+++ b/src/openai/types/realtime/realtime_tool_choice_config_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from ..responses.tool_choice_options import ToolChoiceOptions
+from ..responses.tool_choice_mcp_param import ToolChoiceMcpParam
+from ..responses.tool_choice_function_param import ToolChoiceFunctionParam
+
+__all__ = ["RealtimeToolChoiceConfigParam"]
+
+RealtimeToolChoiceConfigParam: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunctionParam, ToolChoiceMcpParam]
diff --git a/src/openai/types/realtime/realtime_tools_config.py b/src/openai/types/realtime/realtime_tools_config.py
new file mode 100644
index 0000000000..b97599ab42
--- /dev/null
+++ b/src/openai/types/realtime/realtime_tools_config.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import TypeAlias
+
+from .realtime_tools_config_union import RealtimeToolsConfigUnion
+
+__all__ = ["RealtimeToolsConfig"]
+
+RealtimeToolsConfig: TypeAlias = List[RealtimeToolsConfigUnion]
diff --git a/src/openai/types/realtime/realtime_tools_config_param.py b/src/openai/types/realtime/realtime_tools_config_param.py
new file mode 100644
index 0000000000..3cc404feef
--- /dev/null
+++ b/src/openai/types/realtime/realtime_tools_config_param.py
@@ -0,0 +1,161 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+from .realtime_function_tool_param import RealtimeFunctionToolParam
+
+__all__ = [
+    "RealtimeToolsConfigParam",
+    "RealtimeToolsConfigUnionParam",
+    "Mcp",
+    "McpAllowedTools",
+    "McpAllowedToolsMcpToolFilter",
+    "McpRequireApproval",
+    "McpRequireApprovalMcpToolApprovalFilter",
+    "McpRequireApprovalMcpToolApprovalFilterAlways",
+    "McpRequireApprovalMcpToolApprovalFilterNever",
+]
+
+
+class McpAllowedToolsMcpToolFilter(TypedDict, total=False):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: bool
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: SequenceNotStr[str]
+    """List of allowed tool names."""
+
+
+McpAllowedTools: TypeAlias = Union[SequenceNotStr[str], McpAllowedToolsMcpToolFilter]
+
+
+class McpRequireApprovalMcpToolApprovalFilterAlways(TypedDict, total=False):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: bool
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: SequenceNotStr[str]
+    """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilterNever(TypedDict, total=False):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: bool
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: SequenceNotStr[str]
+    """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilter(TypedDict, total=False):
+    """Specify which of the MCP server's tools require approval.
+
+    Can be
+    `always`, `never`, or a filter object associated with tools
+    that require approval.
+    """
+
+    always: McpRequireApprovalMcpToolApprovalFilterAlways
+    """A filter object to specify which tools are allowed."""
+
+    never: McpRequireApprovalMcpToolApprovalFilterNever
+    """A filter object to specify which tools are allowed."""
+
+
+McpRequireApproval: TypeAlias = Union[McpRequireApprovalMcpToolApprovalFilter, Literal["always", "never"]]
+
+
+class Mcp(TypedDict, total=False):
+    """
+    Give the model access to additional tools via remote Model Context Protocol
+    (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
+    """
+
+    server_label: Required[str]
+    """A label for this MCP server, used to identify it in tool calls."""
+
+    type: Required[Literal["mcp"]]
+    """The type of the MCP tool. Always `mcp`."""
+
+    allowed_tools: Optional[McpAllowedTools]
+    """List of allowed tool names or a filter object."""
+
+    authorization: str
+    """
+    An OAuth access token that can be used with a remote MCP server, either with a
+    custom MCP server URL or a service connector. Your application must handle the
+    OAuth authorization flow and provide the token here.
+    """
+
+    connector_id: Literal[
+        "connector_dropbox",
+        "connector_gmail",
+        "connector_googlecalendar",
+        "connector_googledrive",
+        "connector_microsoftteams",
+        "connector_outlookcalendar",
+        "connector_outlookemail",
+        "connector_sharepoint",
+    ]
+    """Identifier for service connectors, like those available in ChatGPT.
+
+    One of `server_url` or `connector_id` must be provided. Learn more about service
+    connectors
+    [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+
+    Currently supported `connector_id` values are:
+
+    - Dropbox: `connector_dropbox`
+    - Gmail: `connector_gmail`
+    - Google Calendar: `connector_googlecalendar`
+    - Google Drive: `connector_googledrive`
+    - Microsoft Teams: `connector_microsoftteams`
+    - Outlook Calendar: `connector_outlookcalendar`
+    - Outlook Email: `connector_outlookemail`
+    - SharePoint: `connector_sharepoint`
+    """
+
+    headers: Optional[Dict[str, str]]
+    """Optional HTTP headers to send to the MCP server.
+
+    Use for authentication or other purposes.
+    """
+
+    require_approval: Optional[McpRequireApproval]
+    """Specify which of the MCP server's tools require approval."""
+
+    server_description: str
+    """Optional description of the MCP server, used to provide more context."""
+
+    server_url: str
+    """The URL for the MCP server.
+
+    One of `server_url` or `connector_id` must be provided.
+    """
+
+
+RealtimeToolsConfigUnionParam: TypeAlias = Union[RealtimeFunctionToolParam, Mcp]
+
+RealtimeToolsConfigParam: TypeAlias = List[RealtimeToolsConfigUnionParam]
diff --git a/src/openai/types/realtime/realtime_tools_config_union.py b/src/openai/types/realtime/realtime_tools_config_union.py
new file mode 100644
index 0000000000..92aaee7f26
--- /dev/null
+++ b/src/openai/types/realtime/realtime_tools_config_union.py
@@ -0,0 +1,159 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .realtime_function_tool import RealtimeFunctionTool
+
+__all__ = [
+    "RealtimeToolsConfigUnion",
+    "Mcp",
+    "McpAllowedTools",
+    "McpAllowedToolsMcpToolFilter",
+    "McpRequireApproval",
+    "McpRequireApprovalMcpToolApprovalFilter",
+    "McpRequireApprovalMcpToolApprovalFilterAlways",
+    "McpRequireApprovalMcpToolApprovalFilterNever",
+]
+
+
+class McpAllowedToolsMcpToolFilter(BaseModel):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: Optional[bool] = None
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: Optional[List[str]] = None
+    """List of allowed tool names."""
+
+
+McpAllowedTools: TypeAlias = Union[List[str], McpAllowedToolsMcpToolFilter, None]
+
+
+class McpRequireApprovalMcpToolApprovalFilterAlways(BaseModel):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: Optional[bool] = None
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: Optional[List[str]] = None
+    """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilterNever(BaseModel):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: Optional[bool] = None
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: Optional[List[str]] = None
+    """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilter(BaseModel):
+    """Specify which of the MCP server's tools require approval.
+
+    Can be
+    `always`, `never`, or a filter object associated with tools
+    that require approval.
+    """
+
+    always: Optional[McpRequireApprovalMcpToolApprovalFilterAlways] = None
+    """A filter object to specify which tools are allowed."""
+
+    never: Optional[McpRequireApprovalMcpToolApprovalFilterNever] = None
+    """A filter object to specify which tools are allowed."""
+
+
+McpRequireApproval: TypeAlias = Union[McpRequireApprovalMcpToolApprovalFilter, Literal["always", "never"], None]
+
+
+class Mcp(BaseModel):
+    """
+    Give the model access to additional tools via remote Model Context Protocol
+    (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
+    """
+
+    server_label: str
+    """A label for this MCP server, used to identify it in tool calls."""
+
+    type: Literal["mcp"]
+    """The type of the MCP tool. Always `mcp`."""
+
+    allowed_tools: Optional[McpAllowedTools] = None
+    """List of allowed tool names or a filter object."""
+
+    authorization: Optional[str] = None
+    """
+    An OAuth access token that can be used with a remote MCP server, either with a
+    custom MCP server URL or a service connector. Your application must handle the
+    OAuth authorization flow and provide the token here.
+    """
+
+    connector_id: Optional[
+        Literal[
+            "connector_dropbox",
+            "connector_gmail",
+            "connector_googlecalendar",
+            "connector_googledrive",
+            "connector_microsoftteams",
+            "connector_outlookcalendar",
+            "connector_outlookemail",
+            "connector_sharepoint",
+        ]
+    ] = None
+    """Identifier for service connectors, like those available in ChatGPT.
+
+    One of `server_url` or `connector_id` must be provided. Learn more about service
+    connectors
+    [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+
+    Currently supported `connector_id` values are:
+
+    - Dropbox: `connector_dropbox`
+    - Gmail: `connector_gmail`
+    - Google Calendar: `connector_googlecalendar`
+    - Google Drive: `connector_googledrive`
+    - Microsoft Teams: `connector_microsoftteams`
+    - Outlook Calendar: `connector_outlookcalendar`
+    - Outlook Email: `connector_outlookemail`
+    - SharePoint: `connector_sharepoint`
+    """
+
+    headers: Optional[Dict[str, str]] = None
+    """Optional HTTP headers to send to the MCP server.
+
+    Use for authentication or other purposes.
+    """
+
+    require_approval: Optional[McpRequireApproval] = None
+    """Specify which of the MCP server's tools require approval."""
+
+    server_description: Optional[str] = None
+    """Optional description of the MCP server, used to provide more context."""
+
+    server_url: Optional[str] = None
+    """The URL for the MCP server.
+
+    One of `server_url` or `connector_id` must be provided.
+    """
+
+
+RealtimeToolsConfigUnion: TypeAlias = Annotated[Union[RealtimeFunctionTool, Mcp], PropertyInfo(discriminator="type")]
diff --git a/src/openai/types/realtime/realtime_tools_config_union_param.py b/src/openai/types/realtime/realtime_tools_config_union_param.py
new file mode 100644
index 0000000000..6889b4c304
--- /dev/null
+++ b/src/openai/types/realtime/realtime_tools_config_union_param.py
@@ -0,0 +1,158 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+from .realtime_function_tool_param import RealtimeFunctionToolParam
+
+__all__ = [
+    "RealtimeToolsConfigUnionParam",
+    "Mcp",
+    "McpAllowedTools",
+    "McpAllowedToolsMcpToolFilter",
+    "McpRequireApproval",
+    "McpRequireApprovalMcpToolApprovalFilter",
+    "McpRequireApprovalMcpToolApprovalFilterAlways",
+    "McpRequireApprovalMcpToolApprovalFilterNever",
+]
+
+
+class McpAllowedToolsMcpToolFilter(TypedDict, total=False):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: bool
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: SequenceNotStr[str]
+    """List of allowed tool names."""
+
+
+McpAllowedTools: TypeAlias = Union[SequenceNotStr[str], McpAllowedToolsMcpToolFilter]
+
+
+class McpRequireApprovalMcpToolApprovalFilterAlways(TypedDict, total=False):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: bool
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: SequenceNotStr[str]
+    """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilterNever(TypedDict, total=False):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: bool
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: SequenceNotStr[str]
+    """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilter(TypedDict, total=False):
+    """Specify which of the MCP server's tools require approval.
+
+    Can be
+    `always`, `never`, or a filter object associated with tools
+    that require approval.
+    """
+
+    always: McpRequireApprovalMcpToolApprovalFilterAlways
+    """A filter object to specify which tools are allowed."""
+
+    never: McpRequireApprovalMcpToolApprovalFilterNever
+    """A filter object to specify which tools are allowed."""
+
+
+McpRequireApproval: TypeAlias = Union[McpRequireApprovalMcpToolApprovalFilter, Literal["always", "never"]]
+
+
+class Mcp(TypedDict, total=False):
+    """
+    Give the model access to additional tools via remote Model Context Protocol
+    (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
+    """
+
+    server_label: Required[str]
+    """A label for this MCP server, used to identify it in tool calls."""
+
+    type: Required[Literal["mcp"]]
+    """The type of the MCP tool. Always `mcp`."""
+
+    allowed_tools: Optional[McpAllowedTools]
+    """List of allowed tool names or a filter object."""
+
+    authorization: str
+    """
+    An OAuth access token that can be used with a remote MCP server, either with a
+    custom MCP server URL or a service connector. Your application must handle the
+    OAuth authorization flow and provide the token here.
+    """
+
+    connector_id: Literal[
+        "connector_dropbox",
+        "connector_gmail",
+        "connector_googlecalendar",
+        "connector_googledrive",
+        "connector_microsoftteams",
+        "connector_outlookcalendar",
+        "connector_outlookemail",
+        "connector_sharepoint",
+    ]
+    """Identifier for service connectors, like those available in ChatGPT.
+
+    One of `server_url` or `connector_id` must be provided. Learn more about service
+    connectors
+    [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+
+    Currently supported `connector_id` values are:
+
+    - Dropbox: `connector_dropbox`
+    - Gmail: `connector_gmail`
+    - Google Calendar: `connector_googlecalendar`
+    - Google Drive: `connector_googledrive`
+    - Microsoft Teams: `connector_microsoftteams`
+    - Outlook Calendar: `connector_outlookcalendar`
+    - Outlook Email: `connector_outlookemail`
+    - SharePoint: `connector_sharepoint`
+    """
+
+    headers: Optional[Dict[str, str]]
+    """Optional HTTP headers to send to the MCP server.
+
+    Use for authentication or other purposes.
+    """
+
+    require_approval: Optional[McpRequireApproval]
+    """Specify which of the MCP server's tools require approval."""
+
+    server_description: str
+    """Optional description of the MCP server, used to provide more context."""
+
+    server_url: str
+    """The URL for the MCP server.
+
+    One of `server_url` or `connector_id` must be provided.
+    """
+
+
+RealtimeToolsConfigUnionParam: TypeAlias = Union[RealtimeFunctionToolParam, Mcp]
diff --git a/src/openai/types/realtime/realtime_tracing_config.py b/src/openai/types/realtime/realtime_tracing_config.py
new file mode 100644
index 0000000000..37e3ce8945
--- /dev/null
+++ b/src/openai/types/realtime/realtime_tracing_config.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeTracingConfig", "TracingConfiguration"]
+
+
+class TracingConfiguration(BaseModel):
+    """Granular configuration for tracing."""
+
+    group_id: Optional[str] = None
+    """
+    The group id to attach to this trace to enable filtering and grouping in the
+    Traces Dashboard.
+    """
+
+    metadata: Optional[object] = None
+    """
+    The arbitrary metadata to attach to this trace to enable filtering in the Traces
+    Dashboard.
+    """
+
+    workflow_name: Optional[str] = None
+    """The name of the workflow to attach to this trace.
+
+    This is used to name the trace in the Traces Dashboard.
+    """
+
+
+RealtimeTracingConfig: TypeAlias = Union[Literal["auto"], TracingConfiguration, None]
diff --git a/src/openai/types/realtime/realtime_tracing_config_param.py b/src/openai/types/realtime/realtime_tracing_config_param.py
new file mode 100644
index 0000000000..742412897f
--- /dev/null
+++ b/src/openai/types/realtime/realtime_tracing_config_param.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias, TypedDict
+
+__all__ = ["RealtimeTracingConfigParam", "TracingConfiguration"]
+
+
+class TracingConfiguration(TypedDict, total=False):
+    """Granular configuration for tracing."""
+
+    group_id: str
+    """
+    The group id to attach to this trace to enable filtering and grouping in the
+    Traces Dashboard.
+    """
+
+    metadata: object
+    """
+    The arbitrary metadata to attach to this trace to enable filtering in the Traces
+    Dashboard.
+    """
+
+    workflow_name: str
+    """The name of the workflow to attach to this trace.
+
+    This is used to name the trace in the Traces Dashboard.
+    """
+
+
+RealtimeTracingConfigParam: TypeAlias = Union[Literal["auto"], TracingConfiguration]
diff --git a/src/openai/types/realtime/realtime_transcription_session_audio.py b/src/openai/types/realtime/realtime_transcription_session_audio.py
new file mode 100644
index 0000000000..7ec29afb79
--- /dev/null
+++ b/src/openai/types/realtime/realtime_transcription_session_audio.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .realtime_transcription_session_audio_input import RealtimeTranscriptionSessionAudioInput
+
+__all__ = ["RealtimeTranscriptionSessionAudio"]
+
+
+class RealtimeTranscriptionSessionAudio(BaseModel):
+    """Configuration for input and output audio."""
+
+    input: Optional[RealtimeTranscriptionSessionAudioInput] = None
diff --git a/src/openai/types/realtime/realtime_transcription_session_audio_input.py b/src/openai/types/realtime/realtime_transcription_session_audio_input.py
new file mode 100644
index 0000000000..80ff223590
--- /dev/null
+++ b/src/openai/types/realtime/realtime_transcription_session_audio_input.py
@@ -0,0 +1,72 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .audio_transcription import AudioTranscription
+from .noise_reduction_type import NoiseReductionType
+from .realtime_audio_formats import RealtimeAudioFormats
+from .realtime_transcription_session_audio_input_turn_detection import (
+    RealtimeTranscriptionSessionAudioInputTurnDetection,
+)
+
+__all__ = ["RealtimeTranscriptionSessionAudioInput", "NoiseReduction"]
+
+
+class NoiseReduction(BaseModel):
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off.
+    Noise reduction filters audio added to the input audio buffer before it is sent to VAD and the model.
+    Filtering the audio can improve VAD and turn detection accuracy (reducing false positives) and model performance by improving perception of the input audio.
+    """
+
+    type: Optional[NoiseReductionType] = None
+    """Type of noise reduction.
+
+    `near_field` is for close-talking microphones such as headphones, `far_field` is
+    for far-field microphones such as laptop or conference room microphones.
+    """
+
+
+class RealtimeTranscriptionSessionAudioInput(BaseModel):
+    format: Optional[RealtimeAudioFormats] = None
+    """The PCM audio format. Only a 24kHz sample rate is supported."""
+
+    noise_reduction: Optional[NoiseReduction] = None
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off. Noise reduction filters audio added to
+    the input audio buffer before it is sent to VAD and the model. Filtering the
+    audio can improve VAD and turn detection accuracy (reducing false positives) and
+    model performance by improving perception of the input audio.
+    """
+
+    transcription: Optional[AudioTranscription] = None
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through
+    [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+    and should be treated as guidance of input audio content rather than precisely
+    what the model heard. The client can optionally set the language and prompt for
+    transcription, these offer additional guidance to the transcription service.
+    """
+
+    turn_detection: Optional[RealtimeTranscriptionSessionAudioInputTurnDetection] = None
+    """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+    This can be set to `null` to turn off, in which case the client must manually
+    trigger model response.
+
+    Server VAD means that the model will detect the start and end of speech based on
+    audio volume and respond at the end of user speech.
+
+    Semantic VAD is more advanced and uses a turn detection model (in conjunction
+    with VAD) to semantically estimate whether the user has finished speaking, then
+    dynamically sets a timeout based on this probability. For example, if user audio
+    trails off with "uhhm", the model will score a low probability of turn end and
+    wait longer for the user to continue speaking. This can be useful for more
+    natural conversations, but may have a higher latency.
+    """
diff --git a/src/openai/types/realtime/realtime_transcription_session_audio_input_param.py b/src/openai/types/realtime/realtime_transcription_session_audio_input_param.py
new file mode 100644
index 0000000000..dd908c72f6
--- /dev/null
+++ b/src/openai/types/realtime/realtime_transcription_session_audio_input_param.py
@@ -0,0 +1,74 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import TypedDict
+
+from .noise_reduction_type import NoiseReductionType
+from .audio_transcription_param import AudioTranscriptionParam
+from .realtime_audio_formats_param import RealtimeAudioFormatsParam
+from .realtime_transcription_session_audio_input_turn_detection_param import (
+    RealtimeTranscriptionSessionAudioInputTurnDetectionParam,
+)
+
+__all__ = ["RealtimeTranscriptionSessionAudioInputParam", "NoiseReduction"]
+
+
+class NoiseReduction(TypedDict, total=False):
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off.
+    Noise reduction filters audio added to the input audio buffer before it is sent to VAD and the model.
+    Filtering the audio can improve VAD and turn detection accuracy (reducing false positives) and model performance by improving perception of the input audio.
+    """
+
+    type: NoiseReductionType
+    """Type of noise reduction.
+
+    `near_field` is for close-talking microphones such as headphones, `far_field` is
+    for far-field microphones such as laptop or conference room microphones.
+    """
+
+
+class RealtimeTranscriptionSessionAudioInputParam(TypedDict, total=False):
+    format: RealtimeAudioFormatsParam
+    """The PCM audio format. Only a 24kHz sample rate is supported."""
+
+    noise_reduction: NoiseReduction
+    """Configuration for input audio noise reduction.
+
+    This can be set to `null` to turn off. Noise reduction filters audio added to
+    the input audio buffer before it is sent to VAD and the model. Filtering the
+    audio can improve VAD and turn detection accuracy (reducing false positives) and
+    model performance by improving perception of the input audio.
+    """
+
+    transcription: AudioTranscriptionParam
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through
+    [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+    and should be treated as guidance of input audio content rather than precisely
+    what the model heard. The client can optionally set the language and prompt for
+    transcription, these offer additional guidance to the transcription service.
+    """
+
+    turn_detection: Optional[RealtimeTranscriptionSessionAudioInputTurnDetectionParam]
+    """Configuration for turn detection, ether Server VAD or Semantic VAD.
+
+    This can be set to `null` to turn off, in which case the client must manually
+    trigger model response.
+
+    Server VAD means that the model will detect the start and end of speech based on
+    audio volume and respond at the end of user speech.
+
+    Semantic VAD is more advanced and uses a turn detection model (in conjunction
+    with VAD) to semantically estimate whether the user has finished speaking, then
+    dynamically sets a timeout based on this probability. For example, if user audio
+    trails off with "uhhm", the model will score a low probability of turn end and
+    wait longer for the user to continue speaking. This can be useful for more
+    natural conversations, but may have a higher latency.
+    """
diff --git a/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py
new file mode 100644
index 0000000000..3d4ee779f4
--- /dev/null
+++ b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py
@@ -0,0 +1,115 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["RealtimeTranscriptionSessionAudioInputTurnDetection", "ServerVad", "SemanticVad"]
+
+
+class ServerVad(BaseModel):
+    """
+    Server-side voice activity detection (VAD) which flips on when user speech is detected and off after a period of silence.
+    """
+
+    type: Literal["server_vad"]
+    """Type of turn detection, `server_vad` to turn on simple Server VAD."""
+
+    create_response: Optional[bool] = None
+    """Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
+    """
+
+    idle_timeout_ms: Optional[int] = None
+    """Optional timeout after which a model response will be triggered automatically.
+
+    This is useful for situations in which a long pause from the user is unexpected,
+    such as a phone call. The model will effectively prompt the user to continue the
+    conversation based on the current context.
+
+    The timeout value will be applied after the last model response's audio has
+    finished playing, i.e. it's set to the `response.done` time plus audio playback
+    duration.
+
+    An `input_audio_buffer.timeout_triggered` event (plus events associated with the
+    Response) will be emitted when the timeout is reached. Idle timeout is currently
+    only supported for `server_vad` mode.
+    """
+
+    interrupt_response: Optional[bool] = None
+    """
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
+    """
+
+    prefix_padding_ms: Optional[int] = None
+    """Used only for `server_vad` mode.
+
+    Amount of audio to include before the VAD detected speech (in milliseconds).
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Used only for `server_vad` mode.
+
+    Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+    With shorter values the model will respond more quickly, but may jump in on
+    short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Used only for `server_vad` mode.
+
+    Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+    threshold will require louder audio to activate the model, and thus might
+    perform better in noisy environments.
+    """
+
+
+class SemanticVad(BaseModel):
+    """
+    Server-side semantic turn detection which uses a model to determine when the user has finished speaking.
+    """
+
+    type: Literal["semantic_vad"]
+    """Type of turn detection, `semantic_vad` to turn on Semantic VAD."""
+
+    create_response: Optional[bool] = None
+    """
+    Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+    """
+
+    eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
+    """Used only for `semantic_vad` mode.
+
+    The eagerness of the model to respond. `low` will wait longer for the user to
+    continue speaking, `high` will respond more quickly. `auto` is the default and
+    is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
+    4s, and 2s respectively.
+    """
+
+    interrupt_response: Optional[bool] = None
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs.
+    """
+
+
+RealtimeTranscriptionSessionAudioInputTurnDetection: TypeAlias = Annotated[
+    Union[ServerVad, SemanticVad, None], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py
new file mode 100644
index 0000000000..0aca59ce11
--- /dev/null
+++ b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py
@@ -0,0 +1,112 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = ["RealtimeTranscriptionSessionAudioInputTurnDetectionParam", "ServerVad", "SemanticVad"]
+
+
+class ServerVad(TypedDict, total=False):
+    """
+    Server-side voice activity detection (VAD) which flips on when user speech is detected and off after a period of silence.
+    """
+
+    type: Required[Literal["server_vad"]]
+    """Type of turn detection, `server_vad` to turn on simple Server VAD."""
+
+    create_response: bool
+    """Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
+    """
+
+    idle_timeout_ms: Optional[int]
+    """Optional timeout after which a model response will be triggered automatically.
+
+    This is useful for situations in which a long pause from the user is unexpected,
+    such as a phone call. The model will effectively prompt the user to continue the
+    conversation based on the current context.
+
+    The timeout value will be applied after the last model response's audio has
+    finished playing, i.e. it's set to the `response.done` time plus audio playback
+    duration.
+
+    An `input_audio_buffer.timeout_triggered` event (plus events associated with the
+    Response) will be emitted when the timeout is reached. Idle timeout is currently
+    only supported for `server_vad` mode.
+    """
+
+    interrupt_response: bool
+    """
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
+    """
+
+    prefix_padding_ms: int
+    """Used only for `server_vad` mode.
+
+    Amount of audio to include before the VAD detected speech (in milliseconds).
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: int
+    """Used only for `server_vad` mode.
+
+    Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+    With shorter values the model will respond more quickly, but may jump in on
+    short pauses from the user.
+    """
+
+    threshold: float
+    """Used only for `server_vad` mode.
+
+    Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+    threshold will require louder audio to activate the model, and thus might
+    perform better in noisy environments.
+    """
+
+
+class SemanticVad(TypedDict, total=False):
+    """
+    Server-side semantic turn detection which uses a model to determine when the user has finished speaking.
+    """
+
+    type: Required[Literal["semantic_vad"]]
+    """Type of turn detection, `semantic_vad` to turn on Semantic VAD."""
+
+    create_response: bool
+    """
+    Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+    """
+
+    eagerness: Literal["low", "medium", "high", "auto"]
+    """Used only for `semantic_vad` mode.
+
+    The eagerness of the model to respond. `low` will wait longer for the user to
+    continue speaking, `high` will respond more quickly. `auto` is the default and
+    is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
+    4s, and 2s respectively.
+    """
+
+    interrupt_response: bool
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs.
+    """
+
+
+RealtimeTranscriptionSessionAudioInputTurnDetectionParam: TypeAlias = Union[ServerVad, SemanticVad]
diff --git a/src/openai/types/realtime/realtime_transcription_session_audio_param.py b/src/openai/types/realtime/realtime_transcription_session_audio_param.py
new file mode 100644
index 0000000000..6bf1117917
--- /dev/null
+++ b/src/openai/types/realtime/realtime_transcription_session_audio_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+from .realtime_transcription_session_audio_input_param import RealtimeTranscriptionSessionAudioInputParam
+
+__all__ = ["RealtimeTranscriptionSessionAudioParam"]
+
+
+class RealtimeTranscriptionSessionAudioParam(TypedDict, total=False):
+    """Configuration for input and output audio."""
+
+    input: RealtimeTranscriptionSessionAudioInputParam
diff --git a/src/openai/types/realtime/realtime_transcription_session_create_request.py b/src/openai/types/realtime/realtime_transcription_session_create_request.py
new file mode 100644
index 0000000000..f72a4ad93f
--- /dev/null
+++ b/src/openai/types/realtime/realtime_transcription_session_create_request.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_transcription_session_audio import RealtimeTranscriptionSessionAudio
+
+__all__ = ["RealtimeTranscriptionSessionCreateRequest"]
+
+
+class RealtimeTranscriptionSessionCreateRequest(BaseModel):
+    """Realtime transcription session object configuration."""
+
+    type: Literal["transcription"]
+    """The type of session to create.
+
+    Always `transcription` for transcription sessions.
+    """
+
+    audio: Optional[RealtimeTranscriptionSessionAudio] = None
+    """Configuration for input and output audio."""
+
+    include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
+    """Additional fields to include in server outputs.
+
+    `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+    transcription.
+    """
diff --git a/src/openai/types/realtime/realtime_transcription_session_create_request_param.py b/src/openai/types/realtime/realtime_transcription_session_create_request_param.py
new file mode 100644
index 0000000000..9b4d8ead79
--- /dev/null
+++ b/src/openai/types/realtime/realtime_transcription_session_create_request_param.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal, Required, TypedDict
+
+from .realtime_transcription_session_audio_param import RealtimeTranscriptionSessionAudioParam
+
+__all__ = ["RealtimeTranscriptionSessionCreateRequestParam"]
+
+
+class RealtimeTranscriptionSessionCreateRequestParam(TypedDict, total=False):
+    """Realtime transcription session object configuration."""
+
+    type: Required[Literal["transcription"]]
+    """The type of session to create.
+
+    Always `transcription` for transcription sessions.
+    """
+
+    audio: RealtimeTranscriptionSessionAudioParam
+    """Configuration for input and output audio."""
+
+    include: List[Literal["item.input_audio_transcription.logprobs"]]
+    """Additional fields to include in server outputs.
+
+    `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+    transcription.
+    """
diff --git a/src/openai/types/realtime/realtime_transcription_session_create_response.py b/src/openai/types/realtime/realtime_transcription_session_create_response.py
new file mode 100644
index 0000000000..6ca6c3808b
--- /dev/null
+++ b/src/openai/types/realtime/realtime_transcription_session_create_response.py
@@ -0,0 +1,74 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .audio_transcription import AudioTranscription
+from .noise_reduction_type import NoiseReductionType
+from .realtime_audio_formats import RealtimeAudioFormats
+from .realtime_transcription_session_turn_detection import RealtimeTranscriptionSessionTurnDetection
+
+__all__ = ["RealtimeTranscriptionSessionCreateResponse", "Audio", "AudioInput", "AudioInputNoiseReduction"]
+
+
+class AudioInputNoiseReduction(BaseModel):
+    """Configuration for input audio noise reduction."""
+
+    type: Optional[NoiseReductionType] = None
+    """Type of noise reduction.
+
+    `near_field` is for close-talking microphones such as headphones, `far_field` is
+    for far-field microphones such as laptop or conference room microphones.
+    """
+
+
+class AudioInput(BaseModel):
+    format: Optional[RealtimeAudioFormats] = None
+    """The PCM audio format. Only a 24kHz sample rate is supported."""
+
+    noise_reduction: Optional[AudioInputNoiseReduction] = None
+    """Configuration for input audio noise reduction."""
+
+    transcription: Optional[AudioTranscription] = None
+    """Configuration of the transcription model."""
+
+    turn_detection: Optional[RealtimeTranscriptionSessionTurnDetection] = None
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
+
+
+class Audio(BaseModel):
+    """Configuration for input audio for the session."""
+
+    input: Optional[AudioInput] = None
+
+
+class RealtimeTranscriptionSessionCreateResponse(BaseModel):
+    """A Realtime transcription session configuration object."""
+
+    id: str
+    """Unique identifier for the session that looks like `sess_1234567890abcdef`."""
+
+    object: str
+    """The object type. Always `realtime.transcription_session`."""
+
+    type: Literal["transcription"]
+    """The type of session. Always `transcription` for transcription sessions."""
+
+    audio: Optional[Audio] = None
+    """Configuration for input audio for the session."""
+
+    expires_at: Optional[int] = None
+    """Expiration timestamp for the session, in seconds since epoch."""
+
+    include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None
+    """Additional fields to include in server outputs.
+
+    - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+      transcription.
+    """
diff --git a/src/openai/types/realtime/realtime_transcription_session_turn_detection.py b/src/openai/types/realtime/realtime_transcription_session_turn_detection.py
new file mode 100644
index 0000000000..8dacd60a07
--- /dev/null
+++ b/src/openai/types/realtime/realtime_transcription_session_turn_detection.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeTranscriptionSessionTurnDetection"]
+
+
+class RealtimeTranscriptionSessionTurnDetection(BaseModel):
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server
+    VAD means that the model will detect the start and end of speech based on
+    audio volume and respond at the end of user speech.
+    """
+
+    prefix_padding_ms: Optional[int] = None
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: Optional[str] = None
+    """Type of turn detection, only `server_vad` is currently supported."""
diff --git a/src/openai/types/realtime/realtime_truncation.py b/src/openai/types/realtime/realtime_truncation.py
new file mode 100644
index 0000000000..515f869071
--- /dev/null
+++ b/src/openai/types/realtime/realtime_truncation.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from .realtime_truncation_retention_ratio import RealtimeTruncationRetentionRatio
+
+__all__ = ["RealtimeTruncation"]
+
+RealtimeTruncation: TypeAlias = Union[Literal["auto", "disabled"], RealtimeTruncationRetentionRatio]
diff --git a/src/openai/types/realtime/realtime_truncation_param.py b/src/openai/types/realtime/realtime_truncation_param.py
new file mode 100644
index 0000000000..5e42b27418
--- /dev/null
+++ b/src/openai/types/realtime/realtime_truncation_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from .realtime_truncation_retention_ratio_param import RealtimeTruncationRetentionRatioParam
+
+__all__ = ["RealtimeTruncationParam"]
+
+RealtimeTruncationParam: TypeAlias = Union[Literal["auto", "disabled"], RealtimeTruncationRetentionRatioParam]
diff --git a/src/openai/types/realtime/realtime_truncation_retention_ratio.py b/src/openai/types/realtime/realtime_truncation_retention_ratio.py
new file mode 100644
index 0000000000..72a93a5654
--- /dev/null
+++ b/src/openai/types/realtime/realtime_truncation_retention_ratio.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeTruncationRetentionRatio", "TokenLimits"]
+
+
+class TokenLimits(BaseModel):
+    """Optional custom token limits for this truncation strategy.
+
+    If not provided, the model's default token limits will be used.
+    """
+
+    post_instructions: Optional[int] = None
+    """
+    Maximum tokens allowed in the conversation after instructions (which including
+    tool definitions). For example, setting this to 5,000 would mean that truncation
+    would occur when the conversation exceeds 5,000 tokens after instructions. This
+    cannot be higher than the model's context window size minus the maximum output
+    tokens.
+    """
+
+
+class RealtimeTruncationRetentionRatio(BaseModel):
+    """
+    Retain a fraction of the conversation tokens when the conversation exceeds the input token limit. This allows you to amortize truncations across multiple turns, which can help improve cached token usage.
+    """
+
+    retention_ratio: float
+    """
+    Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when
+    the conversation exceeds the input token limit. Setting this to `0.8` means that
+    messages will be dropped until 80% of the maximum allowed tokens are used. This
+    helps reduce the frequency of truncations and improve cache rates.
+    """
+
+    type: Literal["retention_ratio"]
+    """Use retention ratio truncation."""
+
+    token_limits: Optional[TokenLimits] = None
+    """Optional custom token limits for this truncation strategy.
+
+    If not provided, the model's default token limits will be used.
+    """
diff --git a/src/openai/types/realtime/realtime_truncation_retention_ratio_param.py b/src/openai/types/realtime/realtime_truncation_retention_ratio_param.py
new file mode 100644
index 0000000000..4648fa66b0
--- /dev/null
+++ b/src/openai/types/realtime/realtime_truncation_retention_ratio_param.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RealtimeTruncationRetentionRatioParam", "TokenLimits"]
+
+
+class TokenLimits(TypedDict, total=False):
+    """Optional custom token limits for this truncation strategy.
+
+    If not provided, the model's default token limits will be used.
+    """
+
+    post_instructions: int
+    """
+    Maximum tokens allowed in the conversation after instructions (which including
+    tool definitions). For example, setting this to 5,000 would mean that truncation
+    would occur when the conversation exceeds 5,000 tokens after instructions. This
+    cannot be higher than the model's context window size minus the maximum output
+    tokens.
+    """
+
+
+class RealtimeTruncationRetentionRatioParam(TypedDict, total=False):
+    """
+    Retain a fraction of the conversation tokens when the conversation exceeds the input token limit. This allows you to amortize truncations across multiple turns, which can help improve cached token usage.
+    """
+
+    retention_ratio: Required[float]
+    """
+    Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when
+    the conversation exceeds the input token limit. Setting this to `0.8` means that
+    messages will be dropped until 80% of the maximum allowed tokens are used. This
+    helps reduce the frequency of truncations and improve cache rates.
+    """
+
+    type: Required[Literal["retention_ratio"]]
+    """Use retention ratio truncation."""
+
+    token_limits: TokenLimits
+    """Optional custom token limits for this truncation strategy.
+
+    If not provided, the model's default token limits will be used.
+    """
diff --git a/src/openai/types/realtime/response_audio_delta_event.py b/src/openai/types/realtime/response_audio_delta_event.py
new file mode 100644
index 0000000000..ae87014053
--- /dev/null
+++ b/src/openai/types/realtime/response_audio_delta_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioDeltaEvent"]
+
+
+class ResponseAudioDeltaEvent(BaseModel):
+    """Returned when the model-generated audio is updated."""
+
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    delta: str
+    """Base64-encoded audio data delta."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.output_audio.delta"]
+    """The event type, must be `response.output_audio.delta`."""
diff --git a/src/openai/types/realtime/response_audio_done_event.py b/src/openai/types/realtime/response_audio_done_event.py
new file mode 100644
index 0000000000..98715aba13
--- /dev/null
+++ b/src/openai/types/realtime/response_audio_done_event.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioDoneEvent"]
+
+
+class ResponseAudioDoneEvent(BaseModel):
+    """Returned when the model-generated audio is done.
+
+    Also emitted when a Response
+    is interrupted, incomplete, or cancelled.
+    """
+
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.output_audio.done"]
+    """The event type, must be `response.output_audio.done`."""
diff --git a/src/openai/types/realtime/response_audio_transcript_delta_event.py b/src/openai/types/realtime/response_audio_transcript_delta_event.py
new file mode 100644
index 0000000000..4ec1a820ba
--- /dev/null
+++ b/src/openai/types/realtime/response_audio_transcript_delta_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDeltaEvent"]
+
+
+class ResponseAudioTranscriptDeltaEvent(BaseModel):
+    """Returned when the model-generated transcription of audio output is updated."""
+
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    delta: str
+    """The transcript delta."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.output_audio_transcript.delta"]
+    """The event type, must be `response.output_audio_transcript.delta`."""
diff --git a/src/openai/types/realtime/response_audio_transcript_done_event.py b/src/openai/types/realtime/response_audio_transcript_done_event.py
new file mode 100644
index 0000000000..c2a2416355
--- /dev/null
+++ b/src/openai/types/realtime/response_audio_transcript_done_event.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDoneEvent"]
+
+
+class ResponseAudioTranscriptDoneEvent(BaseModel):
+    """
+    Returned when the model-generated transcription of audio output is done
+    streaming. Also emitted when a Response is interrupted, incomplete, or
+    cancelled.
+    """
+
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    transcript: str
+    """The final transcript of the audio."""
+
+    type: Literal["response.output_audio_transcript.done"]
+    """The event type, must be `response.output_audio_transcript.done`."""
diff --git a/src/openai/types/realtime/response_cancel_event.py b/src/openai/types/realtime/response_cancel_event.py
new file mode 100644
index 0000000000..9c6113998f
--- /dev/null
+++ b/src/openai/types/realtime/response_cancel_event.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCancelEvent"]
+
+
+class ResponseCancelEvent(BaseModel):
+    """Send this event to cancel an in-progress response.
+
+    The server will respond
+    with a `response.done` event with a status of `response.status=cancelled`. If
+    there is no response to cancel, the server will respond with an error. It's safe
+    to call `response.cancel` even if no response is in progress, an error will be
+    returned the session will remain unaffected.
+    """
+
+    type: Literal["response.cancel"]
+    """The event type, must be `response.cancel`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
+
+    response_id: Optional[str] = None
+    """
+    A specific response ID to cancel - if not provided, will cancel an in-progress
+    response in the default conversation.
+    """
diff --git a/src/openai/types/realtime/response_cancel_event_param.py b/src/openai/types/realtime/response_cancel_event_param.py
new file mode 100644
index 0000000000..b233b407f9
--- /dev/null
+++ b/src/openai/types/realtime/response_cancel_event_param.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseCancelEventParam"]
+
+
+class ResponseCancelEventParam(TypedDict, total=False):
+    """Send this event to cancel an in-progress response.
+
+    The server will respond
+    with a `response.done` event with a status of `response.status=cancelled`. If
+    there is no response to cancel, the server will respond with an error. It's safe
+    to call `response.cancel` even if no response is in progress, an error will be
+    returned the session will remain unaffected.
+    """
+
+    type: Required[Literal["response.cancel"]]
+    """The event type, must be `response.cancel`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
+
+    response_id: str
+    """
+    A specific response ID to cancel - if not provided, will cancel an in-progress
+    response in the default conversation.
+    """
diff --git a/src/openai/types/realtime/response_content_part_added_event.py b/src/openai/types/realtime/response_content_part_added_event.py
new file mode 100644
index 0000000000..e47c84af20
--- /dev/null
+++ b/src/openai/types/realtime/response_content_part_added_event.py
@@ -0,0 +1,52 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseContentPartAddedEvent", "Part"]
+
+
+class Part(BaseModel):
+    """The content part that was added."""
+
+    audio: Optional[str] = None
+    """Base64-encoded audio data (if type is "audio")."""
+
+    text: Optional[str] = None
+    """The text content (if type is "text")."""
+
+    transcript: Optional[str] = None
+    """The transcript of the audio (if type is "audio")."""
+
+    type: Optional[Literal["text", "audio"]] = None
+    """The content type ("text", "audio")."""
+
+
+class ResponseContentPartAddedEvent(BaseModel):
+    """
+    Returned when a new content part is added to an assistant message item during
+    response generation.
+    """
+
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item to which the content part was added."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    part: Part
+    """The content part that was added."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.content_part.added"]
+    """The event type, must be `response.content_part.added`."""
diff --git a/src/openai/types/realtime/response_content_part_done_event.py b/src/openai/types/realtime/response_content_part_done_event.py
new file mode 100644
index 0000000000..a6cb8559b9
--- /dev/null
+++ b/src/openai/types/realtime/response_content_part_done_event.py
@@ -0,0 +1,52 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseContentPartDoneEvent", "Part"]
+
+
+class Part(BaseModel):
+    """The content part that is done."""
+
+    audio: Optional[str] = None
+    """Base64-encoded audio data (if type is "audio")."""
+
+    text: Optional[str] = None
+    """The text content (if type is "text")."""
+
+    transcript: Optional[str] = None
+    """The transcript of the audio (if type is "audio")."""
+
+    type: Optional[Literal["text", "audio"]] = None
+    """The content type ("text", "audio")."""
+
+
+class ResponseContentPartDoneEvent(BaseModel):
+    """
+    Returned when a content part is done streaming in an assistant message item.
+    Also emitted when a Response is interrupted, incomplete, or cancelled.
+    """
+
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    part: Part
+    """The content part that is done."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.content_part.done"]
+    """The event type, must be `response.content_part.done`."""
diff --git a/src/openai/types/realtime/response_create_event.py b/src/openai/types/realtime/response_create_event.py
new file mode 100644
index 0000000000..3e98a8d858
--- /dev/null
+++ b/src/openai/types/realtime/response_create_event.py
@@ -0,0 +1,48 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_response_create_params import RealtimeResponseCreateParams
+
+__all__ = ["ResponseCreateEvent"]
+
+
+class ResponseCreateEvent(BaseModel):
+    """
+    This event instructs the server to create a Response, which means triggering
+    model inference. When in Server VAD mode, the server will create Responses
+    automatically.
+
+    A Response will include at least one Item, and may have two, in which case
+    the second will be a function call. These Items will be appended to the
+    conversation history by default.
+
+    The server will respond with a `response.created` event, events for Items
+    and content created, and finally a `response.done` event to indicate the
+    Response is complete.
+
+    The `response.create` event includes inference configuration like
+    `instructions` and `tools`. If these are set, they will override the Session's
+    configuration for this Response only.
+
+    Responses can be created out-of-band of the default Conversation, meaning that they can
+    have arbitrary input, and it's possible to disable writing the output to the Conversation.
+    Only one Response can write to the default Conversation at a time, but otherwise multiple
+    Responses can be created in parallel. The `metadata` field is a good way to disambiguate
+    multiple simultaneous Responses.
+
+    Clients can set `conversation` to `none` to create a Response that does not write to the default
+    Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting
+    raw Items and references to existing Items.
+    """
+
+    type: Literal["response.create"]
+    """The event type, must be `response.create`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
+
+    response: Optional[RealtimeResponseCreateParams] = None
+    """Create a new Realtime response with these parameters"""
diff --git a/src/openai/types/realtime/response_create_event_param.py b/src/openai/types/realtime/response_create_event_param.py
new file mode 100644
index 0000000000..9da89e14ee
--- /dev/null
+++ b/src/openai/types/realtime/response_create_event_param.py
@@ -0,0 +1,48 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .realtime_response_create_params_param import RealtimeResponseCreateParamsParam
+
+__all__ = ["ResponseCreateEventParam"]
+
+
+class ResponseCreateEventParam(TypedDict, total=False):
+    """
+    This event instructs the server to create a Response, which means triggering
+    model inference. When in Server VAD mode, the server will create Responses
+    automatically.
+
+    A Response will include at least one Item, and may have two, in which case
+    the second will be a function call. These Items will be appended to the
+    conversation history by default.
+
+    The server will respond with a `response.created` event, events for Items
+    and content created, and finally a `response.done` event to indicate the
+    Response is complete.
+
+    The `response.create` event includes inference configuration like
+    `instructions` and `tools`. If these are set, they will override the Session's
+    configuration for this Response only.
+
+    Responses can be created out-of-band of the default Conversation, meaning that they can
+    have arbitrary input, and it's possible to disable writing the output to the Conversation.
+    Only one Response can write to the default Conversation at a time, but otherwise multiple
+    Responses can be created in parallel. The `metadata` field is a good way to disambiguate
+    multiple simultaneous Responses.
+
+    Clients can set `conversation` to `none` to create a Response that does not write to the default
+    Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting
+    raw Items and references to existing Items.
+    """
+
+    type: Required[Literal["response.create"]]
+    """The event type, must be `response.create`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
+
+    response: RealtimeResponseCreateParamsParam
+    """Create a new Realtime response with these parameters"""
diff --git a/src/openai/types/realtime/response_created_event.py b/src/openai/types/realtime/response_created_event.py
new file mode 100644
index 0000000000..dc5941262d
--- /dev/null
+++ b/src/openai/types/realtime/response_created_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_response import RealtimeResponse
+
+__all__ = ["ResponseCreatedEvent"]
+
+
+class ResponseCreatedEvent(BaseModel):
+    """Returned when a new Response is created.
+
+    The first event of response creation,
+    where the response is in an initial state of `in_progress`.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    response: RealtimeResponse
+    """The response resource."""
+
+    type: Literal["response.created"]
+    """The event type, must be `response.created`."""
diff --git a/src/openai/types/realtime/response_done_event.py b/src/openai/types/realtime/response_done_event.py
new file mode 100644
index 0000000000..9c31a2aa03
--- /dev/null
+++ b/src/openai/types/realtime/response_done_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .realtime_response import RealtimeResponse
+
+__all__ = ["ResponseDoneEvent"]
+
+
+class ResponseDoneEvent(BaseModel):
+    """Returned when a Response is done streaming.
+
+    Always emitted, no matter the
+    final state. The Response object included in the `response.done` event will
+    include all output Items in the Response but will omit the raw audio data.
+
+    Clients should check the `status` field of the Response to determine if it was successful
+    (`completed`) or if there was another outcome: `cancelled`, `failed`, or `incomplete`.
+
+    A response will contain all output items that were generated during the response, excluding
+    any audio content.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    response: RealtimeResponse
+    """The response resource."""
+
+    type: Literal["response.done"]
+    """The event type, must be `response.done`."""
diff --git a/src/openai/types/realtime/response_function_call_arguments_delta_event.py b/src/openai/types/realtime/response_function_call_arguments_delta_event.py
new file mode 100644
index 0000000000..a426c3f211
--- /dev/null
+++ b/src/openai/types/realtime/response_function_call_arguments_delta_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDeltaEvent"]
+
+
+class ResponseFunctionCallArgumentsDeltaEvent(BaseModel):
+    """Returned when the model-generated function call arguments are updated."""
+
+    call_id: str
+    """The ID of the function call."""
+
+    delta: str
+    """The arguments delta as a JSON string."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the function call item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.function_call_arguments.delta"]
+    """The event type, must be `response.function_call_arguments.delta`."""
diff --git a/src/openai/types/realtime/response_function_call_arguments_done_event.py b/src/openai/types/realtime/response_function_call_arguments_done_event.py
new file mode 100644
index 0000000000..504f91d558
--- /dev/null
+++ b/src/openai/types/realtime/response_function_call_arguments_done_event.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDoneEvent"]
+
+
+class ResponseFunctionCallArgumentsDoneEvent(BaseModel):
+    """
+    Returned when the model-generated function call arguments are done streaming.
+    Also emitted when a Response is interrupted, incomplete, or cancelled.
+    """
+
+    arguments: str
+    """The final arguments as a JSON string."""
+
+    call_id: str
+    """The ID of the function call."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the function call item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.function_call_arguments.done"]
+    """The event type, must be `response.function_call_arguments.done`."""
diff --git a/src/openai/types/realtime/response_mcp_call_arguments_delta.py b/src/openai/types/realtime/response_mcp_call_arguments_delta.py
new file mode 100644
index 0000000000..d890de0575
--- /dev/null
+++ b/src/openai/types/realtime/response_mcp_call_arguments_delta.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallArgumentsDelta"]
+
+
+class ResponseMcpCallArgumentsDelta(BaseModel):
+    """Returned when MCP tool call arguments are updated during response generation."""
+
+    delta: str
+    """The JSON-encoded arguments delta."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the MCP tool call item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.mcp_call_arguments.delta"]
+    """The event type, must be `response.mcp_call_arguments.delta`."""
+
+    obfuscation: Optional[str] = None
+    """If present, indicates the delta text was obfuscated."""
diff --git a/src/openai/types/realtime/response_mcp_call_arguments_done.py b/src/openai/types/realtime/response_mcp_call_arguments_done.py
new file mode 100644
index 0000000000..a7cb2d1958
--- /dev/null
+++ b/src/openai/types/realtime/response_mcp_call_arguments_done.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallArgumentsDone"]
+
+
+class ResponseMcpCallArgumentsDone(BaseModel):
+    """Returned when MCP tool call arguments are finalized during response generation."""
+
+    arguments: str
+    """The final JSON-encoded arguments string."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the MCP tool call item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.mcp_call_arguments.done"]
+    """The event type, must be `response.mcp_call_arguments.done`."""
diff --git a/src/openai/types/realtime/response_mcp_call_completed.py b/src/openai/types/realtime/response_mcp_call_completed.py
new file mode 100644
index 0000000000..130260539a
--- /dev/null
+++ b/src/openai/types/realtime/response_mcp_call_completed.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallCompleted"]
+
+
+class ResponseMcpCallCompleted(BaseModel):
+    """Returned when an MCP tool call has completed successfully."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the MCP tool call item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    type: Literal["response.mcp_call.completed"]
+    """The event type, must be `response.mcp_call.completed`."""
diff --git a/src/openai/types/realtime/response_mcp_call_failed.py b/src/openai/types/realtime/response_mcp_call_failed.py
new file mode 100644
index 0000000000..1c08d1d4b7
--- /dev/null
+++ b/src/openai/types/realtime/response_mcp_call_failed.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallFailed"]
+
+
+class ResponseMcpCallFailed(BaseModel):
+    """Returned when an MCP tool call has failed."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the MCP tool call item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    type: Literal["response.mcp_call.failed"]
+    """The event type, must be `response.mcp_call.failed`."""
diff --git a/src/openai/types/realtime/response_mcp_call_in_progress.py b/src/openai/types/realtime/response_mcp_call_in_progress.py
new file mode 100644
index 0000000000..4c0ad149e5
--- /dev/null
+++ b/src/openai/types/realtime/response_mcp_call_in_progress.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallInProgress"]
+
+
+class ResponseMcpCallInProgress(BaseModel):
+    """Returned when an MCP tool call has started and is in progress."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the MCP tool call item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    type: Literal["response.mcp_call.in_progress"]
+    """The event type, must be `response.mcp_call.in_progress`."""
diff --git a/src/openai/types/realtime/response_output_item_added_event.py b/src/openai/types/realtime/response_output_item_added_event.py
new file mode 100644
index 0000000000..abec0d18f1
--- /dev/null
+++ b/src/openai/types/realtime/response_output_item_added_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ResponseOutputItemAddedEvent"]
+
+
+class ResponseOutputItemAddedEvent(BaseModel):
+    """Returned when a new Item is created during Response generation."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """A single item within a Realtime conversation."""
+
+    output_index: int
+    """The index of the output item in the Response."""
+
+    response_id: str
+    """The ID of the Response to which the item belongs."""
+
+    type: Literal["response.output_item.added"]
+    """The event type, must be `response.output_item.added`."""
diff --git a/src/openai/types/realtime/response_output_item_done_event.py b/src/openai/types/realtime/response_output_item_done_event.py
new file mode 100644
index 0000000000..63936b97d5
--- /dev/null
+++ b/src/openai/types/realtime/response_output_item_done_event.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ResponseOutputItemDoneEvent"]
+
+
+class ResponseOutputItemDoneEvent(BaseModel):
+    """Returned when an Item is done streaming.
+
+    Also emitted when a Response is
+    interrupted, incomplete, or cancelled.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """A single item within a Realtime conversation."""
+
+    output_index: int
+    """The index of the output item in the Response."""
+
+    response_id: str
+    """The ID of the Response to which the item belongs."""
+
+    type: Literal["response.output_item.done"]
+    """The event type, must be `response.output_item.done`."""
diff --git a/src/openai/types/realtime/response_text_delta_event.py b/src/openai/types/realtime/response_text_delta_event.py
new file mode 100644
index 0000000000..b251b7639c
--- /dev/null
+++ b/src/openai/types/realtime/response_text_delta_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseTextDeltaEvent"]
+
+
+class ResponseTextDeltaEvent(BaseModel):
+    """Returned when the text value of an "output_text" content part is updated."""
+
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    delta: str
+    """The text delta."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.output_text.delta"]
+    """The event type, must be `response.output_text.delta`."""
diff --git a/src/openai/types/realtime/response_text_done_event.py b/src/openai/types/realtime/response_text_done_event.py
new file mode 100644
index 0000000000..046e520222
--- /dev/null
+++ b/src/openai/types/realtime/response_text_done_event.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseTextDoneEvent"]
+
+
+class ResponseTextDoneEvent(BaseModel):
+    """Returned when the text value of an "output_text" content part is done streaming.
+
+    Also
+    emitted when a Response is interrupted, incomplete, or cancelled.
+    """
+
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    text: str
+    """The final text content."""
+
+    type: Literal["response.output_text.done"]
+    """The event type, must be `response.output_text.done`."""
diff --git a/src/openai/types/realtime/session_created_event.py b/src/openai/types/realtime/session_created_event.py
new file mode 100644
index 0000000000..1b8d4a4d81
--- /dev/null
+++ b/src/openai/types/realtime/session_created_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .realtime_session_create_request import RealtimeSessionCreateRequest
+from .realtime_transcription_session_create_request import RealtimeTranscriptionSessionCreateRequest
+
+__all__ = ["SessionCreatedEvent", "Session"]
+
+Session: TypeAlias = Union[RealtimeSessionCreateRequest, RealtimeTranscriptionSessionCreateRequest]
+
+
+class SessionCreatedEvent(BaseModel):
+    """Returned when a Session is created.
+
+    Emitted automatically when a new
+    connection is established as the first server event. This event will contain
+    the default Session configuration.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    session: Session
+    """The session configuration."""
+
+    type: Literal["session.created"]
+    """The event type, must be `session.created`."""
diff --git a/src/openai/types/realtime/session_update_event.py b/src/openai/types/realtime/session_update_event.py
new file mode 100644
index 0000000000..a8422e4e89
--- /dev/null
+++ b/src/openai/types/realtime/session_update_event.py
@@ -0,0 +1,43 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .realtime_session_create_request import RealtimeSessionCreateRequest
+from .realtime_transcription_session_create_request import RealtimeTranscriptionSessionCreateRequest
+
+__all__ = ["SessionUpdateEvent", "Session"]
+
+Session: TypeAlias = Union[RealtimeSessionCreateRequest, RealtimeTranscriptionSessionCreateRequest]
+
+
+class SessionUpdateEvent(BaseModel):
+    """
+    Send this event to update the session’s configuration.
+    The client may send this event at any time to update any field
+    except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
+
+    When the server receives a `session.update`, it will respond
+    with a `session.updated` event showing the full, effective configuration.
+    Only the fields that are present in the `session.update` are updated. To clear a field like
+    `instructions`, pass an empty string. To clear a field like `tools`, pass an empty array.
+    To clear a field like `turn_detection`, pass `null`.
+    """
+
+    session: Session
+    """Update the Realtime session.
+
+    Choose either a realtime session or a transcription session.
+    """
+
+    type: Literal["session.update"]
+    """The event type, must be `session.update`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event.
+
+    This is an arbitrary string that a client may assign. It will be passed back if
+    there is an error with the event, but the corresponding `session.updated` event
+    will not include it.
+    """
diff --git a/src/openai/types/realtime/session_update_event_param.py b/src/openai/types/realtime/session_update_event_param.py
new file mode 100644
index 0000000000..910e89ca34
--- /dev/null
+++ b/src/openai/types/realtime/session_update_event_param.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .realtime_session_create_request_param import RealtimeSessionCreateRequestParam
+from .realtime_transcription_session_create_request_param import RealtimeTranscriptionSessionCreateRequestParam
+
+__all__ = ["SessionUpdateEventParam", "Session"]
+
+Session: TypeAlias = Union[RealtimeSessionCreateRequestParam, RealtimeTranscriptionSessionCreateRequestParam]
+
+
+class SessionUpdateEventParam(TypedDict, total=False):
+    """
+    Send this event to update the session’s configuration.
+    The client may send this event at any time to update any field
+    except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
+
+    When the server receives a `session.update`, it will respond
+    with a `session.updated` event showing the full, effective configuration.
+    Only the fields that are present in the `session.update` are updated. To clear a field like
+    `instructions`, pass an empty string. To clear a field like `tools`, pass an empty array.
+    To clear a field like `turn_detection`, pass `null`.
+    """
+
+    session: Required[Session]
+    """Update the Realtime session.
+
+    Choose either a realtime session or a transcription session.
+    """
+
+    type: Required[Literal["session.update"]]
+    """The event type, must be `session.update`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event.
+
+    This is an arbitrary string that a client may assign. It will be passed back if
+    there is an error with the event, but the corresponding `session.updated` event
+    will not include it.
+    """
diff --git a/src/openai/types/realtime/session_updated_event.py b/src/openai/types/realtime/session_updated_event.py
new file mode 100644
index 0000000000..e68a08d6cc
--- /dev/null
+++ b/src/openai/types/realtime/session_updated_event.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .realtime_session_create_request import RealtimeSessionCreateRequest
+from .realtime_transcription_session_create_request import RealtimeTranscriptionSessionCreateRequest
+
+__all__ = ["SessionUpdatedEvent", "Session"]
+
+Session: TypeAlias = Union[RealtimeSessionCreateRequest, RealtimeTranscriptionSessionCreateRequest]
+
+
+class SessionUpdatedEvent(BaseModel):
+    """
+    Returned when a session is updated with a `session.update` event, unless
+    there is an error.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    session: Session
+    """The session configuration."""
+
+    type: Literal["session.updated"]
+    """The event type, must be `session.updated`."""
diff --git a/src/openai/types/responses/__init__.py b/src/openai/types/responses/__init__.py
new file mode 100644
index 0000000000..dea80b25f7
--- /dev/null
+++ b/src/openai/types/responses/__init__.py
@@ -0,0 +1,267 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .tool import Tool as Tool
+from .response import Response as Response
+from .tool_param import ToolParam as ToolParam
+from .custom_tool import CustomTool as CustomTool
+from .computer_tool import ComputerTool as ComputerTool
+from .function_tool import FunctionTool as FunctionTool
+from .response_item import ResponseItem as ResponseItem
+from .response_error import ResponseError as ResponseError
+from .response_usage import ResponseUsage as ResponseUsage
+from .response_prompt import ResponsePrompt as ResponsePrompt
+from .response_status import ResponseStatus as ResponseStatus
+from .tool_choice_mcp import ToolChoiceMcp as ToolChoiceMcp
+from .web_search_tool import WebSearchTool as WebSearchTool
+from .apply_patch_tool import ApplyPatchTool as ApplyPatchTool
+from .file_search_tool import FileSearchTool as FileSearchTool
+from .custom_tool_param import CustomToolParam as CustomToolParam
+from .tool_choice_shell import ToolChoiceShell as ToolChoiceShell
+from .tool_choice_types import ToolChoiceTypes as ToolChoiceTypes
+from .compacted_response import CompactedResponse as CompactedResponse
+from .easy_input_message import EasyInputMessage as EasyInputMessage
+from .response_item_list import ResponseItemList as ResponseItemList
+from .tool_choice_custom import ToolChoiceCustom as ToolChoiceCustom
+from .computer_tool_param import ComputerToolParam as ComputerToolParam
+from .function_shell_tool import FunctionShellTool as FunctionShellTool
+from .function_tool_param import FunctionToolParam as FunctionToolParam
+from .response_includable import ResponseIncludable as ResponseIncludable
+from .response_input_file import ResponseInputFile as ResponseInputFile
+from .response_input_item import ResponseInputItem as ResponseInputItem
+from .response_input_text import ResponseInputText as ResponseInputText
+from .tool_choice_allowed import ToolChoiceAllowed as ToolChoiceAllowed
+from .tool_choice_options import ToolChoiceOptions as ToolChoiceOptions
+from .response_error_event import ResponseErrorEvent as ResponseErrorEvent
+from .response_input_audio import ResponseInputAudio as ResponseInputAudio
+from .response_input_image import ResponseInputImage as ResponseInputImage
+from .response_input_param import ResponseInputParam as ResponseInputParam
+from .response_output_item import ResponseOutputItem as ResponseOutputItem
+from .response_output_text import ResponseOutputText as ResponseOutputText
+from .response_text_config import ResponseTextConfig as ResponseTextConfig
+from .tool_choice_function import ToolChoiceFunction as ToolChoiceFunction
+from .response_failed_event import ResponseFailedEvent as ResponseFailedEvent
+from .response_prompt_param import ResponsePromptParam as ResponsePromptParam
+from .response_queued_event import ResponseQueuedEvent as ResponseQueuedEvent
+from .response_stream_event import ResponseStreamEvent as ResponseStreamEvent
+from .tool_choice_mcp_param import ToolChoiceMcpParam as ToolChoiceMcpParam
+from .web_search_tool_param import WebSearchToolParam as WebSearchToolParam
+from .apply_patch_tool_param import ApplyPatchToolParam as ApplyPatchToolParam
+from .file_search_tool_param import FileSearchToolParam as FileSearchToolParam
+from .input_item_list_params import InputItemListParams as InputItemListParams
+from .response_create_params import ResponseCreateParams as ResponseCreateParams
+from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
+from .response_input_content import ResponseInputContent as ResponseInputContent
+from .response_compact_params import ResponseCompactParams as ResponseCompactParams
+from .response_output_message import ResponseOutputMessage as ResponseOutputMessage
+from .response_output_refusal import ResponseOutputRefusal as ResponseOutputRefusal
+from .response_reasoning_item import ResponseReasoningItem as ResponseReasoningItem
+from .tool_choice_apply_patch import ToolChoiceApplyPatch as ToolChoiceApplyPatch
+from .tool_choice_shell_param import ToolChoiceShellParam as ToolChoiceShellParam
+from .tool_choice_types_param import ToolChoiceTypesParam as ToolChoiceTypesParam
+from .web_search_preview_tool import WebSearchPreviewTool as WebSearchPreviewTool
+from .easy_input_message_param import EasyInputMessageParam as EasyInputMessageParam
+from .input_token_count_params import InputTokenCountParams as InputTokenCountParams
+from .response_compaction_item import ResponseCompactionItem as ResponseCompactionItem
+from .response_completed_event import ResponseCompletedEvent as ResponseCompletedEvent
+from .response_retrieve_params import ResponseRetrieveParams as ResponseRetrieveParams
+from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
+from .tool_choice_custom_param import ToolChoiceCustomParam as ToolChoiceCustomParam
+from .function_shell_tool_param import FunctionShellToolParam as FunctionShellToolParam
+from .response_audio_done_event import ResponseAudioDoneEvent as ResponseAudioDoneEvent
+from .response_custom_tool_call import ResponseCustomToolCall as ResponseCustomToolCall
+from .response_incomplete_event import ResponseIncompleteEvent as ResponseIncompleteEvent
+from .response_input_file_param import ResponseInputFileParam as ResponseInputFileParam
+from .response_input_item_param import ResponseInputItemParam as ResponseInputItemParam
+from .response_input_text_param import ResponseInputTextParam as ResponseInputTextParam
+from .response_text_delta_event import ResponseTextDeltaEvent as ResponseTextDeltaEvent
+from .tool_choice_allowed_param import ToolChoiceAllowedParam as ToolChoiceAllowedParam
+from .input_token_count_response import InputTokenCountResponse as InputTokenCountResponse
+from .response_audio_delta_event import ResponseAudioDeltaEvent as ResponseAudioDeltaEvent
+from .response_in_progress_event import ResponseInProgressEvent as ResponseInProgressEvent
+from .response_input_audio_param import ResponseInputAudioParam as ResponseInputAudioParam
+from .response_input_image_param import ResponseInputImageParam as ResponseInputImageParam
+from .response_output_text_param import ResponseOutputTextParam as ResponseOutputTextParam
+from .response_text_config_param import ResponseTextConfigParam as ResponseTextConfigParam
+from .tool_choice_function_param import ToolChoiceFunctionParam as ToolChoiceFunctionParam
+from .response_computer_tool_call import ResponseComputerToolCall as ResponseComputerToolCall
+from .response_conversation_param import ResponseConversationParam as ResponseConversationParam
+from .response_format_text_config import ResponseFormatTextConfig as ResponseFormatTextConfig
+from .response_function_tool_call import ResponseFunctionToolCall as ResponseFunctionToolCall
+from .response_input_file_content import ResponseInputFileContent as ResponseInputFileContent
+from .response_input_message_item import ResponseInputMessageItem as ResponseInputMessageItem
+from .response_input_text_content import ResponseInputTextContent as ResponseInputTextContent
+from .response_refusal_done_event import ResponseRefusalDoneEvent as ResponseRefusalDoneEvent
+from .response_function_web_search import ResponseFunctionWebSearch as ResponseFunctionWebSearch
+from .response_input_content_param import ResponseInputContentParam as ResponseInputContentParam
+from .response_input_image_content import ResponseInputImageContent as ResponseInputImageContent
+from .response_refusal_delta_event import ResponseRefusalDeltaEvent as ResponseRefusalDeltaEvent
+from .response_output_message_param import ResponseOutputMessageParam as ResponseOutputMessageParam
+from .response_output_refusal_param import ResponseOutputRefusalParam as ResponseOutputRefusalParam
+from .response_reasoning_item_param import ResponseReasoningItemParam as ResponseReasoningItemParam
+from .tool_choice_apply_patch_param import ToolChoiceApplyPatchParam as ToolChoiceApplyPatchParam
+from .web_search_preview_tool_param import WebSearchPreviewToolParam as WebSearchPreviewToolParam
+from .response_apply_patch_tool_call import ResponseApplyPatchToolCall as ResponseApplyPatchToolCall
+from .response_compaction_item_param import ResponseCompactionItemParam as ResponseCompactionItemParam
+from .response_file_search_tool_call import ResponseFileSearchToolCall as ResponseFileSearchToolCall
+from .response_mcp_call_failed_event import ResponseMcpCallFailedEvent as ResponseMcpCallFailedEvent
+from .response_custom_tool_call_param import ResponseCustomToolCallParam as ResponseCustomToolCallParam
+from .response_output_item_done_event import ResponseOutputItemDoneEvent as ResponseOutputItemDoneEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent as ResponseContentPartDoneEvent
+from .response_custom_tool_call_output import ResponseCustomToolCallOutput as ResponseCustomToolCallOutput
+from .response_function_tool_call_item import ResponseFunctionToolCallItem as ResponseFunctionToolCallItem
+from .response_output_item_added_event import ResponseOutputItemAddedEvent as ResponseOutputItemAddedEvent
+from .response_computer_tool_call_param import ResponseComputerToolCallParam as ResponseComputerToolCallParam
+from .response_content_part_added_event import ResponseContentPartAddedEvent as ResponseContentPartAddedEvent
+from .response_format_text_config_param import ResponseFormatTextConfigParam as ResponseFormatTextConfigParam
+from .response_function_shell_tool_call import ResponseFunctionShellToolCall as ResponseFunctionShellToolCall
+from .response_function_tool_call_param import ResponseFunctionToolCallParam as ResponseFunctionToolCallParam
+from .response_input_file_content_param import ResponseInputFileContentParam as ResponseInputFileContentParam
+from .response_input_text_content_param import ResponseInputTextContentParam as ResponseInputTextContentParam
+from .response_mcp_call_completed_event import ResponseMcpCallCompletedEvent as ResponseMcpCallCompletedEvent
+from .response_function_call_output_item import ResponseFunctionCallOutputItem as ResponseFunctionCallOutputItem
+from .response_function_web_search_param import ResponseFunctionWebSearchParam as ResponseFunctionWebSearchParam
+from .response_input_image_content_param import ResponseInputImageContentParam as ResponseInputImageContentParam
+from .response_reasoning_text_done_event import ResponseReasoningTextDoneEvent as ResponseReasoningTextDoneEvent
+from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall as ResponseCodeInterpreterToolCall
+from .response_input_message_content_list import ResponseInputMessageContentList as ResponseInputMessageContentList
+from .response_mcp_call_in_progress_event import ResponseMcpCallInProgressEvent as ResponseMcpCallInProgressEvent
+from .response_reasoning_text_delta_event import ResponseReasoningTextDeltaEvent as ResponseReasoningTextDeltaEvent
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam as ResponseCompactionItemParamParam
+from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam as ResponseFileSearchToolCallParam
+from .response_mcp_list_tools_failed_event import ResponseMcpListToolsFailedEvent as ResponseMcpListToolsFailedEvent
+from .response_apply_patch_tool_call_output import ResponseApplyPatchToolCallOutput as ResponseApplyPatchToolCallOutput
+from .response_audio_transcript_delta_event import (
+    ResponseAudioTranscriptDeltaEvent as ResponseAudioTranscriptDeltaEvent,
+)
+from .response_custom_tool_call_output_param import (
+    ResponseCustomToolCallOutputParam as ResponseCustomToolCallOutputParam,
+)
+from .response_mcp_call_arguments_done_event import (
+    ResponseMcpCallArgumentsDoneEvent as ResponseMcpCallArgumentsDoneEvent,
+)
+from .response_computer_tool_call_output_item import (
+    ResponseComputerToolCallOutputItem as ResponseComputerToolCallOutputItem,
+)
+from .response_format_text_json_schema_config import (
+    ResponseFormatTextJSONSchemaConfig as ResponseFormatTextJSONSchemaConfig,
+)
+from .response_function_call_output_item_list import (
+    ResponseFunctionCallOutputItemList as ResponseFunctionCallOutputItemList,
+)
+from .response_function_tool_call_output_item import (
+    ResponseFunctionToolCallOutputItem as ResponseFunctionToolCallOutputItem,
+)
+from .response_image_gen_call_completed_event import (
+    ResponseImageGenCallCompletedEvent as ResponseImageGenCallCompletedEvent,
+)
+from .response_mcp_call_arguments_delta_event import (
+    ResponseMcpCallArgumentsDeltaEvent as ResponseMcpCallArgumentsDeltaEvent,
+)
+from .response_mcp_list_tools_completed_event import (
+    ResponseMcpListToolsCompletedEvent as ResponseMcpListToolsCompletedEvent,
+)
+from .response_function_call_output_item_param import (
+    ResponseFunctionCallOutputItemParam as ResponseFunctionCallOutputItemParam,
+)
+from .response_function_shell_tool_call_output import (
+    ResponseFunctionShellToolCallOutput as ResponseFunctionShellToolCallOutput,
+)
+from .response_image_gen_call_generating_event import (
+    ResponseImageGenCallGeneratingEvent as ResponseImageGenCallGeneratingEvent,
+)
+from .response_web_search_call_completed_event import (
+    ResponseWebSearchCallCompletedEvent as ResponseWebSearchCallCompletedEvent,
+)
+from .response_web_search_call_searching_event import (
+    ResponseWebSearchCallSearchingEvent as ResponseWebSearchCallSearchingEvent,
+)
+from .response_code_interpreter_tool_call_param import (
+    ResponseCodeInterpreterToolCallParam as ResponseCodeInterpreterToolCallParam,
+)
+from .response_file_search_call_completed_event import (
+    ResponseFileSearchCallCompletedEvent as ResponseFileSearchCallCompletedEvent,
+)
+from .response_file_search_call_searching_event import (
+    ResponseFileSearchCallSearchingEvent as ResponseFileSearchCallSearchingEvent,
+)
+from .response_image_gen_call_in_progress_event import (
+    ResponseImageGenCallInProgressEvent as ResponseImageGenCallInProgressEvent,
+)
+from .response_input_message_content_list_param import (
+    ResponseInputMessageContentListParam as ResponseInputMessageContentListParam,
+)
+from .response_mcp_list_tools_in_progress_event import (
+    ResponseMcpListToolsInProgressEvent as ResponseMcpListToolsInProgressEvent,
+)
+from .response_custom_tool_call_input_done_event import (
+    ResponseCustomToolCallInputDoneEvent as ResponseCustomToolCallInputDoneEvent,
+)
+from .response_reasoning_summary_part_done_event import (
+    ResponseReasoningSummaryPartDoneEvent as ResponseReasoningSummaryPartDoneEvent,
+)
+from .response_reasoning_summary_text_done_event import (
+    ResponseReasoningSummaryTextDoneEvent as ResponseReasoningSummaryTextDoneEvent,
+)
+from .response_web_search_call_in_progress_event import (
+    ResponseWebSearchCallInProgressEvent as ResponseWebSearchCallInProgressEvent,
+)
+from .response_custom_tool_call_input_delta_event import (
+    ResponseCustomToolCallInputDeltaEvent as ResponseCustomToolCallInputDeltaEvent,
+)
+from .response_file_search_call_in_progress_event import (
+    ResponseFileSearchCallInProgressEvent as ResponseFileSearchCallInProgressEvent,
+)
+from .response_function_call_arguments_done_event import (
+    ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
+)
+from .response_function_shell_call_output_content import (
+    ResponseFunctionShellCallOutputContent as ResponseFunctionShellCallOutputContent,
+)
+from .response_image_gen_call_partial_image_event import (
+    ResponseImageGenCallPartialImageEvent as ResponseImageGenCallPartialImageEvent,
+)
+from .response_output_text_annotation_added_event import (
+    ResponseOutputTextAnnotationAddedEvent as ResponseOutputTextAnnotationAddedEvent,
+)
+from .response_reasoning_summary_part_added_event import (
+    ResponseReasoningSummaryPartAddedEvent as ResponseReasoningSummaryPartAddedEvent,
+)
+from .response_reasoning_summary_text_delta_event import (
+    ResponseReasoningSummaryTextDeltaEvent as ResponseReasoningSummaryTextDeltaEvent,
+)
+from .response_function_call_arguments_delta_event import (
+    ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent,
+)
+from .response_computer_tool_call_output_screenshot import (
+    ResponseComputerToolCallOutputScreenshot as ResponseComputerToolCallOutputScreenshot,
+)
+from .response_format_text_json_schema_config_param import (
+    ResponseFormatTextJSONSchemaConfigParam as ResponseFormatTextJSONSchemaConfigParam,
+)
+from .response_function_call_output_item_list_param import (
+    ResponseFunctionCallOutputItemListParam as ResponseFunctionCallOutputItemListParam,
+)
+from .response_code_interpreter_call_code_done_event import (
+    ResponseCodeInterpreterCallCodeDoneEvent as ResponseCodeInterpreterCallCodeDoneEvent,
+)
+from .response_code_interpreter_call_completed_event import (
+    ResponseCodeInterpreterCallCompletedEvent as ResponseCodeInterpreterCallCompletedEvent,
+)
+from .response_code_interpreter_call_code_delta_event import (
+    ResponseCodeInterpreterCallCodeDeltaEvent as ResponseCodeInterpreterCallCodeDeltaEvent,
+)
+from .response_code_interpreter_call_in_progress_event import (
+    ResponseCodeInterpreterCallInProgressEvent as ResponseCodeInterpreterCallInProgressEvent,
+)
+from .response_code_interpreter_call_interpreting_event import (
+    ResponseCodeInterpreterCallInterpretingEvent as ResponseCodeInterpreterCallInterpretingEvent,
+)
+from .response_function_shell_call_output_content_param import (
+    ResponseFunctionShellCallOutputContentParam as ResponseFunctionShellCallOutputContentParam,
+)
+from .response_computer_tool_call_output_screenshot_param import (
+    ResponseComputerToolCallOutputScreenshotParam as ResponseComputerToolCallOutputScreenshotParam,
+)
diff --git a/src/openai/types/responses/apply_patch_tool.py b/src/openai/types/responses/apply_patch_tool.py
new file mode 100644
index 0000000000..f2ed245d10
--- /dev/null
+++ b/src/openai/types/responses/apply_patch_tool.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ApplyPatchTool"]
+
+
+class ApplyPatchTool(BaseModel):
+    """Allows the assistant to create, delete, or update files using unified diffs."""
+
+    type: Literal["apply_patch"]
+    """The type of the tool. Always `apply_patch`."""
diff --git a/src/openai/types/responses/apply_patch_tool_param.py b/src/openai/types/responses/apply_patch_tool_param.py
new file mode 100644
index 0000000000..2e0a809099
--- /dev/null
+++ b/src/openai/types/responses/apply_patch_tool_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ApplyPatchToolParam"]
+
+
+class ApplyPatchToolParam(TypedDict, total=False):
+    """Allows the assistant to create, delete, or update files using unified diffs."""
+
+    type: Required[Literal["apply_patch"]]
+    """The type of the tool. Always `apply_patch`."""
diff --git a/src/openai/types/responses/compacted_response.py b/src/openai/types/responses/compacted_response.py
new file mode 100644
index 0000000000..5b333b83c0
--- /dev/null
+++ b/src/openai/types/responses/compacted_response.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_usage import ResponseUsage
+from .response_output_item import ResponseOutputItem
+
+__all__ = ["CompactedResponse"]
+
+
+class CompactedResponse(BaseModel):
+    id: str
+    """The unique identifier for the compacted response."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the compacted conversation was created."""
+
+    object: Literal["response.compaction"]
+    """The object type. Always `response.compaction`."""
+
+    output: List[ResponseOutputItem]
+    """The compacted list of output items.
+
+    This is a list of all user messages, followed by a single compaction item.
+    """
+
+    usage: ResponseUsage
+    """
+    Token accounting for the compaction pass, including cached, reasoning, and total
+    tokens.
+    """
diff --git a/src/openai/types/responses/computer_tool.py b/src/openai/types/responses/computer_tool.py
new file mode 100644
index 0000000000..22871c841c
--- /dev/null
+++ b/src/openai/types/responses/computer_tool.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ComputerTool"]
+
+
+class ComputerTool(BaseModel):
+    """A tool that controls a virtual computer.
+
+    Learn more about the [computer tool](https://platform.openai.com/docs/guides/tools-computer-use).
+    """
+
+    display_height: int
+    """The height of the computer display."""
+
+    display_width: int
+    """The width of the computer display."""
+
+    environment: Literal["windows", "mac", "linux", "ubuntu", "browser"]
+    """The type of computer environment to control."""
+
+    type: Literal["computer_use_preview"]
+    """The type of the computer use tool. Always `computer_use_preview`."""
diff --git a/src/openai/types/responses/computer_tool_param.py b/src/openai/types/responses/computer_tool_param.py
new file mode 100644
index 0000000000..cdf75a43f2
--- /dev/null
+++ b/src/openai/types/responses/computer_tool_param.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ComputerToolParam"]
+
+
+class ComputerToolParam(TypedDict, total=False):
+    """A tool that controls a virtual computer.
+
+    Learn more about the [computer tool](https://platform.openai.com/docs/guides/tools-computer-use).
+    """
+
+    display_height: Required[int]
+    """The height of the computer display."""
+
+    display_width: Required[int]
+    """The width of the computer display."""
+
+    environment: Required[Literal["windows", "mac", "linux", "ubuntu", "browser"]]
+    """The type of computer environment to control."""
+
+    type: Required[Literal["computer_use_preview"]]
+    """The type of the computer use tool. Always `computer_use_preview`."""
diff --git a/src/openai/types/responses/custom_tool.py b/src/openai/types/responses/custom_tool.py
new file mode 100644
index 0000000000..1ca401a486
--- /dev/null
+++ b/src/openai/types/responses/custom_tool.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.custom_tool_input_format import CustomToolInputFormat
+
+__all__ = ["CustomTool"]
+
+
+class CustomTool(BaseModel):
+    """A custom tool that processes input using a specified format.
+
+    Learn more about   [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
+    """
+
+    name: str
+    """The name of the custom tool, used to identify it in tool calls."""
+
+    type: Literal["custom"]
+    """The type of the custom tool. Always `custom`."""
+
+    description: Optional[str] = None
+    """Optional description of the custom tool, used to provide more context."""
+
+    format: Optional[CustomToolInputFormat] = None
+    """The input format for the custom tool. Default is unconstrained text."""
diff --git a/src/openai/types/responses/custom_tool_param.py b/src/openai/types/responses/custom_tool_param.py
new file mode 100644
index 0000000000..4ce43cdfdb
--- /dev/null
+++ b/src/openai/types/responses/custom_tool_param.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from ..shared_params.custom_tool_input_format import CustomToolInputFormat
+
+__all__ = ["CustomToolParam"]
+
+
+class CustomToolParam(TypedDict, total=False):
+    """A custom tool that processes input using a specified format.
+
+    Learn more about   [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
+    """
+
+    name: Required[str]
+    """The name of the custom tool, used to identify it in tool calls."""
+
+    type: Required[Literal["custom"]]
+    """The type of the custom tool. Always `custom`."""
+
+    description: str
+    """Optional description of the custom tool, used to provide more context."""
+
+    format: CustomToolInputFormat
+    """The input format for the custom tool. Default is unconstrained text."""
diff --git a/src/openai/types/responses/easy_input_message.py b/src/openai/types/responses/easy_input_message.py
new file mode 100644
index 0000000000..9a36a6b084
--- /dev/null
+++ b/src/openai/types/responses/easy_input_message.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_input_message_content_list import ResponseInputMessageContentList
+
+__all__ = ["EasyInputMessage"]
+
+
+class EasyInputMessage(BaseModel):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role. Messages with the
+    `assistant` role are presumed to have been generated by the model in previous
+    interactions.
+    """
+
+    content: Union[str, ResponseInputMessageContentList]
+    """
+    Text, image, or audio input to the model, used to generate a response. Can also
+    contain previous assistant responses.
+    """
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
diff --git a/src/openai/types/responses/easy_input_message_param.py b/src/openai/types/responses/easy_input_message_param.py
new file mode 100644
index 0000000000..0a382bddee
--- /dev/null
+++ b/src/openai/types/responses/easy_input_message_param.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypedDict
+
+from .response_input_message_content_list_param import ResponseInputMessageContentListParam
+
+__all__ = ["EasyInputMessageParam"]
+
+
+class EasyInputMessageParam(TypedDict, total=False):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role. Messages with the
+    `assistant` role are presumed to have been generated by the model in previous
+    interactions.
+    """
+
+    content: Required[Union[str, ResponseInputMessageContentListParam]]
+    """
+    Text, image, or audio input to the model, used to generate a response. Can also
+    contain previous assistant responses.
+    """
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
diff --git a/src/openai/types/responses/file_search_tool.py b/src/openai/types/responses/file_search_tool.py
new file mode 100644
index 0000000000..09c12876ca
--- /dev/null
+++ b/src/openai/types/responses/file_search_tool.py
@@ -0,0 +1,69 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from ..shared.compound_filter import CompoundFilter
+from ..shared.comparison_filter import ComparisonFilter
+
+__all__ = ["FileSearchTool", "Filters", "RankingOptions", "RankingOptionsHybridSearch"]
+
+Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter, None]
+
+
+class RankingOptionsHybridSearch(BaseModel):
+    """
+    Weights that control how reciprocal rank fusion balances semantic embedding matches versus sparse keyword matches when hybrid search is enabled.
+    """
+
+    embedding_weight: float
+    """The weight of the embedding in the reciprocal ranking fusion."""
+
+    text_weight: float
+    """The weight of the text in the reciprocal ranking fusion."""
+
+
+class RankingOptions(BaseModel):
+    """Ranking options for search."""
+
+    hybrid_search: Optional[RankingOptionsHybridSearch] = None
+    """
+    Weights that control how reciprocal rank fusion balances semantic embedding
+    matches versus sparse keyword matches when hybrid search is enabled.
+    """
+
+    ranker: Optional[Literal["auto", "default-2024-11-15"]] = None
+    """The ranker to use for the file search."""
+
+    score_threshold: Optional[float] = None
+    """The score threshold for the file search, a number between 0 and 1.
+
+    Numbers closer to 1 will attempt to return only the most relevant results, but
+    may return fewer results.
+    """
+
+
+class FileSearchTool(BaseModel):
+    """A tool that searches for relevant content from uploaded files.
+
+    Learn more about the [file search tool](https://platform.openai.com/docs/guides/tools-file-search).
+    """
+
+    type: Literal["file_search"]
+    """The type of the file search tool. Always `file_search`."""
+
+    vector_store_ids: List[str]
+    """The IDs of the vector stores to search."""
+
+    filters: Optional[Filters] = None
+    """A filter to apply."""
+
+    max_num_results: Optional[int] = None
+    """The maximum number of results to return.
+
+    This number should be between 1 and 50 inclusive.
+    """
+
+    ranking_options: Optional[RankingOptions] = None
+    """Ranking options for search."""
diff --git a/src/openai/types/responses/file_search_tool_param.py b/src/openai/types/responses/file_search_tool_param.py
new file mode 100644
index 0000000000..82831d0dc0
--- /dev/null
+++ b/src/openai/types/responses/file_search_tool_param.py
@@ -0,0 +1,71 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+from ..shared_params.compound_filter import CompoundFilter
+from ..shared_params.comparison_filter import ComparisonFilter
+
+__all__ = ["FileSearchToolParam", "Filters", "RankingOptions", "RankingOptionsHybridSearch"]
+
+Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter]
+
+
+class RankingOptionsHybridSearch(TypedDict, total=False):
+    """
+    Weights that control how reciprocal rank fusion balances semantic embedding matches versus sparse keyword matches when hybrid search is enabled.
+    """
+
+    embedding_weight: Required[float]
+    """The weight of the embedding in the reciprocal ranking fusion."""
+
+    text_weight: Required[float]
+    """The weight of the text in the reciprocal ranking fusion."""
+
+
+class RankingOptions(TypedDict, total=False):
+    """Ranking options for search."""
+
+    hybrid_search: RankingOptionsHybridSearch
+    """
+    Weights that control how reciprocal rank fusion balances semantic embedding
+    matches versus sparse keyword matches when hybrid search is enabled.
+    """
+
+    ranker: Literal["auto", "default-2024-11-15"]
+    """The ranker to use for the file search."""
+
+    score_threshold: float
+    """The score threshold for the file search, a number between 0 and 1.
+
+    Numbers closer to 1 will attempt to return only the most relevant results, but
+    may return fewer results.
+    """
+
+
+class FileSearchToolParam(TypedDict, total=False):
+    """A tool that searches for relevant content from uploaded files.
+
+    Learn more about the [file search tool](https://platform.openai.com/docs/guides/tools-file-search).
+    """
+
+    type: Required[Literal["file_search"]]
+    """The type of the file search tool. Always `file_search`."""
+
+    vector_store_ids: Required[SequenceNotStr[str]]
+    """The IDs of the vector stores to search."""
+
+    filters: Optional[Filters]
+    """A filter to apply."""
+
+    max_num_results: int
+    """The maximum number of results to return.
+
+    This number should be between 1 and 50 inclusive.
+    """
+
+    ranking_options: RankingOptions
+    """Ranking options for search."""
diff --git a/src/openai/types/responses/function_shell_tool.py b/src/openai/types/responses/function_shell_tool.py
new file mode 100644
index 0000000000..5b237aa705
--- /dev/null
+++ b/src/openai/types/responses/function_shell_tool.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FunctionShellTool"]
+
+
+class FunctionShellTool(BaseModel):
+    """A tool that allows the model to execute shell commands."""
+
+    type: Literal["shell"]
+    """The type of the shell tool. Always `shell`."""
diff --git a/src/openai/types/responses/function_shell_tool_param.py b/src/openai/types/responses/function_shell_tool_param.py
new file mode 100644
index 0000000000..c640ddab99
--- /dev/null
+++ b/src/openai/types/responses/function_shell_tool_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["FunctionShellToolParam"]
+
+
+class FunctionShellToolParam(TypedDict, total=False):
+    """A tool that allows the model to execute shell commands."""
+
+    type: Required[Literal["shell"]]
+    """The type of the shell tool. Always `shell`."""
diff --git a/src/openai/types/responses/function_tool.py b/src/openai/types/responses/function_tool.py
new file mode 100644
index 0000000000..b0827a9fa7
--- /dev/null
+++ b/src/openai/types/responses/function_tool.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FunctionTool"]
+
+
+class FunctionTool(BaseModel):
+    """Defines a function in your own code the model can choose to call.
+
+    Learn more about [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    name: str
+    """The name of the function to call."""
+
+    parameters: Optional[Dict[str, object]] = None
+    """A JSON schema object describing the parameters of the function."""
+
+    strict: Optional[bool] = None
+    """Whether to enforce strict parameter validation. Default `true`."""
+
+    type: Literal["function"]
+    """The type of the function tool. Always `function`."""
+
+    description: Optional[str] = None
+    """A description of the function.
+
+    Used by the model to determine whether or not to call the function.
+    """
diff --git a/src/openai/types/responses/function_tool_param.py b/src/openai/types/responses/function_tool_param.py
new file mode 100644
index 0000000000..ba0a3168c4
--- /dev/null
+++ b/src/openai/types/responses/function_tool_param.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["FunctionToolParam"]
+
+
+class FunctionToolParam(TypedDict, total=False):
+    """Defines a function in your own code the model can choose to call.
+
+    Learn more about [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    name: Required[str]
+    """The name of the function to call."""
+
+    parameters: Required[Optional[Dict[str, object]]]
+    """A JSON schema object describing the parameters of the function."""
+
+    strict: Required[Optional[bool]]
+    """Whether to enforce strict parameter validation. Default `true`."""
+
+    type: Required[Literal["function"]]
+    """The type of the function tool. Always `function`."""
+
+    description: Optional[str]
+    """A description of the function.
+
+    Used by the model to determine whether or not to call the function.
+    """
diff --git a/src/openai/types/responses/input_item_list_params.py b/src/openai/types/responses/input_item_list_params.py
new file mode 100644
index 0000000000..44a8dc5de3
--- /dev/null
+++ b/src/openai/types/responses/input_item_list_params.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal, TypedDict
+
+from .response_includable import ResponseIncludable
+
+__all__ = ["InputItemListParams"]
+
+
+class InputItemListParams(TypedDict, total=False):
+    after: str
+    """An item ID to list items after, used in pagination."""
+
+    include: List[ResponseIncludable]
+    """Additional fields to include in the response.
+
+    See the `include` parameter for Response creation above for more information.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """The order to return the input items in. Default is `desc`.
+
+    - `asc`: Return the input items in ascending order.
+    - `desc`: Return the input items in descending order.
+    """
diff --git a/src/openai/types/responses/input_token_count_params.py b/src/openai/types/responses/input_token_count_params.py
new file mode 100644
index 0000000000..50cc950e41
--- /dev/null
+++ b/src/openai/types/responses/input_token_count_params.py
@@ -0,0 +1,150 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, TypeAlias, TypedDict
+
+from .tool_param import ToolParam
+from .tool_choice_options import ToolChoiceOptions
+from .tool_choice_mcp_param import ToolChoiceMcpParam
+from .tool_choice_shell_param import ToolChoiceShellParam
+from .tool_choice_types_param import ToolChoiceTypesParam
+from ..shared_params.reasoning import Reasoning
+from .tool_choice_custom_param import ToolChoiceCustomParam
+from .response_input_item_param import ResponseInputItemParam
+from .tool_choice_allowed_param import ToolChoiceAllowedParam
+from .tool_choice_function_param import ToolChoiceFunctionParam
+from .response_conversation_param import ResponseConversationParam
+from .tool_choice_apply_patch_param import ToolChoiceApplyPatchParam
+from .response_format_text_config_param import ResponseFormatTextConfigParam
+
+__all__ = ["InputTokenCountParams", "Conversation", "Text", "ToolChoice"]
+
+
+class InputTokenCountParams(TypedDict, total=False):
+    conversation: Optional[Conversation]
+    """The conversation that this response belongs to.
+
+    Items from this conversation are prepended to `input_items` for this response
+    request. Input items and output items from this response are automatically added
+    to this conversation after this response completes.
+    """
+
+    input: Union[str, Iterable[ResponseInputItemParam], None]
+    """Text, image, or file inputs to the model, used to generate a response"""
+
+    instructions: Optional[str]
+    """
+    A system (or developer) message inserted into the model's context. When used
+    along with `previous_response_id`, the instructions from a previous response
+    will not be carried over to the next response. This makes it simple to swap out
+    system (or developer) messages in new responses.
+    """
+
+    model: Optional[str]
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
+
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
+    """
+
+    parallel_tool_calls: Optional[bool]
+    """Whether to allow the model to run tool calls in parallel."""
+
+    previous_response_id: Optional[str]
+    """The unique ID of the previous response to the model.
+
+    Use this to create multi-turn conversations. Learn more about
+    [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    Cannot be used in conjunction with `conversation`.
+    """
+
+    reasoning: Optional[Reasoning]
+    """
+    **gpt-5 and o-series models only** Configuration options for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    text: Optional[Text]
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tool_choice: Optional[ToolChoice]
+    """
+    How the model should select which tool (or tools) to use when generating a
+    response. See the `tools` parameter to see how to specify which tools the model
+    can call.
+    """
+
+    tools: Optional[Iterable[ToolParam]]
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+    """
+
+    truncation: Literal["auto", "disabled"]
+    """The truncation strategy to use for the model response.
+
+    - `auto`: If the input to this Response exceeds the model's context window size,
+      the model will truncate the response to fit the context window by dropping
+      items from the beginning of the conversation. - `disabled` (default): If the
+      input size will exceed the context window size for a model, the request will
+      fail with a 400 error.
+    """
+
+
+Conversation: TypeAlias = Union[str, ResponseConversationParam]
+
+
+class Text(TypedDict, total=False):
+    """Configuration options for a text response from the model.
+
+    Can be plain
+    text or structured JSON data. Learn more:
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    format: ResponseFormatTextConfigParam
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+    verbosity: Optional[Literal["low", "medium", "high"]]
+    """Constrains the verbosity of the model's response.
+
+    Lower values will result in more concise responses, while higher values will
+    result in more verbose responses. Currently supported values are `low`,
+    `medium`, and `high`.
+    """
+
+
+ToolChoice: TypeAlias = Union[
+    ToolChoiceOptions,
+    ToolChoiceAllowedParam,
+    ToolChoiceTypesParam,
+    ToolChoiceFunctionParam,
+    ToolChoiceMcpParam,
+    ToolChoiceCustomParam,
+    ToolChoiceApplyPatchParam,
+    ToolChoiceShellParam,
+]
diff --git a/src/openai/types/responses/input_token_count_response.py b/src/openai/types/responses/input_token_count_response.py
new file mode 100644
index 0000000000..30ddfc1217
--- /dev/null
+++ b/src/openai/types/responses/input_token_count_response.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputTokenCountResponse"]
+
+
+class InputTokenCountResponse(BaseModel):
+    input_tokens: int
+
+    object: Literal["response.input_tokens"]
diff --git a/src/openai/types/responses/response.py b/src/openai/types/responses/response.py
new file mode 100644
index 0000000000..deb84a7a6b
--- /dev/null
+++ b/src/openai/types/responses/response.py
@@ -0,0 +1,299 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from .tool import Tool
+from ..._models import BaseModel
+from .response_error import ResponseError
+from .response_usage import ResponseUsage
+from .response_prompt import ResponsePrompt
+from .response_status import ResponseStatus
+from .tool_choice_mcp import ToolChoiceMcp
+from ..shared.metadata import Metadata
+from ..shared.reasoning import Reasoning
+from .tool_choice_shell import ToolChoiceShell
+from .tool_choice_types import ToolChoiceTypes
+from .tool_choice_custom import ToolChoiceCustom
+from .response_input_item import ResponseInputItem
+from .tool_choice_allowed import ToolChoiceAllowed
+from .tool_choice_options import ToolChoiceOptions
+from .response_output_item import ResponseOutputItem
+from .response_text_config import ResponseTextConfig
+from .tool_choice_function import ToolChoiceFunction
+from ..shared.responses_model import ResponsesModel
+from .tool_choice_apply_patch import ToolChoiceApplyPatch
+
+__all__ = ["Response", "IncompleteDetails", "ToolChoice", "Conversation"]
+
+
+class IncompleteDetails(BaseModel):
+    """Details about why the response is incomplete."""
+
+    reason: Optional[Literal["max_output_tokens", "content_filter"]] = None
+    """The reason why the response is incomplete."""
+
+
+ToolChoice: TypeAlias = Union[
+    ToolChoiceOptions,
+    ToolChoiceAllowed,
+    ToolChoiceTypes,
+    ToolChoiceFunction,
+    ToolChoiceMcp,
+    ToolChoiceCustom,
+    ToolChoiceApplyPatch,
+    ToolChoiceShell,
+]
+
+
+class Conversation(BaseModel):
+    """The conversation that this response belongs to.
+
+    Input items and output items from this response are automatically added to this conversation.
+    """
+
+    id: str
+    """The unique ID of the conversation."""
+
+
+class Response(BaseModel):
+    id: str
+    """Unique identifier for this Response."""
+
+    created_at: float
+    """Unix timestamp (in seconds) of when this Response was created."""
+
+    error: Optional[ResponseError] = None
+    """An error object returned when the model fails to generate a Response."""
+
+    incomplete_details: Optional[IncompleteDetails] = None
+    """Details about why the response is incomplete."""
+
+    instructions: Union[str, List[ResponseInputItem], None] = None
+    """A system (or developer) message inserted into the model's context.
+
+    When using along with `previous_response_id`, the instructions from a previous
+    response will not be carried over to the next response. This makes it simple to
+    swap out system (or developer) messages in new responses.
+    """
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: ResponsesModel
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
+
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
+    """
+
+    object: Literal["response"]
+    """The object type of this resource - always set to `response`."""
+
+    output: List[ResponseOutputItem]
+    """An array of content items generated by the model.
+
+    - The length and order of items in the `output` array is dependent on the
+      model's response.
+    - Rather than accessing the first item in the `output` array and assuming it's
+      an `assistant` message with the content generated by the model, you might
+      consider using the `output_text` property where supported in SDKs.
+    """
+
+    parallel_tool_calls: bool
+    """Whether to allow the model to run tool calls in parallel."""
+
+    temperature: Optional[float] = None
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic. We generally recommend altering
+    this or `top_p` but not both.
+    """
+
+    tool_choice: ToolChoice
+    """
+    How the model should select which tool (or tools) to use when generating a
+    response. See the `tools` parameter to see how to specify which tools the model
+    can call.
+    """
+
+    tools: List[Tool]
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    We support the following categories of tools:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+      predefined connectors such as Google Drive and SharePoint. Learn more about
+      [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code with strongly typed arguments and outputs.
+      Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+      You can also use custom tools to call your own code.
+    """
+
+    top_p: Optional[float] = None
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or `temperature` but not both.
+    """
+
+    background: Optional[bool] = None
+    """
+    Whether to run the model response in the background.
+    [Learn more](https://platform.openai.com/docs/guides/background).
+    """
+
+    conversation: Optional[Conversation] = None
+    """The conversation that this response belongs to.
+
+    Input items and output items from this response are automatically added to this
+    conversation.
+    """
+
+    max_output_tokens: Optional[int] = None
+    """
+    An upper bound for the number of tokens that can be generated for a response,
+    including visible output tokens and
+    [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    max_tool_calls: Optional[int] = None
+    """
+    The maximum number of total calls to built-in tools that can be processed in a
+    response. This maximum number applies across all built-in tool calls, not per
+    individual tool. Any further attempts to call a tool by the model will be
+    ignored.
+    """
+
+    previous_response_id: Optional[str] = None
+    """The unique ID of the previous response to the model.
+
+    Use this to create multi-turn conversations. Learn more about
+    [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    Cannot be used in conjunction with `conversation`.
+    """
+
+    prompt: Optional[ResponsePrompt] = None
+    """
+    Reference to a prompt template and its variables.
+    [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    """
+
+    prompt_cache_key: Optional[str] = None
+    """
+    Used by OpenAI to cache responses for similar requests to optimize your cache
+    hit rates. Replaces the `user` field.
+    [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+    """
+
+    prompt_cache_retention: Optional[Literal["in-memory", "24h"]] = None
+    """The retention policy for the prompt cache.
+
+    Set to `24h` to enable extended prompt caching, which keeps cached prefixes
+    active for longer, up to a maximum of 24 hours.
+    [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+    """
+
+    reasoning: Optional[Reasoning] = None
+    """**gpt-5 and o-series models only**
+
+    Configuration options for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    safety_identifier: Optional[str] = None
+    """
+    A stable identifier used to help detect users of your application that may be
+    violating OpenAI's usage policies. The IDs should be a string that uniquely
+    identifies each user. We recommend hashing their username or email address, in
+    order to avoid sending us any identifying information.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+    """
+
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = None
+    """Specifies the processing type used for serving the request.
+
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the request will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      '[priority](https://openai.com/api-priority-processing/)', then the request
+      will be processed with the corresponding service tier.
+    - When not set, the default behavior is 'auto'.
+
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
+    """
+
+    status: Optional[ResponseStatus] = None
+    """The status of the response generation.
+
+    One of `completed`, `failed`, `in_progress`, `cancelled`, `queued`, or
+    `incomplete`.
+    """
+
+    text: Optional[ResponseTextConfig] = None
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    top_logprobs: Optional[int] = None
+    """
+    An integer between 0 and 20 specifying the number of most likely tokens to
+    return at each token position, each with an associated log probability.
+    """
+
+    truncation: Optional[Literal["auto", "disabled"]] = None
+    """The truncation strategy to use for the model response.
+
+    - `auto`: If the input to this Response exceeds the model's context window size,
+      the model will truncate the response to fit the context window by dropping
+      items from the beginning of the conversation.
+    - `disabled` (default): If the input size will exceed the context window size
+      for a model, the request will fail with a 400 error.
+    """
+
+    usage: Optional[ResponseUsage] = None
+    """
+    Represents token usage details including input tokens, output tokens, a
+    breakdown of output tokens, and the total tokens used.
+    """
+
+    user: Optional[str] = None
+    """This field is being replaced by `safety_identifier` and `prompt_cache_key`.
+
+    Use `prompt_cache_key` instead to maintain caching optimizations. A stable
+    identifier for your end-users. Used to boost cache hit rates by better bucketing
+    similar requests and to help OpenAI detect and prevent abuse.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+    """
diff --git a/src/openai/types/responses/response_apply_patch_tool_call.py b/src/openai/types/responses/response_apply_patch_tool_call.py
new file mode 100644
index 0000000000..7af1300265
--- /dev/null
+++ b/src/openai/types/responses/response_apply_patch_tool_call.py
@@ -0,0 +1,84 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = [
+    "ResponseApplyPatchToolCall",
+    "Operation",
+    "OperationCreateFile",
+    "OperationDeleteFile",
+    "OperationUpdateFile",
+]
+
+
+class OperationCreateFile(BaseModel):
+    """Instruction describing how to create a file via the apply_patch tool."""
+
+    diff: str
+    """Diff to apply."""
+
+    path: str
+    """Path of the file to create."""
+
+    type: Literal["create_file"]
+    """Create a new file with the provided diff."""
+
+
+class OperationDeleteFile(BaseModel):
+    """Instruction describing how to delete a file via the apply_patch tool."""
+
+    path: str
+    """Path of the file to delete."""
+
+    type: Literal["delete_file"]
+    """Delete the specified file."""
+
+
+class OperationUpdateFile(BaseModel):
+    """Instruction describing how to update a file via the apply_patch tool."""
+
+    diff: str
+    """Diff to apply."""
+
+    path: str
+    """Path of the file to update."""
+
+    type: Literal["update_file"]
+    """Update an existing file with the provided diff."""
+
+
+Operation: TypeAlias = Annotated[
+    Union[OperationCreateFile, OperationDeleteFile, OperationUpdateFile], PropertyInfo(discriminator="type")
+]
+
+
+class ResponseApplyPatchToolCall(BaseModel):
+    """A tool call that applies file diffs by creating, deleting, or updating files."""
+
+    id: str
+    """The unique ID of the apply patch tool call.
+
+    Populated when this item is returned via API.
+    """
+
+    call_id: str
+    """The unique ID of the apply patch tool call generated by the model."""
+
+    operation: Operation
+    """
+    One of the create_file, delete_file, or update_file operations applied via
+    apply_patch.
+    """
+
+    status: Literal["in_progress", "completed"]
+    """The status of the apply patch tool call. One of `in_progress` or `completed`."""
+
+    type: Literal["apply_patch_call"]
+    """The type of the item. Always `apply_patch_call`."""
+
+    created_by: Optional[str] = None
+    """The ID of the entity that created this tool call."""
diff --git a/src/openai/types/responses/response_apply_patch_tool_call_output.py b/src/openai/types/responses/response_apply_patch_tool_call_output.py
new file mode 100644
index 0000000000..de63c6e2ee
--- /dev/null
+++ b/src/openai/types/responses/response_apply_patch_tool_call_output.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseApplyPatchToolCallOutput"]
+
+
+class ResponseApplyPatchToolCallOutput(BaseModel):
+    """The output emitted by an apply patch tool call."""
+
+    id: str
+    """The unique ID of the apply patch tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    call_id: str
+    """The unique ID of the apply patch tool call generated by the model."""
+
+    status: Literal["completed", "failed"]
+    """The status of the apply patch tool call output. One of `completed` or `failed`."""
+
+    type: Literal["apply_patch_call_output"]
+    """The type of the item. Always `apply_patch_call_output`."""
+
+    created_by: Optional[str] = None
+    """The ID of the entity that created this tool call output."""
+
+    output: Optional[str] = None
+    """Optional textual output returned by the apply patch tool."""
diff --git a/src/openai/types/responses/response_audio_delta_event.py b/src/openai/types/responses/response_audio_delta_event.py
new file mode 100644
index 0000000000..e577d65d04
--- /dev/null
+++ b/src/openai/types/responses/response_audio_delta_event.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioDeltaEvent"]
+
+
+class ResponseAudioDeltaEvent(BaseModel):
+    """Emitted when there is a partial audio response."""
+
+    delta: str
+    """A chunk of Base64 encoded response audio bytes."""
+
+    sequence_number: int
+    """A sequence number for this chunk of the stream response."""
+
+    type: Literal["response.audio.delta"]
+    """The type of the event. Always `response.audio.delta`."""
diff --git a/src/openai/types/responses/response_audio_done_event.py b/src/openai/types/responses/response_audio_done_event.py
new file mode 100644
index 0000000000..f5f0401c86
--- /dev/null
+++ b/src/openai/types/responses/response_audio_done_event.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioDoneEvent"]
+
+
+class ResponseAudioDoneEvent(BaseModel):
+    """Emitted when the audio response is complete."""
+
+    sequence_number: int
+    """The sequence number of the delta."""
+
+    type: Literal["response.audio.done"]
+    """The type of the event. Always `response.audio.done`."""
diff --git a/src/openai/types/responses/response_audio_transcript_delta_event.py b/src/openai/types/responses/response_audio_transcript_delta_event.py
new file mode 100644
index 0000000000..03be59a29f
--- /dev/null
+++ b/src/openai/types/responses/response_audio_transcript_delta_event.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDeltaEvent"]
+
+
+class ResponseAudioTranscriptDeltaEvent(BaseModel):
+    """Emitted when there is a partial transcript of audio."""
+
+    delta: str
+    """The partial transcript of the audio response."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.audio.transcript.delta"]
+    """The type of the event. Always `response.audio.transcript.delta`."""
diff --git a/src/openai/types/responses/response_audio_transcript_done_event.py b/src/openai/types/responses/response_audio_transcript_done_event.py
new file mode 100644
index 0000000000..87219e4844
--- /dev/null
+++ b/src/openai/types/responses/response_audio_transcript_done_event.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDoneEvent"]
+
+
+class ResponseAudioTranscriptDoneEvent(BaseModel):
+    """Emitted when the full audio transcript is completed."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.audio.transcript.done"]
+    """The type of the event. Always `response.audio.transcript.done`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_code_delta_event.py b/src/openai/types/responses/response_code_interpreter_call_code_delta_event.py
new file mode 100644
index 0000000000..c6bc8b73ea
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_code_delta_event.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterCallCodeDeltaEvent"]
+
+
+class ResponseCodeInterpreterCallCodeDeltaEvent(BaseModel):
+    """Emitted when a partial code snippet is streamed by the code interpreter."""
+
+    delta: str
+    """The partial code snippet being streamed by the code interpreter."""
+
+    item_id: str
+    """The unique identifier of the code interpreter tool call item."""
+
+    output_index: int
+    """
+    The index of the output item in the response for which the code is being
+    streamed.
+    """
+
+    sequence_number: int
+    """The sequence number of this event, used to order streaming events."""
+
+    type: Literal["response.code_interpreter_call_code.delta"]
+    """The type of the event. Always `response.code_interpreter_call_code.delta`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_code_done_event.py b/src/openai/types/responses/response_code_interpreter_call_code_done_event.py
new file mode 100644
index 0000000000..186c03711a
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_code_done_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterCallCodeDoneEvent"]
+
+
+class ResponseCodeInterpreterCallCodeDoneEvent(BaseModel):
+    """Emitted when the code snippet is finalized by the code interpreter."""
+
+    code: str
+    """The final code snippet output by the code interpreter."""
+
+    item_id: str
+    """The unique identifier of the code interpreter tool call item."""
+
+    output_index: int
+    """The index of the output item in the response for which the code is finalized."""
+
+    sequence_number: int
+    """The sequence number of this event, used to order streaming events."""
+
+    type: Literal["response.code_interpreter_call_code.done"]
+    """The type of the event. Always `response.code_interpreter_call_code.done`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_completed_event.py b/src/openai/types/responses/response_code_interpreter_call_completed_event.py
new file mode 100644
index 0000000000..197e39e7e9
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_completed_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterCallCompletedEvent"]
+
+
+class ResponseCodeInterpreterCallCompletedEvent(BaseModel):
+    """Emitted when the code interpreter call is completed."""
+
+    item_id: str
+    """The unique identifier of the code interpreter tool call item."""
+
+    output_index: int
+    """
+    The index of the output item in the response for which the code interpreter call
+    is completed.
+    """
+
+    sequence_number: int
+    """The sequence number of this event, used to order streaming events."""
+
+    type: Literal["response.code_interpreter_call.completed"]
+    """The type of the event. Always `response.code_interpreter_call.completed`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_in_progress_event.py b/src/openai/types/responses/response_code_interpreter_call_in_progress_event.py
new file mode 100644
index 0000000000..c775f1b864
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_in_progress_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterCallInProgressEvent"]
+
+
+class ResponseCodeInterpreterCallInProgressEvent(BaseModel):
+    """Emitted when a code interpreter call is in progress."""
+
+    item_id: str
+    """The unique identifier of the code interpreter tool call item."""
+
+    output_index: int
+    """
+    The index of the output item in the response for which the code interpreter call
+    is in progress.
+    """
+
+    sequence_number: int
+    """The sequence number of this event, used to order streaming events."""
+
+    type: Literal["response.code_interpreter_call.in_progress"]
+    """The type of the event. Always `response.code_interpreter_call.in_progress`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_interpreting_event.py b/src/openai/types/responses/response_code_interpreter_call_interpreting_event.py
new file mode 100644
index 0000000000..85e9c87f08
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_interpreting_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterCallInterpretingEvent"]
+
+
+class ResponseCodeInterpreterCallInterpretingEvent(BaseModel):
+    """Emitted when the code interpreter is actively interpreting the code snippet."""
+
+    item_id: str
+    """The unique identifier of the code interpreter tool call item."""
+
+    output_index: int
+    """
+    The index of the output item in the response for which the code interpreter is
+    interpreting code.
+    """
+
+    sequence_number: int
+    """The sequence number of this event, used to order streaming events."""
+
+    type: Literal["response.code_interpreter_call.interpreting"]
+    """The type of the event. Always `response.code_interpreter_call.interpreting`."""
diff --git a/src/openai/types/responses/response_code_interpreter_tool_call.py b/src/openai/types/responses/response_code_interpreter_tool_call.py
new file mode 100644
index 0000000000..d7e30f4920
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_tool_call.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterToolCall", "Output", "OutputLogs", "OutputImage"]
+
+
+class OutputLogs(BaseModel):
+    """The logs output from the code interpreter."""
+
+    logs: str
+    """The logs output from the code interpreter."""
+
+    type: Literal["logs"]
+    """The type of the output. Always `logs`."""
+
+
+class OutputImage(BaseModel):
+    """The image output from the code interpreter."""
+
+    type: Literal["image"]
+    """The type of the output. Always `image`."""
+
+    url: str
+    """The URL of the image output from the code interpreter."""
+
+
+Output: TypeAlias = Annotated[Union[OutputLogs, OutputImage], PropertyInfo(discriminator="type")]
+
+
+class ResponseCodeInterpreterToolCall(BaseModel):
+    """A tool call to run code."""
+
+    id: str
+    """The unique ID of the code interpreter tool call."""
+
+    code: Optional[str] = None
+    """The code to run, or null if not available."""
+
+    container_id: str
+    """The ID of the container used to run the code."""
+
+    outputs: Optional[List[Output]] = None
+    """
+    The outputs generated by the code interpreter, such as logs or images. Can be
+    null if no outputs are available.
+    """
+
+    status: Literal["in_progress", "completed", "incomplete", "interpreting", "failed"]
+    """The status of the code interpreter tool call.
+
+    Valid values are `in_progress`, `completed`, `incomplete`, `interpreting`, and
+    `failed`.
+    """
+
+    type: Literal["code_interpreter_call"]
+    """The type of the code interpreter tool call. Always `code_interpreter_call`."""
diff --git a/src/openai/types/responses/response_code_interpreter_tool_call_param.py b/src/openai/types/responses/response_code_interpreter_tool_call_param.py
new file mode 100644
index 0000000000..fc03a3fe48
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_tool_call_param.py
@@ -0,0 +1,60 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = ["ResponseCodeInterpreterToolCallParam", "Output", "OutputLogs", "OutputImage"]
+
+
+class OutputLogs(TypedDict, total=False):
+    """The logs output from the code interpreter."""
+
+    logs: Required[str]
+    """The logs output from the code interpreter."""
+
+    type: Required[Literal["logs"]]
+    """The type of the output. Always `logs`."""
+
+
+class OutputImage(TypedDict, total=False):
+    """The image output from the code interpreter."""
+
+    type: Required[Literal["image"]]
+    """The type of the output. Always `image`."""
+
+    url: Required[str]
+    """The URL of the image output from the code interpreter."""
+
+
+Output: TypeAlias = Union[OutputLogs, OutputImage]
+
+
+class ResponseCodeInterpreterToolCallParam(TypedDict, total=False):
+    """A tool call to run code."""
+
+    id: Required[str]
+    """The unique ID of the code interpreter tool call."""
+
+    code: Required[Optional[str]]
+    """The code to run, or null if not available."""
+
+    container_id: Required[str]
+    """The ID of the container used to run the code."""
+
+    outputs: Required[Optional[Iterable[Output]]]
+    """
+    The outputs generated by the code interpreter, such as logs or images. Can be
+    null if no outputs are available.
+    """
+
+    status: Required[Literal["in_progress", "completed", "incomplete", "interpreting", "failed"]]
+    """The status of the code interpreter tool call.
+
+    Valid values are `in_progress`, `completed`, `incomplete`, `interpreting`, and
+    `failed`.
+    """
+
+    type: Required[Literal["code_interpreter_call"]]
+    """The type of the code interpreter tool call. Always `code_interpreter_call`."""
diff --git a/src/openai/types/responses/response_compact_params.py b/src/openai/types/responses/response_compact_params.py
new file mode 100644
index 0000000000..657c6a0764
--- /dev/null
+++ b/src/openai/types/responses/response_compact_params.py
@@ -0,0 +1,133 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .response_input_item_param import ResponseInputItemParam
+
+__all__ = ["ResponseCompactParams"]
+
+
+class ResponseCompactParams(TypedDict, total=False):
+    model: Required[
+        Union[
+            Literal[
+                "gpt-5.2",
+                "gpt-5.2-2025-12-11",
+                "gpt-5.2-chat-latest",
+                "gpt-5.2-pro",
+                "gpt-5.2-pro-2025-12-11",
+                "gpt-5.1",
+                "gpt-5.1-2025-11-13",
+                "gpt-5.1-codex",
+                "gpt-5.1-mini",
+                "gpt-5.1-chat-latest",
+                "gpt-5",
+                "gpt-5-mini",
+                "gpt-5-nano",
+                "gpt-5-2025-08-07",
+                "gpt-5-mini-2025-08-07",
+                "gpt-5-nano-2025-08-07",
+                "gpt-5-chat-latest",
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "gpt-4.1-2025-04-14",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
+                "o4-mini",
+                "o4-mini-2025-04-16",
+                "o3",
+                "o3-2025-04-16",
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "o1-preview",
+                "o1-preview-2024-09-12",
+                "o1-mini",
+                "o1-mini-2024-09-12",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-audio-preview",
+                "gpt-4o-audio-preview-2024-10-01",
+                "gpt-4o-audio-preview-2024-12-17",
+                "gpt-4o-audio-preview-2025-06-03",
+                "gpt-4o-mini-audio-preview",
+                "gpt-4o-mini-audio-preview-2024-12-17",
+                "gpt-4o-search-preview",
+                "gpt-4o-mini-search-preview",
+                "gpt-4o-search-preview-2025-03-11",
+                "gpt-4o-mini-search-preview-2025-03-11",
+                "chatgpt-4o-latest",
+                "codex-mini-latest",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0301",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+                "o1-pro",
+                "o1-pro-2025-03-19",
+                "o3-pro",
+                "o3-pro-2025-06-10",
+                "o3-deep-research",
+                "o3-deep-research-2025-06-26",
+                "o4-mini-deep-research",
+                "o4-mini-deep-research-2025-06-26",
+                "computer-use-preview",
+                "computer-use-preview-2025-03-11",
+                "gpt-5-codex",
+                "gpt-5-pro",
+                "gpt-5-pro-2025-10-06",
+                "gpt-5.1-codex-max",
+            ],
+            str,
+            None,
+        ]
+    ]
+    """Model ID used to generate the response, like `gpt-5` or `o3`.
+
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
+    """
+
+    input: Union[str, Iterable[ResponseInputItemParam], None]
+    """Text, image, or file inputs to the model, used to generate a response"""
+
+    instructions: Optional[str]
+    """
+    A system (or developer) message inserted into the model's context. When used
+    along with `previous_response_id`, the instructions from a previous response
+    will not be carried over to the next response. This makes it simple to swap out
+    system (or developer) messages in new responses.
+    """
+
+    previous_response_id: Optional[str]
+    """The unique ID of the previous response to the model.
+
+    Use this to create multi-turn conversations. Learn more about
+    [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    Cannot be used in conjunction with `conversation`.
+    """
diff --git a/src/openai/types/responses/response_compaction_item.py b/src/openai/types/responses/response_compaction_item.py
new file mode 100644
index 0000000000..f5f8b97f4e
--- /dev/null
+++ b/src/openai/types/responses/response_compaction_item.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompactionItem"]
+
+
+class ResponseCompactionItem(BaseModel):
+    """
+    A compaction item generated by the [`v1/responses/compact` API](https://platform.openai.com/docs/api-reference/responses/compact).
+    """
+
+    id: str
+    """The unique ID of the compaction item."""
+
+    encrypted_content: str
+
+    type: Literal["compaction"]
+    """The type of the item. Always `compaction`."""
+
+    created_by: Optional[str] = None
diff --git a/src/openai/types/responses/response_compaction_item_param.py b/src/openai/types/responses/response_compaction_item_param.py
new file mode 100644
index 0000000000..5dcc921d67
--- /dev/null
+++ b/src/openai/types/responses/response_compaction_item_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompactionItemParam"]
+
+
+class ResponseCompactionItemParam(BaseModel):
+    """
+    A compaction item generated by the [`v1/responses/compact` API](https://platform.openai.com/docs/api-reference/responses/compact).
+    """
+
+    encrypted_content: str
+
+    type: Literal["compaction"]
+    """The type of the item. Always `compaction`."""
+
+    id: Optional[str] = None
+    """The ID of the compaction item."""
diff --git a/src/openai/types/responses/response_compaction_item_param_param.py b/src/openai/types/responses/response_compaction_item_param_param.py
new file mode 100644
index 0000000000..b9b5ab031c
--- /dev/null
+++ b/src/openai/types/responses/response_compaction_item_param_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseCompactionItemParamParam"]
+
+
+class ResponseCompactionItemParamParam(TypedDict, total=False):
+    """
+    A compaction item generated by the [`v1/responses/compact` API](https://platform.openai.com/docs/api-reference/responses/compact).
+    """
+
+    encrypted_content: Required[str]
+
+    type: Required[Literal["compaction"]]
+    """The type of the item. Always `compaction`."""
+
+    id: Optional[str]
+    """The ID of the compaction item."""
diff --git a/src/openai/types/responses/response_completed_event.py b/src/openai/types/responses/response_completed_event.py
new file mode 100644
index 0000000000..6dc958101c
--- /dev/null
+++ b/src/openai/types/responses/response_completed_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompletedEvent"]
+
+
+class ResponseCompletedEvent(BaseModel):
+    """Emitted when the model response is complete."""
+
+    response: Response
+    """Properties of the completed response."""
+
+    sequence_number: int
+    """The sequence number for this event."""
+
+    type: Literal["response.completed"]
+    """The type of the event. Always `response.completed`."""
diff --git a/src/openai/types/responses/response_computer_tool_call.py b/src/openai/types/responses/response_computer_tool_call.py
new file mode 100644
index 0000000000..4e1b3cf7fd
--- /dev/null
+++ b/src/openai/types/responses/response_computer_tool_call.py
@@ -0,0 +1,237 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = [
+    "ResponseComputerToolCall",
+    "Action",
+    "ActionClick",
+    "ActionDoubleClick",
+    "ActionDrag",
+    "ActionDragPath",
+    "ActionKeypress",
+    "ActionMove",
+    "ActionScreenshot",
+    "ActionScroll",
+    "ActionType",
+    "ActionWait",
+    "PendingSafetyCheck",
+]
+
+
+class ActionClick(BaseModel):
+    """A click action."""
+
+    button: Literal["left", "right", "wheel", "back", "forward"]
+    """Indicates which mouse button was pressed during the click.
+
+    One of `left`, `right`, `wheel`, `back`, or `forward`.
+    """
+
+    type: Literal["click"]
+    """Specifies the event type. For a click action, this property is always `click`."""
+
+    x: int
+    """The x-coordinate where the click occurred."""
+
+    y: int
+    """The y-coordinate where the click occurred."""
+
+
+class ActionDoubleClick(BaseModel):
+    """A double click action."""
+
+    type: Literal["double_click"]
+    """Specifies the event type.
+
+    For a double click action, this property is always set to `double_click`.
+    """
+
+    x: int
+    """The x-coordinate where the double click occurred."""
+
+    y: int
+    """The y-coordinate where the double click occurred."""
+
+
+class ActionDragPath(BaseModel):
+    """An x/y coordinate pair, e.g. `{ x: 100, y: 200 }`."""
+
+    x: int
+    """The x-coordinate."""
+
+    y: int
+    """The y-coordinate."""
+
+
+class ActionDrag(BaseModel):
+    """A drag action."""
+
+    path: List[ActionDragPath]
+    """An array of coordinates representing the path of the drag action.
+
+    Coordinates will appear as an array of objects, eg
+
+    ```
+    [
+      { x: 100, y: 200 },
+      { x: 200, y: 300 }
+    ]
+    ```
+    """
+
+    type: Literal["drag"]
+    """Specifies the event type.
+
+    For a drag action, this property is always set to `drag`.
+    """
+
+
+class ActionKeypress(BaseModel):
+    """A collection of keypresses the model would like to perform."""
+
+    keys: List[str]
+    """The combination of keys the model is requesting to be pressed.
+
+    This is an array of strings, each representing a key.
+    """
+
+    type: Literal["keypress"]
+    """Specifies the event type.
+
+    For a keypress action, this property is always set to `keypress`.
+    """
+
+
+class ActionMove(BaseModel):
+    """A mouse move action."""
+
+    type: Literal["move"]
+    """Specifies the event type.
+
+    For a move action, this property is always set to `move`.
+    """
+
+    x: int
+    """The x-coordinate to move to."""
+
+    y: int
+    """The y-coordinate to move to."""
+
+
+class ActionScreenshot(BaseModel):
+    """A screenshot action."""
+
+    type: Literal["screenshot"]
+    """Specifies the event type.
+
+    For a screenshot action, this property is always set to `screenshot`.
+    """
+
+
+class ActionScroll(BaseModel):
+    """A scroll action."""
+
+    scroll_x: int
+    """The horizontal scroll distance."""
+
+    scroll_y: int
+    """The vertical scroll distance."""
+
+    type: Literal["scroll"]
+    """Specifies the event type.
+
+    For a scroll action, this property is always set to `scroll`.
+    """
+
+    x: int
+    """The x-coordinate where the scroll occurred."""
+
+    y: int
+    """The y-coordinate where the scroll occurred."""
+
+
+class ActionType(BaseModel):
+    """An action to type in text."""
+
+    text: str
+    """The text to type."""
+
+    type: Literal["type"]
+    """Specifies the event type.
+
+    For a type action, this property is always set to `type`.
+    """
+
+
+class ActionWait(BaseModel):
+    """A wait action."""
+
+    type: Literal["wait"]
+    """Specifies the event type.
+
+    For a wait action, this property is always set to `wait`.
+    """
+
+
+Action: TypeAlias = Annotated[
+    Union[
+        ActionClick,
+        ActionDoubleClick,
+        ActionDrag,
+        ActionKeypress,
+        ActionMove,
+        ActionScreenshot,
+        ActionScroll,
+        ActionType,
+        ActionWait,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PendingSafetyCheck(BaseModel):
+    """A pending safety check for the computer call."""
+
+    id: str
+    """The ID of the pending safety check."""
+
+    code: Optional[str] = None
+    """The type of the pending safety check."""
+
+    message: Optional[str] = None
+    """Details about the pending safety check."""
+
+
+class ResponseComputerToolCall(BaseModel):
+    """A tool call to a computer use tool.
+
+    See the
+    [computer use guide](https://platform.openai.com/docs/guides/tools-computer-use) for more information.
+    """
+
+    id: str
+    """The unique ID of the computer call."""
+
+    action: Action
+    """A click action."""
+
+    call_id: str
+    """An identifier used when responding to the tool call with output."""
+
+    pending_safety_checks: List[PendingSafetyCheck]
+    """The pending safety checks for the computer call."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Literal["computer_call"]
+    """The type of the computer call. Always `computer_call`."""
diff --git a/src/openai/types/responses/response_computer_tool_call_output_item.py b/src/openai/types/responses/response_computer_tool_call_output_item.py
new file mode 100644
index 0000000000..90e935c3bd
--- /dev/null
+++ b/src/openai/types/responses/response_computer_tool_call_output_item.py
@@ -0,0 +1,49 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_computer_tool_call_output_screenshot import ResponseComputerToolCallOutputScreenshot
+
+__all__ = ["ResponseComputerToolCallOutputItem", "AcknowledgedSafetyCheck"]
+
+
+class AcknowledgedSafetyCheck(BaseModel):
+    """A pending safety check for the computer call."""
+
+    id: str
+    """The ID of the pending safety check."""
+
+    code: Optional[str] = None
+    """The type of the pending safety check."""
+
+    message: Optional[str] = None
+    """Details about the pending safety check."""
+
+
+class ResponseComputerToolCallOutputItem(BaseModel):
+    id: str
+    """The unique ID of the computer call tool output."""
+
+    call_id: str
+    """The ID of the computer tool call that produced the output."""
+
+    output: ResponseComputerToolCallOutputScreenshot
+    """A computer screenshot image used with the computer use tool."""
+
+    type: Literal["computer_call_output"]
+    """The type of the computer tool call output. Always `computer_call_output`."""
+
+    acknowledged_safety_checks: Optional[List[AcknowledgedSafetyCheck]] = None
+    """
+    The safety checks reported by the API that have been acknowledged by the
+    developer.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
diff --git a/src/openai/types/responses/response_computer_tool_call_output_screenshot.py b/src/openai/types/responses/response_computer_tool_call_output_screenshot.py
new file mode 100644
index 0000000000..2c16f215eb
--- /dev/null
+++ b/src/openai/types/responses/response_computer_tool_call_output_screenshot.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseComputerToolCallOutputScreenshot"]
+
+
+class ResponseComputerToolCallOutputScreenshot(BaseModel):
+    """A computer screenshot image used with the computer use tool."""
+
+    type: Literal["computer_screenshot"]
+    """Specifies the event type.
+
+    For a computer screenshot, this property is always set to `computer_screenshot`.
+    """
+
+    file_id: Optional[str] = None
+    """The identifier of an uploaded file that contains the screenshot."""
+
+    image_url: Optional[str] = None
+    """The URL of the screenshot image."""
diff --git a/src/openai/types/responses/response_computer_tool_call_output_screenshot_param.py b/src/openai/types/responses/response_computer_tool_call_output_screenshot_param.py
new file mode 100644
index 0000000000..857ccf9fb9
--- /dev/null
+++ b/src/openai/types/responses/response_computer_tool_call_output_screenshot_param.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseComputerToolCallOutputScreenshotParam"]
+
+
+class ResponseComputerToolCallOutputScreenshotParam(TypedDict, total=False):
+    """A computer screenshot image used with the computer use tool."""
+
+    type: Required[Literal["computer_screenshot"]]
+    """Specifies the event type.
+
+    For a computer screenshot, this property is always set to `computer_screenshot`.
+    """
+
+    file_id: str
+    """The identifier of an uploaded file that contains the screenshot."""
+
+    image_url: str
+    """The URL of the screenshot image."""
diff --git a/src/openai/types/responses/response_computer_tool_call_param.py b/src/openai/types/responses/response_computer_tool_call_param.py
new file mode 100644
index 0000000000..550ba599cd
--- /dev/null
+++ b/src/openai/types/responses/response_computer_tool_call_param.py
@@ -0,0 +1,235 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+
+__all__ = [
+    "ResponseComputerToolCallParam",
+    "Action",
+    "ActionClick",
+    "ActionDoubleClick",
+    "ActionDrag",
+    "ActionDragPath",
+    "ActionKeypress",
+    "ActionMove",
+    "ActionScreenshot",
+    "ActionScroll",
+    "ActionType",
+    "ActionWait",
+    "PendingSafetyCheck",
+]
+
+
+class ActionClick(TypedDict, total=False):
+    """A click action."""
+
+    button: Required[Literal["left", "right", "wheel", "back", "forward"]]
+    """Indicates which mouse button was pressed during the click.
+
+    One of `left`, `right`, `wheel`, `back`, or `forward`.
+    """
+
+    type: Required[Literal["click"]]
+    """Specifies the event type. For a click action, this property is always `click`."""
+
+    x: Required[int]
+    """The x-coordinate where the click occurred."""
+
+    y: Required[int]
+    """The y-coordinate where the click occurred."""
+
+
+class ActionDoubleClick(TypedDict, total=False):
+    """A double click action."""
+
+    type: Required[Literal["double_click"]]
+    """Specifies the event type.
+
+    For a double click action, this property is always set to `double_click`.
+    """
+
+    x: Required[int]
+    """The x-coordinate where the double click occurred."""
+
+    y: Required[int]
+    """The y-coordinate where the double click occurred."""
+
+
+class ActionDragPath(TypedDict, total=False):
+    """An x/y coordinate pair, e.g. `{ x: 100, y: 200 }`."""
+
+    x: Required[int]
+    """The x-coordinate."""
+
+    y: Required[int]
+    """The y-coordinate."""
+
+
+class ActionDrag(TypedDict, total=False):
+    """A drag action."""
+
+    path: Required[Iterable[ActionDragPath]]
+    """An array of coordinates representing the path of the drag action.
+
+    Coordinates will appear as an array of objects, eg
+
+    ```
+    [
+      { x: 100, y: 200 },
+      { x: 200, y: 300 }
+    ]
+    ```
+    """
+
+    type: Required[Literal["drag"]]
+    """Specifies the event type.
+
+    For a drag action, this property is always set to `drag`.
+    """
+
+
+class ActionKeypress(TypedDict, total=False):
+    """A collection of keypresses the model would like to perform."""
+
+    keys: Required[SequenceNotStr[str]]
+    """The combination of keys the model is requesting to be pressed.
+
+    This is an array of strings, each representing a key.
+    """
+
+    type: Required[Literal["keypress"]]
+    """Specifies the event type.
+
+    For a keypress action, this property is always set to `keypress`.
+    """
+
+
+class ActionMove(TypedDict, total=False):
+    """A mouse move action."""
+
+    type: Required[Literal["move"]]
+    """Specifies the event type.
+
+    For a move action, this property is always set to `move`.
+    """
+
+    x: Required[int]
+    """The x-coordinate to move to."""
+
+    y: Required[int]
+    """The y-coordinate to move to."""
+
+
+class ActionScreenshot(TypedDict, total=False):
+    """A screenshot action."""
+
+    type: Required[Literal["screenshot"]]
+    """Specifies the event type.
+
+    For a screenshot action, this property is always set to `screenshot`.
+    """
+
+
+class ActionScroll(TypedDict, total=False):
+    """A scroll action."""
+
+    scroll_x: Required[int]
+    """The horizontal scroll distance."""
+
+    scroll_y: Required[int]
+    """The vertical scroll distance."""
+
+    type: Required[Literal["scroll"]]
+    """Specifies the event type.
+
+    For a scroll action, this property is always set to `scroll`.
+    """
+
+    x: Required[int]
+    """The x-coordinate where the scroll occurred."""
+
+    y: Required[int]
+    """The y-coordinate where the scroll occurred."""
+
+
+class ActionType(TypedDict, total=False):
+    """An action to type in text."""
+
+    text: Required[str]
+    """The text to type."""
+
+    type: Required[Literal["type"]]
+    """Specifies the event type.
+
+    For a type action, this property is always set to `type`.
+    """
+
+
+class ActionWait(TypedDict, total=False):
+    """A wait action."""
+
+    type: Required[Literal["wait"]]
+    """Specifies the event type.
+
+    For a wait action, this property is always set to `wait`.
+    """
+
+
+Action: TypeAlias = Union[
+    ActionClick,
+    ActionDoubleClick,
+    ActionDrag,
+    ActionKeypress,
+    ActionMove,
+    ActionScreenshot,
+    ActionScroll,
+    ActionType,
+    ActionWait,
+]
+
+
+class PendingSafetyCheck(TypedDict, total=False):
+    """A pending safety check for the computer call."""
+
+    id: Required[str]
+    """The ID of the pending safety check."""
+
+    code: Optional[str]
+    """The type of the pending safety check."""
+
+    message: Optional[str]
+    """Details about the pending safety check."""
+
+
+class ResponseComputerToolCallParam(TypedDict, total=False):
+    """A tool call to a computer use tool.
+
+    See the
+    [computer use guide](https://platform.openai.com/docs/guides/tools-computer-use) for more information.
+    """
+
+    id: Required[str]
+    """The unique ID of the computer call."""
+
+    action: Required[Action]
+    """A click action."""
+
+    call_id: Required[str]
+    """An identifier used when responding to the tool call with output."""
+
+    pending_safety_checks: Required[Iterable[PendingSafetyCheck]]
+    """The pending safety checks for the computer call."""
+
+    status: Required[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Required[Literal["computer_call"]]
+    """The type of the computer call. Always `computer_call`."""
diff --git a/src/openai/types/responses/response_content_part_added_event.py b/src/openai/types/responses/response_content_part_added_event.py
new file mode 100644
index 0000000000..ec9893159d
--- /dev/null
+++ b/src/openai/types/responses/response_content_part_added_event.py
@@ -0,0 +1,48 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_output_text import ResponseOutputText
+from .response_output_refusal import ResponseOutputRefusal
+
+__all__ = ["ResponseContentPartAddedEvent", "Part", "PartReasoningText"]
+
+
+class PartReasoningText(BaseModel):
+    """Reasoning text from the model."""
+
+    text: str
+    """The reasoning text from the model."""
+
+    type: Literal["reasoning_text"]
+    """The type of the reasoning text. Always `reasoning_text`."""
+
+
+Part: TypeAlias = Annotated[
+    Union[ResponseOutputText, ResponseOutputRefusal, PartReasoningText], PropertyInfo(discriminator="type")
+]
+
+
+class ResponseContentPartAddedEvent(BaseModel):
+    """Emitted when a new content part is added."""
+
+    content_index: int
+    """The index of the content part that was added."""
+
+    item_id: str
+    """The ID of the output item that the content part was added to."""
+
+    output_index: int
+    """The index of the output item that the content part was added to."""
+
+    part: Part
+    """The content part that was added."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.content_part.added"]
+    """The type of the event. Always `response.content_part.added`."""
diff --git a/src/openai/types/responses/response_content_part_done_event.py b/src/openai/types/responses/response_content_part_done_event.py
new file mode 100644
index 0000000000..f896ad8743
--- /dev/null
+++ b/src/openai/types/responses/response_content_part_done_event.py
@@ -0,0 +1,48 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_output_text import ResponseOutputText
+from .response_output_refusal import ResponseOutputRefusal
+
+__all__ = ["ResponseContentPartDoneEvent", "Part", "PartReasoningText"]
+
+
+class PartReasoningText(BaseModel):
+    """Reasoning text from the model."""
+
+    text: str
+    """The reasoning text from the model."""
+
+    type: Literal["reasoning_text"]
+    """The type of the reasoning text. Always `reasoning_text`."""
+
+
+Part: TypeAlias = Annotated[
+    Union[ResponseOutputText, ResponseOutputRefusal, PartReasoningText], PropertyInfo(discriminator="type")
+]
+
+
+class ResponseContentPartDoneEvent(BaseModel):
+    """Emitted when a content part is done."""
+
+    content_index: int
+    """The index of the content part that is done."""
+
+    item_id: str
+    """The ID of the output item that the content part was added to."""
+
+    output_index: int
+    """The index of the output item that the content part was added to."""
+
+    part: Part
+    """The content part that is done."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.content_part.done"]
+    """The type of the event. Always `response.content_part.done`."""
diff --git a/src/openai/types/responses/response_conversation_param.py b/src/openai/types/responses/response_conversation_param.py
new file mode 100644
index 0000000000..d1587fe68a
--- /dev/null
+++ b/src/openai/types/responses/response_conversation_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["ResponseConversationParam"]
+
+
+class ResponseConversationParam(TypedDict, total=False):
+    """The conversation that this response belongs to."""
+
+    id: Required[str]
+    """The unique ID of the conversation."""
diff --git a/src/openai/types/responses/response_create_params.py b/src/openai/types/responses/response_create_params.py
new file mode 100644
index 0000000000..15844c6597
--- /dev/null
+++ b/src/openai/types/responses/response_create_params.py
@@ -0,0 +1,336 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .tool_param import ToolParam
+from .response_includable import ResponseIncludable
+from .tool_choice_options import ToolChoiceOptions
+from .response_input_param import ResponseInputParam
+from .response_prompt_param import ResponsePromptParam
+from .tool_choice_mcp_param import ToolChoiceMcpParam
+from ..shared_params.metadata import Metadata
+from .tool_choice_shell_param import ToolChoiceShellParam
+from .tool_choice_types_param import ToolChoiceTypesParam
+from ..shared_params.reasoning import Reasoning
+from .tool_choice_custom_param import ToolChoiceCustomParam
+from .tool_choice_allowed_param import ToolChoiceAllowedParam
+from .response_text_config_param import ResponseTextConfigParam
+from .tool_choice_function_param import ToolChoiceFunctionParam
+from .response_conversation_param import ResponseConversationParam
+from .tool_choice_apply_patch_param import ToolChoiceApplyPatchParam
+from ..shared_params.responses_model import ResponsesModel
+
+__all__ = [
+    "ResponseCreateParamsBase",
+    "Conversation",
+    "StreamOptions",
+    "ToolChoice",
+    "ResponseCreateParamsNonStreaming",
+    "ResponseCreateParamsStreaming",
+]
+
+
+class ResponseCreateParamsBase(TypedDict, total=False):
+    background: Optional[bool]
+    """
+    Whether to run the model response in the background.
+    [Learn more](https://platform.openai.com/docs/guides/background).
+    """
+
+    conversation: Optional[Conversation]
+    """The conversation that this response belongs to.
+
+    Items from this conversation are prepended to `input_items` for this response
+    request. Input items and output items from this response are automatically added
+    to this conversation after this response completes.
+    """
+
+    include: Optional[List[ResponseIncludable]]
+    """Specify additional output data to include in the model response.
+
+    Currently supported values are:
+
+    - `web_search_call.action.sources`: Include the sources of the web search tool
+      call.
+    - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+      in code interpreter tool call items.
+    - `computer_call_output.output.image_url`: Include image urls from the computer
+      call output.
+    - `file_search_call.results`: Include the search results of the file search tool
+      call.
+    - `message.input_image.image_url`: Include image urls from the input message.
+    - `message.output_text.logprobs`: Include logprobs with assistant messages.
+    - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
+      tokens in reasoning item outputs. This enables reasoning items to be used in
+      multi-turn conversations when using the Responses API statelessly (like when
+      the `store` parameter is set to `false`, or when an organization is enrolled
+      in the zero data retention program).
+    """
+
+    input: Union[str, ResponseInputParam]
+    """Text, image, or file inputs to the model, used to generate a response.
+
+    Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Image inputs](https://platform.openai.com/docs/guides/images)
+    - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+    - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+    - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+    """
+
+    instructions: Optional[str]
+    """A system (or developer) message inserted into the model's context.
+
+    When using along with `previous_response_id`, the instructions from a previous
+    response will not be carried over to the next response. This makes it simple to
+    swap out system (or developer) messages in new responses.
+    """
+
+    max_output_tokens: Optional[int]
+    """
+    An upper bound for the number of tokens that can be generated for a response,
+    including visible output tokens and
+    [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    max_tool_calls: Optional[int]
+    """
+    The maximum number of total calls to built-in tools that can be processed in a
+    response. This maximum number applies across all built-in tool calls, not per
+    individual tool. Any further attempts to call a tool by the model will be
+    ignored.
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: ResponsesModel
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
+
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
+    """
+
+    parallel_tool_calls: Optional[bool]
+    """Whether to allow the model to run tool calls in parallel."""
+
+    previous_response_id: Optional[str]
+    """The unique ID of the previous response to the model.
+
+    Use this to create multi-turn conversations. Learn more about
+    [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    Cannot be used in conjunction with `conversation`.
+    """
+
+    prompt: Optional[ResponsePromptParam]
+    """
+    Reference to a prompt template and its variables.
+    [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    """
+
+    prompt_cache_key: str
+    """
+    Used by OpenAI to cache responses for similar requests to optimize your cache
+    hit rates. Replaces the `user` field.
+    [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+    """
+
+    prompt_cache_retention: Optional[Literal["in-memory", "24h"]]
+    """The retention policy for the prompt cache.
+
+    Set to `24h` to enable extended prompt caching, which keeps cached prefixes
+    active for longer, up to a maximum of 24 hours.
+    [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+    """
+
+    reasoning: Optional[Reasoning]
+    """**gpt-5 and o-series models only**
+
+    Configuration options for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    safety_identifier: str
+    """
+    A stable identifier used to help detect users of your application that may be
+    violating OpenAI's usage policies. The IDs should be a string that uniquely
+    identifies each user. We recommend hashing their username or email address, in
+    order to avoid sending us any identifying information.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+    """
+
+    service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]]
+    """Specifies the processing type used for serving the request.
+
+    - If set to 'auto', then the request will be processed with the service tier
+      configured in the Project settings. Unless otherwise configured, the Project
+      will use 'default'.
+    - If set to 'default', then the request will be processed with the standard
+      pricing and performance for the selected model.
+    - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+      '[priority](https://openai.com/api-priority-processing/)', then the request
+      will be processed with the corresponding service tier.
+    - When not set, the default behavior is 'auto'.
+
+    When the `service_tier` parameter is set, the response body will include the
+    `service_tier` value based on the processing mode actually used to serve the
+    request. This response value may be different from the value set in the
+    parameter.
+    """
+
+    store: Optional[bool]
+    """Whether to store the generated model response for later retrieval via API."""
+
+    stream_options: Optional[StreamOptions]
+    """Options for streaming responses. Only set this when you set `stream: true`."""
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic. We generally recommend altering
+    this or `top_p` but not both.
+    """
+
+    text: ResponseTextConfigParam
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tool_choice: ToolChoice
+    """
+    How the model should select which tool (or tools) to use when generating a
+    response. See the `tools` parameter to see how to specify which tools the model
+    can call.
+    """
+
+    tools: Iterable[ToolParam]
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    We support the following categories of tools:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
+      predefined connectors such as Google Drive and SharePoint. Learn more about
+      [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code with strongly typed arguments and outputs.
+      Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+      You can also use custom tools to call your own code.
+    """
+
+    top_logprobs: Optional[int]
+    """
+    An integer between 0 and 20 specifying the number of most likely tokens to
+    return at each token position, each with an associated log probability.
+    """
+
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or `temperature` but not both.
+    """
+
+    truncation: Optional[Literal["auto", "disabled"]]
+    """The truncation strategy to use for the model response.
+
+    - `auto`: If the input to this Response exceeds the model's context window size,
+      the model will truncate the response to fit the context window by dropping
+      items from the beginning of the conversation.
+    - `disabled` (default): If the input size will exceed the context window size
+      for a model, the request will fail with a 400 error.
+    """
+
+    user: str
+    """This field is being replaced by `safety_identifier` and `prompt_cache_key`.
+
+    Use `prompt_cache_key` instead to maintain caching optimizations. A stable
+    identifier for your end-users. Used to boost cache hit rates by better bucketing
+    similar requests and to help OpenAI detect and prevent abuse.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+    """
+
+
+Conversation: TypeAlias = Union[str, ResponseConversationParam]
+
+
+class StreamOptions(TypedDict, total=False):
+    """Options for streaming responses. Only set this when you set `stream: true`."""
+
+    include_obfuscation: bool
+    """When true, stream obfuscation will be enabled.
+
+    Stream obfuscation adds random characters to an `obfuscation` field on streaming
+    delta events to normalize payload sizes as a mitigation to certain side-channel
+    attacks. These obfuscation fields are included by default, but add a small
+    amount of overhead to the data stream. You can set `include_obfuscation` to
+    false to optimize for bandwidth if you trust the network links between your
+    application and the OpenAI API.
+    """
+
+
+ToolChoice: TypeAlias = Union[
+    ToolChoiceOptions,
+    ToolChoiceAllowedParam,
+    ToolChoiceTypesParam,
+    ToolChoiceFunctionParam,
+    ToolChoiceMcpParam,
+    ToolChoiceCustomParam,
+    ToolChoiceApplyPatchParam,
+    ToolChoiceShellParam,
+]
+
+
+class ResponseCreateParamsNonStreaming(ResponseCreateParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+    for more information.
+    """
+
+
+class ResponseCreateParamsStreaming(ResponseCreateParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+    for more information.
+    """
+
+
+ResponseCreateParams = Union[ResponseCreateParamsNonStreaming, ResponseCreateParamsStreaming]
diff --git a/src/openai/types/responses/response_created_event.py b/src/openai/types/responses/response_created_event.py
new file mode 100644
index 0000000000..308b2f4916
--- /dev/null
+++ b/src/openai/types/responses/response_created_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseCreatedEvent"]
+
+
+class ResponseCreatedEvent(BaseModel):
+    """An event that is emitted when a response is created."""
+
+    response: Response
+    """The response that was created."""
+
+    sequence_number: int
+    """The sequence number for this event."""
+
+    type: Literal["response.created"]
+    """The type of the event. Always `response.created`."""
diff --git a/src/openai/types/responses/response_custom_tool_call.py b/src/openai/types/responses/response_custom_tool_call.py
new file mode 100644
index 0000000000..f05743966e
--- /dev/null
+++ b/src/openai/types/responses/response_custom_tool_call.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCustomToolCall"]
+
+
+class ResponseCustomToolCall(BaseModel):
+    """A call to a custom tool created by the model."""
+
+    call_id: str
+    """An identifier used to map this custom tool call to a tool call output."""
+
+    input: str
+    """The input for the custom tool call generated by the model."""
+
+    name: str
+    """The name of the custom tool being called."""
+
+    type: Literal["custom_tool_call"]
+    """The type of the custom tool call. Always `custom_tool_call`."""
+
+    id: Optional[str] = None
+    """The unique ID of the custom tool call in the OpenAI platform."""
diff --git a/src/openai/types/responses/response_custom_tool_call_input_delta_event.py b/src/openai/types/responses/response_custom_tool_call_input_delta_event.py
new file mode 100644
index 0000000000..7473d33d9a
--- /dev/null
+++ b/src/openai/types/responses/response_custom_tool_call_input_delta_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCustomToolCallInputDeltaEvent"]
+
+
+class ResponseCustomToolCallInputDeltaEvent(BaseModel):
+    """Event representing a delta (partial update) to the input of a custom tool call."""
+
+    delta: str
+    """The incremental input data (delta) for the custom tool call."""
+
+    item_id: str
+    """Unique identifier for the API item associated with this event."""
+
+    output_index: int
+    """The index of the output this delta applies to."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.custom_tool_call_input.delta"]
+    """The event type identifier."""
diff --git a/src/openai/types/responses/response_custom_tool_call_input_done_event.py b/src/openai/types/responses/response_custom_tool_call_input_done_event.py
new file mode 100644
index 0000000000..be47ae8e96
--- /dev/null
+++ b/src/openai/types/responses/response_custom_tool_call_input_done_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCustomToolCallInputDoneEvent"]
+
+
+class ResponseCustomToolCallInputDoneEvent(BaseModel):
+    """Event indicating that input for a custom tool call is complete."""
+
+    input: str
+    """The complete input data for the custom tool call."""
+
+    item_id: str
+    """Unique identifier for the API item associated with this event."""
+
+    output_index: int
+    """The index of the output this event applies to."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.custom_tool_call_input.done"]
+    """The event type identifier."""
diff --git a/src/openai/types/responses/response_custom_tool_call_output.py b/src/openai/types/responses/response_custom_tool_call_output.py
new file mode 100644
index 0000000000..833956493b
--- /dev/null
+++ b/src/openai/types/responses/response_custom_tool_call_output.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_input_file import ResponseInputFile
+from .response_input_text import ResponseInputText
+from .response_input_image import ResponseInputImage
+
+__all__ = ["ResponseCustomToolCallOutput", "OutputOutputContentList"]
+
+OutputOutputContentList: TypeAlias = Annotated[
+    Union[ResponseInputText, ResponseInputImage, ResponseInputFile], PropertyInfo(discriminator="type")
+]
+
+
+class ResponseCustomToolCallOutput(BaseModel):
+    """The output of a custom tool call from your code, being sent back to the model."""
+
+    call_id: str
+    """The call ID, used to map this custom tool call output to a custom tool call."""
+
+    output: Union[str, List[OutputOutputContentList]]
+    """
+    The output from the custom tool call generated by your code. Can be a string or
+    an list of output content.
+    """
+
+    type: Literal["custom_tool_call_output"]
+    """The type of the custom tool call output. Always `custom_tool_call_output`."""
+
+    id: Optional[str] = None
+    """The unique ID of the custom tool call output in the OpenAI platform."""
diff --git a/src/openai/types/responses/response_custom_tool_call_output_param.py b/src/openai/types/responses/response_custom_tool_call_output_param.py
new file mode 100644
index 0000000000..db0034216a
--- /dev/null
+++ b/src/openai/types/responses/response_custom_tool_call_output_param.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .response_input_file_param import ResponseInputFileParam
+from .response_input_text_param import ResponseInputTextParam
+from .response_input_image_param import ResponseInputImageParam
+
+__all__ = ["ResponseCustomToolCallOutputParam", "OutputOutputContentList"]
+
+OutputOutputContentList: TypeAlias = Union[ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam]
+
+
+class ResponseCustomToolCallOutputParam(TypedDict, total=False):
+    """The output of a custom tool call from your code, being sent back to the model."""
+
+    call_id: Required[str]
+    """The call ID, used to map this custom tool call output to a custom tool call."""
+
+    output: Required[Union[str, Iterable[OutputOutputContentList]]]
+    """
+    The output from the custom tool call generated by your code. Can be a string or
+    an list of output content.
+    """
+
+    type: Required[Literal["custom_tool_call_output"]]
+    """The type of the custom tool call output. Always `custom_tool_call_output`."""
+
+    id: str
+    """The unique ID of the custom tool call output in the OpenAI platform."""
diff --git a/src/openai/types/responses/response_custom_tool_call_param.py b/src/openai/types/responses/response_custom_tool_call_param.py
new file mode 100644
index 0000000000..5d4ce3376c
--- /dev/null
+++ b/src/openai/types/responses/response_custom_tool_call_param.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseCustomToolCallParam"]
+
+
+class ResponseCustomToolCallParam(TypedDict, total=False):
+    """A call to a custom tool created by the model."""
+
+    call_id: Required[str]
+    """An identifier used to map this custom tool call to a tool call output."""
+
+    input: Required[str]
+    """The input for the custom tool call generated by the model."""
+
+    name: Required[str]
+    """The name of the custom tool being called."""
+
+    type: Required[Literal["custom_tool_call"]]
+    """The type of the custom tool call. Always `custom_tool_call`."""
+
+    id: str
+    """The unique ID of the custom tool call in the OpenAI platform."""
diff --git a/src/openai/types/responses/response_error.py b/src/openai/types/responses/response_error.py
new file mode 100644
index 0000000000..90958d1c13
--- /dev/null
+++ b/src/openai/types/responses/response_error.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseError"]
+
+
+class ResponseError(BaseModel):
+    """An error object returned when the model fails to generate a Response."""
+
+    code: Literal[
+        "server_error",
+        "rate_limit_exceeded",
+        "invalid_prompt",
+        "vector_store_timeout",
+        "invalid_image",
+        "invalid_image_format",
+        "invalid_base64_image",
+        "invalid_image_url",
+        "image_too_large",
+        "image_too_small",
+        "image_parse_error",
+        "image_content_policy_violation",
+        "invalid_image_mode",
+        "image_file_too_large",
+        "unsupported_image_media_type",
+        "empty_image_file",
+        "failed_to_download_image",
+        "image_file_not_found",
+    ]
+    """The error code for the response."""
+
+    message: str
+    """A human-readable description of the error."""
diff --git a/src/openai/types/responses/response_error_event.py b/src/openai/types/responses/response_error_event.py
new file mode 100644
index 0000000000..1789f731b4
--- /dev/null
+++ b/src/openai/types/responses/response_error_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseErrorEvent"]
+
+
+class ResponseErrorEvent(BaseModel):
+    """Emitted when an error occurs."""
+
+    code: Optional[str] = None
+    """The error code."""
+
+    message: str
+    """The error message."""
+
+    param: Optional[str] = None
+    """The error parameter."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["error"]
+    """The type of the event. Always `error`."""
diff --git a/src/openai/types/responses/response_failed_event.py b/src/openai/types/responses/response_failed_event.py
new file mode 100644
index 0000000000..2232c9678d
--- /dev/null
+++ b/src/openai/types/responses/response_failed_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseFailedEvent"]
+
+
+class ResponseFailedEvent(BaseModel):
+    """An event that is emitted when a response fails."""
+
+    response: Response
+    """The response that failed."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.failed"]
+    """The type of the event. Always `response.failed`."""
diff --git a/src/openai/types/responses/response_file_search_call_completed_event.py b/src/openai/types/responses/response_file_search_call_completed_event.py
new file mode 100644
index 0000000000..88ffa5ac56
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_call_completed_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFileSearchCallCompletedEvent"]
+
+
+class ResponseFileSearchCallCompletedEvent(BaseModel):
+    """Emitted when a file search call is completed (results found)."""
+
+    item_id: str
+    """The ID of the output item that the file search call is initiated."""
+
+    output_index: int
+    """The index of the output item that the file search call is initiated."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.file_search_call.completed"]
+    """The type of the event. Always `response.file_search_call.completed`."""
diff --git a/src/openai/types/responses/response_file_search_call_in_progress_event.py b/src/openai/types/responses/response_file_search_call_in_progress_event.py
new file mode 100644
index 0000000000..4f3504fda4
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_call_in_progress_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFileSearchCallInProgressEvent"]
+
+
+class ResponseFileSearchCallInProgressEvent(BaseModel):
+    """Emitted when a file search call is initiated."""
+
+    item_id: str
+    """The ID of the output item that the file search call is initiated."""
+
+    output_index: int
+    """The index of the output item that the file search call is initiated."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.file_search_call.in_progress"]
+    """The type of the event. Always `response.file_search_call.in_progress`."""
diff --git a/src/openai/types/responses/response_file_search_call_searching_event.py b/src/openai/types/responses/response_file_search_call_searching_event.py
new file mode 100644
index 0000000000..5bf1a076dd
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_call_searching_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFileSearchCallSearchingEvent"]
+
+
+class ResponseFileSearchCallSearchingEvent(BaseModel):
+    """Emitted when a file search is currently searching."""
+
+    item_id: str
+    """The ID of the output item that the file search call is initiated."""
+
+    output_index: int
+    """The index of the output item that the file search call is searching."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.file_search_call.searching"]
+    """The type of the event. Always `response.file_search_call.searching`."""
diff --git a/src/openai/types/responses/response_file_search_tool_call.py b/src/openai/types/responses/response_file_search_tool_call.py
new file mode 100644
index 0000000000..fa45631345
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_tool_call.py
@@ -0,0 +1,57 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFileSearchToolCall", "Result"]
+
+
+class Result(BaseModel):
+    attributes: Optional[Dict[str, Union[str, float, bool]]] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    file_id: Optional[str] = None
+    """The unique ID of the file."""
+
+    filename: Optional[str] = None
+    """The name of the file."""
+
+    score: Optional[float] = None
+    """The relevance score of the file - a value between 0 and 1."""
+
+    text: Optional[str] = None
+    """The text that was retrieved from the file."""
+
+
+class ResponseFileSearchToolCall(BaseModel):
+    """The results of a file search tool call.
+
+    See the
+    [file search guide](https://platform.openai.com/docs/guides/tools-file-search) for more information.
+    """
+
+    id: str
+    """The unique ID of the file search tool call."""
+
+    queries: List[str]
+    """The queries used to search for files."""
+
+    status: Literal["in_progress", "searching", "completed", "incomplete", "failed"]
+    """The status of the file search tool call.
+
+    One of `in_progress`, `searching`, `incomplete` or `failed`,
+    """
+
+    type: Literal["file_search_call"]
+    """The type of the file search tool call. Always `file_search_call`."""
+
+    results: Optional[List[Result]] = None
+    """The results of the file search tool call."""
diff --git a/src/openai/types/responses/response_file_search_tool_call_param.py b/src/openai/types/responses/response_file_search_tool_call_param.py
new file mode 100644
index 0000000000..45a5bbb486
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_tool_call_param.py
@@ -0,0 +1,59 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from ..._types import SequenceNotStr
+
+__all__ = ["ResponseFileSearchToolCallParam", "Result"]
+
+
+class Result(TypedDict, total=False):
+    attributes: Optional[Dict[str, Union[str, float, bool]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    file_id: str
+    """The unique ID of the file."""
+
+    filename: str
+    """The name of the file."""
+
+    score: float
+    """The relevance score of the file - a value between 0 and 1."""
+
+    text: str
+    """The text that was retrieved from the file."""
+
+
+class ResponseFileSearchToolCallParam(TypedDict, total=False):
+    """The results of a file search tool call.
+
+    See the
+    [file search guide](https://platform.openai.com/docs/guides/tools-file-search) for more information.
+    """
+
+    id: Required[str]
+    """The unique ID of the file search tool call."""
+
+    queries: Required[SequenceNotStr[str]]
+    """The queries used to search for files."""
+
+    status: Required[Literal["in_progress", "searching", "completed", "incomplete", "failed"]]
+    """The status of the file search tool call.
+
+    One of `in_progress`, `searching`, `incomplete` or `failed`,
+    """
+
+    type: Required[Literal["file_search_call"]]
+    """The type of the file search tool call. Always `file_search_call`."""
+
+    results: Optional[Iterable[Result]]
+    """The results of the file search tool call."""
diff --git a/src/openai/types/responses/response_format_text_config.py b/src/openai/types/responses/response_format_text_config.py
new file mode 100644
index 0000000000..a4896bf9fe
--- /dev/null
+++ b/src/openai/types/responses/response_format_text_config.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..shared.response_format_text import ResponseFormatText
+from ..shared.response_format_json_object import ResponseFormatJSONObject
+from .response_format_text_json_schema_config import ResponseFormatTextJSONSchemaConfig
+
+__all__ = ["ResponseFormatTextConfig"]
+
+ResponseFormatTextConfig: TypeAlias = Annotated[
+    Union[ResponseFormatText, ResponseFormatTextJSONSchemaConfig, ResponseFormatJSONObject],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/response_format_text_config_param.py b/src/openai/types/responses/response_format_text_config_param.py
new file mode 100644
index 0000000000..fcaf8f3fb6
--- /dev/null
+++ b/src/openai/types/responses/response_format_text_config_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from ..shared_params.response_format_text import ResponseFormatText
+from ..shared_params.response_format_json_object import ResponseFormatJSONObject
+from .response_format_text_json_schema_config_param import ResponseFormatTextJSONSchemaConfigParam
+
+__all__ = ["ResponseFormatTextConfigParam"]
+
+ResponseFormatTextConfigParam: TypeAlias = Union[
+    ResponseFormatText, ResponseFormatTextJSONSchemaConfigParam, ResponseFormatJSONObject
+]
diff --git a/src/openai/types/responses/response_format_text_json_schema_config.py b/src/openai/types/responses/response_format_text_json_schema_config.py
new file mode 100644
index 0000000000..b953112621
--- /dev/null
+++ b/src/openai/types/responses/response_format_text_json_schema_config.py
@@ -0,0 +1,49 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormatTextJSONSchemaConfig"]
+
+
+class ResponseFormatTextJSONSchemaConfig(BaseModel):
+    """JSON Schema response format.
+
+    Used to generate structured JSON responses.
+    Learn more about [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs).
+    """
+
+    name: str
+    """The name of the response format.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
+
+    type: Literal["json_schema"]
+    """The type of response format being defined. Always `json_schema`."""
+
+    description: Optional[str] = None
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
+
+    strict: Optional[bool] = None
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field. Only a subset of JSON Schema is supported when `strict` is `true`. To
+    learn more, read the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    """
diff --git a/src/openai/types/responses/response_format_text_json_schema_config_param.py b/src/openai/types/responses/response_format_text_json_schema_config_param.py
new file mode 100644
index 0000000000..6f5c633106
--- /dev/null
+++ b/src/openai/types/responses/response_format_text_json_schema_config_param.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFormatTextJSONSchemaConfigParam"]
+
+
+class ResponseFormatTextJSONSchemaConfigParam(TypedDict, total=False):
+    """JSON Schema response format.
+
+    Used to generate structured JSON responses.
+    Learn more about [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs).
+    """
+
+    name: Required[str]
+    """The name of the response format.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    schema: Required[Dict[str, object]]
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
+
+    type: Required[Literal["json_schema"]]
+    """The type of response format being defined. Always `json_schema`."""
+
+    description: str
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
+
+    strict: Optional[bool]
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field. Only a subset of JSON Schema is supported when `strict` is `true`. To
+    learn more, read the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    """
diff --git a/src/openai/types/responses/response_function_call_arguments_delta_event.py b/src/openai/types/responses/response_function_call_arguments_delta_event.py
new file mode 100644
index 0000000000..0798c2e123
--- /dev/null
+++ b/src/openai/types/responses/response_function_call_arguments_delta_event.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDeltaEvent"]
+
+
+class ResponseFunctionCallArgumentsDeltaEvent(BaseModel):
+    """Emitted when there is a partial function-call arguments delta."""
+
+    delta: str
+    """The function-call arguments delta that is added."""
+
+    item_id: str
+    """The ID of the output item that the function-call arguments delta is added to."""
+
+    output_index: int
+    """
+    The index of the output item that the function-call arguments delta is added to.
+    """
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.function_call_arguments.delta"]
+    """The type of the event. Always `response.function_call_arguments.delta`."""
diff --git a/src/openai/types/responses/response_function_call_arguments_done_event.py b/src/openai/types/responses/response_function_call_arguments_done_event.py
new file mode 100644
index 0000000000..543cd073a2
--- /dev/null
+++ b/src/openai/types/responses/response_function_call_arguments_done_event.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDoneEvent"]
+
+
+class ResponseFunctionCallArgumentsDoneEvent(BaseModel):
+    """Emitted when function-call arguments are finalized."""
+
+    arguments: str
+    """The function-call arguments."""
+
+    item_id: str
+    """The ID of the item."""
+
+    name: str
+    """The name of the function that was called."""
+
+    output_index: int
+    """The index of the output item."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.function_call_arguments.done"]
diff --git a/src/openai/types/responses/response_function_call_output_item.py b/src/openai/types/responses/response_function_call_output_item.py
new file mode 100644
index 0000000000..41898f9eda
--- /dev/null
+++ b/src/openai/types/responses/response_function_call_output_item.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .response_input_file_content import ResponseInputFileContent
+from .response_input_text_content import ResponseInputTextContent
+from .response_input_image_content import ResponseInputImageContent
+
+__all__ = ["ResponseFunctionCallOutputItem"]
+
+ResponseFunctionCallOutputItem: TypeAlias = Annotated[
+    Union[ResponseInputTextContent, ResponseInputImageContent, ResponseInputFileContent],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/response_function_call_output_item_list.py b/src/openai/types/responses/response_function_call_output_item_list.py
new file mode 100644
index 0000000000..13db577160
--- /dev/null
+++ b/src/openai/types/responses/response_function_call_output_item_list.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import TypeAlias
+
+from .response_function_call_output_item import ResponseFunctionCallOutputItem
+
+__all__ = ["ResponseFunctionCallOutputItemList"]
+
+ResponseFunctionCallOutputItemList: TypeAlias = List[ResponseFunctionCallOutputItem]
diff --git a/src/openai/types/responses/response_function_call_output_item_list_param.py b/src/openai/types/responses/response_function_call_output_item_list_param.py
new file mode 100644
index 0000000000..8c286d3cf0
--- /dev/null
+++ b/src/openai/types/responses/response_function_call_output_item_list_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import TypeAlias
+
+from .response_input_file_content_param import ResponseInputFileContentParam
+from .response_input_text_content_param import ResponseInputTextContentParam
+from .response_input_image_content_param import ResponseInputImageContentParam
+
+__all__ = ["ResponseFunctionCallOutputItemListParam", "ResponseFunctionCallOutputItemParam"]
+
+ResponseFunctionCallOutputItemParam: TypeAlias = Union[
+    ResponseInputTextContentParam, ResponseInputImageContentParam, ResponseInputFileContentParam
+]
+
+ResponseFunctionCallOutputItemListParam: TypeAlias = List[ResponseFunctionCallOutputItemParam]
diff --git a/src/openai/types/responses/response_function_call_output_item_param.py b/src/openai/types/responses/response_function_call_output_item_param.py
new file mode 100644
index 0000000000..2a703cac1e
--- /dev/null
+++ b/src/openai/types/responses/response_function_call_output_item_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .response_input_file_content_param import ResponseInputFileContentParam
+from .response_input_text_content_param import ResponseInputTextContentParam
+from .response_input_image_content_param import ResponseInputImageContentParam
+
+__all__ = ["ResponseFunctionCallOutputItemParam"]
+
+ResponseFunctionCallOutputItemParam: TypeAlias = Union[
+    ResponseInputTextContentParam, ResponseInputImageContentParam, ResponseInputFileContentParam
+]
diff --git a/src/openai/types/responses/response_function_shell_call_output_content.py b/src/openai/types/responses/response_function_shell_call_output_content.py
new file mode 100644
index 0000000000..dae48f14da
--- /dev/null
+++ b/src/openai/types/responses/response_function_shell_call_output_content.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionShellCallOutputContent", "Outcome", "OutcomeTimeout", "OutcomeExit"]
+
+
+class OutcomeTimeout(BaseModel):
+    """Indicates that the shell call exceeded its configured time limit."""
+
+    type: Literal["timeout"]
+    """The outcome type. Always `timeout`."""
+
+
+class OutcomeExit(BaseModel):
+    """Indicates that the shell commands finished and returned an exit code."""
+
+    exit_code: int
+    """The exit code returned by the shell process."""
+
+    type: Literal["exit"]
+    """The outcome type. Always `exit`."""
+
+
+Outcome: TypeAlias = Annotated[Union[OutcomeTimeout, OutcomeExit], PropertyInfo(discriminator="type")]
+
+
+class ResponseFunctionShellCallOutputContent(BaseModel):
+    """Captured stdout and stderr for a portion of a shell tool call output."""
+
+    outcome: Outcome
+    """The exit or timeout outcome associated with this shell call."""
+
+    stderr: str
+    """Captured stderr output for the shell call."""
+
+    stdout: str
+    """Captured stdout output for the shell call."""
diff --git a/src/openai/types/responses/response_function_shell_call_output_content_param.py b/src/openai/types/responses/response_function_shell_call_output_content_param.py
new file mode 100644
index 0000000000..4d8ea70d08
--- /dev/null
+++ b/src/openai/types/responses/response_function_shell_call_output_content_param.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = ["ResponseFunctionShellCallOutputContentParam", "Outcome", "OutcomeTimeout", "OutcomeExit"]
+
+
+class OutcomeTimeout(TypedDict, total=False):
+    """Indicates that the shell call exceeded its configured time limit."""
+
+    type: Required[Literal["timeout"]]
+    """The outcome type. Always `timeout`."""
+
+
+class OutcomeExit(TypedDict, total=False):
+    """Indicates that the shell commands finished and returned an exit code."""
+
+    exit_code: Required[int]
+    """The exit code returned by the shell process."""
+
+    type: Required[Literal["exit"]]
+    """The outcome type. Always `exit`."""
+
+
+Outcome: TypeAlias = Union[OutcomeTimeout, OutcomeExit]
+
+
+class ResponseFunctionShellCallOutputContentParam(TypedDict, total=False):
+    """Captured stdout and stderr for a portion of a shell tool call output."""
+
+    outcome: Required[Outcome]
+    """The exit or timeout outcome associated with this shell call."""
+
+    stderr: Required[str]
+    """Captured stderr output for the shell call."""
+
+    stdout: Required[str]
+    """Captured stdout output for the shell call."""
diff --git a/src/openai/types/responses/response_function_shell_tool_call.py b/src/openai/types/responses/response_function_shell_tool_call.py
new file mode 100644
index 0000000000..7c6a184ed4
--- /dev/null
+++ b/src/openai/types/responses/response_function_shell_tool_call.py
@@ -0,0 +1,48 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionShellToolCall", "Action"]
+
+
+class Action(BaseModel):
+    """The shell commands and limits that describe how to run the tool call."""
+
+    commands: List[str]
+
+    max_output_length: Optional[int] = None
+    """Optional maximum number of characters to return from each command."""
+
+    timeout_ms: Optional[int] = None
+    """Optional timeout in milliseconds for the commands."""
+
+
+class ResponseFunctionShellToolCall(BaseModel):
+    """A tool call that executes one or more shell commands in a managed environment."""
+
+    id: str
+    """The unique ID of the shell tool call.
+
+    Populated when this item is returned via API.
+    """
+
+    action: Action
+    """The shell commands and limits that describe how to run the tool call."""
+
+    call_id: str
+    """The unique ID of the shell tool call generated by the model."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the shell call.
+
+    One of `in_progress`, `completed`, or `incomplete`.
+    """
+
+    type: Literal["shell_call"]
+    """The type of the item. Always `shell_call`."""
+
+    created_by: Optional[str] = None
+    """The ID of the entity that created this tool call."""
diff --git a/src/openai/types/responses/response_function_shell_tool_call_output.py b/src/openai/types/responses/response_function_shell_tool_call_output.py
new file mode 100644
index 0000000000..7885ee2f83
--- /dev/null
+++ b/src/openai/types/responses/response_function_shell_tool_call_output.py
@@ -0,0 +1,78 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = [
+    "ResponseFunctionShellToolCallOutput",
+    "Output",
+    "OutputOutcome",
+    "OutputOutcomeTimeout",
+    "OutputOutcomeExit",
+]
+
+
+class OutputOutcomeTimeout(BaseModel):
+    """Indicates that the shell call exceeded its configured time limit."""
+
+    type: Literal["timeout"]
+    """The outcome type. Always `timeout`."""
+
+
+class OutputOutcomeExit(BaseModel):
+    """Indicates that the shell commands finished and returned an exit code."""
+
+    exit_code: int
+    """Exit code from the shell process."""
+
+    type: Literal["exit"]
+    """The outcome type. Always `exit`."""
+
+
+OutputOutcome: TypeAlias = Annotated[Union[OutputOutcomeTimeout, OutputOutcomeExit], PropertyInfo(discriminator="type")]
+
+
+class Output(BaseModel):
+    """The content of a shell call output."""
+
+    outcome: OutputOutcome
+    """
+    Represents either an exit outcome (with an exit code) or a timeout outcome for a
+    shell call output chunk.
+    """
+
+    stderr: str
+
+    stdout: str
+
+    created_by: Optional[str] = None
+
+
+class ResponseFunctionShellToolCallOutput(BaseModel):
+    """The output of a shell tool call."""
+
+    id: str
+    """The unique ID of the shell call output.
+
+    Populated when this item is returned via API.
+    """
+
+    call_id: str
+    """The unique ID of the shell tool call generated by the model."""
+
+    max_output_length: Optional[int] = None
+    """The maximum length of the shell command output.
+
+    This is generated by the model and should be passed back with the raw output.
+    """
+
+    output: List[Output]
+    """An array of shell call output contents"""
+
+    type: Literal["shell_call_output"]
+    """The type of the shell call output. Always `shell_call_output`."""
+
+    created_by: Optional[str] = None
diff --git a/src/openai/types/responses/response_function_tool_call.py b/src/openai/types/responses/response_function_tool_call.py
new file mode 100644
index 0000000000..194e3f7d6a
--- /dev/null
+++ b/src/openai/types/responses/response_function_tool_call.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionToolCall"]
+
+
+class ResponseFunctionToolCall(BaseModel):
+    """A tool call to run a function.
+
+    See the
+    [function calling guide](https://platform.openai.com/docs/guides/function-calling) for more information.
+    """
+
+    arguments: str
+    """A JSON string of the arguments to pass to the function."""
+
+    call_id: str
+    """The unique ID of the function tool call generated by the model."""
+
+    name: str
+    """The name of the function to run."""
+
+    type: Literal["function_call"]
+    """The type of the function tool call. Always `function_call`."""
+
+    id: Optional[str] = None
+    """The unique ID of the function tool call."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
diff --git a/src/openai/types/responses/response_function_tool_call_item.py b/src/openai/types/responses/response_function_tool_call_item.py
new file mode 100644
index 0000000000..3df299e512
--- /dev/null
+++ b/src/openai/types/responses/response_function_tool_call_item.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .response_function_tool_call import ResponseFunctionToolCall
+
+__all__ = ["ResponseFunctionToolCallItem"]
+
+
+class ResponseFunctionToolCallItem(ResponseFunctionToolCall):
+    """A tool call to run a function.
+
+    See the
+    [function calling guide](https://platform.openai.com/docs/guides/function-calling) for more information.
+    """
+
+    id: str  # type: ignore
+    """The unique ID of the function tool call."""
diff --git a/src/openai/types/responses/response_function_tool_call_output_item.py b/src/openai/types/responses/response_function_tool_call_output_item.py
new file mode 100644
index 0000000000..1a2c848cb3
--- /dev/null
+++ b/src/openai/types/responses/response_function_tool_call_output_item.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_input_file import ResponseInputFile
+from .response_input_text import ResponseInputText
+from .response_input_image import ResponseInputImage
+
+__all__ = ["ResponseFunctionToolCallOutputItem", "OutputOutputContentList"]
+
+OutputOutputContentList: TypeAlias = Annotated[
+    Union[ResponseInputText, ResponseInputImage, ResponseInputFile], PropertyInfo(discriminator="type")
+]
+
+
+class ResponseFunctionToolCallOutputItem(BaseModel):
+    id: str
+    """The unique ID of the function call tool output."""
+
+    call_id: str
+    """The unique ID of the function tool call generated by the model."""
+
+    output: Union[str, List[OutputOutputContentList]]
+    """
+    The output from the function call generated by your code. Can be a string or an
+    list of output content.
+    """
+
+    type: Literal["function_call_output"]
+    """The type of the function tool call output. Always `function_call_output`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
diff --git a/src/openai/types/responses/response_function_tool_call_param.py b/src/openai/types/responses/response_function_tool_call_param.py
new file mode 100644
index 0000000000..4e8dd3d629
--- /dev/null
+++ b/src/openai/types/responses/response_function_tool_call_param.py
@@ -0,0 +1,37 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFunctionToolCallParam"]
+
+
+class ResponseFunctionToolCallParam(TypedDict, total=False):
+    """A tool call to run a function.
+
+    See the
+    [function calling guide](https://platform.openai.com/docs/guides/function-calling) for more information.
+    """
+
+    arguments: Required[str]
+    """A JSON string of the arguments to pass to the function."""
+
+    call_id: Required[str]
+    """The unique ID of the function tool call generated by the model."""
+
+    name: Required[str]
+    """The name of the function to run."""
+
+    type: Required[Literal["function_call"]]
+    """The type of the function tool call. Always `function_call`."""
+
+    id: str
+    """The unique ID of the function tool call."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
diff --git a/src/openai/types/responses/response_function_web_search.py b/src/openai/types/responses/response_function_web_search.py
new file mode 100644
index 0000000000..1450fba4d1
--- /dev/null
+++ b/src/openai/types/responses/response_function_web_search.py
@@ -0,0 +1,81 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionWebSearch", "Action", "ActionSearch", "ActionSearchSource", "ActionOpenPage", "ActionFind"]
+
+
+class ActionSearchSource(BaseModel):
+    """A source used in the search."""
+
+    type: Literal["url"]
+    """The type of source. Always `url`."""
+
+    url: str
+    """The URL of the source."""
+
+
+class ActionSearch(BaseModel):
+    """Action type "search" - Performs a web search query."""
+
+    query: str
+    """The search query."""
+
+    type: Literal["search"]
+    """The action type."""
+
+    sources: Optional[List[ActionSearchSource]] = None
+    """The sources used in the search."""
+
+
+class ActionOpenPage(BaseModel):
+    """Action type "open_page" - Opens a specific URL from search results."""
+
+    type: Literal["open_page"]
+    """The action type."""
+
+    url: str
+    """The URL opened by the model."""
+
+
+class ActionFind(BaseModel):
+    """Action type "find": Searches for a pattern within a loaded page."""
+
+    pattern: str
+    """The pattern or text to search for within the page."""
+
+    type: Literal["find"]
+    """The action type."""
+
+    url: str
+    """The URL of the page searched for the pattern."""
+
+
+Action: TypeAlias = Annotated[Union[ActionSearch, ActionOpenPage, ActionFind], PropertyInfo(discriminator="type")]
+
+
+class ResponseFunctionWebSearch(BaseModel):
+    """The results of a web search tool call.
+
+    See the
+    [web search guide](https://platform.openai.com/docs/guides/tools-web-search) for more information.
+    """
+
+    id: str
+    """The unique ID of the web search tool call."""
+
+    action: Action
+    """
+    An object describing the specific action taken in this web search call. Includes
+    details on how the model used the web (search, open_page, find).
+    """
+
+    status: Literal["in_progress", "searching", "completed", "failed"]
+    """The status of the web search tool call."""
+
+    type: Literal["web_search_call"]
+    """The type of the web search tool call. Always `web_search_call`."""
diff --git a/src/openai/types/responses/response_function_web_search_param.py b/src/openai/types/responses/response_function_web_search_param.py
new file mode 100644
index 0000000000..8d0b60334d
--- /dev/null
+++ b/src/openai/types/responses/response_function_web_search_param.py
@@ -0,0 +1,87 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+    "ResponseFunctionWebSearchParam",
+    "Action",
+    "ActionSearch",
+    "ActionSearchSource",
+    "ActionOpenPage",
+    "ActionFind",
+]
+
+
+class ActionSearchSource(TypedDict, total=False):
+    """A source used in the search."""
+
+    type: Required[Literal["url"]]
+    """The type of source. Always `url`."""
+
+    url: Required[str]
+    """The URL of the source."""
+
+
+class ActionSearch(TypedDict, total=False):
+    """Action type "search" - Performs a web search query."""
+
+    query: Required[str]
+    """The search query."""
+
+    type: Required[Literal["search"]]
+    """The action type."""
+
+    sources: Iterable[ActionSearchSource]
+    """The sources used in the search."""
+
+
+class ActionOpenPage(TypedDict, total=False):
+    """Action type "open_page" - Opens a specific URL from search results."""
+
+    type: Required[Literal["open_page"]]
+    """The action type."""
+
+    url: Required[str]
+    """The URL opened by the model."""
+
+
+class ActionFind(TypedDict, total=False):
+    """Action type "find": Searches for a pattern within a loaded page."""
+
+    pattern: Required[str]
+    """The pattern or text to search for within the page."""
+
+    type: Required[Literal["find"]]
+    """The action type."""
+
+    url: Required[str]
+    """The URL of the page searched for the pattern."""
+
+
+Action: TypeAlias = Union[ActionSearch, ActionOpenPage, ActionFind]
+
+
+class ResponseFunctionWebSearchParam(TypedDict, total=False):
+    """The results of a web search tool call.
+
+    See the
+    [web search guide](https://platform.openai.com/docs/guides/tools-web-search) for more information.
+    """
+
+    id: Required[str]
+    """The unique ID of the web search tool call."""
+
+    action: Required[Action]
+    """
+    An object describing the specific action taken in this web search call. Includes
+    details on how the model used the web (search, open_page, find).
+    """
+
+    status: Required[Literal["in_progress", "searching", "completed", "failed"]]
+    """The status of the web search tool call."""
+
+    type: Required[Literal["web_search_call"]]
+    """The type of the web search tool call. Always `web_search_call`."""
diff --git a/src/openai/types/responses/response_image_gen_call_completed_event.py b/src/openai/types/responses/response_image_gen_call_completed_event.py
new file mode 100644
index 0000000000..f6ce9d0fd8
--- /dev/null
+++ b/src/openai/types/responses/response_image_gen_call_completed_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseImageGenCallCompletedEvent"]
+
+
+class ResponseImageGenCallCompletedEvent(BaseModel):
+    """
+    Emitted when an image generation tool call has completed and the final image is available.
+    """
+
+    item_id: str
+    """The unique identifier of the image generation item being processed."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.image_generation_call.completed"]
+    """The type of the event. Always 'response.image_generation_call.completed'."""
diff --git a/src/openai/types/responses/response_image_gen_call_generating_event.py b/src/openai/types/responses/response_image_gen_call_generating_event.py
new file mode 100644
index 0000000000..8e3026d0dc
--- /dev/null
+++ b/src/openai/types/responses/response_image_gen_call_generating_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseImageGenCallGeneratingEvent"]
+
+
+class ResponseImageGenCallGeneratingEvent(BaseModel):
+    """
+    Emitted when an image generation tool call is actively generating an image (intermediate state).
+    """
+
+    item_id: str
+    """The unique identifier of the image generation item being processed."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of the image generation item being processed."""
+
+    type: Literal["response.image_generation_call.generating"]
+    """The type of the event. Always 'response.image_generation_call.generating'."""
diff --git a/src/openai/types/responses/response_image_gen_call_in_progress_event.py b/src/openai/types/responses/response_image_gen_call_in_progress_event.py
new file mode 100644
index 0000000000..60726a22b4
--- /dev/null
+++ b/src/openai/types/responses/response_image_gen_call_in_progress_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseImageGenCallInProgressEvent"]
+
+
+class ResponseImageGenCallInProgressEvent(BaseModel):
+    """Emitted when an image generation tool call is in progress."""
+
+    item_id: str
+    """The unique identifier of the image generation item being processed."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of the image generation item being processed."""
+
+    type: Literal["response.image_generation_call.in_progress"]
+    """The type of the event. Always 'response.image_generation_call.in_progress'."""
diff --git a/src/openai/types/responses/response_image_gen_call_partial_image_event.py b/src/openai/types/responses/response_image_gen_call_partial_image_event.py
new file mode 100644
index 0000000000..289d5d44c0
--- /dev/null
+++ b/src/openai/types/responses/response_image_gen_call_partial_image_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseImageGenCallPartialImageEvent"]
+
+
+class ResponseImageGenCallPartialImageEvent(BaseModel):
+    """Emitted when a partial image is available during image generation streaming."""
+
+    item_id: str
+    """The unique identifier of the image generation item being processed."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    partial_image_b64: str
+    """Base64-encoded partial image data, suitable for rendering as an image."""
+
+    partial_image_index: int
+    """
+    0-based index for the partial image (backend is 1-based, but this is 0-based for
+    the user).
+    """
+
+    sequence_number: int
+    """The sequence number of the image generation item being processed."""
+
+    type: Literal["response.image_generation_call.partial_image"]
+    """The type of the event. Always 'response.image_generation_call.partial_image'."""
diff --git a/src/openai/types/responses/response_in_progress_event.py b/src/openai/types/responses/response_in_progress_event.py
new file mode 100644
index 0000000000..9d9bbd94b0
--- /dev/null
+++ b/src/openai/types/responses/response_in_progress_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseInProgressEvent"]
+
+
+class ResponseInProgressEvent(BaseModel):
+    """Emitted when the response is in progress."""
+
+    response: Response
+    """The response that is in progress."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.in_progress"]
+    """The type of the event. Always `response.in_progress`."""
diff --git a/src/openai/types/responses/response_includable.py b/src/openai/types/responses/response_includable.py
new file mode 100644
index 0000000000..675c83405a
--- /dev/null
+++ b/src/openai/types/responses/response_includable.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ResponseIncludable"]
+
+ResponseIncludable: TypeAlias = Literal[
+    "file_search_call.results",
+    "web_search_call.results",
+    "web_search_call.action.sources",
+    "message.input_image.image_url",
+    "computer_call_output.output.image_url",
+    "code_interpreter_call.outputs",
+    "reasoning.encrypted_content",
+    "message.output_text.logprobs",
+]
diff --git a/src/openai/types/responses/response_incomplete_event.py b/src/openai/types/responses/response_incomplete_event.py
new file mode 100644
index 0000000000..ef99c5f0b2
--- /dev/null
+++ b/src/openai/types/responses/response_incomplete_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseIncompleteEvent"]
+
+
+class ResponseIncompleteEvent(BaseModel):
+    """An event that is emitted when a response finishes as incomplete."""
+
+    response: Response
+    """The response that was incomplete."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.incomplete"]
+    """The type of the event. Always `response.incomplete`."""
diff --git a/src/openai/types/responses/response_input_audio.py b/src/openai/types/responses/response_input_audio.py
new file mode 100644
index 0000000000..f362ba4133
--- /dev/null
+++ b/src/openai/types/responses/response_input_audio.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseInputAudio", "InputAudio"]
+
+
+class InputAudio(BaseModel):
+    data: str
+    """Base64-encoded audio data."""
+
+    format: Literal["mp3", "wav"]
+    """The format of the audio data. Currently supported formats are `mp3` and `wav`."""
+
+
+class ResponseInputAudio(BaseModel):
+    """An audio input to the model."""
+
+    input_audio: InputAudio
+
+    type: Literal["input_audio"]
+    """The type of the input item. Always `input_audio`."""
diff --git a/src/openai/types/responses/response_input_audio_param.py b/src/openai/types/responses/response_input_audio_param.py
new file mode 100644
index 0000000000..0be935c54d
--- /dev/null
+++ b/src/openai/types/responses/response_input_audio_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseInputAudioParam", "InputAudio"]
+
+
+class InputAudio(TypedDict, total=False):
+    data: Required[str]
+    """Base64-encoded audio data."""
+
+    format: Required[Literal["mp3", "wav"]]
+    """The format of the audio data. Currently supported formats are `mp3` and `wav`."""
+
+
+class ResponseInputAudioParam(TypedDict, total=False):
+    """An audio input to the model."""
+
+    input_audio: Required[InputAudio]
+
+    type: Required[Literal["input_audio"]]
+    """The type of the input item. Always `input_audio`."""
diff --git a/src/openai/types/responses/response_input_content.py b/src/openai/types/responses/response_input_content.py
new file mode 100644
index 0000000000..1726909a17
--- /dev/null
+++ b/src/openai/types/responses/response_input_content.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .response_input_file import ResponseInputFile
+from .response_input_text import ResponseInputText
+from .response_input_image import ResponseInputImage
+
+__all__ = ["ResponseInputContent"]
+
+ResponseInputContent: TypeAlias = Annotated[
+    Union[ResponseInputText, ResponseInputImage, ResponseInputFile], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/responses/response_input_content_param.py b/src/openai/types/responses/response_input_content_param.py
new file mode 100644
index 0000000000..7791cdfd8e
--- /dev/null
+++ b/src/openai/types/responses/response_input_content_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .response_input_file_param import ResponseInputFileParam
+from .response_input_text_param import ResponseInputTextParam
+from .response_input_image_param import ResponseInputImageParam
+
+__all__ = ["ResponseInputContentParam"]
+
+ResponseInputContentParam: TypeAlias = Union[ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam]
diff --git a/src/openai/types/responses/response_input_file.py b/src/openai/types/responses/response_input_file.py
new file mode 100644
index 0000000000..3e5fb70c5f
--- /dev/null
+++ b/src/openai/types/responses/response_input_file.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseInputFile"]
+
+
+class ResponseInputFile(BaseModel):
+    """A file input to the model."""
+
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The content of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
diff --git a/src/openai/types/responses/response_input_file_content.py b/src/openai/types/responses/response_input_file_content.py
new file mode 100644
index 0000000000..f0dfef55d0
--- /dev/null
+++ b/src/openai/types/responses/response_input_file_content.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseInputFileContent"]
+
+
+class ResponseInputFileContent(BaseModel):
+    """A file input to the model."""
+
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The base64-encoded data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
diff --git a/src/openai/types/responses/response_input_file_content_param.py b/src/openai/types/responses/response_input_file_content_param.py
new file mode 100644
index 0000000000..376f6c7a45
--- /dev/null
+++ b/src/openai/types/responses/response_input_file_content_param.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseInputFileContentParam"]
+
+
+class ResponseInputFileContentParam(TypedDict, total=False):
+    """A file input to the model."""
+
+    type: Required[Literal["input_file"]]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str]
+    """The base64-encoded data of the file to be sent to the model."""
+
+    file_id: Optional[str]
+    """The ID of the file to be sent to the model."""
+
+    file_url: Optional[str]
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str]
+    """The name of the file to be sent to the model."""
diff --git a/src/openai/types/responses/response_input_file_param.py b/src/openai/types/responses/response_input_file_param.py
new file mode 100644
index 0000000000..8b5da20245
--- /dev/null
+++ b/src/openai/types/responses/response_input_file_param.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseInputFileParam"]
+
+
+class ResponseInputFileParam(TypedDict, total=False):
+    """A file input to the model."""
+
+    type: Required[Literal["input_file"]]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: str
+    """The content of the file to be sent to the model."""
+
+    file_id: Optional[str]
+    """The ID of the file to be sent to the model."""
+
+    file_url: str
+    """The URL of the file to be sent to the model."""
+
+    filename: str
+    """The name of the file to be sent to the model."""
diff --git a/src/openai/types/responses/response_input_image.py b/src/openai/types/responses/response_input_image.py
new file mode 100644
index 0000000000..500bc4b346
--- /dev/null
+++ b/src/openai/types/responses/response_input_image.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseInputImage"]
+
+
+class ResponseInputImage(BaseModel):
+    """An image input to the model.
+
+    Learn about [image inputs](https://platform.openai.com/docs/guides/vision).
+    """
+
+    detail: Literal["low", "high", "auto"]
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+    type: Literal["input_image"]
+    """The type of the input item. Always `input_image`."""
+
+    file_id: Optional[str] = None
+    """The ID of the file to be sent to the model."""
+
+    image_url: Optional[str] = None
+    """The URL of the image to be sent to the model.
+
+    A fully qualified URL or base64 encoded image in a data URL.
+    """
diff --git a/src/openai/types/responses/response_input_image_content.py b/src/openai/types/responses/response_input_image_content.py
new file mode 100644
index 0000000000..e38bc28d5e
--- /dev/null
+++ b/src/openai/types/responses/response_input_image_content.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseInputImageContent"]
+
+
+class ResponseInputImageContent(BaseModel):
+    """An image input to the model.
+
+    Learn about [image inputs](https://platform.openai.com/docs/guides/vision)
+    """
+
+    type: Literal["input_image"]
+    """The type of the input item. Always `input_image`."""
+
+    detail: Optional[Literal["low", "high", "auto"]] = None
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+    file_id: Optional[str] = None
+    """The ID of the file to be sent to the model."""
+
+    image_url: Optional[str] = None
+    """The URL of the image to be sent to the model.
+
+    A fully qualified URL or base64 encoded image in a data URL.
+    """
diff --git a/src/openai/types/responses/response_input_image_content_param.py b/src/openai/types/responses/response_input_image_content_param.py
new file mode 100644
index 0000000000..c21f46d736
--- /dev/null
+++ b/src/openai/types/responses/response_input_image_content_param.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseInputImageContentParam"]
+
+
+class ResponseInputImageContentParam(TypedDict, total=False):
+    """An image input to the model.
+
+    Learn about [image inputs](https://platform.openai.com/docs/guides/vision)
+    """
+
+    type: Required[Literal["input_image"]]
+    """The type of the input item. Always `input_image`."""
+
+    detail: Optional[Literal["low", "high", "auto"]]
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+    file_id: Optional[str]
+    """The ID of the file to be sent to the model."""
+
+    image_url: Optional[str]
+    """The URL of the image to be sent to the model.
+
+    A fully qualified URL or base64 encoded image in a data URL.
+    """
diff --git a/src/openai/types/responses/response_input_image_param.py b/src/openai/types/responses/response_input_image_param.py
new file mode 100644
index 0000000000..fd8c1bd070
--- /dev/null
+++ b/src/openai/types/responses/response_input_image_param.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseInputImageParam"]
+
+
+class ResponseInputImageParam(TypedDict, total=False):
+    """An image input to the model.
+
+    Learn about [image inputs](https://platform.openai.com/docs/guides/vision).
+    """
+
+    detail: Required[Literal["low", "high", "auto"]]
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+    type: Required[Literal["input_image"]]
+    """The type of the input item. Always `input_image`."""
+
+    file_id: Optional[str]
+    """The ID of the file to be sent to the model."""
+
+    image_url: Optional[str]
+    """The URL of the image to be sent to the model.
+
+    A fully qualified URL or base64 encoded image in a data URL.
+    """
diff --git a/src/openai/types/responses/response_input_item.py b/src/openai/types/responses/response_input_item.py
new file mode 100644
index 0000000000..23eb2c8950
--- /dev/null
+++ b/src/openai/types/responses/response_input_item.py
@@ -0,0 +1,534 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .easy_input_message import EasyInputMessage
+from .response_output_message import ResponseOutputMessage
+from .response_reasoning_item import ResponseReasoningItem
+from .response_custom_tool_call import ResponseCustomToolCall
+from .response_computer_tool_call import ResponseComputerToolCall
+from .response_function_tool_call import ResponseFunctionToolCall
+from .response_function_web_search import ResponseFunctionWebSearch
+from .response_compaction_item_param import ResponseCompactionItemParam
+from .response_file_search_tool_call import ResponseFileSearchToolCall
+from .response_custom_tool_call_output import ResponseCustomToolCallOutput
+from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
+from .response_input_message_content_list import ResponseInputMessageContentList
+from .response_function_call_output_item_list import ResponseFunctionCallOutputItemList
+from .response_function_shell_call_output_content import ResponseFunctionShellCallOutputContent
+from .response_computer_tool_call_output_screenshot import ResponseComputerToolCallOutputScreenshot
+
+__all__ = [
+    "ResponseInputItem",
+    "Message",
+    "ComputerCallOutput",
+    "ComputerCallOutputAcknowledgedSafetyCheck",
+    "FunctionCallOutput",
+    "ImageGenerationCall",
+    "LocalShellCall",
+    "LocalShellCallAction",
+    "LocalShellCallOutput",
+    "ShellCall",
+    "ShellCallAction",
+    "ShellCallOutput",
+    "ApplyPatchCall",
+    "ApplyPatchCallOperation",
+    "ApplyPatchCallOperationCreateFile",
+    "ApplyPatchCallOperationDeleteFile",
+    "ApplyPatchCallOperationUpdateFile",
+    "ApplyPatchCallOutput",
+    "McpListTools",
+    "McpListToolsTool",
+    "McpApprovalRequest",
+    "McpApprovalResponse",
+    "McpCall",
+    "ItemReference",
+]
+
+
+class Message(BaseModel):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role.
+    """
+
+    content: ResponseInputMessageContentList
+    """
+    A list of one or many input items to the model, containing different content
+    types.
+    """
+
+    role: Literal["user", "system", "developer"]
+    """The role of the message input. One of `user`, `system`, or `developer`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always set to `message`."""
+
+
+class ComputerCallOutputAcknowledgedSafetyCheck(BaseModel):
+    """A pending safety check for the computer call."""
+
+    id: str
+    """The ID of the pending safety check."""
+
+    code: Optional[str] = None
+    """The type of the pending safety check."""
+
+    message: Optional[str] = None
+    """Details about the pending safety check."""
+
+
+class ComputerCallOutput(BaseModel):
+    """The output of a computer tool call."""
+
+    call_id: str
+    """The ID of the computer tool call that produced the output."""
+
+    output: ResponseComputerToolCallOutputScreenshot
+    """A computer screenshot image used with the computer use tool."""
+
+    type: Literal["computer_call_output"]
+    """The type of the computer tool call output. Always `computer_call_output`."""
+
+    id: Optional[str] = None
+    """The ID of the computer tool call output."""
+
+    acknowledged_safety_checks: Optional[List[ComputerCallOutputAcknowledgedSafetyCheck]] = None
+    """
+    The safety checks reported by the API that have been acknowledged by the
+    developer.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+
+class FunctionCallOutput(BaseModel):
+    """The output of a function tool call."""
+
+    call_id: str
+    """The unique ID of the function tool call generated by the model."""
+
+    output: Union[str, ResponseFunctionCallOutputItemList]
+    """Text, image, or file output of the function tool call."""
+
+    type: Literal["function_call_output"]
+    """The type of the function tool call output. Always `function_call_output`."""
+
+    id: Optional[str] = None
+    """The unique ID of the function tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+
+class ImageGenerationCall(BaseModel):
+    """An image generation request made by the model."""
+
+    id: str
+    """The unique ID of the image generation call."""
+
+    result: Optional[str] = None
+    """The generated image encoded in base64."""
+
+    status: Literal["in_progress", "completed", "generating", "failed"]
+    """The status of the image generation call."""
+
+    type: Literal["image_generation_call"]
+    """The type of the image generation call. Always `image_generation_call`."""
+
+
+class LocalShellCallAction(BaseModel):
+    """Execute a shell command on the server."""
+
+    command: List[str]
+    """The command to run."""
+
+    env: Dict[str, str]
+    """Environment variables to set for the command."""
+
+    type: Literal["exec"]
+    """The type of the local shell action. Always `exec`."""
+
+    timeout_ms: Optional[int] = None
+    """Optional timeout in milliseconds for the command."""
+
+    user: Optional[str] = None
+    """Optional user to run the command as."""
+
+    working_directory: Optional[str] = None
+    """Optional working directory to run the command in."""
+
+
+class LocalShellCall(BaseModel):
+    """A tool call to run a command on the local shell."""
+
+    id: str
+    """The unique ID of the local shell call."""
+
+    action: LocalShellCallAction
+    """Execute a shell command on the server."""
+
+    call_id: str
+    """The unique ID of the local shell tool call generated by the model."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the local shell call."""
+
+    type: Literal["local_shell_call"]
+    """The type of the local shell call. Always `local_shell_call`."""
+
+
+class LocalShellCallOutput(BaseModel):
+    """The output of a local shell tool call."""
+
+    id: str
+    """The unique ID of the local shell tool call generated by the model."""
+
+    output: str
+    """A JSON string of the output of the local shell tool call."""
+
+    type: Literal["local_shell_call_output"]
+    """The type of the local shell tool call output. Always `local_shell_call_output`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item. One of `in_progress`, `completed`, or `incomplete`."""
+
+
+class ShellCallAction(BaseModel):
+    """The shell commands and limits that describe how to run the tool call."""
+
+    commands: List[str]
+    """Ordered shell commands for the execution environment to run."""
+
+    max_output_length: Optional[int] = None
+    """
+    Maximum number of UTF-8 characters to capture from combined stdout and stderr
+    output.
+    """
+
+    timeout_ms: Optional[int] = None
+    """Maximum wall-clock time in milliseconds to allow the shell commands to run."""
+
+
+class ShellCall(BaseModel):
+    """A tool representing a request to execute one or more shell commands."""
+
+    action: ShellCallAction
+    """The shell commands and limits that describe how to run the tool call."""
+
+    call_id: str
+    """The unique ID of the shell tool call generated by the model."""
+
+    type: Literal["shell_call"]
+    """The type of the item. Always `shell_call`."""
+
+    id: Optional[str] = None
+    """The unique ID of the shell tool call.
+
+    Populated when this item is returned via API.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the shell call.
+
+    One of `in_progress`, `completed`, or `incomplete`.
+    """
+
+
+class ShellCallOutput(BaseModel):
+    """The streamed output items emitted by a shell tool call."""
+
+    call_id: str
+    """The unique ID of the shell tool call generated by the model."""
+
+    output: List[ResponseFunctionShellCallOutputContent]
+    """
+    Captured chunks of stdout and stderr output, along with their associated
+    outcomes.
+    """
+
+    type: Literal["shell_call_output"]
+    """The type of the item. Always `shell_call_output`."""
+
+    id: Optional[str] = None
+    """The unique ID of the shell tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    max_output_length: Optional[int] = None
+    """
+    The maximum number of UTF-8 characters captured for this shell call's combined
+    output.
+    """
+
+
+class ApplyPatchCallOperationCreateFile(BaseModel):
+    """Instruction for creating a new file via the apply_patch tool."""
+
+    diff: str
+    """Unified diff content to apply when creating the file."""
+
+    path: str
+    """Path of the file to create relative to the workspace root."""
+
+    type: Literal["create_file"]
+    """The operation type. Always `create_file`."""
+
+
+class ApplyPatchCallOperationDeleteFile(BaseModel):
+    """Instruction for deleting an existing file via the apply_patch tool."""
+
+    path: str
+    """Path of the file to delete relative to the workspace root."""
+
+    type: Literal["delete_file"]
+    """The operation type. Always `delete_file`."""
+
+
+class ApplyPatchCallOperationUpdateFile(BaseModel):
+    """Instruction for updating an existing file via the apply_patch tool."""
+
+    diff: str
+    """Unified diff content to apply to the existing file."""
+
+    path: str
+    """Path of the file to update relative to the workspace root."""
+
+    type: Literal["update_file"]
+    """The operation type. Always `update_file`."""
+
+
+ApplyPatchCallOperation: TypeAlias = Annotated[
+    Union[ApplyPatchCallOperationCreateFile, ApplyPatchCallOperationDeleteFile, ApplyPatchCallOperationUpdateFile],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class ApplyPatchCall(BaseModel):
+    """
+    A tool call representing a request to create, delete, or update files using diff patches.
+    """
+
+    call_id: str
+    """The unique ID of the apply patch tool call generated by the model."""
+
+    operation: ApplyPatchCallOperation
+    """
+    The specific create, delete, or update instruction for the apply_patch tool
+    call.
+    """
+
+    status: Literal["in_progress", "completed"]
+    """The status of the apply patch tool call. One of `in_progress` or `completed`."""
+
+    type: Literal["apply_patch_call"]
+    """The type of the item. Always `apply_patch_call`."""
+
+    id: Optional[str] = None
+    """The unique ID of the apply patch tool call.
+
+    Populated when this item is returned via API.
+    """
+
+
+class ApplyPatchCallOutput(BaseModel):
+    """The streamed output emitted by an apply patch tool call."""
+
+    call_id: str
+    """The unique ID of the apply patch tool call generated by the model."""
+
+    status: Literal["completed", "failed"]
+    """The status of the apply patch tool call output. One of `completed` or `failed`."""
+
+    type: Literal["apply_patch_call_output"]
+    """The type of the item. Always `apply_patch_call_output`."""
+
+    id: Optional[str] = None
+    """The unique ID of the apply patch tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    output: Optional[str] = None
+    """
+    Optional human-readable log text from the apply patch tool (e.g., patch results
+    or errors).
+    """
+
+
+class McpListToolsTool(BaseModel):
+    """A tool available on an MCP server."""
+
+    input_schema: object
+    """The JSON schema describing the tool's input."""
+
+    name: str
+    """The name of the tool."""
+
+    annotations: Optional[object] = None
+    """Additional annotations about the tool."""
+
+    description: Optional[str] = None
+    """The description of the tool."""
+
+
+class McpListTools(BaseModel):
+    """A list of tools available on an MCP server."""
+
+    id: str
+    """The unique ID of the list."""
+
+    server_label: str
+    """The label of the MCP server."""
+
+    tools: List[McpListToolsTool]
+    """The tools available on the server."""
+
+    type: Literal["mcp_list_tools"]
+    """The type of the item. Always `mcp_list_tools`."""
+
+    error: Optional[str] = None
+    """Error message if the server could not list tools."""
+
+
+class McpApprovalRequest(BaseModel):
+    """A request for human approval of a tool invocation."""
+
+    id: str
+    """The unique ID of the approval request."""
+
+    arguments: str
+    """A JSON string of arguments for the tool."""
+
+    name: str
+    """The name of the tool to run."""
+
+    server_label: str
+    """The label of the MCP server making the request."""
+
+    type: Literal["mcp_approval_request"]
+    """The type of the item. Always `mcp_approval_request`."""
+
+
+class McpApprovalResponse(BaseModel):
+    """A response to an MCP approval request."""
+
+    approval_request_id: str
+    """The ID of the approval request being answered."""
+
+    approve: bool
+    """Whether the request was approved."""
+
+    type: Literal["mcp_approval_response"]
+    """The type of the item. Always `mcp_approval_response`."""
+
+    id: Optional[str] = None
+    """The unique ID of the approval response"""
+
+    reason: Optional[str] = None
+    """Optional reason for the decision."""
+
+
+class McpCall(BaseModel):
+    """An invocation of a tool on an MCP server."""
+
+    id: str
+    """The unique ID of the tool call."""
+
+    arguments: str
+    """A JSON string of the arguments passed to the tool."""
+
+    name: str
+    """The name of the tool that was run."""
+
+    server_label: str
+    """The label of the MCP server running the tool."""
+
+    type: Literal["mcp_call"]
+    """The type of the item. Always `mcp_call`."""
+
+    approval_request_id: Optional[str] = None
+    """
+    Unique identifier for the MCP tool call approval request. Include this value in
+    a subsequent `mcp_approval_response` input to approve or reject the
+    corresponding tool call.
+    """
+
+    error: Optional[str] = None
+    """The error from the tool call, if any."""
+
+    output: Optional[str] = None
+    """The output from the tool call."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete", "calling", "failed"]] = None
+    """The status of the tool call.
+
+    One of `in_progress`, `completed`, `incomplete`, `calling`, or `failed`.
+    """
+
+
+class ItemReference(BaseModel):
+    """An internal identifier for an item to reference."""
+
+    id: str
+    """The ID of the item to reference."""
+
+    type: Optional[Literal["item_reference"]] = None
+    """The type of item to reference. Always `item_reference`."""
+
+
+ResponseInputItem: TypeAlias = Annotated[
+    Union[
+        EasyInputMessage,
+        Message,
+        ResponseOutputMessage,
+        ResponseFileSearchToolCall,
+        ResponseComputerToolCall,
+        ComputerCallOutput,
+        ResponseFunctionWebSearch,
+        ResponseFunctionToolCall,
+        FunctionCallOutput,
+        ResponseReasoningItem,
+        ResponseCompactionItemParam,
+        ImageGenerationCall,
+        ResponseCodeInterpreterToolCall,
+        LocalShellCall,
+        LocalShellCallOutput,
+        ShellCall,
+        ShellCallOutput,
+        ApplyPatchCall,
+        ApplyPatchCallOutput,
+        McpListTools,
+        McpApprovalRequest,
+        McpApprovalResponse,
+        McpCall,
+        ResponseCustomToolCallOutput,
+        ResponseCustomToolCall,
+        ItemReference,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/response_input_item_param.py b/src/openai/types/responses/response_input_item_param.py
new file mode 100644
index 0000000000..2c42b93021
--- /dev/null
+++ b/src/openai/types/responses/response_input_item_param.py
@@ -0,0 +1,531 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+from .easy_input_message_param import EasyInputMessageParam
+from .response_output_message_param import ResponseOutputMessageParam
+from .response_reasoning_item_param import ResponseReasoningItemParam
+from .response_custom_tool_call_param import ResponseCustomToolCallParam
+from .response_computer_tool_call_param import ResponseComputerToolCallParam
+from .response_function_tool_call_param import ResponseFunctionToolCallParam
+from .response_function_web_search_param import ResponseFunctionWebSearchParam
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam
+from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam
+from .response_custom_tool_call_output_param import ResponseCustomToolCallOutputParam
+from .response_code_interpreter_tool_call_param import ResponseCodeInterpreterToolCallParam
+from .response_input_message_content_list_param import ResponseInputMessageContentListParam
+from .response_function_call_output_item_list_param import ResponseFunctionCallOutputItemListParam
+from .response_function_shell_call_output_content_param import ResponseFunctionShellCallOutputContentParam
+from .response_computer_tool_call_output_screenshot_param import ResponseComputerToolCallOutputScreenshotParam
+
+__all__ = [
+    "ResponseInputItemParam",
+    "Message",
+    "ComputerCallOutput",
+    "ComputerCallOutputAcknowledgedSafetyCheck",
+    "FunctionCallOutput",
+    "ImageGenerationCall",
+    "LocalShellCall",
+    "LocalShellCallAction",
+    "LocalShellCallOutput",
+    "ShellCall",
+    "ShellCallAction",
+    "ShellCallOutput",
+    "ApplyPatchCall",
+    "ApplyPatchCallOperation",
+    "ApplyPatchCallOperationCreateFile",
+    "ApplyPatchCallOperationDeleteFile",
+    "ApplyPatchCallOperationUpdateFile",
+    "ApplyPatchCallOutput",
+    "McpListTools",
+    "McpListToolsTool",
+    "McpApprovalRequest",
+    "McpApprovalResponse",
+    "McpCall",
+    "ItemReference",
+]
+
+
+class Message(TypedDict, total=False):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role.
+    """
+
+    content: Required[ResponseInputMessageContentListParam]
+    """
+    A list of one or many input items to the model, containing different content
+    types.
+    """
+
+    role: Required[Literal["user", "system", "developer"]]
+    """The role of the message input. One of `user`, `system`, or `developer`."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always set to `message`."""
+
+
+class ComputerCallOutputAcknowledgedSafetyCheck(TypedDict, total=False):
+    """A pending safety check for the computer call."""
+
+    id: Required[str]
+    """The ID of the pending safety check."""
+
+    code: Optional[str]
+    """The type of the pending safety check."""
+
+    message: Optional[str]
+    """Details about the pending safety check."""
+
+
+class ComputerCallOutput(TypedDict, total=False):
+    """The output of a computer tool call."""
+
+    call_id: Required[str]
+    """The ID of the computer tool call that produced the output."""
+
+    output: Required[ResponseComputerToolCallOutputScreenshotParam]
+    """A computer screenshot image used with the computer use tool."""
+
+    type: Required[Literal["computer_call_output"]]
+    """The type of the computer tool call output. Always `computer_call_output`."""
+
+    id: Optional[str]
+    """The ID of the computer tool call output."""
+
+    acknowledged_safety_checks: Optional[Iterable[ComputerCallOutputAcknowledgedSafetyCheck]]
+    """
+    The safety checks reported by the API that have been acknowledged by the
+    developer.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+
+class FunctionCallOutput(TypedDict, total=False):
+    """The output of a function tool call."""
+
+    call_id: Required[str]
+    """The unique ID of the function tool call generated by the model."""
+
+    output: Required[Union[str, ResponseFunctionCallOutputItemListParam]]
+    """Text, image, or file output of the function tool call."""
+
+    type: Required[Literal["function_call_output"]]
+    """The type of the function tool call output. Always `function_call_output`."""
+
+    id: Optional[str]
+    """The unique ID of the function tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+
+class ImageGenerationCall(TypedDict, total=False):
+    """An image generation request made by the model."""
+
+    id: Required[str]
+    """The unique ID of the image generation call."""
+
+    result: Required[Optional[str]]
+    """The generated image encoded in base64."""
+
+    status: Required[Literal["in_progress", "completed", "generating", "failed"]]
+    """The status of the image generation call."""
+
+    type: Required[Literal["image_generation_call"]]
+    """The type of the image generation call. Always `image_generation_call`."""
+
+
+class LocalShellCallAction(TypedDict, total=False):
+    """Execute a shell command on the server."""
+
+    command: Required[SequenceNotStr[str]]
+    """The command to run."""
+
+    env: Required[Dict[str, str]]
+    """Environment variables to set for the command."""
+
+    type: Required[Literal["exec"]]
+    """The type of the local shell action. Always `exec`."""
+
+    timeout_ms: Optional[int]
+    """Optional timeout in milliseconds for the command."""
+
+    user: Optional[str]
+    """Optional user to run the command as."""
+
+    working_directory: Optional[str]
+    """Optional working directory to run the command in."""
+
+
+class LocalShellCall(TypedDict, total=False):
+    """A tool call to run a command on the local shell."""
+
+    id: Required[str]
+    """The unique ID of the local shell call."""
+
+    action: Required[LocalShellCallAction]
+    """Execute a shell command on the server."""
+
+    call_id: Required[str]
+    """The unique ID of the local shell tool call generated by the model."""
+
+    status: Required[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the local shell call."""
+
+    type: Required[Literal["local_shell_call"]]
+    """The type of the local shell call. Always `local_shell_call`."""
+
+
+class LocalShellCallOutput(TypedDict, total=False):
+    """The output of a local shell tool call."""
+
+    id: Required[str]
+    """The unique ID of the local shell tool call generated by the model."""
+
+    output: Required[str]
+    """A JSON string of the output of the local shell tool call."""
+
+    type: Required[Literal["local_shell_call_output"]]
+    """The type of the local shell tool call output. Always `local_shell_call_output`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the item. One of `in_progress`, `completed`, or `incomplete`."""
+
+
+class ShellCallAction(TypedDict, total=False):
+    """The shell commands and limits that describe how to run the tool call."""
+
+    commands: Required[SequenceNotStr[str]]
+    """Ordered shell commands for the execution environment to run."""
+
+    max_output_length: Optional[int]
+    """
+    Maximum number of UTF-8 characters to capture from combined stdout and stderr
+    output.
+    """
+
+    timeout_ms: Optional[int]
+    """Maximum wall-clock time in milliseconds to allow the shell commands to run."""
+
+
+class ShellCall(TypedDict, total=False):
+    """A tool representing a request to execute one or more shell commands."""
+
+    action: Required[ShellCallAction]
+    """The shell commands and limits that describe how to run the tool call."""
+
+    call_id: Required[str]
+    """The unique ID of the shell tool call generated by the model."""
+
+    type: Required[Literal["shell_call"]]
+    """The type of the item. Always `shell_call`."""
+
+    id: Optional[str]
+    """The unique ID of the shell tool call.
+
+    Populated when this item is returned via API.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the shell call.
+
+    One of `in_progress`, `completed`, or `incomplete`.
+    """
+
+
+class ShellCallOutput(TypedDict, total=False):
+    """The streamed output items emitted by a shell tool call."""
+
+    call_id: Required[str]
+    """The unique ID of the shell tool call generated by the model."""
+
+    output: Required[Iterable[ResponseFunctionShellCallOutputContentParam]]
+    """
+    Captured chunks of stdout and stderr output, along with their associated
+    outcomes.
+    """
+
+    type: Required[Literal["shell_call_output"]]
+    """The type of the item. Always `shell_call_output`."""
+
+    id: Optional[str]
+    """The unique ID of the shell tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    max_output_length: Optional[int]
+    """
+    The maximum number of UTF-8 characters captured for this shell call's combined
+    output.
+    """
+
+
+class ApplyPatchCallOperationCreateFile(TypedDict, total=False):
+    """Instruction for creating a new file via the apply_patch tool."""
+
+    diff: Required[str]
+    """Unified diff content to apply when creating the file."""
+
+    path: Required[str]
+    """Path of the file to create relative to the workspace root."""
+
+    type: Required[Literal["create_file"]]
+    """The operation type. Always `create_file`."""
+
+
+class ApplyPatchCallOperationDeleteFile(TypedDict, total=False):
+    """Instruction for deleting an existing file via the apply_patch tool."""
+
+    path: Required[str]
+    """Path of the file to delete relative to the workspace root."""
+
+    type: Required[Literal["delete_file"]]
+    """The operation type. Always `delete_file`."""
+
+
+class ApplyPatchCallOperationUpdateFile(TypedDict, total=False):
+    """Instruction for updating an existing file via the apply_patch tool."""
+
+    diff: Required[str]
+    """Unified diff content to apply to the existing file."""
+
+    path: Required[str]
+    """Path of the file to update relative to the workspace root."""
+
+    type: Required[Literal["update_file"]]
+    """The operation type. Always `update_file`."""
+
+
+ApplyPatchCallOperation: TypeAlias = Union[
+    ApplyPatchCallOperationCreateFile, ApplyPatchCallOperationDeleteFile, ApplyPatchCallOperationUpdateFile
+]
+
+
+class ApplyPatchCall(TypedDict, total=False):
+    """
+    A tool call representing a request to create, delete, or update files using diff patches.
+    """
+
+    call_id: Required[str]
+    """The unique ID of the apply patch tool call generated by the model."""
+
+    operation: Required[ApplyPatchCallOperation]
+    """
+    The specific create, delete, or update instruction for the apply_patch tool
+    call.
+    """
+
+    status: Required[Literal["in_progress", "completed"]]
+    """The status of the apply patch tool call. One of `in_progress` or `completed`."""
+
+    type: Required[Literal["apply_patch_call"]]
+    """The type of the item. Always `apply_patch_call`."""
+
+    id: Optional[str]
+    """The unique ID of the apply patch tool call.
+
+    Populated when this item is returned via API.
+    """
+
+
+class ApplyPatchCallOutput(TypedDict, total=False):
+    """The streamed output emitted by an apply patch tool call."""
+
+    call_id: Required[str]
+    """The unique ID of the apply patch tool call generated by the model."""
+
+    status: Required[Literal["completed", "failed"]]
+    """The status of the apply patch tool call output. One of `completed` or `failed`."""
+
+    type: Required[Literal["apply_patch_call_output"]]
+    """The type of the item. Always `apply_patch_call_output`."""
+
+    id: Optional[str]
+    """The unique ID of the apply patch tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    output: Optional[str]
+    """
+    Optional human-readable log text from the apply patch tool (e.g., patch results
+    or errors).
+    """
+
+
+class McpListToolsTool(TypedDict, total=False):
+    """A tool available on an MCP server."""
+
+    input_schema: Required[object]
+    """The JSON schema describing the tool's input."""
+
+    name: Required[str]
+    """The name of the tool."""
+
+    annotations: Optional[object]
+    """Additional annotations about the tool."""
+
+    description: Optional[str]
+    """The description of the tool."""
+
+
+class McpListTools(TypedDict, total=False):
+    """A list of tools available on an MCP server."""
+
+    id: Required[str]
+    """The unique ID of the list."""
+
+    server_label: Required[str]
+    """The label of the MCP server."""
+
+    tools: Required[Iterable[McpListToolsTool]]
+    """The tools available on the server."""
+
+    type: Required[Literal["mcp_list_tools"]]
+    """The type of the item. Always `mcp_list_tools`."""
+
+    error: Optional[str]
+    """Error message if the server could not list tools."""
+
+
+class McpApprovalRequest(TypedDict, total=False):
+    """A request for human approval of a tool invocation."""
+
+    id: Required[str]
+    """The unique ID of the approval request."""
+
+    arguments: Required[str]
+    """A JSON string of arguments for the tool."""
+
+    name: Required[str]
+    """The name of the tool to run."""
+
+    server_label: Required[str]
+    """The label of the MCP server making the request."""
+
+    type: Required[Literal["mcp_approval_request"]]
+    """The type of the item. Always `mcp_approval_request`."""
+
+
+class McpApprovalResponse(TypedDict, total=False):
+    """A response to an MCP approval request."""
+
+    approval_request_id: Required[str]
+    """The ID of the approval request being answered."""
+
+    approve: Required[bool]
+    """Whether the request was approved."""
+
+    type: Required[Literal["mcp_approval_response"]]
+    """The type of the item. Always `mcp_approval_response`."""
+
+    id: Optional[str]
+    """The unique ID of the approval response"""
+
+    reason: Optional[str]
+    """Optional reason for the decision."""
+
+
+class McpCall(TypedDict, total=False):
+    """An invocation of a tool on an MCP server."""
+
+    id: Required[str]
+    """The unique ID of the tool call."""
+
+    arguments: Required[str]
+    """A JSON string of the arguments passed to the tool."""
+
+    name: Required[str]
+    """The name of the tool that was run."""
+
+    server_label: Required[str]
+    """The label of the MCP server running the tool."""
+
+    type: Required[Literal["mcp_call"]]
+    """The type of the item. Always `mcp_call`."""
+
+    approval_request_id: Optional[str]
+    """
+    Unique identifier for the MCP tool call approval request. Include this value in
+    a subsequent `mcp_approval_response` input to approve or reject the
+    corresponding tool call.
+    """
+
+    error: Optional[str]
+    """The error from the tool call, if any."""
+
+    output: Optional[str]
+    """The output from the tool call."""
+
+    status: Literal["in_progress", "completed", "incomplete", "calling", "failed"]
+    """The status of the tool call.
+
+    One of `in_progress`, `completed`, `incomplete`, `calling`, or `failed`.
+    """
+
+
+class ItemReference(TypedDict, total=False):
+    """An internal identifier for an item to reference."""
+
+    id: Required[str]
+    """The ID of the item to reference."""
+
+    type: Optional[Literal["item_reference"]]
+    """The type of item to reference. Always `item_reference`."""
+
+
+ResponseInputItemParam: TypeAlias = Union[
+    EasyInputMessageParam,
+    Message,
+    ResponseOutputMessageParam,
+    ResponseFileSearchToolCallParam,
+    ResponseComputerToolCallParam,
+    ComputerCallOutput,
+    ResponseFunctionWebSearchParam,
+    ResponseFunctionToolCallParam,
+    FunctionCallOutput,
+    ResponseReasoningItemParam,
+    ResponseCompactionItemParamParam,
+    ImageGenerationCall,
+    ResponseCodeInterpreterToolCallParam,
+    LocalShellCall,
+    LocalShellCallOutput,
+    ShellCall,
+    ShellCallOutput,
+    ApplyPatchCall,
+    ApplyPatchCallOutput,
+    McpListTools,
+    McpApprovalRequest,
+    McpApprovalResponse,
+    McpCall,
+    ResponseCustomToolCallOutputParam,
+    ResponseCustomToolCallParam,
+    ItemReference,
+]
diff --git a/src/openai/types/responses/response_input_message_content_list.py b/src/openai/types/responses/response_input_message_content_list.py
new file mode 100644
index 0000000000..99b7c10f12
--- /dev/null
+++ b/src/openai/types/responses/response_input_message_content_list.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import TypeAlias
+
+from .response_input_content import ResponseInputContent
+
+__all__ = ["ResponseInputMessageContentList"]
+
+ResponseInputMessageContentList: TypeAlias = List[ResponseInputContent]
diff --git a/src/openai/types/responses/response_input_message_content_list_param.py b/src/openai/types/responses/response_input_message_content_list_param.py
new file mode 100644
index 0000000000..080613df0d
--- /dev/null
+++ b/src/openai/types/responses/response_input_message_content_list_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import TypeAlias
+
+from .response_input_file_param import ResponseInputFileParam
+from .response_input_text_param import ResponseInputTextParam
+from .response_input_image_param import ResponseInputImageParam
+
+__all__ = ["ResponseInputMessageContentListParam", "ResponseInputContentParam"]
+
+ResponseInputContentParam: TypeAlias = Union[ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam]
+
+ResponseInputMessageContentListParam: TypeAlias = List[ResponseInputContentParam]
diff --git a/src/openai/types/responses/response_input_message_item.py b/src/openai/types/responses/response_input_message_item.py
new file mode 100644
index 0000000000..6a788e7fa4
--- /dev/null
+++ b/src/openai/types/responses/response_input_message_item.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_input_message_content_list import ResponseInputMessageContentList
+
+__all__ = ["ResponseInputMessageItem"]
+
+
+class ResponseInputMessageItem(BaseModel):
+    id: str
+    """The unique ID of the message input."""
+
+    content: ResponseInputMessageContentList
+    """
+    A list of one or many input items to the model, containing different content
+    types.
+    """
+
+    role: Literal["user", "system", "developer"]
+    """The role of the message input. One of `user`, `system`, or `developer`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always set to `message`."""
diff --git a/src/openai/types/responses/response_input_param.py b/src/openai/types/responses/response_input_param.py
new file mode 100644
index 0000000000..c2d12c0ab4
--- /dev/null
+++ b/src/openai/types/responses/response_input_param.py
@@ -0,0 +1,534 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+from .easy_input_message_param import EasyInputMessageParam
+from .response_output_message_param import ResponseOutputMessageParam
+from .response_reasoning_item_param import ResponseReasoningItemParam
+from .response_custom_tool_call_param import ResponseCustomToolCallParam
+from .response_computer_tool_call_param import ResponseComputerToolCallParam
+from .response_function_tool_call_param import ResponseFunctionToolCallParam
+from .response_function_web_search_param import ResponseFunctionWebSearchParam
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam
+from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam
+from .response_custom_tool_call_output_param import ResponseCustomToolCallOutputParam
+from .response_code_interpreter_tool_call_param import ResponseCodeInterpreterToolCallParam
+from .response_input_message_content_list_param import ResponseInputMessageContentListParam
+from .response_function_call_output_item_list_param import ResponseFunctionCallOutputItemListParam
+from .response_function_shell_call_output_content_param import ResponseFunctionShellCallOutputContentParam
+from .response_computer_tool_call_output_screenshot_param import ResponseComputerToolCallOutputScreenshotParam
+
+__all__ = [
+    "ResponseInputParam",
+    "ResponseInputItemParam",
+    "Message",
+    "ComputerCallOutput",
+    "ComputerCallOutputAcknowledgedSafetyCheck",
+    "FunctionCallOutput",
+    "ImageGenerationCall",
+    "LocalShellCall",
+    "LocalShellCallAction",
+    "LocalShellCallOutput",
+    "ShellCall",
+    "ShellCallAction",
+    "ShellCallOutput",
+    "ApplyPatchCall",
+    "ApplyPatchCallOperation",
+    "ApplyPatchCallOperationCreateFile",
+    "ApplyPatchCallOperationDeleteFile",
+    "ApplyPatchCallOperationUpdateFile",
+    "ApplyPatchCallOutput",
+    "McpListTools",
+    "McpListToolsTool",
+    "McpApprovalRequest",
+    "McpApprovalResponse",
+    "McpCall",
+    "ItemReference",
+]
+
+
+class Message(TypedDict, total=False):
+    """
+    A message input to the model with a role indicating instruction following
+    hierarchy. Instructions given with the `developer` or `system` role take
+    precedence over instructions given with the `user` role.
+    """
+
+    content: Required[ResponseInputMessageContentListParam]
+    """
+    A list of one or many input items to the model, containing different content
+    types.
+    """
+
+    role: Required[Literal["user", "system", "developer"]]
+    """The role of the message input. One of `user`, `system`, or `developer`."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always set to `message`."""
+
+
+class ComputerCallOutputAcknowledgedSafetyCheck(TypedDict, total=False):
+    """A pending safety check for the computer call."""
+
+    id: Required[str]
+    """The ID of the pending safety check."""
+
+    code: Optional[str]
+    """The type of the pending safety check."""
+
+    message: Optional[str]
+    """Details about the pending safety check."""
+
+
+class ComputerCallOutput(TypedDict, total=False):
+    """The output of a computer tool call."""
+
+    call_id: Required[str]
+    """The ID of the computer tool call that produced the output."""
+
+    output: Required[ResponseComputerToolCallOutputScreenshotParam]
+    """A computer screenshot image used with the computer use tool."""
+
+    type: Required[Literal["computer_call_output"]]
+    """The type of the computer tool call output. Always `computer_call_output`."""
+
+    id: Optional[str]
+    """The ID of the computer tool call output."""
+
+    acknowledged_safety_checks: Optional[Iterable[ComputerCallOutputAcknowledgedSafetyCheck]]
+    """
+    The safety checks reported by the API that have been acknowledged by the
+    developer.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+
+class FunctionCallOutput(TypedDict, total=False):
+    """The output of a function tool call."""
+
+    call_id: Required[str]
+    """The unique ID of the function tool call generated by the model."""
+
+    output: Required[Union[str, ResponseFunctionCallOutputItemListParam]]
+    """Text, image, or file output of the function tool call."""
+
+    type: Required[Literal["function_call_output"]]
+    """The type of the function tool call output. Always `function_call_output`."""
+
+    id: Optional[str]
+    """The unique ID of the function tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+
+class ImageGenerationCall(TypedDict, total=False):
+    """An image generation request made by the model."""
+
+    id: Required[str]
+    """The unique ID of the image generation call."""
+
+    result: Required[Optional[str]]
+    """The generated image encoded in base64."""
+
+    status: Required[Literal["in_progress", "completed", "generating", "failed"]]
+    """The status of the image generation call."""
+
+    type: Required[Literal["image_generation_call"]]
+    """The type of the image generation call. Always `image_generation_call`."""
+
+
+class LocalShellCallAction(TypedDict, total=False):
+    """Execute a shell command on the server."""
+
+    command: Required[SequenceNotStr[str]]
+    """The command to run."""
+
+    env: Required[Dict[str, str]]
+    """Environment variables to set for the command."""
+
+    type: Required[Literal["exec"]]
+    """The type of the local shell action. Always `exec`."""
+
+    timeout_ms: Optional[int]
+    """Optional timeout in milliseconds for the command."""
+
+    user: Optional[str]
+    """Optional user to run the command as."""
+
+    working_directory: Optional[str]
+    """Optional working directory to run the command in."""
+
+
+class LocalShellCall(TypedDict, total=False):
+    """A tool call to run a command on the local shell."""
+
+    id: Required[str]
+    """The unique ID of the local shell call."""
+
+    action: Required[LocalShellCallAction]
+    """Execute a shell command on the server."""
+
+    call_id: Required[str]
+    """The unique ID of the local shell tool call generated by the model."""
+
+    status: Required[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the local shell call."""
+
+    type: Required[Literal["local_shell_call"]]
+    """The type of the local shell call. Always `local_shell_call`."""
+
+
+class LocalShellCallOutput(TypedDict, total=False):
+    """The output of a local shell tool call."""
+
+    id: Required[str]
+    """The unique ID of the local shell tool call generated by the model."""
+
+    output: Required[str]
+    """A JSON string of the output of the local shell tool call."""
+
+    type: Required[Literal["local_shell_call_output"]]
+    """The type of the local shell tool call output. Always `local_shell_call_output`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the item. One of `in_progress`, `completed`, or `incomplete`."""
+
+
+class ShellCallAction(TypedDict, total=False):
+    """The shell commands and limits that describe how to run the tool call."""
+
+    commands: Required[SequenceNotStr[str]]
+    """Ordered shell commands for the execution environment to run."""
+
+    max_output_length: Optional[int]
+    """
+    Maximum number of UTF-8 characters to capture from combined stdout and stderr
+    output.
+    """
+
+    timeout_ms: Optional[int]
+    """Maximum wall-clock time in milliseconds to allow the shell commands to run."""
+
+
+class ShellCall(TypedDict, total=False):
+    """A tool representing a request to execute one or more shell commands."""
+
+    action: Required[ShellCallAction]
+    """The shell commands and limits that describe how to run the tool call."""
+
+    call_id: Required[str]
+    """The unique ID of the shell tool call generated by the model."""
+
+    type: Required[Literal["shell_call"]]
+    """The type of the item. Always `shell_call`."""
+
+    id: Optional[str]
+    """The unique ID of the shell tool call.
+
+    Populated when this item is returned via API.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the shell call.
+
+    One of `in_progress`, `completed`, or `incomplete`.
+    """
+
+
+class ShellCallOutput(TypedDict, total=False):
+    """The streamed output items emitted by a shell tool call."""
+
+    call_id: Required[str]
+    """The unique ID of the shell tool call generated by the model."""
+
+    output: Required[Iterable[ResponseFunctionShellCallOutputContentParam]]
+    """
+    Captured chunks of stdout and stderr output, along with their associated
+    outcomes.
+    """
+
+    type: Required[Literal["shell_call_output"]]
+    """The type of the item. Always `shell_call_output`."""
+
+    id: Optional[str]
+    """The unique ID of the shell tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    max_output_length: Optional[int]
+    """
+    The maximum number of UTF-8 characters captured for this shell call's combined
+    output.
+    """
+
+
+class ApplyPatchCallOperationCreateFile(TypedDict, total=False):
+    """Instruction for creating a new file via the apply_patch tool."""
+
+    diff: Required[str]
+    """Unified diff content to apply when creating the file."""
+
+    path: Required[str]
+    """Path of the file to create relative to the workspace root."""
+
+    type: Required[Literal["create_file"]]
+    """The operation type. Always `create_file`."""
+
+
+class ApplyPatchCallOperationDeleteFile(TypedDict, total=False):
+    """Instruction for deleting an existing file via the apply_patch tool."""
+
+    path: Required[str]
+    """Path of the file to delete relative to the workspace root."""
+
+    type: Required[Literal["delete_file"]]
+    """The operation type. Always `delete_file`."""
+
+
+class ApplyPatchCallOperationUpdateFile(TypedDict, total=False):
+    """Instruction for updating an existing file via the apply_patch tool."""
+
+    diff: Required[str]
+    """Unified diff content to apply to the existing file."""
+
+    path: Required[str]
+    """Path of the file to update relative to the workspace root."""
+
+    type: Required[Literal["update_file"]]
+    """The operation type. Always `update_file`."""
+
+
+ApplyPatchCallOperation: TypeAlias = Union[
+    ApplyPatchCallOperationCreateFile, ApplyPatchCallOperationDeleteFile, ApplyPatchCallOperationUpdateFile
+]
+
+
+class ApplyPatchCall(TypedDict, total=False):
+    """
+    A tool call representing a request to create, delete, or update files using diff patches.
+    """
+
+    call_id: Required[str]
+    """The unique ID of the apply patch tool call generated by the model."""
+
+    operation: Required[ApplyPatchCallOperation]
+    """
+    The specific create, delete, or update instruction for the apply_patch tool
+    call.
+    """
+
+    status: Required[Literal["in_progress", "completed"]]
+    """The status of the apply patch tool call. One of `in_progress` or `completed`."""
+
+    type: Required[Literal["apply_patch_call"]]
+    """The type of the item. Always `apply_patch_call`."""
+
+    id: Optional[str]
+    """The unique ID of the apply patch tool call.
+
+    Populated when this item is returned via API.
+    """
+
+
+class ApplyPatchCallOutput(TypedDict, total=False):
+    """The streamed output emitted by an apply patch tool call."""
+
+    call_id: Required[str]
+    """The unique ID of the apply patch tool call generated by the model."""
+
+    status: Required[Literal["completed", "failed"]]
+    """The status of the apply patch tool call output. One of `completed` or `failed`."""
+
+    type: Required[Literal["apply_patch_call_output"]]
+    """The type of the item. Always `apply_patch_call_output`."""
+
+    id: Optional[str]
+    """The unique ID of the apply patch tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    output: Optional[str]
+    """
+    Optional human-readable log text from the apply patch tool (e.g., patch results
+    or errors).
+    """
+
+
+class McpListToolsTool(TypedDict, total=False):
+    """A tool available on an MCP server."""
+
+    input_schema: Required[object]
+    """The JSON schema describing the tool's input."""
+
+    name: Required[str]
+    """The name of the tool."""
+
+    annotations: Optional[object]
+    """Additional annotations about the tool."""
+
+    description: Optional[str]
+    """The description of the tool."""
+
+
+class McpListTools(TypedDict, total=False):
+    """A list of tools available on an MCP server."""
+
+    id: Required[str]
+    """The unique ID of the list."""
+
+    server_label: Required[str]
+    """The label of the MCP server."""
+
+    tools: Required[Iterable[McpListToolsTool]]
+    """The tools available on the server."""
+
+    type: Required[Literal["mcp_list_tools"]]
+    """The type of the item. Always `mcp_list_tools`."""
+
+    error: Optional[str]
+    """Error message if the server could not list tools."""
+
+
+class McpApprovalRequest(TypedDict, total=False):
+    """A request for human approval of a tool invocation."""
+
+    id: Required[str]
+    """The unique ID of the approval request."""
+
+    arguments: Required[str]
+    """A JSON string of arguments for the tool."""
+
+    name: Required[str]
+    """The name of the tool to run."""
+
+    server_label: Required[str]
+    """The label of the MCP server making the request."""
+
+    type: Required[Literal["mcp_approval_request"]]
+    """The type of the item. Always `mcp_approval_request`."""
+
+
+class McpApprovalResponse(TypedDict, total=False):
+    """A response to an MCP approval request."""
+
+    approval_request_id: Required[str]
+    """The ID of the approval request being answered."""
+
+    approve: Required[bool]
+    """Whether the request was approved."""
+
+    type: Required[Literal["mcp_approval_response"]]
+    """The type of the item. Always `mcp_approval_response`."""
+
+    id: Optional[str]
+    """The unique ID of the approval response"""
+
+    reason: Optional[str]
+    """Optional reason for the decision."""
+
+
+class McpCall(TypedDict, total=False):
+    """An invocation of a tool on an MCP server."""
+
+    id: Required[str]
+    """The unique ID of the tool call."""
+
+    arguments: Required[str]
+    """A JSON string of the arguments passed to the tool."""
+
+    name: Required[str]
+    """The name of the tool that was run."""
+
+    server_label: Required[str]
+    """The label of the MCP server running the tool."""
+
+    type: Required[Literal["mcp_call"]]
+    """The type of the item. Always `mcp_call`."""
+
+    approval_request_id: Optional[str]
+    """
+    Unique identifier for the MCP tool call approval request. Include this value in
+    a subsequent `mcp_approval_response` input to approve or reject the
+    corresponding tool call.
+    """
+
+    error: Optional[str]
+    """The error from the tool call, if any."""
+
+    output: Optional[str]
+    """The output from the tool call."""
+
+    status: Literal["in_progress", "completed", "incomplete", "calling", "failed"]
+    """The status of the tool call.
+
+    One of `in_progress`, `completed`, `incomplete`, `calling`, or `failed`.
+    """
+
+
+class ItemReference(TypedDict, total=False):
+    """An internal identifier for an item to reference."""
+
+    id: Required[str]
+    """The ID of the item to reference."""
+
+    type: Optional[Literal["item_reference"]]
+    """The type of item to reference. Always `item_reference`."""
+
+
+ResponseInputItemParam: TypeAlias = Union[
+    EasyInputMessageParam,
+    Message,
+    ResponseOutputMessageParam,
+    ResponseFileSearchToolCallParam,
+    ResponseComputerToolCallParam,
+    ComputerCallOutput,
+    ResponseFunctionWebSearchParam,
+    ResponseFunctionToolCallParam,
+    FunctionCallOutput,
+    ResponseReasoningItemParam,
+    ResponseCompactionItemParamParam,
+    ImageGenerationCall,
+    ResponseCodeInterpreterToolCallParam,
+    LocalShellCall,
+    LocalShellCallOutput,
+    ShellCall,
+    ShellCallOutput,
+    ApplyPatchCall,
+    ApplyPatchCallOutput,
+    McpListTools,
+    McpApprovalRequest,
+    McpApprovalResponse,
+    McpCall,
+    ResponseCustomToolCallOutputParam,
+    ResponseCustomToolCallParam,
+    ItemReference,
+]
+
+ResponseInputParam: TypeAlias = List[ResponseInputItemParam]
diff --git a/src/openai/types/responses/response_input_text.py b/src/openai/types/responses/response_input_text.py
new file mode 100644
index 0000000000..1e06ba71f3
--- /dev/null
+++ b/src/openai/types/responses/response_input_text.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseInputText"]
+
+
+class ResponseInputText(BaseModel):
+    """A text input to the model."""
+
+    text: str
+    """The text input to the model."""
+
+    type: Literal["input_text"]
+    """The type of the input item. Always `input_text`."""
diff --git a/src/openai/types/responses/response_input_text_content.py b/src/openai/types/responses/response_input_text_content.py
new file mode 100644
index 0000000000..66dbb8b0d0
--- /dev/null
+++ b/src/openai/types/responses/response_input_text_content.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseInputTextContent"]
+
+
+class ResponseInputTextContent(BaseModel):
+    """A text input to the model."""
+
+    text: str
+    """The text input to the model."""
+
+    type: Literal["input_text"]
+    """The type of the input item. Always `input_text`."""
diff --git a/src/openai/types/responses/response_input_text_content_param.py b/src/openai/types/responses/response_input_text_content_param.py
new file mode 100644
index 0000000000..013f22d0df
--- /dev/null
+++ b/src/openai/types/responses/response_input_text_content_param.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseInputTextContentParam"]
+
+
+class ResponseInputTextContentParam(TypedDict, total=False):
+    """A text input to the model."""
+
+    text: Required[str]
+    """The text input to the model."""
+
+    type: Required[Literal["input_text"]]
+    """The type of the input item. Always `input_text`."""
diff --git a/src/openai/types/responses/response_input_text_param.py b/src/openai/types/responses/response_input_text_param.py
new file mode 100644
index 0000000000..e1a2976e2e
--- /dev/null
+++ b/src/openai/types/responses/response_input_text_param.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseInputTextParam"]
+
+
+class ResponseInputTextParam(TypedDict, total=False):
+    """A text input to the model."""
+
+    text: Required[str]
+    """The text input to the model."""
+
+    type: Required[Literal["input_text"]]
+    """The type of the input item. Always `input_text`."""
diff --git a/src/openai/types/responses/response_item.py b/src/openai/types/responses/response_item.py
new file mode 100644
index 0000000000..3dba681d53
--- /dev/null
+++ b/src/openai/types/responses/response_item.py
@@ -0,0 +1,244 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_output_message import ResponseOutputMessage
+from .response_computer_tool_call import ResponseComputerToolCall
+from .response_input_message_item import ResponseInputMessageItem
+from .response_function_web_search import ResponseFunctionWebSearch
+from .response_apply_patch_tool_call import ResponseApplyPatchToolCall
+from .response_file_search_tool_call import ResponseFileSearchToolCall
+from .response_function_tool_call_item import ResponseFunctionToolCallItem
+from .response_function_shell_tool_call import ResponseFunctionShellToolCall
+from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
+from .response_apply_patch_tool_call_output import ResponseApplyPatchToolCallOutput
+from .response_computer_tool_call_output_item import ResponseComputerToolCallOutputItem
+from .response_function_tool_call_output_item import ResponseFunctionToolCallOutputItem
+from .response_function_shell_tool_call_output import ResponseFunctionShellToolCallOutput
+
+__all__ = [
+    "ResponseItem",
+    "ImageGenerationCall",
+    "LocalShellCall",
+    "LocalShellCallAction",
+    "LocalShellCallOutput",
+    "McpListTools",
+    "McpListToolsTool",
+    "McpApprovalRequest",
+    "McpApprovalResponse",
+    "McpCall",
+]
+
+
+class ImageGenerationCall(BaseModel):
+    """An image generation request made by the model."""
+
+    id: str
+    """The unique ID of the image generation call."""
+
+    result: Optional[str] = None
+    """The generated image encoded in base64."""
+
+    status: Literal["in_progress", "completed", "generating", "failed"]
+    """The status of the image generation call."""
+
+    type: Literal["image_generation_call"]
+    """The type of the image generation call. Always `image_generation_call`."""
+
+
+class LocalShellCallAction(BaseModel):
+    """Execute a shell command on the server."""
+
+    command: List[str]
+    """The command to run."""
+
+    env: Dict[str, str]
+    """Environment variables to set for the command."""
+
+    type: Literal["exec"]
+    """The type of the local shell action. Always `exec`."""
+
+    timeout_ms: Optional[int] = None
+    """Optional timeout in milliseconds for the command."""
+
+    user: Optional[str] = None
+    """Optional user to run the command as."""
+
+    working_directory: Optional[str] = None
+    """Optional working directory to run the command in."""
+
+
+class LocalShellCall(BaseModel):
+    """A tool call to run a command on the local shell."""
+
+    id: str
+    """The unique ID of the local shell call."""
+
+    action: LocalShellCallAction
+    """Execute a shell command on the server."""
+
+    call_id: str
+    """The unique ID of the local shell tool call generated by the model."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the local shell call."""
+
+    type: Literal["local_shell_call"]
+    """The type of the local shell call. Always `local_shell_call`."""
+
+
+class LocalShellCallOutput(BaseModel):
+    """The output of a local shell tool call."""
+
+    id: str
+    """The unique ID of the local shell tool call generated by the model."""
+
+    output: str
+    """A JSON string of the output of the local shell tool call."""
+
+    type: Literal["local_shell_call_output"]
+    """The type of the local shell tool call output. Always `local_shell_call_output`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item. One of `in_progress`, `completed`, or `incomplete`."""
+
+
+class McpListToolsTool(BaseModel):
+    """A tool available on an MCP server."""
+
+    input_schema: object
+    """The JSON schema describing the tool's input."""
+
+    name: str
+    """The name of the tool."""
+
+    annotations: Optional[object] = None
+    """Additional annotations about the tool."""
+
+    description: Optional[str] = None
+    """The description of the tool."""
+
+
+class McpListTools(BaseModel):
+    """A list of tools available on an MCP server."""
+
+    id: str
+    """The unique ID of the list."""
+
+    server_label: str
+    """The label of the MCP server."""
+
+    tools: List[McpListToolsTool]
+    """The tools available on the server."""
+
+    type: Literal["mcp_list_tools"]
+    """The type of the item. Always `mcp_list_tools`."""
+
+    error: Optional[str] = None
+    """Error message if the server could not list tools."""
+
+
+class McpApprovalRequest(BaseModel):
+    """A request for human approval of a tool invocation."""
+
+    id: str
+    """The unique ID of the approval request."""
+
+    arguments: str
+    """A JSON string of arguments for the tool."""
+
+    name: str
+    """The name of the tool to run."""
+
+    server_label: str
+    """The label of the MCP server making the request."""
+
+    type: Literal["mcp_approval_request"]
+    """The type of the item. Always `mcp_approval_request`."""
+
+
+class McpApprovalResponse(BaseModel):
+    """A response to an MCP approval request."""
+
+    id: str
+    """The unique ID of the approval response"""
+
+    approval_request_id: str
+    """The ID of the approval request being answered."""
+
+    approve: bool
+    """Whether the request was approved."""
+
+    type: Literal["mcp_approval_response"]
+    """The type of the item. Always `mcp_approval_response`."""
+
+    reason: Optional[str] = None
+    """Optional reason for the decision."""
+
+
+class McpCall(BaseModel):
+    """An invocation of a tool on an MCP server."""
+
+    id: str
+    """The unique ID of the tool call."""
+
+    arguments: str
+    """A JSON string of the arguments passed to the tool."""
+
+    name: str
+    """The name of the tool that was run."""
+
+    server_label: str
+    """The label of the MCP server running the tool."""
+
+    type: Literal["mcp_call"]
+    """The type of the item. Always `mcp_call`."""
+
+    approval_request_id: Optional[str] = None
+    """
+    Unique identifier for the MCP tool call approval request. Include this value in
+    a subsequent `mcp_approval_response` input to approve or reject the
+    corresponding tool call.
+    """
+
+    error: Optional[str] = None
+    """The error from the tool call, if any."""
+
+    output: Optional[str] = None
+    """The output from the tool call."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete", "calling", "failed"]] = None
+    """The status of the tool call.
+
+    One of `in_progress`, `completed`, `incomplete`, `calling`, or `failed`.
+    """
+
+
+ResponseItem: TypeAlias = Annotated[
+    Union[
+        ResponseInputMessageItem,
+        ResponseOutputMessage,
+        ResponseFileSearchToolCall,
+        ResponseComputerToolCall,
+        ResponseComputerToolCallOutputItem,
+        ResponseFunctionWebSearch,
+        ResponseFunctionToolCallItem,
+        ResponseFunctionToolCallOutputItem,
+        ImageGenerationCall,
+        ResponseCodeInterpreterToolCall,
+        LocalShellCall,
+        LocalShellCallOutput,
+        ResponseFunctionShellToolCall,
+        ResponseFunctionShellToolCallOutput,
+        ResponseApplyPatchToolCall,
+        ResponseApplyPatchToolCallOutput,
+        McpListTools,
+        McpApprovalRequest,
+        McpApprovalResponse,
+        McpCall,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/response_item_list.py b/src/openai/types/responses/response_item_list.py
new file mode 100644
index 0000000000..e2b5a1a961
--- /dev/null
+++ b/src/openai/types/responses/response_item_list.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_item import ResponseItem
+
+__all__ = ["ResponseItemList"]
+
+
+class ResponseItemList(BaseModel):
+    """A list of Response items."""
+
+    data: List[ResponseItem]
+    """A list of items used to generate this response."""
+
+    first_id: str
+    """The ID of the first item in the list."""
+
+    has_more: bool
+    """Whether there are more items available."""
+
+    last_id: str
+    """The ID of the last item in the list."""
+
+    object: Literal["list"]
+    """The type of object returned, must be `list`."""
diff --git a/src/openai/types/responses/response_mcp_call_arguments_delta_event.py b/src/openai/types/responses/response_mcp_call_arguments_delta_event.py
new file mode 100644
index 0000000000..303ef494a3
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_call_arguments_delta_event.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallArgumentsDeltaEvent"]
+
+
+class ResponseMcpCallArgumentsDeltaEvent(BaseModel):
+    """
+    Emitted when there is a delta (partial update) to the arguments of an MCP tool call.
+    """
+
+    delta: str
+    """
+    A JSON string containing the partial update to the arguments for the MCP tool
+    call.
+    """
+
+    item_id: str
+    """The unique identifier of the MCP tool call item being processed."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_call_arguments.delta"]
+    """The type of the event. Always 'response.mcp_call_arguments.delta'."""
diff --git a/src/openai/types/responses/response_mcp_call_arguments_done_event.py b/src/openai/types/responses/response_mcp_call_arguments_done_event.py
new file mode 100644
index 0000000000..59e71be77c
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_call_arguments_done_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallArgumentsDoneEvent"]
+
+
+class ResponseMcpCallArgumentsDoneEvent(BaseModel):
+    """Emitted when the arguments for an MCP tool call are finalized."""
+
+    arguments: str
+    """A JSON string containing the finalized arguments for the MCP tool call."""
+
+    item_id: str
+    """The unique identifier of the MCP tool call item being processed."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_call_arguments.done"]
+    """The type of the event. Always 'response.mcp_call_arguments.done'."""
diff --git a/src/openai/types/responses/response_mcp_call_completed_event.py b/src/openai/types/responses/response_mcp_call_completed_event.py
new file mode 100644
index 0000000000..bee54d4039
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_call_completed_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallCompletedEvent"]
+
+
+class ResponseMcpCallCompletedEvent(BaseModel):
+    """Emitted when an MCP  tool call has completed successfully."""
+
+    item_id: str
+    """The ID of the MCP tool call item that completed."""
+
+    output_index: int
+    """The index of the output item that completed."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_call.completed"]
+    """The type of the event. Always 'response.mcp_call.completed'."""
diff --git a/src/openai/types/responses/response_mcp_call_failed_event.py b/src/openai/types/responses/response_mcp_call_failed_event.py
new file mode 100644
index 0000000000..cb3130b155
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_call_failed_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallFailedEvent"]
+
+
+class ResponseMcpCallFailedEvent(BaseModel):
+    """Emitted when an MCP  tool call has failed."""
+
+    item_id: str
+    """The ID of the MCP tool call item that failed."""
+
+    output_index: int
+    """The index of the output item that failed."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_call.failed"]
+    """The type of the event. Always 'response.mcp_call.failed'."""
diff --git a/src/openai/types/responses/response_mcp_call_in_progress_event.py b/src/openai/types/responses/response_mcp_call_in_progress_event.py
new file mode 100644
index 0000000000..7cf6a1decf
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_call_in_progress_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpCallInProgressEvent"]
+
+
+class ResponseMcpCallInProgressEvent(BaseModel):
+    """Emitted when an MCP  tool call is in progress."""
+
+    item_id: str
+    """The unique identifier of the MCP tool call item being processed."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_call.in_progress"]
+    """The type of the event. Always 'response.mcp_call.in_progress'."""
diff --git a/src/openai/types/responses/response_mcp_list_tools_completed_event.py b/src/openai/types/responses/response_mcp_list_tools_completed_event.py
new file mode 100644
index 0000000000..685ba59c4d
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_list_tools_completed_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpListToolsCompletedEvent"]
+
+
+class ResponseMcpListToolsCompletedEvent(BaseModel):
+    """Emitted when the list of available MCP tools has been successfully retrieved."""
+
+    item_id: str
+    """The ID of the MCP tool call item that produced this output."""
+
+    output_index: int
+    """The index of the output item that was processed."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_list_tools.completed"]
+    """The type of the event. Always 'response.mcp_list_tools.completed'."""
diff --git a/src/openai/types/responses/response_mcp_list_tools_failed_event.py b/src/openai/types/responses/response_mcp_list_tools_failed_event.py
new file mode 100644
index 0000000000..c5fa54d231
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_list_tools_failed_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpListToolsFailedEvent"]
+
+
+class ResponseMcpListToolsFailedEvent(BaseModel):
+    """Emitted when the attempt to list available MCP tools has failed."""
+
+    item_id: str
+    """The ID of the MCP tool call item that failed."""
+
+    output_index: int
+    """The index of the output item that failed."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_list_tools.failed"]
+    """The type of the event. Always 'response.mcp_list_tools.failed'."""
diff --git a/src/openai/types/responses/response_mcp_list_tools_in_progress_event.py b/src/openai/types/responses/response_mcp_list_tools_in_progress_event.py
new file mode 100644
index 0000000000..403fdbdeb3
--- /dev/null
+++ b/src/openai/types/responses/response_mcp_list_tools_in_progress_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseMcpListToolsInProgressEvent"]
+
+
+class ResponseMcpListToolsInProgressEvent(BaseModel):
+    """
+    Emitted when the system is in the process of retrieving the list of available MCP tools.
+    """
+
+    item_id: str
+    """The ID of the MCP tool call item that is being processed."""
+
+    output_index: int
+    """The index of the output item that is being processed."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.mcp_list_tools.in_progress"]
+    """The type of the event. Always 'response.mcp_list_tools.in_progress'."""
diff --git a/src/openai/types/responses/response_output_item.py b/src/openai/types/responses/response_output_item.py
new file mode 100644
index 0000000000..990f947b90
--- /dev/null
+++ b/src/openai/types/responses/response_output_item.py
@@ -0,0 +1,205 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_output_message import ResponseOutputMessage
+from .response_reasoning_item import ResponseReasoningItem
+from .response_compaction_item import ResponseCompactionItem
+from .response_custom_tool_call import ResponseCustomToolCall
+from .response_computer_tool_call import ResponseComputerToolCall
+from .response_function_tool_call import ResponseFunctionToolCall
+from .response_function_web_search import ResponseFunctionWebSearch
+from .response_apply_patch_tool_call import ResponseApplyPatchToolCall
+from .response_file_search_tool_call import ResponseFileSearchToolCall
+from .response_function_shell_tool_call import ResponseFunctionShellToolCall
+from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
+from .response_apply_patch_tool_call_output import ResponseApplyPatchToolCallOutput
+from .response_function_shell_tool_call_output import ResponseFunctionShellToolCallOutput
+
+__all__ = [
+    "ResponseOutputItem",
+    "ImageGenerationCall",
+    "LocalShellCall",
+    "LocalShellCallAction",
+    "McpCall",
+    "McpListTools",
+    "McpListToolsTool",
+    "McpApprovalRequest",
+]
+
+
+class ImageGenerationCall(BaseModel):
+    """An image generation request made by the model."""
+
+    id: str
+    """The unique ID of the image generation call."""
+
+    result: Optional[str] = None
+    """The generated image encoded in base64."""
+
+    status: Literal["in_progress", "completed", "generating", "failed"]
+    """The status of the image generation call."""
+
+    type: Literal["image_generation_call"]
+    """The type of the image generation call. Always `image_generation_call`."""
+
+
+class LocalShellCallAction(BaseModel):
+    """Execute a shell command on the server."""
+
+    command: List[str]
+    """The command to run."""
+
+    env: Dict[str, str]
+    """Environment variables to set for the command."""
+
+    type: Literal["exec"]
+    """The type of the local shell action. Always `exec`."""
+
+    timeout_ms: Optional[int] = None
+    """Optional timeout in milliseconds for the command."""
+
+    user: Optional[str] = None
+    """Optional user to run the command as."""
+
+    working_directory: Optional[str] = None
+    """Optional working directory to run the command in."""
+
+
+class LocalShellCall(BaseModel):
+    """A tool call to run a command on the local shell."""
+
+    id: str
+    """The unique ID of the local shell call."""
+
+    action: LocalShellCallAction
+    """Execute a shell command on the server."""
+
+    call_id: str
+    """The unique ID of the local shell tool call generated by the model."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the local shell call."""
+
+    type: Literal["local_shell_call"]
+    """The type of the local shell call. Always `local_shell_call`."""
+
+
+class McpCall(BaseModel):
+    """An invocation of a tool on an MCP server."""
+
+    id: str
+    """The unique ID of the tool call."""
+
+    arguments: str
+    """A JSON string of the arguments passed to the tool."""
+
+    name: str
+    """The name of the tool that was run."""
+
+    server_label: str
+    """The label of the MCP server running the tool."""
+
+    type: Literal["mcp_call"]
+    """The type of the item. Always `mcp_call`."""
+
+    approval_request_id: Optional[str] = None
+    """
+    Unique identifier for the MCP tool call approval request. Include this value in
+    a subsequent `mcp_approval_response` input to approve or reject the
+    corresponding tool call.
+    """
+
+    error: Optional[str] = None
+    """The error from the tool call, if any."""
+
+    output: Optional[str] = None
+    """The output from the tool call."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete", "calling", "failed"]] = None
+    """The status of the tool call.
+
+    One of `in_progress`, `completed`, `incomplete`, `calling`, or `failed`.
+    """
+
+
+class McpListToolsTool(BaseModel):
+    """A tool available on an MCP server."""
+
+    input_schema: object
+    """The JSON schema describing the tool's input."""
+
+    name: str
+    """The name of the tool."""
+
+    annotations: Optional[object] = None
+    """Additional annotations about the tool."""
+
+    description: Optional[str] = None
+    """The description of the tool."""
+
+
+class McpListTools(BaseModel):
+    """A list of tools available on an MCP server."""
+
+    id: str
+    """The unique ID of the list."""
+
+    server_label: str
+    """The label of the MCP server."""
+
+    tools: List[McpListToolsTool]
+    """The tools available on the server."""
+
+    type: Literal["mcp_list_tools"]
+    """The type of the item. Always `mcp_list_tools`."""
+
+    error: Optional[str] = None
+    """Error message if the server could not list tools."""
+
+
+class McpApprovalRequest(BaseModel):
+    """A request for human approval of a tool invocation."""
+
+    id: str
+    """The unique ID of the approval request."""
+
+    arguments: str
+    """A JSON string of arguments for the tool."""
+
+    name: str
+    """The name of the tool to run."""
+
+    server_label: str
+    """The label of the MCP server making the request."""
+
+    type: Literal["mcp_approval_request"]
+    """The type of the item. Always `mcp_approval_request`."""
+
+
+ResponseOutputItem: TypeAlias = Annotated[
+    Union[
+        ResponseOutputMessage,
+        ResponseFileSearchToolCall,
+        ResponseFunctionToolCall,
+        ResponseFunctionWebSearch,
+        ResponseComputerToolCall,
+        ResponseReasoningItem,
+        ResponseCompactionItem,
+        ImageGenerationCall,
+        ResponseCodeInterpreterToolCall,
+        LocalShellCall,
+        ResponseFunctionShellToolCall,
+        ResponseFunctionShellToolCallOutput,
+        ResponseApplyPatchToolCall,
+        ResponseApplyPatchToolCallOutput,
+        McpCall,
+        McpListTools,
+        McpApprovalRequest,
+        ResponseCustomToolCall,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/response_output_item_added_event.py b/src/openai/types/responses/response_output_item_added_event.py
new file mode 100644
index 0000000000..a42f6281e3
--- /dev/null
+++ b/src/openai/types/responses/response_output_item_added_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_output_item import ResponseOutputItem
+
+__all__ = ["ResponseOutputItemAddedEvent"]
+
+
+class ResponseOutputItemAddedEvent(BaseModel):
+    """Emitted when a new output item is added."""
+
+    item: ResponseOutputItem
+    """The output item that was added."""
+
+    output_index: int
+    """The index of the output item that was added."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.output_item.added"]
+    """The type of the event. Always `response.output_item.added`."""
diff --git a/src/openai/types/responses/response_output_item_done_event.py b/src/openai/types/responses/response_output_item_done_event.py
new file mode 100644
index 0000000000..50b99da569
--- /dev/null
+++ b/src/openai/types/responses/response_output_item_done_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_output_item import ResponseOutputItem
+
+__all__ = ["ResponseOutputItemDoneEvent"]
+
+
+class ResponseOutputItemDoneEvent(BaseModel):
+    """Emitted when an output item is marked done."""
+
+    item: ResponseOutputItem
+    """The output item that was marked done."""
+
+    output_index: int
+    """The index of the output item that was marked done."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.output_item.done"]
+    """The type of the event. Always `response.output_item.done`."""
diff --git a/src/openai/types/responses/response_output_message.py b/src/openai/types/responses/response_output_message.py
new file mode 100644
index 0000000000..9c1d1f97fc
--- /dev/null
+++ b/src/openai/types/responses/response_output_message.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_output_text import ResponseOutputText
+from .response_output_refusal import ResponseOutputRefusal
+
+__all__ = ["ResponseOutputMessage", "Content"]
+
+Content: TypeAlias = Annotated[Union[ResponseOutputText, ResponseOutputRefusal], PropertyInfo(discriminator="type")]
+
+
+class ResponseOutputMessage(BaseModel):
+    """An output message from the model."""
+
+    id: str
+    """The unique ID of the output message."""
+
+    content: List[Content]
+    """The content of the output message."""
+
+    role: Literal["assistant"]
+    """The role of the output message. Always `assistant`."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+    type: Literal["message"]
+    """The type of the output message. Always `message`."""
diff --git a/src/openai/types/responses/response_output_message_param.py b/src/openai/types/responses/response_output_message_param.py
new file mode 100644
index 0000000000..9c2f5246a1
--- /dev/null
+++ b/src/openai/types/responses/response_output_message_param.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .response_output_text_param import ResponseOutputTextParam
+from .response_output_refusal_param import ResponseOutputRefusalParam
+
+__all__ = ["ResponseOutputMessageParam", "Content"]
+
+Content: TypeAlias = Union[ResponseOutputTextParam, ResponseOutputRefusalParam]
+
+
+class ResponseOutputMessageParam(TypedDict, total=False):
+    """An output message from the model."""
+
+    id: Required[str]
+    """The unique ID of the output message."""
+
+    content: Required[Iterable[Content]]
+    """The content of the output message."""
+
+    role: Required[Literal["assistant"]]
+    """The role of the output message. Always `assistant`."""
+
+    status: Required[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+    type: Required[Literal["message"]]
+    """The type of the output message. Always `message`."""
diff --git a/src/openai/types/responses/response_output_refusal.py b/src/openai/types/responses/response_output_refusal.py
new file mode 100644
index 0000000000..6bce26af74
--- /dev/null
+++ b/src/openai/types/responses/response_output_refusal.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseOutputRefusal"]
+
+
+class ResponseOutputRefusal(BaseModel):
+    """A refusal from the model."""
+
+    refusal: str
+    """The refusal explanation from the model."""
+
+    type: Literal["refusal"]
+    """The type of the refusal. Always `refusal`."""
diff --git a/src/openai/types/responses/response_output_refusal_param.py b/src/openai/types/responses/response_output_refusal_param.py
new file mode 100644
index 0000000000..02bdfdcf4f
--- /dev/null
+++ b/src/openai/types/responses/response_output_refusal_param.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseOutputRefusalParam"]
+
+
+class ResponseOutputRefusalParam(TypedDict, total=False):
+    """A refusal from the model."""
+
+    refusal: Required[str]
+    """The refusal explanation from the model."""
+
+    type: Required[Literal["refusal"]]
+    """The type of the refusal. Always `refusal`."""
diff --git a/src/openai/types/responses/response_output_text.py b/src/openai/types/responses/response_output_text.py
new file mode 100644
index 0000000000..2386fcb3c0
--- /dev/null
+++ b/src/openai/types/responses/response_output_text.py
@@ -0,0 +1,131 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = [
+    "ResponseOutputText",
+    "Annotation",
+    "AnnotationFileCitation",
+    "AnnotationURLCitation",
+    "AnnotationContainerFileCitation",
+    "AnnotationFilePath",
+    "Logprob",
+    "LogprobTopLogprob",
+]
+
+
+class AnnotationFileCitation(BaseModel):
+    """A citation to a file."""
+
+    file_id: str
+    """The ID of the file."""
+
+    filename: str
+    """The filename of the file cited."""
+
+    index: int
+    """The index of the file in the list of files."""
+
+    type: Literal["file_citation"]
+    """The type of the file citation. Always `file_citation`."""
+
+
+class AnnotationURLCitation(BaseModel):
+    """A citation for a web resource used to generate a model response."""
+
+    end_index: int
+    """The index of the last character of the URL citation in the message."""
+
+    start_index: int
+    """The index of the first character of the URL citation in the message."""
+
+    title: str
+    """The title of the web resource."""
+
+    type: Literal["url_citation"]
+    """The type of the URL citation. Always `url_citation`."""
+
+    url: str
+    """The URL of the web resource."""
+
+
+class AnnotationContainerFileCitation(BaseModel):
+    """A citation for a container file used to generate a model response."""
+
+    container_id: str
+    """The ID of the container file."""
+
+    end_index: int
+    """The index of the last character of the container file citation in the message."""
+
+    file_id: str
+    """The ID of the file."""
+
+    filename: str
+    """The filename of the container file cited."""
+
+    start_index: int
+    """The index of the first character of the container file citation in the message."""
+
+    type: Literal["container_file_citation"]
+    """The type of the container file citation. Always `container_file_citation`."""
+
+
+class AnnotationFilePath(BaseModel):
+    """A path to a file."""
+
+    file_id: str
+    """The ID of the file."""
+
+    index: int
+    """The index of the file in the list of files."""
+
+    type: Literal["file_path"]
+    """The type of the file path. Always `file_path`."""
+
+
+Annotation: TypeAlias = Annotated[
+    Union[AnnotationFileCitation, AnnotationURLCitation, AnnotationContainerFileCitation, AnnotationFilePath],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class LogprobTopLogprob(BaseModel):
+    """The top log probability of a token."""
+
+    token: str
+
+    bytes: List[int]
+
+    logprob: float
+
+
+class Logprob(BaseModel):
+    """The log probability of a token."""
+
+    token: str
+
+    bytes: List[int]
+
+    logprob: float
+
+    top_logprobs: List[LogprobTopLogprob]
+
+
+class ResponseOutputText(BaseModel):
+    """A text output from the model."""
+
+    annotations: List[Annotation]
+    """The annotations of the text output."""
+
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+    logprobs: Optional[List[Logprob]] = None
diff --git a/src/openai/types/responses/response_output_text_annotation_added_event.py b/src/openai/types/responses/response_output_text_annotation_added_event.py
new file mode 100644
index 0000000000..b9dc262150
--- /dev/null
+++ b/src/openai/types/responses/response_output_text_annotation_added_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseOutputTextAnnotationAddedEvent"]
+
+
+class ResponseOutputTextAnnotationAddedEvent(BaseModel):
+    """Emitted when an annotation is added to output text content."""
+
+    annotation: object
+    """The annotation object being added. (See annotation schema for details.)"""
+
+    annotation_index: int
+    """The index of the annotation within the content part."""
+
+    content_index: int
+    """The index of the content part within the output item."""
+
+    item_id: str
+    """The unique identifier of the item to which the annotation is being added."""
+
+    output_index: int
+    """The index of the output item in the response's output array."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.output_text.annotation.added"]
+    """The type of the event. Always 'response.output_text.annotation.added'."""
diff --git a/src/openai/types/responses/response_output_text_param.py b/src/openai/types/responses/response_output_text_param.py
new file mode 100644
index 0000000000..bc30fbcd8e
--- /dev/null
+++ b/src/openai/types/responses/response_output_text_param.py
@@ -0,0 +1,129 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+    "ResponseOutputTextParam",
+    "Annotation",
+    "AnnotationFileCitation",
+    "AnnotationURLCitation",
+    "AnnotationContainerFileCitation",
+    "AnnotationFilePath",
+    "Logprob",
+    "LogprobTopLogprob",
+]
+
+
+class AnnotationFileCitation(TypedDict, total=False):
+    """A citation to a file."""
+
+    file_id: Required[str]
+    """The ID of the file."""
+
+    filename: Required[str]
+    """The filename of the file cited."""
+
+    index: Required[int]
+    """The index of the file in the list of files."""
+
+    type: Required[Literal["file_citation"]]
+    """The type of the file citation. Always `file_citation`."""
+
+
+class AnnotationURLCitation(TypedDict, total=False):
+    """A citation for a web resource used to generate a model response."""
+
+    end_index: Required[int]
+    """The index of the last character of the URL citation in the message."""
+
+    start_index: Required[int]
+    """The index of the first character of the URL citation in the message."""
+
+    title: Required[str]
+    """The title of the web resource."""
+
+    type: Required[Literal["url_citation"]]
+    """The type of the URL citation. Always `url_citation`."""
+
+    url: Required[str]
+    """The URL of the web resource."""
+
+
+class AnnotationContainerFileCitation(TypedDict, total=False):
+    """A citation for a container file used to generate a model response."""
+
+    container_id: Required[str]
+    """The ID of the container file."""
+
+    end_index: Required[int]
+    """The index of the last character of the container file citation in the message."""
+
+    file_id: Required[str]
+    """The ID of the file."""
+
+    filename: Required[str]
+    """The filename of the container file cited."""
+
+    start_index: Required[int]
+    """The index of the first character of the container file citation in the message."""
+
+    type: Required[Literal["container_file_citation"]]
+    """The type of the container file citation. Always `container_file_citation`."""
+
+
+class AnnotationFilePath(TypedDict, total=False):
+    """A path to a file."""
+
+    file_id: Required[str]
+    """The ID of the file."""
+
+    index: Required[int]
+    """The index of the file in the list of files."""
+
+    type: Required[Literal["file_path"]]
+    """The type of the file path. Always `file_path`."""
+
+
+Annotation: TypeAlias = Union[
+    AnnotationFileCitation, AnnotationURLCitation, AnnotationContainerFileCitation, AnnotationFilePath
+]
+
+
+class LogprobTopLogprob(TypedDict, total=False):
+    """The top log probability of a token."""
+
+    token: Required[str]
+
+    bytes: Required[Iterable[int]]
+
+    logprob: Required[float]
+
+
+class Logprob(TypedDict, total=False):
+    """The log probability of a token."""
+
+    token: Required[str]
+
+    bytes: Required[Iterable[int]]
+
+    logprob: Required[float]
+
+    top_logprobs: Required[Iterable[LogprobTopLogprob]]
+
+
+class ResponseOutputTextParam(TypedDict, total=False):
+    """A text output from the model."""
+
+    annotations: Required[Iterable[Annotation]]
+    """The annotations of the text output."""
+
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+    logprobs: Iterable[Logprob]
diff --git a/src/openai/types/responses/response_prompt.py b/src/openai/types/responses/response_prompt.py
new file mode 100644
index 0000000000..e3acacf63a
--- /dev/null
+++ b/src/openai/types/responses/response_prompt.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Union, Optional
+from typing_extensions import TypeAlias
+
+from ..._models import BaseModel
+from .response_input_file import ResponseInputFile
+from .response_input_text import ResponseInputText
+from .response_input_image import ResponseInputImage
+
+__all__ = ["ResponsePrompt", "Variables"]
+
+Variables: TypeAlias = Union[str, ResponseInputText, ResponseInputImage, ResponseInputFile]
+
+
+class ResponsePrompt(BaseModel):
+    """
+    Reference to a prompt template and its variables.
+    [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    """
+
+    id: str
+    """The unique identifier of the prompt template to use."""
+
+    variables: Optional[Dict[str, Variables]] = None
+    """Optional map of values to substitute in for variables in your prompt.
+
+    The substitution values can either be strings, or other Response input types
+    like images or files.
+    """
+
+    version: Optional[str] = None
+    """Optional version of the prompt template."""
diff --git a/src/openai/types/responses/response_prompt_param.py b/src/openai/types/responses/response_prompt_param.py
new file mode 100644
index 0000000000..f9a28b62a2
--- /dev/null
+++ b/src/openai/types/responses/response_prompt_param.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Required, TypeAlias, TypedDict
+
+from .response_input_file_param import ResponseInputFileParam
+from .response_input_text_param import ResponseInputTextParam
+from .response_input_image_param import ResponseInputImageParam
+
+__all__ = ["ResponsePromptParam", "Variables"]
+
+Variables: TypeAlias = Union[str, ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam]
+
+
+class ResponsePromptParam(TypedDict, total=False):
+    """
+    Reference to a prompt template and its variables.
+    [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    """
+
+    id: Required[str]
+    """The unique identifier of the prompt template to use."""
+
+    variables: Optional[Dict[str, Variables]]
+    """Optional map of values to substitute in for variables in your prompt.
+
+    The substitution values can either be strings, or other Response input types
+    like images or files.
+    """
+
+    version: Optional[str]
+    """Optional version of the prompt template."""
diff --git a/src/openai/types/responses/response_queued_event.py b/src/openai/types/responses/response_queued_event.py
new file mode 100644
index 0000000000..a554215275
--- /dev/null
+++ b/src/openai/types/responses/response_queued_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseQueuedEvent"]
+
+
+class ResponseQueuedEvent(BaseModel):
+    """Emitted when a response is queued and waiting to be processed."""
+
+    response: Response
+    """The full response object that is queued."""
+
+    sequence_number: int
+    """The sequence number for this event."""
+
+    type: Literal["response.queued"]
+    """The type of the event. Always 'response.queued'."""
diff --git a/src/openai/types/responses/response_reasoning_item.py b/src/openai/types/responses/response_reasoning_item.py
new file mode 100644
index 0000000000..1a22eb60cc
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_item.py
@@ -0,0 +1,62 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningItem", "Summary", "Content"]
+
+
+class Summary(BaseModel):
+    """A summary text from the model."""
+
+    text: str
+    """A summary of the reasoning output from the model so far."""
+
+    type: Literal["summary_text"]
+    """The type of the object. Always `summary_text`."""
+
+
+class Content(BaseModel):
+    """Reasoning text from the model."""
+
+    text: str
+    """The reasoning text from the model."""
+
+    type: Literal["reasoning_text"]
+    """The type of the reasoning text. Always `reasoning_text`."""
+
+
+class ResponseReasoningItem(BaseModel):
+    """
+    A description of the chain of thought used by a reasoning model while generating
+    a response. Be sure to include these items in your `input` to the Responses API
+    for subsequent turns of a conversation if you are manually
+    [managing context](https://platform.openai.com/docs/guides/conversation-state).
+    """
+
+    id: str
+    """The unique identifier of the reasoning content."""
+
+    summary: List[Summary]
+    """Reasoning summary content."""
+
+    type: Literal["reasoning"]
+    """The type of the object. Always `reasoning`."""
+
+    content: Optional[List[Content]] = None
+    """Reasoning text content."""
+
+    encrypted_content: Optional[str] = None
+    """
+    The encrypted content of the reasoning item - populated when a response is
+    generated with `reasoning.encrypted_content` in the `include` parameter.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
diff --git a/src/openai/types/responses/response_reasoning_item_param.py b/src/openai/types/responses/response_reasoning_item_param.py
new file mode 100644
index 0000000000..40320b72e1
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_item_param.py
@@ -0,0 +1,62 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseReasoningItemParam", "Summary", "Content"]
+
+
+class Summary(TypedDict, total=False):
+    """A summary text from the model."""
+
+    text: Required[str]
+    """A summary of the reasoning output from the model so far."""
+
+    type: Required[Literal["summary_text"]]
+    """The type of the object. Always `summary_text`."""
+
+
+class Content(TypedDict, total=False):
+    """Reasoning text from the model."""
+
+    text: Required[str]
+    """The reasoning text from the model."""
+
+    type: Required[Literal["reasoning_text"]]
+    """The type of the reasoning text. Always `reasoning_text`."""
+
+
+class ResponseReasoningItemParam(TypedDict, total=False):
+    """
+    A description of the chain of thought used by a reasoning model while generating
+    a response. Be sure to include these items in your `input` to the Responses API
+    for subsequent turns of a conversation if you are manually
+    [managing context](https://platform.openai.com/docs/guides/conversation-state).
+    """
+
+    id: Required[str]
+    """The unique identifier of the reasoning content."""
+
+    summary: Required[Iterable[Summary]]
+    """Reasoning summary content."""
+
+    type: Required[Literal["reasoning"]]
+    """The type of the object. Always `reasoning`."""
+
+    content: Iterable[Content]
+    """Reasoning text content."""
+
+    encrypted_content: Optional[str]
+    """
+    The encrypted content of the reasoning item - populated when a response is
+    generated with `reasoning.encrypted_content` in the `include` parameter.
+    """
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
diff --git a/src/openai/types/responses/response_reasoning_summary_part_added_event.py b/src/openai/types/responses/response_reasoning_summary_part_added_event.py
new file mode 100644
index 0000000000..e4b0f34231
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_summary_part_added_event.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningSummaryPartAddedEvent", "Part"]
+
+
+class Part(BaseModel):
+    """The summary part that was added."""
+
+    text: str
+    """The text of the summary part."""
+
+    type: Literal["summary_text"]
+    """The type of the summary part. Always `summary_text`."""
+
+
+class ResponseReasoningSummaryPartAddedEvent(BaseModel):
+    """Emitted when a new reasoning summary part is added."""
+
+    item_id: str
+    """The ID of the item this summary part is associated with."""
+
+    output_index: int
+    """The index of the output item this summary part is associated with."""
+
+    part: Part
+    """The summary part that was added."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    summary_index: int
+    """The index of the summary part within the reasoning summary."""
+
+    type: Literal["response.reasoning_summary_part.added"]
+    """The type of the event. Always `response.reasoning_summary_part.added`."""
diff --git a/src/openai/types/responses/response_reasoning_summary_part_done_event.py b/src/openai/types/responses/response_reasoning_summary_part_done_event.py
new file mode 100644
index 0000000000..48f3f684e8
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_summary_part_done_event.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningSummaryPartDoneEvent", "Part"]
+
+
+class Part(BaseModel):
+    """The completed summary part."""
+
+    text: str
+    """The text of the summary part."""
+
+    type: Literal["summary_text"]
+    """The type of the summary part. Always `summary_text`."""
+
+
+class ResponseReasoningSummaryPartDoneEvent(BaseModel):
+    """Emitted when a reasoning summary part is completed."""
+
+    item_id: str
+    """The ID of the item this summary part is associated with."""
+
+    output_index: int
+    """The index of the output item this summary part is associated with."""
+
+    part: Part
+    """The completed summary part."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    summary_index: int
+    """The index of the summary part within the reasoning summary."""
+
+    type: Literal["response.reasoning_summary_part.done"]
+    """The type of the event. Always `response.reasoning_summary_part.done`."""
diff --git a/src/openai/types/responses/response_reasoning_summary_text_delta_event.py b/src/openai/types/responses/response_reasoning_summary_text_delta_event.py
new file mode 100644
index 0000000000..84bcf039c4
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_summary_text_delta_event.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningSummaryTextDeltaEvent"]
+
+
+class ResponseReasoningSummaryTextDeltaEvent(BaseModel):
+    """Emitted when a delta is added to a reasoning summary text."""
+
+    delta: str
+    """The text delta that was added to the summary."""
+
+    item_id: str
+    """The ID of the item this summary text delta is associated with."""
+
+    output_index: int
+    """The index of the output item this summary text delta is associated with."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    summary_index: int
+    """The index of the summary part within the reasoning summary."""
+
+    type: Literal["response.reasoning_summary_text.delta"]
+    """The type of the event. Always `response.reasoning_summary_text.delta`."""
diff --git a/src/openai/types/responses/response_reasoning_summary_text_done_event.py b/src/openai/types/responses/response_reasoning_summary_text_done_event.py
new file mode 100644
index 0000000000..244d001b75
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_summary_text_done_event.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningSummaryTextDoneEvent"]
+
+
+class ResponseReasoningSummaryTextDoneEvent(BaseModel):
+    """Emitted when a reasoning summary text is completed."""
+
+    item_id: str
+    """The ID of the item this summary text is associated with."""
+
+    output_index: int
+    """The index of the output item this summary text is associated with."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    summary_index: int
+    """The index of the summary part within the reasoning summary."""
+
+    text: str
+    """The full text of the completed reasoning summary."""
+
+    type: Literal["response.reasoning_summary_text.done"]
+    """The type of the event. Always `response.reasoning_summary_text.done`."""
diff --git a/src/openai/types/responses/response_reasoning_text_delta_event.py b/src/openai/types/responses/response_reasoning_text_delta_event.py
new file mode 100644
index 0000000000..0e05226c94
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_text_delta_event.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningTextDeltaEvent"]
+
+
+class ResponseReasoningTextDeltaEvent(BaseModel):
+    """Emitted when a delta is added to a reasoning text."""
+
+    content_index: int
+    """The index of the reasoning content part this delta is associated with."""
+
+    delta: str
+    """The text delta that was added to the reasoning content."""
+
+    item_id: str
+    """The ID of the item this reasoning text delta is associated with."""
+
+    output_index: int
+    """The index of the output item this reasoning text delta is associated with."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.reasoning_text.delta"]
+    """The type of the event. Always `response.reasoning_text.delta`."""
diff --git a/src/openai/types/responses/response_reasoning_text_done_event.py b/src/openai/types/responses/response_reasoning_text_done_event.py
new file mode 100644
index 0000000000..40e3f4701c
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_text_done_event.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningTextDoneEvent"]
+
+
+class ResponseReasoningTextDoneEvent(BaseModel):
+    """Emitted when a reasoning text is completed."""
+
+    content_index: int
+    """The index of the reasoning content part."""
+
+    item_id: str
+    """The ID of the item this reasoning text is associated with."""
+
+    output_index: int
+    """The index of the output item this reasoning text is associated with."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    text: str
+    """The full text of the completed reasoning content."""
+
+    type: Literal["response.reasoning_text.done"]
+    """The type of the event. Always `response.reasoning_text.done`."""
diff --git a/src/openai/types/responses/response_refusal_delta_event.py b/src/openai/types/responses/response_refusal_delta_event.py
new file mode 100644
index 0000000000..e3933b7dda
--- /dev/null
+++ b/src/openai/types/responses/response_refusal_delta_event.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseRefusalDeltaEvent"]
+
+
+class ResponseRefusalDeltaEvent(BaseModel):
+    """Emitted when there is a partial refusal text."""
+
+    content_index: int
+    """The index of the content part that the refusal text is added to."""
+
+    delta: str
+    """The refusal text that is added."""
+
+    item_id: str
+    """The ID of the output item that the refusal text is added to."""
+
+    output_index: int
+    """The index of the output item that the refusal text is added to."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.refusal.delta"]
+    """The type of the event. Always `response.refusal.delta`."""
diff --git a/src/openai/types/responses/response_refusal_done_event.py b/src/openai/types/responses/response_refusal_done_event.py
new file mode 100644
index 0000000000..91adeb6331
--- /dev/null
+++ b/src/openai/types/responses/response_refusal_done_event.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseRefusalDoneEvent"]
+
+
+class ResponseRefusalDoneEvent(BaseModel):
+    """Emitted when refusal text is finalized."""
+
+    content_index: int
+    """The index of the content part that the refusal text is finalized."""
+
+    item_id: str
+    """The ID of the output item that the refusal text is finalized."""
+
+    output_index: int
+    """The index of the output item that the refusal text is finalized."""
+
+    refusal: str
+    """The refusal text that is finalized."""
+
+    sequence_number: int
+    """The sequence number of this event."""
+
+    type: Literal["response.refusal.done"]
+    """The type of the event. Always `response.refusal.done`."""
diff --git a/src/openai/types/responses/response_retrieve_params.py b/src/openai/types/responses/response_retrieve_params.py
new file mode 100644
index 0000000000..4013db85ce
--- /dev/null
+++ b/src/openai/types/responses/response_retrieve_params.py
@@ -0,0 +1,59 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import Literal, Required, TypedDict
+
+from .response_includable import ResponseIncludable
+
+__all__ = ["ResponseRetrieveParamsBase", "ResponseRetrieveParamsNonStreaming", "ResponseRetrieveParamsStreaming"]
+
+
+class ResponseRetrieveParamsBase(TypedDict, total=False):
+    include: List[ResponseIncludable]
+    """Additional fields to include in the response.
+
+    See the `include` parameter for Response creation above for more information.
+    """
+
+    include_obfuscation: bool
+    """When true, stream obfuscation will be enabled.
+
+    Stream obfuscation adds random characters to an `obfuscation` field on streaming
+    delta events to normalize payload sizes as a mitigation to certain side-channel
+    attacks. These obfuscation fields are included by default, but add a small
+    amount of overhead to the data stream. You can set `include_obfuscation` to
+    false to optimize for bandwidth if you trust the network links between your
+    application and the OpenAI API.
+    """
+
+    starting_after: int
+    """The sequence number of the event after which to start streaming."""
+
+
+class ResponseRetrieveParamsNonStreaming(ResponseRetrieveParamsBase, total=False):
+    stream: Literal[False]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+    for more information.
+    """
+
+
+class ResponseRetrieveParamsStreaming(ResponseRetrieveParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+    for more information.
+    """
+
+
+ResponseRetrieveParams = Union[ResponseRetrieveParamsNonStreaming, ResponseRetrieveParamsStreaming]
diff --git a/src/openai/types/responses/response_status.py b/src/openai/types/responses/response_status.py
new file mode 100644
index 0000000000..a7887b92d2
--- /dev/null
+++ b/src/openai/types/responses/response_status.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ResponseStatus"]
+
+ResponseStatus: TypeAlias = Literal["completed", "failed", "in_progress", "cancelled", "queued", "incomplete"]
diff --git a/src/openai/types/responses/response_stream_event.py b/src/openai/types/responses/response_stream_event.py
new file mode 100644
index 0000000000..c0a317cd9d
--- /dev/null
+++ b/src/openai/types/responses/response_stream_event.py
@@ -0,0 +1,120 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .response_error_event import ResponseErrorEvent
+from .response_failed_event import ResponseFailedEvent
+from .response_queued_event import ResponseQueuedEvent
+from .response_created_event import ResponseCreatedEvent
+from .response_completed_event import ResponseCompletedEvent
+from .response_text_done_event import ResponseTextDoneEvent
+from .response_audio_done_event import ResponseAudioDoneEvent
+from .response_incomplete_event import ResponseIncompleteEvent
+from .response_text_delta_event import ResponseTextDeltaEvent
+from .response_audio_delta_event import ResponseAudioDeltaEvent
+from .response_in_progress_event import ResponseInProgressEvent
+from .response_refusal_done_event import ResponseRefusalDoneEvent
+from .response_refusal_delta_event import ResponseRefusalDeltaEvent
+from .response_mcp_call_failed_event import ResponseMcpCallFailedEvent
+from .response_output_item_done_event import ResponseOutputItemDoneEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent
+from .response_output_item_added_event import ResponseOutputItemAddedEvent
+from .response_content_part_added_event import ResponseContentPartAddedEvent
+from .response_mcp_call_completed_event import ResponseMcpCallCompletedEvent
+from .response_reasoning_text_done_event import ResponseReasoningTextDoneEvent
+from .response_mcp_call_in_progress_event import ResponseMcpCallInProgressEvent
+from .response_reasoning_text_delta_event import ResponseReasoningTextDeltaEvent
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent
+from .response_mcp_list_tools_failed_event import ResponseMcpListToolsFailedEvent
+from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaEvent
+from .response_mcp_call_arguments_done_event import ResponseMcpCallArgumentsDoneEvent
+from .response_image_gen_call_completed_event import ResponseImageGenCallCompletedEvent
+from .response_mcp_call_arguments_delta_event import ResponseMcpCallArgumentsDeltaEvent
+from .response_mcp_list_tools_completed_event import ResponseMcpListToolsCompletedEvent
+from .response_image_gen_call_generating_event import ResponseImageGenCallGeneratingEvent
+from .response_web_search_call_completed_event import ResponseWebSearchCallCompletedEvent
+from .response_web_search_call_searching_event import ResponseWebSearchCallSearchingEvent
+from .response_file_search_call_completed_event import ResponseFileSearchCallCompletedEvent
+from .response_file_search_call_searching_event import ResponseFileSearchCallSearchingEvent
+from .response_image_gen_call_in_progress_event import ResponseImageGenCallInProgressEvent
+from .response_mcp_list_tools_in_progress_event import ResponseMcpListToolsInProgressEvent
+from .response_custom_tool_call_input_done_event import ResponseCustomToolCallInputDoneEvent
+from .response_reasoning_summary_part_done_event import ResponseReasoningSummaryPartDoneEvent
+from .response_reasoning_summary_text_done_event import ResponseReasoningSummaryTextDoneEvent
+from .response_web_search_call_in_progress_event import ResponseWebSearchCallInProgressEvent
+from .response_custom_tool_call_input_delta_event import ResponseCustomToolCallInputDeltaEvent
+from .response_file_search_call_in_progress_event import ResponseFileSearchCallInProgressEvent
+from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent
+from .response_image_gen_call_partial_image_event import ResponseImageGenCallPartialImageEvent
+from .response_output_text_annotation_added_event import ResponseOutputTextAnnotationAddedEvent
+from .response_reasoning_summary_part_added_event import ResponseReasoningSummaryPartAddedEvent
+from .response_reasoning_summary_text_delta_event import ResponseReasoningSummaryTextDeltaEvent
+from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent
+from .response_code_interpreter_call_code_done_event import ResponseCodeInterpreterCallCodeDoneEvent
+from .response_code_interpreter_call_completed_event import ResponseCodeInterpreterCallCompletedEvent
+from .response_code_interpreter_call_code_delta_event import ResponseCodeInterpreterCallCodeDeltaEvent
+from .response_code_interpreter_call_in_progress_event import ResponseCodeInterpreterCallInProgressEvent
+from .response_code_interpreter_call_interpreting_event import ResponseCodeInterpreterCallInterpretingEvent
+
+__all__ = ["ResponseStreamEvent"]
+
+ResponseStreamEvent: TypeAlias = Annotated[
+    Union[
+        ResponseAudioDeltaEvent,
+        ResponseAudioDoneEvent,
+        ResponseAudioTranscriptDeltaEvent,
+        ResponseAudioTranscriptDoneEvent,
+        ResponseCodeInterpreterCallCodeDeltaEvent,
+        ResponseCodeInterpreterCallCodeDoneEvent,
+        ResponseCodeInterpreterCallCompletedEvent,
+        ResponseCodeInterpreterCallInProgressEvent,
+        ResponseCodeInterpreterCallInterpretingEvent,
+        ResponseCompletedEvent,
+        ResponseContentPartAddedEvent,
+        ResponseContentPartDoneEvent,
+        ResponseCreatedEvent,
+        ResponseErrorEvent,
+        ResponseFileSearchCallCompletedEvent,
+        ResponseFileSearchCallInProgressEvent,
+        ResponseFileSearchCallSearchingEvent,
+        ResponseFunctionCallArgumentsDeltaEvent,
+        ResponseFunctionCallArgumentsDoneEvent,
+        ResponseInProgressEvent,
+        ResponseFailedEvent,
+        ResponseIncompleteEvent,
+        ResponseOutputItemAddedEvent,
+        ResponseOutputItemDoneEvent,
+        ResponseReasoningSummaryPartAddedEvent,
+        ResponseReasoningSummaryPartDoneEvent,
+        ResponseReasoningSummaryTextDeltaEvent,
+        ResponseReasoningSummaryTextDoneEvent,
+        ResponseReasoningTextDeltaEvent,
+        ResponseReasoningTextDoneEvent,
+        ResponseRefusalDeltaEvent,
+        ResponseRefusalDoneEvent,
+        ResponseTextDeltaEvent,
+        ResponseTextDoneEvent,
+        ResponseWebSearchCallCompletedEvent,
+        ResponseWebSearchCallInProgressEvent,
+        ResponseWebSearchCallSearchingEvent,
+        ResponseImageGenCallCompletedEvent,
+        ResponseImageGenCallGeneratingEvent,
+        ResponseImageGenCallInProgressEvent,
+        ResponseImageGenCallPartialImageEvent,
+        ResponseMcpCallArgumentsDeltaEvent,
+        ResponseMcpCallArgumentsDoneEvent,
+        ResponseMcpCallCompletedEvent,
+        ResponseMcpCallFailedEvent,
+        ResponseMcpCallInProgressEvent,
+        ResponseMcpListToolsCompletedEvent,
+        ResponseMcpListToolsFailedEvent,
+        ResponseMcpListToolsInProgressEvent,
+        ResponseOutputTextAnnotationAddedEvent,
+        ResponseQueuedEvent,
+        ResponseCustomToolCallInputDeltaEvent,
+        ResponseCustomToolCallInputDoneEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/response_text_config.py b/src/openai/types/responses/response_text_config.py
new file mode 100644
index 0000000000..fbf4da0b03
--- /dev/null
+++ b/src/openai/types/responses/response_text_config.py
@@ -0,0 +1,43 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_format_text_config import ResponseFormatTextConfig
+
+__all__ = ["ResponseTextConfig"]
+
+
+class ResponseTextConfig(BaseModel):
+    """Configuration options for a text response from the model.
+
+    Can be plain
+    text or structured JSON data. Learn more:
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    format: Optional[ResponseFormatTextConfig] = None
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+    verbosity: Optional[Literal["low", "medium", "high"]] = None
+    """Constrains the verbosity of the model's response.
+
+    Lower values will result in more concise responses, while higher values will
+    result in more verbose responses. Currently supported values are `low`,
+    `medium`, and `high`.
+    """
diff --git a/src/openai/types/responses/response_text_config_param.py b/src/openai/types/responses/response_text_config_param.py
new file mode 100644
index 0000000000..9cd54765b0
--- /dev/null
+++ b/src/openai/types/responses/response_text_config_param.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, TypedDict
+
+from .response_format_text_config_param import ResponseFormatTextConfigParam
+
+__all__ = ["ResponseTextConfigParam"]
+
+
+class ResponseTextConfigParam(TypedDict, total=False):
+    """Configuration options for a text response from the model.
+
+    Can be plain
+    text or structured JSON data. Learn more:
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    format: ResponseFormatTextConfigParam
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+    verbosity: Optional[Literal["low", "medium", "high"]]
+    """Constrains the verbosity of the model's response.
+
+    Lower values will result in more concise responses, while higher values will
+    result in more verbose responses. Currently supported values are `low`,
+    `medium`, and `high`.
+    """
diff --git a/src/openai/types/responses/response_text_delta_event.py b/src/openai/types/responses/response_text_delta_event.py
new file mode 100644
index 0000000000..4f802abfd2
--- /dev/null
+++ b/src/openai/types/responses/response_text_delta_event.py
@@ -0,0 +1,58 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseTextDeltaEvent", "Logprob", "LogprobTopLogprob"]
+
+
+class LogprobTopLogprob(BaseModel):
+    token: Optional[str] = None
+    """A possible text token."""
+
+    logprob: Optional[float] = None
+    """The log probability of this token."""
+
+
+class Logprob(BaseModel):
+    """
+    A logprob is the logarithmic probability that the model assigns to producing
+    a particular token at a given position in the sequence. Less-negative (higher)
+    logprob values indicate greater model confidence in that token choice.
+    """
+
+    token: str
+    """A possible text token."""
+
+    logprob: float
+    """The log probability of this token."""
+
+    top_logprobs: Optional[List[LogprobTopLogprob]] = None
+    """The log probability of the top 20 most likely tokens."""
+
+
+class ResponseTextDeltaEvent(BaseModel):
+    """Emitted when there is an additional text delta."""
+
+    content_index: int
+    """The index of the content part that the text delta was added to."""
+
+    delta: str
+    """The text delta that was added."""
+
+    item_id: str
+    """The ID of the output item that the text delta was added to."""
+
+    logprobs: List[Logprob]
+    """The log probabilities of the tokens in the delta."""
+
+    output_index: int
+    """The index of the output item that the text delta was added to."""
+
+    sequence_number: int
+    """The sequence number for this event."""
+
+    type: Literal["response.output_text.delta"]
+    """The type of the event. Always `response.output_text.delta`."""
diff --git a/src/openai/types/responses/response_text_done_event.py b/src/openai/types/responses/response_text_done_event.py
new file mode 100644
index 0000000000..75bd479870
--- /dev/null
+++ b/src/openai/types/responses/response_text_done_event.py
@@ -0,0 +1,58 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseTextDoneEvent", "Logprob", "LogprobTopLogprob"]
+
+
+class LogprobTopLogprob(BaseModel):
+    token: Optional[str] = None
+    """A possible text token."""
+
+    logprob: Optional[float] = None
+    """The log probability of this token."""
+
+
+class Logprob(BaseModel):
+    """
+    A logprob is the logarithmic probability that the model assigns to producing
+    a particular token at a given position in the sequence. Less-negative (higher)
+    logprob values indicate greater model confidence in that token choice.
+    """
+
+    token: str
+    """A possible text token."""
+
+    logprob: float
+    """The log probability of this token."""
+
+    top_logprobs: Optional[List[LogprobTopLogprob]] = None
+    """The log probability of the top 20 most likely tokens."""
+
+
+class ResponseTextDoneEvent(BaseModel):
+    """Emitted when text content is finalized."""
+
+    content_index: int
+    """The index of the content part that the text content is finalized."""
+
+    item_id: str
+    """The ID of the output item that the text content is finalized."""
+
+    logprobs: List[Logprob]
+    """The log probabilities of the tokens in the delta."""
+
+    output_index: int
+    """The index of the output item that the text content is finalized."""
+
+    sequence_number: int
+    """The sequence number for this event."""
+
+    text: str
+    """The text content that is finalized."""
+
+    type: Literal["response.output_text.done"]
+    """The type of the event. Always `response.output_text.done`."""
diff --git a/src/openai/types/responses/response_usage.py b/src/openai/types/responses/response_usage.py
new file mode 100644
index 0000000000..d4b739c598
--- /dev/null
+++ b/src/openai/types/responses/response_usage.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseUsage", "InputTokensDetails", "OutputTokensDetails"]
+
+
+class InputTokensDetails(BaseModel):
+    """A detailed breakdown of the input tokens."""
+
+    cached_tokens: int
+    """The number of tokens that were retrieved from the cache.
+
+    [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching).
+    """
+
+
+class OutputTokensDetails(BaseModel):
+    """A detailed breakdown of the output tokens."""
+
+    reasoning_tokens: int
+    """The number of reasoning tokens."""
+
+
+class ResponseUsage(BaseModel):
+    """
+    Represents token usage details including input tokens, output tokens,
+    a breakdown of output tokens, and the total tokens used.
+    """
+
+    input_tokens: int
+    """The number of input tokens."""
+
+    input_tokens_details: InputTokensDetails
+    """A detailed breakdown of the input tokens."""
+
+    output_tokens: int
+    """The number of output tokens."""
+
+    output_tokens_details: OutputTokensDetails
+    """A detailed breakdown of the output tokens."""
+
+    total_tokens: int
+    """The total number of tokens used."""
diff --git a/src/openai/types/responses/response_web_search_call_completed_event.py b/src/openai/types/responses/response_web_search_call_completed_event.py
new file mode 100644
index 0000000000..5aa7afe609
--- /dev/null
+++ b/src/openai/types/responses/response_web_search_call_completed_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseWebSearchCallCompletedEvent"]
+
+
+class ResponseWebSearchCallCompletedEvent(BaseModel):
+    """Emitted when a web search call is completed."""
+
+    item_id: str
+    """Unique ID for the output item associated with the web search call."""
+
+    output_index: int
+    """The index of the output item that the web search call is associated with."""
+
+    sequence_number: int
+    """The sequence number of the web search call being processed."""
+
+    type: Literal["response.web_search_call.completed"]
+    """The type of the event. Always `response.web_search_call.completed`."""
diff --git a/src/openai/types/responses/response_web_search_call_in_progress_event.py b/src/openai/types/responses/response_web_search_call_in_progress_event.py
new file mode 100644
index 0000000000..73b30ff5c0
--- /dev/null
+++ b/src/openai/types/responses/response_web_search_call_in_progress_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseWebSearchCallInProgressEvent"]
+
+
+class ResponseWebSearchCallInProgressEvent(BaseModel):
+    """Emitted when a web search call is initiated."""
+
+    item_id: str
+    """Unique ID for the output item associated with the web search call."""
+
+    output_index: int
+    """The index of the output item that the web search call is associated with."""
+
+    sequence_number: int
+    """The sequence number of the web search call being processed."""
+
+    type: Literal["response.web_search_call.in_progress"]
+    """The type of the event. Always `response.web_search_call.in_progress`."""
diff --git a/src/openai/types/responses/response_web_search_call_searching_event.py b/src/openai/types/responses/response_web_search_call_searching_event.py
new file mode 100644
index 0000000000..959c095187
--- /dev/null
+++ b/src/openai/types/responses/response_web_search_call_searching_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseWebSearchCallSearchingEvent"]
+
+
+class ResponseWebSearchCallSearchingEvent(BaseModel):
+    """Emitted when a web search call is executing."""
+
+    item_id: str
+    """Unique ID for the output item associated with the web search call."""
+
+    output_index: int
+    """The index of the output item that the web search call is associated with."""
+
+    sequence_number: int
+    """The sequence number of the web search call being processed."""
+
+    type: Literal["response.web_search_call.searching"]
+    """The type of the event. Always `response.web_search_call.searching`."""
diff --git a/src/openai/types/responses/tool.py b/src/openai/types/responses/tool.py
new file mode 100644
index 0000000000..c4111b0b00
--- /dev/null
+++ b/src/openai/types/responses/tool.py
@@ -0,0 +1,301 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .custom_tool import CustomTool
+from .computer_tool import ComputerTool
+from .function_tool import FunctionTool
+from .web_search_tool import WebSearchTool
+from .apply_patch_tool import ApplyPatchTool
+from .file_search_tool import FileSearchTool
+from .function_shell_tool import FunctionShellTool
+from .web_search_preview_tool import WebSearchPreviewTool
+
+__all__ = [
+    "Tool",
+    "Mcp",
+    "McpAllowedTools",
+    "McpAllowedToolsMcpToolFilter",
+    "McpRequireApproval",
+    "McpRequireApprovalMcpToolApprovalFilter",
+    "McpRequireApprovalMcpToolApprovalFilterAlways",
+    "McpRequireApprovalMcpToolApprovalFilterNever",
+    "CodeInterpreter",
+    "CodeInterpreterContainer",
+    "CodeInterpreterContainerCodeInterpreterToolAuto",
+    "ImageGeneration",
+    "ImageGenerationInputImageMask",
+    "LocalShell",
+]
+
+
+class McpAllowedToolsMcpToolFilter(BaseModel):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: Optional[bool] = None
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: Optional[List[str]] = None
+    """List of allowed tool names."""
+
+
+McpAllowedTools: TypeAlias = Union[List[str], McpAllowedToolsMcpToolFilter, None]
+
+
+class McpRequireApprovalMcpToolApprovalFilterAlways(BaseModel):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: Optional[bool] = None
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: Optional[List[str]] = None
+    """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilterNever(BaseModel):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: Optional[bool] = None
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: Optional[List[str]] = None
+    """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilter(BaseModel):
+    """Specify which of the MCP server's tools require approval.
+
+    Can be
+    `always`, `never`, or a filter object associated with tools
+    that require approval.
+    """
+
+    always: Optional[McpRequireApprovalMcpToolApprovalFilterAlways] = None
+    """A filter object to specify which tools are allowed."""
+
+    never: Optional[McpRequireApprovalMcpToolApprovalFilterNever] = None
+    """A filter object to specify which tools are allowed."""
+
+
+McpRequireApproval: TypeAlias = Union[McpRequireApprovalMcpToolApprovalFilter, Literal["always", "never"], None]
+
+
+class Mcp(BaseModel):
+    """
+    Give the model access to additional tools via remote Model Context Protocol
+    (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
+    """
+
+    server_label: str
+    """A label for this MCP server, used to identify it in tool calls."""
+
+    type: Literal["mcp"]
+    """The type of the MCP tool. Always `mcp`."""
+
+    allowed_tools: Optional[McpAllowedTools] = None
+    """List of allowed tool names or a filter object."""
+
+    authorization: Optional[str] = None
+    """
+    An OAuth access token that can be used with a remote MCP server, either with a
+    custom MCP server URL or a service connector. Your application must handle the
+    OAuth authorization flow and provide the token here.
+    """
+
+    connector_id: Optional[
+        Literal[
+            "connector_dropbox",
+            "connector_gmail",
+            "connector_googlecalendar",
+            "connector_googledrive",
+            "connector_microsoftteams",
+            "connector_outlookcalendar",
+            "connector_outlookemail",
+            "connector_sharepoint",
+        ]
+    ] = None
+    """Identifier for service connectors, like those available in ChatGPT.
+
+    One of `server_url` or `connector_id` must be provided. Learn more about service
+    connectors
+    [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+
+    Currently supported `connector_id` values are:
+
+    - Dropbox: `connector_dropbox`
+    - Gmail: `connector_gmail`
+    - Google Calendar: `connector_googlecalendar`
+    - Google Drive: `connector_googledrive`
+    - Microsoft Teams: `connector_microsoftteams`
+    - Outlook Calendar: `connector_outlookcalendar`
+    - Outlook Email: `connector_outlookemail`
+    - SharePoint: `connector_sharepoint`
+    """
+
+    headers: Optional[Dict[str, str]] = None
+    """Optional HTTP headers to send to the MCP server.
+
+    Use for authentication or other purposes.
+    """
+
+    require_approval: Optional[McpRequireApproval] = None
+    """Specify which of the MCP server's tools require approval."""
+
+    server_description: Optional[str] = None
+    """Optional description of the MCP server, used to provide more context."""
+
+    server_url: Optional[str] = None
+    """The URL for the MCP server.
+
+    One of `server_url` or `connector_id` must be provided.
+    """
+
+
+class CodeInterpreterContainerCodeInterpreterToolAuto(BaseModel):
+    """Configuration for a code interpreter container.
+
+    Optionally specify the IDs of the files to run the code on.
+    """
+
+    type: Literal["auto"]
+    """Always `auto`."""
+
+    file_ids: Optional[List[str]] = None
+    """An optional list of uploaded files to make available to your code."""
+
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+
+
+CodeInterpreterContainer: TypeAlias = Union[str, CodeInterpreterContainerCodeInterpreterToolAuto]
+
+
+class CodeInterpreter(BaseModel):
+    """A tool that runs Python code to help generate a response to a prompt."""
+
+    container: CodeInterpreterContainer
+    """The code interpreter container.
+
+    Can be a container ID or an object that specifies uploaded file IDs to make
+    available to your code, along with an optional `memory_limit` setting.
+    """
+
+    type: Literal["code_interpreter"]
+    """The type of the code interpreter tool. Always `code_interpreter`."""
+
+
+class ImageGenerationInputImageMask(BaseModel):
+    """Optional mask for inpainting.
+
+    Contains `image_url`
+    (string, optional) and `file_id` (string, optional).
+    """
+
+    file_id: Optional[str] = None
+    """File ID for the mask image."""
+
+    image_url: Optional[str] = None
+    """Base64-encoded mask image."""
+
+
+class ImageGeneration(BaseModel):
+    """A tool that generates images using a model like `gpt-image-1`."""
+
+    type: Literal["image_generation"]
+    """The type of the image generation tool. Always `image_generation`."""
+
+    background: Optional[Literal["transparent", "opaque", "auto"]] = None
+    """Background type for the generated image.
+
+    One of `transparent`, `opaque`, or `auto`. Default: `auto`.
+    """
+
+    input_fidelity: Optional[Literal["high", "low"]] = None
+    """
+    Control how much effort the model will exert to match the style and features,
+    especially facial features, of input images. This parameter is only supported
+    for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+    `low`. Defaults to `low`.
+    """
+
+    input_image_mask: Optional[ImageGenerationInputImageMask] = None
+    """Optional mask for inpainting.
+
+    Contains `image_url` (string, optional) and `file_id` (string, optional).
+    """
+
+    model: Optional[Literal["gpt-image-1", "gpt-image-1-mini"]] = None
+    """The image generation model to use. Default: `gpt-image-1`."""
+
+    moderation: Optional[Literal["auto", "low"]] = None
+    """Moderation level for the generated image. Default: `auto`."""
+
+    output_compression: Optional[int] = None
+    """Compression level for the output image. Default: 100."""
+
+    output_format: Optional[Literal["png", "webp", "jpeg"]] = None
+    """The output format of the generated image.
+
+    One of `png`, `webp`, or `jpeg`. Default: `png`.
+    """
+
+    partial_images: Optional[int] = None
+    """
+    Number of partial images to generate in streaming mode, from 0 (default value)
+    to 3.
+    """
+
+    quality: Optional[Literal["low", "medium", "high", "auto"]] = None
+    """The quality of the generated image.
+
+    One of `low`, `medium`, `high`, or `auto`. Default: `auto`.
+    """
+
+    size: Optional[Literal["1024x1024", "1024x1536", "1536x1024", "auto"]] = None
+    """The size of the generated image.
+
+    One of `1024x1024`, `1024x1536`, `1536x1024`, or `auto`. Default: `auto`.
+    """
+
+
+class LocalShell(BaseModel):
+    """A tool that allows the model to execute shell commands in a local environment."""
+
+    type: Literal["local_shell"]
+    """The type of the local shell tool. Always `local_shell`."""
+
+
+Tool: TypeAlias = Annotated[
+    Union[
+        FunctionTool,
+        FileSearchTool,
+        ComputerTool,
+        WebSearchTool,
+        Mcp,
+        CodeInterpreter,
+        ImageGeneration,
+        LocalShell,
+        FunctionShellTool,
+        CustomTool,
+        WebSearchPreviewTool,
+        ApplyPatchTool,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/tool_choice_allowed.py b/src/openai/types/responses/tool_choice_allowed.py
new file mode 100644
index 0000000000..400e170a57
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_allowed.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ToolChoiceAllowed"]
+
+
+class ToolChoiceAllowed(BaseModel):
+    """Constrains the tools available to the model to a pre-defined set."""
+
+    mode: Literal["auto", "required"]
+    """Constrains the tools available to the model to a pre-defined set.
+
+    `auto` allows the model to pick from among the allowed tools and generate a
+    message.
+
+    `required` requires the model to call one or more of the allowed tools.
+    """
+
+    tools: List[Dict[str, object]]
+    """A list of tool definitions that the model should be allowed to call.
+
+    For the Responses API, the list of tool definitions might look like:
+
+    ```json
+    [
+      { "type": "function", "name": "get_weather" },
+      { "type": "mcp", "server_label": "deepwiki" },
+      { "type": "image_generation" }
+    ]
+    ```
+    """
+
+    type: Literal["allowed_tools"]
+    """Allowed tool configuration type. Always `allowed_tools`."""
diff --git a/src/openai/types/responses/tool_choice_allowed_param.py b/src/openai/types/responses/tool_choice_allowed_param.py
new file mode 100644
index 0000000000..cb316c1560
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_allowed_param.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceAllowedParam"]
+
+
+class ToolChoiceAllowedParam(TypedDict, total=False):
+    """Constrains the tools available to the model to a pre-defined set."""
+
+    mode: Required[Literal["auto", "required"]]
+    """Constrains the tools available to the model to a pre-defined set.
+
+    `auto` allows the model to pick from among the allowed tools and generate a
+    message.
+
+    `required` requires the model to call one or more of the allowed tools.
+    """
+
+    tools: Required[Iterable[Dict[str, object]]]
+    """A list of tool definitions that the model should be allowed to call.
+
+    For the Responses API, the list of tool definitions might look like:
+
+    ```json
+    [
+      { "type": "function", "name": "get_weather" },
+      { "type": "mcp", "server_label": "deepwiki" },
+      { "type": "image_generation" }
+    ]
+    ```
+    """
+
+    type: Required[Literal["allowed_tools"]]
+    """Allowed tool configuration type. Always `allowed_tools`."""
diff --git a/src/openai/types/responses/tool_choice_apply_patch.py b/src/openai/types/responses/tool_choice_apply_patch.py
new file mode 100644
index 0000000000..ef5a5e8bfa
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_apply_patch.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ToolChoiceApplyPatch"]
+
+
+class ToolChoiceApplyPatch(BaseModel):
+    """Forces the model to call the apply_patch tool when executing a tool call."""
+
+    type: Literal["apply_patch"]
+    """The tool to call. Always `apply_patch`."""
diff --git a/src/openai/types/responses/tool_choice_apply_patch_param.py b/src/openai/types/responses/tool_choice_apply_patch_param.py
new file mode 100644
index 0000000000..193c99328a
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_apply_patch_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceApplyPatchParam"]
+
+
+class ToolChoiceApplyPatchParam(TypedDict, total=False):
+    """Forces the model to call the apply_patch tool when executing a tool call."""
+
+    type: Required[Literal["apply_patch"]]
+    """The tool to call. Always `apply_patch`."""
diff --git a/src/openai/types/responses/tool_choice_custom.py b/src/openai/types/responses/tool_choice_custom.py
new file mode 100644
index 0000000000..dec85ef78c
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_custom.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ToolChoiceCustom"]
+
+
+class ToolChoiceCustom(BaseModel):
+    """Use this option to force the model to call a specific custom tool."""
+
+    name: str
+    """The name of the custom tool to call."""
+
+    type: Literal["custom"]
+    """For custom tool calling, the type is always `custom`."""
diff --git a/src/openai/types/responses/tool_choice_custom_param.py b/src/openai/types/responses/tool_choice_custom_param.py
new file mode 100644
index 0000000000..ccdbab568a
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_custom_param.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceCustomParam"]
+
+
+class ToolChoiceCustomParam(TypedDict, total=False):
+    """Use this option to force the model to call a specific custom tool."""
+
+    name: Required[str]
+    """The name of the custom tool to call."""
+
+    type: Required[Literal["custom"]]
+    """For custom tool calling, the type is always `custom`."""
diff --git a/src/openai/types/responses/tool_choice_function.py b/src/openai/types/responses/tool_choice_function.py
new file mode 100644
index 0000000000..b2aab24aca
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_function.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ToolChoiceFunction"]
+
+
+class ToolChoiceFunction(BaseModel):
+    """Use this option to force the model to call a specific function."""
+
+    name: str
+    """The name of the function to call."""
+
+    type: Literal["function"]
+    """For function calling, the type is always `function`."""
diff --git a/src/openai/types/responses/tool_choice_function_param.py b/src/openai/types/responses/tool_choice_function_param.py
new file mode 100644
index 0000000000..837465ebd7
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_function_param.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceFunctionParam"]
+
+
+class ToolChoiceFunctionParam(TypedDict, total=False):
+    """Use this option to force the model to call a specific function."""
+
+    name: Required[str]
+    """The name of the function to call."""
+
+    type: Required[Literal["function"]]
+    """For function calling, the type is always `function`."""
diff --git a/src/openai/types/responses/tool_choice_mcp.py b/src/openai/types/responses/tool_choice_mcp.py
new file mode 100644
index 0000000000..a2c8049c2d
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_mcp.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ToolChoiceMcp"]
+
+
+class ToolChoiceMcp(BaseModel):
+    """
+    Use this option to force the model to call a specific tool on a remote MCP server.
+    """
+
+    server_label: str
+    """The label of the MCP server to use."""
+
+    type: Literal["mcp"]
+    """For MCP tools, the type is always `mcp`."""
+
+    name: Optional[str] = None
+    """The name of the tool to call on the server."""
diff --git a/src/openai/types/responses/tool_choice_mcp_param.py b/src/openai/types/responses/tool_choice_mcp_param.py
new file mode 100644
index 0000000000..9726e47a47
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_mcp_param.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceMcpParam"]
+
+
+class ToolChoiceMcpParam(TypedDict, total=False):
+    """
+    Use this option to force the model to call a specific tool on a remote MCP server.
+    """
+
+    server_label: Required[str]
+    """The label of the MCP server to use."""
+
+    type: Required[Literal["mcp"]]
+    """For MCP tools, the type is always `mcp`."""
+
+    name: Optional[str]
+    """The name of the tool to call on the server."""
diff --git a/src/openai/types/responses/tool_choice_options.py b/src/openai/types/responses/tool_choice_options.py
new file mode 100644
index 0000000000..c200db54e1
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_options.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ToolChoiceOptions"]
+
+ToolChoiceOptions: TypeAlias = Literal["none", "auto", "required"]
diff --git a/src/openai/types/responses/tool_choice_shell.py b/src/openai/types/responses/tool_choice_shell.py
new file mode 100644
index 0000000000..a78eccc387
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_shell.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ToolChoiceShell"]
+
+
+class ToolChoiceShell(BaseModel):
+    """Forces the model to call the shell tool when a tool call is required."""
+
+    type: Literal["shell"]
+    """The tool to call. Always `shell`."""
diff --git a/src/openai/types/responses/tool_choice_shell_param.py b/src/openai/types/responses/tool_choice_shell_param.py
new file mode 100644
index 0000000000..0dbcc90f39
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_shell_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceShellParam"]
+
+
+class ToolChoiceShellParam(TypedDict, total=False):
+    """Forces the model to call the shell tool when a tool call is required."""
+
+    type: Required[Literal["shell"]]
+    """The tool to call. Always `shell`."""
diff --git a/src/openai/types/responses/tool_choice_types.py b/src/openai/types/responses/tool_choice_types.py
new file mode 100644
index 0000000000..044c014b19
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_types.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ToolChoiceTypes"]
+
+
+class ToolChoiceTypes(BaseModel):
+    """
+    Indicates that the model should use a built-in tool to generate a response.
+    [Learn more about built-in tools](https://platform.openai.com/docs/guides/tools).
+    """
+
+    type: Literal[
+        "file_search",
+        "web_search_preview",
+        "computer_use_preview",
+        "web_search_preview_2025_03_11",
+        "image_generation",
+        "code_interpreter",
+    ]
+    """The type of hosted tool the model should to use.
+
+    Learn more about
+    [built-in tools](https://platform.openai.com/docs/guides/tools).
+
+    Allowed values are:
+
+    - `file_search`
+    - `web_search_preview`
+    - `computer_use_preview`
+    - `code_interpreter`
+    - `image_generation`
+    """
diff --git a/src/openai/types/responses/tool_choice_types_param.py b/src/openai/types/responses/tool_choice_types_param.py
new file mode 100644
index 0000000000..9bf02dbfcc
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_types_param.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceTypesParam"]
+
+
+class ToolChoiceTypesParam(TypedDict, total=False):
+    """
+    Indicates that the model should use a built-in tool to generate a response.
+    [Learn more about built-in tools](https://platform.openai.com/docs/guides/tools).
+    """
+
+    type: Required[
+        Literal[
+            "file_search",
+            "web_search_preview",
+            "computer_use_preview",
+            "web_search_preview_2025_03_11",
+            "image_generation",
+            "code_interpreter",
+        ]
+    ]
+    """The type of hosted tool the model should to use.
+
+    Learn more about
+    [built-in tools](https://platform.openai.com/docs/guides/tools).
+
+    Allowed values are:
+
+    - `file_search`
+    - `web_search_preview`
+    - `computer_use_preview`
+    - `code_interpreter`
+    - `image_generation`
+    """
diff --git a/src/openai/types/responses/tool_param.py b/src/openai/types/responses/tool_param.py
new file mode 100644
index 0000000000..e5b7ef37b8
--- /dev/null
+++ b/src/openai/types/responses/tool_param.py
@@ -0,0 +1,297 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+from .custom_tool_param import CustomToolParam
+from .computer_tool_param import ComputerToolParam
+from .function_tool_param import FunctionToolParam
+from .web_search_tool_param import WebSearchToolParam
+from .apply_patch_tool_param import ApplyPatchToolParam
+from .file_search_tool_param import FileSearchToolParam
+from .function_shell_tool_param import FunctionShellToolParam
+from .web_search_preview_tool_param import WebSearchPreviewToolParam
+
+__all__ = [
+    "ToolParam",
+    "Mcp",
+    "McpAllowedTools",
+    "McpAllowedToolsMcpToolFilter",
+    "McpRequireApproval",
+    "McpRequireApprovalMcpToolApprovalFilter",
+    "McpRequireApprovalMcpToolApprovalFilterAlways",
+    "McpRequireApprovalMcpToolApprovalFilterNever",
+    "CodeInterpreter",
+    "CodeInterpreterContainer",
+    "CodeInterpreterContainerCodeInterpreterToolAuto",
+    "ImageGeneration",
+    "ImageGenerationInputImageMask",
+    "LocalShell",
+]
+
+
+class McpAllowedToolsMcpToolFilter(TypedDict, total=False):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: bool
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: SequenceNotStr[str]
+    """List of allowed tool names."""
+
+
+McpAllowedTools: TypeAlias = Union[SequenceNotStr[str], McpAllowedToolsMcpToolFilter]
+
+
+class McpRequireApprovalMcpToolApprovalFilterAlways(TypedDict, total=False):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: bool
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: SequenceNotStr[str]
+    """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilterNever(TypedDict, total=False):
+    """A filter object to specify which tools are allowed."""
+
+    read_only: bool
+    """Indicates whether or not a tool modifies data or is read-only.
+
+    If an MCP server is
+    [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    it will match this filter.
+    """
+
+    tool_names: SequenceNotStr[str]
+    """List of allowed tool names."""
+
+
+class McpRequireApprovalMcpToolApprovalFilter(TypedDict, total=False):
+    """Specify which of the MCP server's tools require approval.
+
+    Can be
+    `always`, `never`, or a filter object associated with tools
+    that require approval.
+    """
+
+    always: McpRequireApprovalMcpToolApprovalFilterAlways
+    """A filter object to specify which tools are allowed."""
+
+    never: McpRequireApprovalMcpToolApprovalFilterNever
+    """A filter object to specify which tools are allowed."""
+
+
+McpRequireApproval: TypeAlias = Union[McpRequireApprovalMcpToolApprovalFilter, Literal["always", "never"]]
+
+
+class Mcp(TypedDict, total=False):
+    """
+    Give the model access to additional tools via remote Model Context Protocol
+    (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
+    """
+
+    server_label: Required[str]
+    """A label for this MCP server, used to identify it in tool calls."""
+
+    type: Required[Literal["mcp"]]
+    """The type of the MCP tool. Always `mcp`."""
+
+    allowed_tools: Optional[McpAllowedTools]
+    """List of allowed tool names or a filter object."""
+
+    authorization: str
+    """
+    An OAuth access token that can be used with a remote MCP server, either with a
+    custom MCP server URL or a service connector. Your application must handle the
+    OAuth authorization flow and provide the token here.
+    """
+
+    connector_id: Literal[
+        "connector_dropbox",
+        "connector_gmail",
+        "connector_googlecalendar",
+        "connector_googledrive",
+        "connector_microsoftteams",
+        "connector_outlookcalendar",
+        "connector_outlookemail",
+        "connector_sharepoint",
+    ]
+    """Identifier for service connectors, like those available in ChatGPT.
+
+    One of `server_url` or `connector_id` must be provided. Learn more about service
+    connectors
+    [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+
+    Currently supported `connector_id` values are:
+
+    - Dropbox: `connector_dropbox`
+    - Gmail: `connector_gmail`
+    - Google Calendar: `connector_googlecalendar`
+    - Google Drive: `connector_googledrive`
+    - Microsoft Teams: `connector_microsoftteams`
+    - Outlook Calendar: `connector_outlookcalendar`
+    - Outlook Email: `connector_outlookemail`
+    - SharePoint: `connector_sharepoint`
+    """
+
+    headers: Optional[Dict[str, str]]
+    """Optional HTTP headers to send to the MCP server.
+
+    Use for authentication or other purposes.
+    """
+
+    require_approval: Optional[McpRequireApproval]
+    """Specify which of the MCP server's tools require approval."""
+
+    server_description: str
+    """Optional description of the MCP server, used to provide more context."""
+
+    server_url: str
+    """The URL for the MCP server.
+
+    One of `server_url` or `connector_id` must be provided.
+    """
+
+
+class CodeInterpreterContainerCodeInterpreterToolAuto(TypedDict, total=False):
+    """Configuration for a code interpreter container.
+
+    Optionally specify the IDs of the files to run the code on.
+    """
+
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
+
+    file_ids: SequenceNotStr[str]
+    """An optional list of uploaded files to make available to your code."""
+
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]]
+
+
+CodeInterpreterContainer: TypeAlias = Union[str, CodeInterpreterContainerCodeInterpreterToolAuto]
+
+
+class CodeInterpreter(TypedDict, total=False):
+    """A tool that runs Python code to help generate a response to a prompt."""
+
+    container: Required[CodeInterpreterContainer]
+    """The code interpreter container.
+
+    Can be a container ID or an object that specifies uploaded file IDs to make
+    available to your code, along with an optional `memory_limit` setting.
+    """
+
+    type: Required[Literal["code_interpreter"]]
+    """The type of the code interpreter tool. Always `code_interpreter`."""
+
+
+class ImageGenerationInputImageMask(TypedDict, total=False):
+    """Optional mask for inpainting.
+
+    Contains `image_url`
+    (string, optional) and `file_id` (string, optional).
+    """
+
+    file_id: str
+    """File ID for the mask image."""
+
+    image_url: str
+    """Base64-encoded mask image."""
+
+
+class ImageGeneration(TypedDict, total=False):
+    """A tool that generates images using a model like `gpt-image-1`."""
+
+    type: Required[Literal["image_generation"]]
+    """The type of the image generation tool. Always `image_generation`."""
+
+    background: Literal["transparent", "opaque", "auto"]
+    """Background type for the generated image.
+
+    One of `transparent`, `opaque`, or `auto`. Default: `auto`.
+    """
+
+    input_fidelity: Optional[Literal["high", "low"]]
+    """
+    Control how much effort the model will exert to match the style and features,
+    especially facial features, of input images. This parameter is only supported
+    for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+    `low`. Defaults to `low`.
+    """
+
+    input_image_mask: ImageGenerationInputImageMask
+    """Optional mask for inpainting.
+
+    Contains `image_url` (string, optional) and `file_id` (string, optional).
+    """
+
+    model: Literal["gpt-image-1", "gpt-image-1-mini"]
+    """The image generation model to use. Default: `gpt-image-1`."""
+
+    moderation: Literal["auto", "low"]
+    """Moderation level for the generated image. Default: `auto`."""
+
+    output_compression: int
+    """Compression level for the output image. Default: 100."""
+
+    output_format: Literal["png", "webp", "jpeg"]
+    """The output format of the generated image.
+
+    One of `png`, `webp`, or `jpeg`. Default: `png`.
+    """
+
+    partial_images: int
+    """
+    Number of partial images to generate in streaming mode, from 0 (default value)
+    to 3.
+    """
+
+    quality: Literal["low", "medium", "high", "auto"]
+    """The quality of the generated image.
+
+    One of `low`, `medium`, `high`, or `auto`. Default: `auto`.
+    """
+
+    size: Literal["1024x1024", "1024x1536", "1536x1024", "auto"]
+    """The size of the generated image.
+
+    One of `1024x1024`, `1024x1536`, `1536x1024`, or `auto`. Default: `auto`.
+    """
+
+
+class LocalShell(TypedDict, total=False):
+    """A tool that allows the model to execute shell commands in a local environment."""
+
+    type: Required[Literal["local_shell"]]
+    """The type of the local shell tool. Always `local_shell`."""
+
+
+ToolParam: TypeAlias = Union[
+    FunctionToolParam,
+    FileSearchToolParam,
+    ComputerToolParam,
+    WebSearchToolParam,
+    Mcp,
+    CodeInterpreter,
+    ImageGeneration,
+    LocalShell,
+    FunctionShellToolParam,
+    CustomToolParam,
+    WebSearchPreviewToolParam,
+    ApplyPatchToolParam,
+]
diff --git a/src/openai/types/responses/web_search_preview_tool.py b/src/openai/types/responses/web_search_preview_tool.py
new file mode 100644
index 0000000000..12478e896d
--- /dev/null
+++ b/src/openai/types/responses/web_search_preview_tool.py
@@ -0,0 +1,56 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["WebSearchPreviewTool", "UserLocation"]
+
+
+class UserLocation(BaseModel):
+    """The user's location."""
+
+    type: Literal["approximate"]
+    """The type of location approximation. Always `approximate`."""
+
+    city: Optional[str] = None
+    """Free text input for the city of the user, e.g. `San Francisco`."""
+
+    country: Optional[str] = None
+    """
+    The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
+    the user, e.g. `US`.
+    """
+
+    region: Optional[str] = None
+    """Free text input for the region of the user, e.g. `California`."""
+
+    timezone: Optional[str] = None
+    """
+    The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
+    user, e.g. `America/Los_Angeles`.
+    """
+
+
+class WebSearchPreviewTool(BaseModel):
+    """This tool searches the web for relevant results to use in a response.
+
+    Learn more about the [web search tool](https://platform.openai.com/docs/guides/tools-web-search).
+    """
+
+    type: Literal["web_search_preview", "web_search_preview_2025_03_11"]
+    """The type of the web search tool.
+
+    One of `web_search_preview` or `web_search_preview_2025_03_11`.
+    """
+
+    search_context_size: Optional[Literal["low", "medium", "high"]] = None
+    """High level guidance for the amount of context window space to use for the
+    search.
+
+    One of `low`, `medium`, or `high`. `medium` is the default.
+    """
+
+    user_location: Optional[UserLocation] = None
+    """The user's location."""
diff --git a/src/openai/types/responses/web_search_preview_tool_param.py b/src/openai/types/responses/web_search_preview_tool_param.py
new file mode 100644
index 0000000000..09619a3394
--- /dev/null
+++ b/src/openai/types/responses/web_search_preview_tool_param.py
@@ -0,0 +1,56 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["WebSearchPreviewToolParam", "UserLocation"]
+
+
+class UserLocation(TypedDict, total=False):
+    """The user's location."""
+
+    type: Required[Literal["approximate"]]
+    """The type of location approximation. Always `approximate`."""
+
+    city: Optional[str]
+    """Free text input for the city of the user, e.g. `San Francisco`."""
+
+    country: Optional[str]
+    """
+    The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
+    the user, e.g. `US`.
+    """
+
+    region: Optional[str]
+    """Free text input for the region of the user, e.g. `California`."""
+
+    timezone: Optional[str]
+    """
+    The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
+    user, e.g. `America/Los_Angeles`.
+    """
+
+
+class WebSearchPreviewToolParam(TypedDict, total=False):
+    """This tool searches the web for relevant results to use in a response.
+
+    Learn more about the [web search tool](https://platform.openai.com/docs/guides/tools-web-search).
+    """
+
+    type: Required[Literal["web_search_preview", "web_search_preview_2025_03_11"]]
+    """The type of the web search tool.
+
+    One of `web_search_preview` or `web_search_preview_2025_03_11`.
+    """
+
+    search_context_size: Literal["low", "medium", "high"]
+    """High level guidance for the amount of context window space to use for the
+    search.
+
+    One of `low`, `medium`, or `high`. `medium` is the default.
+    """
+
+    user_location: Optional[UserLocation]
+    """The user's location."""
diff --git a/src/openai/types/responses/web_search_tool.py b/src/openai/types/responses/web_search_tool.py
new file mode 100644
index 0000000000..769f5c93a4
--- /dev/null
+++ b/src/openai/types/responses/web_search_tool.py
@@ -0,0 +1,73 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["WebSearchTool", "Filters", "UserLocation"]
+
+
+class Filters(BaseModel):
+    """Filters for the search."""
+
+    allowed_domains: Optional[List[str]] = None
+    """Allowed domains for the search.
+
+    If not provided, all domains are allowed. Subdomains of the provided domains are
+    allowed as well.
+
+    Example: `["pubmed.ncbi.nlm.nih.gov"]`
+    """
+
+
+class UserLocation(BaseModel):
+    """The approximate location of the user."""
+
+    city: Optional[str] = None
+    """Free text input for the city of the user, e.g. `San Francisco`."""
+
+    country: Optional[str] = None
+    """
+    The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
+    the user, e.g. `US`.
+    """
+
+    region: Optional[str] = None
+    """Free text input for the region of the user, e.g. `California`."""
+
+    timezone: Optional[str] = None
+    """
+    The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
+    user, e.g. `America/Los_Angeles`.
+    """
+
+    type: Optional[Literal["approximate"]] = None
+    """The type of location approximation. Always `approximate`."""
+
+
+class WebSearchTool(BaseModel):
+    """Search the Internet for sources related to the prompt.
+
+    Learn more about the
+    [web search tool](https://platform.openai.com/docs/guides/tools-web-search).
+    """
+
+    type: Literal["web_search", "web_search_2025_08_26"]
+    """The type of the web search tool.
+
+    One of `web_search` or `web_search_2025_08_26`.
+    """
+
+    filters: Optional[Filters] = None
+    """Filters for the search."""
+
+    search_context_size: Optional[Literal["low", "medium", "high"]] = None
+    """High level guidance for the amount of context window space to use for the
+    search.
+
+    One of `low`, `medium`, or `high`. `medium` is the default.
+    """
+
+    user_location: Optional[UserLocation] = None
+    """The approximate location of the user."""
diff --git a/src/openai/types/responses/web_search_tool_param.py b/src/openai/types/responses/web_search_tool_param.py
new file mode 100644
index 0000000000..a4531a9304
--- /dev/null
+++ b/src/openai/types/responses/web_search_tool_param.py
@@ -0,0 +1,75 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from ..._types import SequenceNotStr
+
+__all__ = ["WebSearchToolParam", "Filters", "UserLocation"]
+
+
+class Filters(TypedDict, total=False):
+    """Filters for the search."""
+
+    allowed_domains: Optional[SequenceNotStr[str]]
+    """Allowed domains for the search.
+
+    If not provided, all domains are allowed. Subdomains of the provided domains are
+    allowed as well.
+
+    Example: `["pubmed.ncbi.nlm.nih.gov"]`
+    """
+
+
+class UserLocation(TypedDict, total=False):
+    """The approximate location of the user."""
+
+    city: Optional[str]
+    """Free text input for the city of the user, e.g. `San Francisco`."""
+
+    country: Optional[str]
+    """
+    The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
+    the user, e.g. `US`.
+    """
+
+    region: Optional[str]
+    """Free text input for the region of the user, e.g. `California`."""
+
+    timezone: Optional[str]
+    """
+    The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
+    user, e.g. `America/Los_Angeles`.
+    """
+
+    type: Literal["approximate"]
+    """The type of location approximation. Always `approximate`."""
+
+
+class WebSearchToolParam(TypedDict, total=False):
+    """Search the Internet for sources related to the prompt.
+
+    Learn more about the
+    [web search tool](https://platform.openai.com/docs/guides/tools-web-search).
+    """
+
+    type: Required[Literal["web_search", "web_search_2025_08_26"]]
+    """The type of the web search tool.
+
+    One of `web_search` or `web_search_2025_08_26`.
+    """
+
+    filters: Optional[Filters]
+    """Filters for the search."""
+
+    search_context_size: Literal["low", "medium", "high"]
+    """High level guidance for the amount of context window space to use for the
+    search.
+
+    One of `low`, `medium`, or `high`. `medium` is the default.
+    """
+
+    user_location: Optional[UserLocation]
+    """The approximate location of the user."""
diff --git a/src/openai/types/shared/__init__.py b/src/openai/types/shared/__init__.py
index e085744e29..2930b9ae3b 100644
--- a/src/openai/types/shared/__init__.py
+++ b/src/openai/types/shared/__init__.py
@@ -1,5 +1,19 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from .metadata import Metadata as Metadata
+from .reasoning import Reasoning as Reasoning
+from .all_models import AllModels as AllModels
+from .chat_model import ChatModel as ChatModel
 from .error_object import ErrorObject as ErrorObject
+from .compound_filter import CompoundFilter as CompoundFilter
+from .responses_model import ResponsesModel as ResponsesModel
+from .reasoning_effort import ReasoningEffort as ReasoningEffort
+from .comparison_filter import ComparisonFilter as ComparisonFilter
 from .function_definition import FunctionDefinition as FunctionDefinition
 from .function_parameters import FunctionParameters as FunctionParameters
+from .response_format_text import ResponseFormatText as ResponseFormatText
+from .custom_tool_input_format import CustomToolInputFormat as CustomToolInputFormat
+from .response_format_json_object import ResponseFormatJSONObject as ResponseFormatJSONObject
+from .response_format_json_schema import ResponseFormatJSONSchema as ResponseFormatJSONSchema
+from .response_format_text_python import ResponseFormatTextPython as ResponseFormatTextPython
+from .response_format_text_grammar import ResponseFormatTextGrammar as ResponseFormatTextGrammar
diff --git a/src/openai/types/shared/all_models.py b/src/openai/types/shared/all_models.py
new file mode 100644
index 0000000000..ba8e1d82cf
--- /dev/null
+++ b/src/openai/types/shared/all_models.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from .chat_model import ChatModel
+
+__all__ = ["AllModels"]
+
+AllModels: TypeAlias = Union[
+    str,
+    ChatModel,
+    Literal[
+        "o1-pro",
+        "o1-pro-2025-03-19",
+        "o3-pro",
+        "o3-pro-2025-06-10",
+        "o3-deep-research",
+        "o3-deep-research-2025-06-26",
+        "o4-mini-deep-research",
+        "o4-mini-deep-research-2025-06-26",
+        "computer-use-preview",
+        "computer-use-preview-2025-03-11",
+        "gpt-5-codex",
+        "gpt-5-pro",
+        "gpt-5-pro-2025-10-06",
+        "gpt-5.1-codex-max",
+    ],
+]
diff --git a/src/openai/types/shared/chat_model.py b/src/openai/types/shared/chat_model.py
new file mode 100644
index 0000000000..8223b81bef
--- /dev/null
+++ b/src/openai/types/shared/chat_model.py
@@ -0,0 +1,80 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatModel"]
+
+ChatModel: TypeAlias = Literal[
+    "gpt-5.2",
+    "gpt-5.2-2025-12-11",
+    "gpt-5.2-chat-latest",
+    "gpt-5.2-pro",
+    "gpt-5.2-pro-2025-12-11",
+    "gpt-5.1",
+    "gpt-5.1-2025-11-13",
+    "gpt-5.1-codex",
+    "gpt-5.1-mini",
+    "gpt-5.1-chat-latest",
+    "gpt-5",
+    "gpt-5-mini",
+    "gpt-5-nano",
+    "gpt-5-2025-08-07",
+    "gpt-5-mini-2025-08-07",
+    "gpt-5-nano-2025-08-07",
+    "gpt-5-chat-latest",
+    "gpt-4.1",
+    "gpt-4.1-mini",
+    "gpt-4.1-nano",
+    "gpt-4.1-2025-04-14",
+    "gpt-4.1-mini-2025-04-14",
+    "gpt-4.1-nano-2025-04-14",
+    "o4-mini",
+    "o4-mini-2025-04-16",
+    "o3",
+    "o3-2025-04-16",
+    "o3-mini",
+    "o3-mini-2025-01-31",
+    "o1",
+    "o1-2024-12-17",
+    "o1-preview",
+    "o1-preview-2024-09-12",
+    "o1-mini",
+    "o1-mini-2024-09-12",
+    "gpt-4o",
+    "gpt-4o-2024-11-20",
+    "gpt-4o-2024-08-06",
+    "gpt-4o-2024-05-13",
+    "gpt-4o-audio-preview",
+    "gpt-4o-audio-preview-2024-10-01",
+    "gpt-4o-audio-preview-2024-12-17",
+    "gpt-4o-audio-preview-2025-06-03",
+    "gpt-4o-mini-audio-preview",
+    "gpt-4o-mini-audio-preview-2024-12-17",
+    "gpt-4o-search-preview",
+    "gpt-4o-mini-search-preview",
+    "gpt-4o-search-preview-2025-03-11",
+    "gpt-4o-mini-search-preview-2025-03-11",
+    "chatgpt-4o-latest",
+    "codex-mini-latest",
+    "gpt-4o-mini",
+    "gpt-4o-mini-2024-07-18",
+    "gpt-4-turbo",
+    "gpt-4-turbo-2024-04-09",
+    "gpt-4-0125-preview",
+    "gpt-4-turbo-preview",
+    "gpt-4-1106-preview",
+    "gpt-4-vision-preview",
+    "gpt-4",
+    "gpt-4-0314",
+    "gpt-4-0613",
+    "gpt-4-32k",
+    "gpt-4-32k-0314",
+    "gpt-4-32k-0613",
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-16k",
+    "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-0613",
+    "gpt-3.5-turbo-1106",
+    "gpt-3.5-turbo-0125",
+    "gpt-3.5-turbo-16k-0613",
+]
diff --git a/src/openai/types/shared/comparison_filter.py b/src/openai/types/shared/comparison_filter.py
new file mode 100644
index 0000000000..852cac1738
--- /dev/null
+++ b/src/openai/types/shared/comparison_filter.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ComparisonFilter"]
+
+
+class ComparisonFilter(BaseModel):
+    """
+    A filter used to compare a specified attribute key to a given value using a defined comparison operation.
+    """
+
+    key: str
+    """The key to compare against the value."""
+
+    type: Literal["eq", "ne", "gt", "gte", "lt", "lte"]
+    """
+    Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`, `in`,
+    `nin`.
+
+    - `eq`: equals
+    - `ne`: not equal
+    - `gt`: greater than
+    - `gte`: greater than or equal
+    - `lt`: less than
+    - `lte`: less than or equal
+    - `in`: in
+    - `nin`: not in
+    """
+
+    value: Union[str, float, bool, List[Union[str, float]]]
+    """
+    The value to compare against the attribute key; supports string, number, or
+    boolean types.
+    """
diff --git a/src/openai/types/shared/compound_filter.py b/src/openai/types/shared/compound_filter.py
new file mode 100644
index 0000000000..4801aaac1a
--- /dev/null
+++ b/src/openai/types/shared/compound_filter.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .comparison_filter import ComparisonFilter
+
+__all__ = ["CompoundFilter", "Filter"]
+
+Filter: TypeAlias = Union[ComparisonFilter, object]
+
+
+class CompoundFilter(BaseModel):
+    """Combine multiple filters using `and` or `or`."""
+
+    filters: List[Filter]
+    """Array of filters to combine.
+
+    Items can be `ComparisonFilter` or `CompoundFilter`.
+    """
+
+    type: Literal["and", "or"]
+    """Type of operation: `and` or `or`."""
diff --git a/src/openai/types/shared/custom_tool_input_format.py b/src/openai/types/shared/custom_tool_input_format.py
new file mode 100644
index 0000000000..9391692b7b
--- /dev/null
+++ b/src/openai/types/shared/custom_tool_input_format.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["CustomToolInputFormat", "Text", "Grammar"]
+
+
+class Text(BaseModel):
+    """Unconstrained free-form text."""
+
+    type: Literal["text"]
+    """Unconstrained text format. Always `text`."""
+
+
+class Grammar(BaseModel):
+    """A grammar defined by the user."""
+
+    definition: str
+    """The grammar definition."""
+
+    syntax: Literal["lark", "regex"]
+    """The syntax of the grammar definition. One of `lark` or `regex`."""
+
+    type: Literal["grammar"]
+    """Grammar format. Always `grammar`."""
+
+
+CustomToolInputFormat: TypeAlias = Annotated[Union[Text, Grammar], PropertyInfo(discriminator="type")]
diff --git a/src/openai/types/shared/function_definition.py b/src/openai/types/shared/function_definition.py
index a39116d6bd..33ebb9ad3e 100644
--- a/src/openai/types/shared/function_definition.py
+++ b/src/openai/types/shared/function_definition.py
@@ -25,11 +25,19 @@ class FunctionDefinition(BaseModel):
     parameters: Optional[FunctionParameters] = None
     """The parameters the functions accepts, described as a JSON Schema object.
 
-    See the
-    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
-    for examples, and the
+    See the [guide](https://platform.openai.com/docs/guides/function-calling) for
+    examples, and the
     [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
     documentation about the format.
 
     Omitting `parameters` defines a function with an empty parameter list.
     """
+
+    strict: Optional[bool] = None
+    """Whether to enable strict schema adherence when generating the function call.
+
+    If set to true, the model will follow the exact schema defined in the
+    `parameters` field. Only a subset of JSON Schema is supported when `strict` is
+    `true`. Learn more about Structured Outputs in the
+    [function calling guide](https://platform.openai.com/docs/guides/function-calling).
+    """
diff --git a/src/openai/types/shared/function_parameters.py b/src/openai/types/shared/function_parameters.py
index c9524e4cb8..a3d83e3496 100644
--- a/src/openai/types/shared/function_parameters.py
+++ b/src/openai/types/shared/function_parameters.py
@@ -1,7 +1,8 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Dict
+from typing_extensions import TypeAlias
 
 __all__ = ["FunctionParameters"]
 
-FunctionParameters = Dict[str, object]
+FunctionParameters: TypeAlias = Dict[str, object]
diff --git a/src/openai/types/shared/metadata.py b/src/openai/types/shared/metadata.py
new file mode 100644
index 0000000000..0da88c679c
--- /dev/null
+++ b/src/openai/types/shared/metadata.py
@@ -0,0 +1,8 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict
+from typing_extensions import TypeAlias
+
+__all__ = ["Metadata"]
+
+Metadata: TypeAlias = Dict[str, str]
diff --git a/src/openai/types/shared/reasoning.py b/src/openai/types/shared/reasoning.py
new file mode 100644
index 0000000000..14f56a04cd
--- /dev/null
+++ b/src/openai/types/shared/reasoning.py
@@ -0,0 +1,52 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .reasoning_effort import ReasoningEffort
+
+__all__ = ["Reasoning"]
+
+
+class Reasoning(BaseModel):
+    """**gpt-5 and o-series models only**
+
+    Configuration options for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    effort: Optional[ReasoningEffort] = None
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    generate_summary: Optional[Literal["auto", "concise", "detailed"]] = None
+    """**Deprecated:** use `summary` instead.
+
+    A summary of the reasoning performed by the model. This can be useful for
+    debugging and understanding the model's reasoning process. One of `auto`,
+    `concise`, or `detailed`.
+    """
+
+    summary: Optional[Literal["auto", "concise", "detailed"]] = None
+    """A summary of the reasoning performed by the model.
+
+    This can be useful for debugging and understanding the model's reasoning
+    process. One of `auto`, `concise`, or `detailed`.
+
+    `concise` is supported for `computer-use-preview` models and all reasoning
+    models after `gpt-5`.
+    """
diff --git a/src/openai/types/shared/reasoning_effort.py b/src/openai/types/shared/reasoning_effort.py
new file mode 100644
index 0000000000..24d8516424
--- /dev/null
+++ b/src/openai/types/shared/reasoning_effort.py
@@ -0,0 +1,8 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ReasoningEffort"]
+
+ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]]
diff --git a/src/openai/types/shared/response_format_json_object.py b/src/openai/types/shared/response_format_json_object.py
new file mode 100644
index 0000000000..98e0da6a2c
--- /dev/null
+++ b/src/openai/types/shared/response_format_json_object.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormatJSONObject"]
+
+
+class ResponseFormatJSONObject(BaseModel):
+    """JSON object response format.
+
+    An older method of generating JSON responses.
+    Using `json_schema` is recommended for models that support it. Note that the
+    model will not generate JSON without a system or user message instructing it
+    to do so.
+    """
+
+    type: Literal["json_object"]
+    """The type of response format being defined. Always `json_object`."""
diff --git a/src/openai/types/shared/response_format_json_schema.py b/src/openai/types/shared/response_format_json_schema.py
new file mode 100644
index 0000000000..9b2adb66cd
--- /dev/null
+++ b/src/openai/types/shared/response_format_json_schema.py
@@ -0,0 +1,56 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormatJSONSchema", "JSONSchema"]
+
+
+class JSONSchema(BaseModel):
+    """Structured Outputs configuration options, including a JSON Schema."""
+
+    name: str
+    """The name of the response format.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    description: Optional[str] = None
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
+
+    schema_: Optional[Dict[str, object]] = FieldInfo(alias="schema", default=None)
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
+
+    strict: Optional[bool] = None
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field. Only a subset of JSON Schema is supported when `strict` is `true`. To
+    learn more, read the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    """
+
+
+class ResponseFormatJSONSchema(BaseModel):
+    """JSON Schema response format.
+
+    Used to generate structured JSON responses.
+    Learn more about [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs).
+    """
+
+    json_schema: JSONSchema
+    """Structured Outputs configuration options, including a JSON Schema."""
+
+    type: Literal["json_schema"]
+    """The type of response format being defined. Always `json_schema`."""
diff --git a/src/openai/types/shared/response_format_text.py b/src/openai/types/shared/response_format_text.py
new file mode 100644
index 0000000000..9f4bc0d13e
--- /dev/null
+++ b/src/openai/types/shared/response_format_text.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormatText"]
+
+
+class ResponseFormatText(BaseModel):
+    """Default response format. Used to generate text responses."""
+
+    type: Literal["text"]
+    """The type of response format being defined. Always `text`."""
diff --git a/src/openai/types/shared/response_format_text_grammar.py b/src/openai/types/shared/response_format_text_grammar.py
new file mode 100644
index 0000000000..84cd141278
--- /dev/null
+++ b/src/openai/types/shared/response_format_text_grammar.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormatTextGrammar"]
+
+
+class ResponseFormatTextGrammar(BaseModel):
+    """
+    A custom grammar for the model to follow when generating text.
+    Learn more in the [custom grammars guide](https://platform.openai.com/docs/guides/custom-grammars).
+    """
+
+    grammar: str
+    """The custom grammar for the model to follow."""
+
+    type: Literal["grammar"]
+    """The type of response format being defined. Always `grammar`."""
diff --git a/src/openai/types/shared/response_format_text_python.py b/src/openai/types/shared/response_format_text_python.py
new file mode 100644
index 0000000000..1b04cb62ba
--- /dev/null
+++ b/src/openai/types/shared/response_format_text_python.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormatTextPython"]
+
+
+class ResponseFormatTextPython(BaseModel):
+    """Configure the model to generate valid Python code.
+
+    See the
+    [custom grammars guide](https://platform.openai.com/docs/guides/custom-grammars) for more details.
+    """
+
+    type: Literal["python"]
+    """The type of response format being defined. Always `python`."""
diff --git a/src/openai/types/shared/responses_model.py b/src/openai/types/shared/responses_model.py
new file mode 100644
index 0000000000..38cdea9a94
--- /dev/null
+++ b/src/openai/types/shared/responses_model.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from .chat_model import ChatModel
+
+__all__ = ["ResponsesModel"]
+
+ResponsesModel: TypeAlias = Union[
+    str,
+    ChatModel,
+    Literal[
+        "o1-pro",
+        "o1-pro-2025-03-19",
+        "o3-pro",
+        "o3-pro-2025-06-10",
+        "o3-deep-research",
+        "o3-deep-research-2025-06-26",
+        "o4-mini-deep-research",
+        "o4-mini-deep-research-2025-06-26",
+        "computer-use-preview",
+        "computer-use-preview-2025-03-11",
+        "gpt-5-codex",
+        "gpt-5-pro",
+        "gpt-5-pro-2025-10-06",
+        "gpt-5.1-codex-max",
+    ],
+]
diff --git a/src/openai/types/shared_params/__init__.py b/src/openai/types/shared_params/__init__.py
index ef638cb279..b6c0912b0f 100644
--- a/src/openai/types/shared_params/__init__.py
+++ b/src/openai/types/shared_params/__init__.py
@@ -1,4 +1,15 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from .metadata import Metadata as Metadata
+from .reasoning import Reasoning as Reasoning
+from .chat_model import ChatModel as ChatModel
+from .compound_filter import CompoundFilter as CompoundFilter
+from .responses_model import ResponsesModel as ResponsesModel
+from .reasoning_effort import ReasoningEffort as ReasoningEffort
+from .comparison_filter import ComparisonFilter as ComparisonFilter
 from .function_definition import FunctionDefinition as FunctionDefinition
 from .function_parameters import FunctionParameters as FunctionParameters
+from .response_format_text import ResponseFormatText as ResponseFormatText
+from .custom_tool_input_format import CustomToolInputFormat as CustomToolInputFormat
+from .response_format_json_object import ResponseFormatJSONObject as ResponseFormatJSONObject
+from .response_format_json_schema import ResponseFormatJSONSchema as ResponseFormatJSONSchema
diff --git a/src/openai/types/shared_params/chat_model.py b/src/openai/types/shared_params/chat_model.py
new file mode 100644
index 0000000000..c1937a8312
--- /dev/null
+++ b/src/openai/types/shared_params/chat_model.py
@@ -0,0 +1,82 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatModel"]
+
+ChatModel: TypeAlias = Literal[
+    "gpt-5.2",
+    "gpt-5.2-2025-12-11",
+    "gpt-5.2-chat-latest",
+    "gpt-5.2-pro",
+    "gpt-5.2-pro-2025-12-11",
+    "gpt-5.1",
+    "gpt-5.1-2025-11-13",
+    "gpt-5.1-codex",
+    "gpt-5.1-mini",
+    "gpt-5.1-chat-latest",
+    "gpt-5",
+    "gpt-5-mini",
+    "gpt-5-nano",
+    "gpt-5-2025-08-07",
+    "gpt-5-mini-2025-08-07",
+    "gpt-5-nano-2025-08-07",
+    "gpt-5-chat-latest",
+    "gpt-4.1",
+    "gpt-4.1-mini",
+    "gpt-4.1-nano",
+    "gpt-4.1-2025-04-14",
+    "gpt-4.1-mini-2025-04-14",
+    "gpt-4.1-nano-2025-04-14",
+    "o4-mini",
+    "o4-mini-2025-04-16",
+    "o3",
+    "o3-2025-04-16",
+    "o3-mini",
+    "o3-mini-2025-01-31",
+    "o1",
+    "o1-2024-12-17",
+    "o1-preview",
+    "o1-preview-2024-09-12",
+    "o1-mini",
+    "o1-mini-2024-09-12",
+    "gpt-4o",
+    "gpt-4o-2024-11-20",
+    "gpt-4o-2024-08-06",
+    "gpt-4o-2024-05-13",
+    "gpt-4o-audio-preview",
+    "gpt-4o-audio-preview-2024-10-01",
+    "gpt-4o-audio-preview-2024-12-17",
+    "gpt-4o-audio-preview-2025-06-03",
+    "gpt-4o-mini-audio-preview",
+    "gpt-4o-mini-audio-preview-2024-12-17",
+    "gpt-4o-search-preview",
+    "gpt-4o-mini-search-preview",
+    "gpt-4o-search-preview-2025-03-11",
+    "gpt-4o-mini-search-preview-2025-03-11",
+    "chatgpt-4o-latest",
+    "codex-mini-latest",
+    "gpt-4o-mini",
+    "gpt-4o-mini-2024-07-18",
+    "gpt-4-turbo",
+    "gpt-4-turbo-2024-04-09",
+    "gpt-4-0125-preview",
+    "gpt-4-turbo-preview",
+    "gpt-4-1106-preview",
+    "gpt-4-vision-preview",
+    "gpt-4",
+    "gpt-4-0314",
+    "gpt-4-0613",
+    "gpt-4-32k",
+    "gpt-4-32k-0314",
+    "gpt-4-32k-0613",
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-16k",
+    "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-0613",
+    "gpt-3.5-turbo-1106",
+    "gpt-3.5-turbo-0125",
+    "gpt-3.5-turbo-16k-0613",
+]
diff --git a/src/openai/types/shared_params/comparison_filter.py b/src/openai/types/shared_params/comparison_filter.py
new file mode 100644
index 0000000000..363688e467
--- /dev/null
+++ b/src/openai/types/shared_params/comparison_filter.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypedDict
+
+from ..._types import SequenceNotStr
+
+__all__ = ["ComparisonFilter"]
+
+
+class ComparisonFilter(TypedDict, total=False):
+    """
+    A filter used to compare a specified attribute key to a given value using a defined comparison operation.
+    """
+
+    key: Required[str]
+    """The key to compare against the value."""
+
+    type: Required[Literal["eq", "ne", "gt", "gte", "lt", "lte"]]
+    """
+    Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`, `in`,
+    `nin`.
+
+    - `eq`: equals
+    - `ne`: not equal
+    - `gt`: greater than
+    - `gte`: greater than or equal
+    - `lt`: less than
+    - `lte`: less than or equal
+    - `in`: in
+    - `nin`: not in
+    """
+
+    value: Required[Union[str, float, bool, SequenceNotStr[Union[str, float]]]]
+    """
+    The value to compare against the attribute key; supports string, number, or
+    boolean types.
+    """
diff --git a/src/openai/types/shared_params/compound_filter.py b/src/openai/types/shared_params/compound_filter.py
new file mode 100644
index 0000000000..9358e46083
--- /dev/null
+++ b/src/openai/types/shared_params/compound_filter.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .comparison_filter import ComparisonFilter
+
+__all__ = ["CompoundFilter", "Filter"]
+
+Filter: TypeAlias = Union[ComparisonFilter, object]
+
+
+class CompoundFilter(TypedDict, total=False):
+    """Combine multiple filters using `and` or `or`."""
+
+    filters: Required[Iterable[Filter]]
+    """Array of filters to combine.
+
+    Items can be `ComparisonFilter` or `CompoundFilter`.
+    """
+
+    type: Required[Literal["and", "or"]]
+    """Type of operation: `and` or `or`."""
diff --git a/src/openai/types/shared_params/custom_tool_input_format.py b/src/openai/types/shared_params/custom_tool_input_format.py
new file mode 100644
index 0000000000..ddc71cacb4
--- /dev/null
+++ b/src/openai/types/shared_params/custom_tool_input_format.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = ["CustomToolInputFormat", "Text", "Grammar"]
+
+
+class Text(TypedDict, total=False):
+    """Unconstrained free-form text."""
+
+    type: Required[Literal["text"]]
+    """Unconstrained text format. Always `text`."""
+
+
+class Grammar(TypedDict, total=False):
+    """A grammar defined by the user."""
+
+    definition: Required[str]
+    """The grammar definition."""
+
+    syntax: Required[Literal["lark", "regex"]]
+    """The syntax of the grammar definition. One of `lark` or `regex`."""
+
+    type: Required[Literal["grammar"]]
+    """Grammar format. Always `grammar`."""
+
+
+CustomToolInputFormat: TypeAlias = Union[Text, Grammar]
diff --git a/src/openai/types/shared_params/function_definition.py b/src/openai/types/shared_params/function_definition.py
index 58d0203b4f..b3fdaf86ff 100644
--- a/src/openai/types/shared_params/function_definition.py
+++ b/src/openai/types/shared_params/function_definition.py
@@ -2,9 +2,10 @@
 
 from __future__ import annotations
 
+from typing import Optional
 from typing_extensions import Required, TypedDict
 
-from ...types import shared_params
+from .function_parameters import FunctionParameters
 
 __all__ = ["FunctionDefinition"]
 
@@ -23,14 +24,22 @@ class FunctionDefinition(TypedDict, total=False):
     how to call the function.
     """
 
-    parameters: shared_params.FunctionParameters
+    parameters: FunctionParameters
     """The parameters the functions accepts, described as a JSON Schema object.
 
-    See the
-    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
-    for examples, and the
+    See the [guide](https://platform.openai.com/docs/guides/function-calling) for
+    examples, and the
     [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
     documentation about the format.
 
     Omitting `parameters` defines a function with an empty parameter list.
     """
+
+    strict: Optional[bool]
+    """Whether to enable strict schema adherence when generating the function call.
+
+    If set to true, the model will follow the exact schema defined in the
+    `parameters` field. Only a subset of JSON Schema is supported when `strict` is
+    `true`. Learn more about Structured Outputs in the
+    [function calling guide](https://platform.openai.com/docs/guides/function-calling).
+    """
diff --git a/src/openai/types/shared_params/function_parameters.py b/src/openai/types/shared_params/function_parameters.py
index 5b40efb78f..45fc742d3b 100644
--- a/src/openai/types/shared_params/function_parameters.py
+++ b/src/openai/types/shared_params/function_parameters.py
@@ -3,7 +3,8 @@
 from __future__ import annotations
 
 from typing import Dict
+from typing_extensions import TypeAlias
 
 __all__ = ["FunctionParameters"]
 
-FunctionParameters = Dict[str, object]
+FunctionParameters: TypeAlias = Dict[str, object]
diff --git a/src/openai/types/shared_params/metadata.py b/src/openai/types/shared_params/metadata.py
new file mode 100644
index 0000000000..821650b48b
--- /dev/null
+++ b/src/openai/types/shared_params/metadata.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict
+from typing_extensions import TypeAlias
+
+__all__ = ["Metadata"]
+
+Metadata: TypeAlias = Dict[str, str]
diff --git a/src/openai/types/shared_params/reasoning.py b/src/openai/types/shared_params/reasoning.py
new file mode 100644
index 0000000000..2bd7ce7268
--- /dev/null
+++ b/src/openai/types/shared_params/reasoning.py
@@ -0,0 +1,53 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, TypedDict
+
+from ..shared.reasoning_effort import ReasoningEffort
+
+__all__ = ["Reasoning"]
+
+
+class Reasoning(TypedDict, total=False):
+    """**gpt-5 and o-series models only**
+
+    Configuration options for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    effort: Optional[ReasoningEffort]
+    """
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
+
+    - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
+      reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
+      calls are supported for all reasoning values in gpt-5.1.
+    - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
+      support `none`.
+    - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
+    """
+
+    generate_summary: Optional[Literal["auto", "concise", "detailed"]]
+    """**Deprecated:** use `summary` instead.
+
+    A summary of the reasoning performed by the model. This can be useful for
+    debugging and understanding the model's reasoning process. One of `auto`,
+    `concise`, or `detailed`.
+    """
+
+    summary: Optional[Literal["auto", "concise", "detailed"]]
+    """A summary of the reasoning performed by the model.
+
+    This can be useful for debugging and understanding the model's reasoning
+    process. One of `auto`, `concise`, or `detailed`.
+
+    `concise` is supported for `computer-use-preview` models and all reasoning
+    models after `gpt-5`.
+    """
diff --git a/src/openai/types/shared_params/reasoning_effort.py b/src/openai/types/shared_params/reasoning_effort.py
new file mode 100644
index 0000000000..8518c2b141
--- /dev/null
+++ b/src/openai/types/shared_params/reasoning_effort.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ReasoningEffort"]
+
+ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]]
diff --git a/src/openai/types/shared_params/response_format_json_object.py b/src/openai/types/shared_params/response_format_json_object.py
new file mode 100644
index 0000000000..ef5d43be2e
--- /dev/null
+++ b/src/openai/types/shared_params/response_format_json_object.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFormatJSONObject"]
+
+
+class ResponseFormatJSONObject(TypedDict, total=False):
+    """JSON object response format.
+
+    An older method of generating JSON responses.
+    Using `json_schema` is recommended for models that support it. Note that the
+    model will not generate JSON without a system or user message instructing it
+    to do so.
+    """
+
+    type: Required[Literal["json_object"]]
+    """The type of response format being defined. Always `json_object`."""
diff --git a/src/openai/types/shared_params/response_format_json_schema.py b/src/openai/types/shared_params/response_format_json_schema.py
new file mode 100644
index 0000000000..0a0e846873
--- /dev/null
+++ b/src/openai/types/shared_params/response_format_json_schema.py
@@ -0,0 +1,54 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFormatJSONSchema", "JSONSchema"]
+
+
+class JSONSchema(TypedDict, total=False):
+    """Structured Outputs configuration options, including a JSON Schema."""
+
+    name: Required[str]
+    """The name of the response format.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    description: str
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
+
+    schema: Dict[str, object]
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
+
+    strict: Optional[bool]
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field. Only a subset of JSON Schema is supported when `strict` is `true`. To
+    learn more, read the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    """
+
+
+class ResponseFormatJSONSchema(TypedDict, total=False):
+    """JSON Schema response format.
+
+    Used to generate structured JSON responses.
+    Learn more about [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs).
+    """
+
+    json_schema: Required[JSONSchema]
+    """Structured Outputs configuration options, including a JSON Schema."""
+
+    type: Required[Literal["json_schema"]]
+    """The type of response format being defined. Always `json_schema`."""
diff --git a/src/openai/types/shared_params/response_format_text.py b/src/openai/types/shared_params/response_format_text.py
new file mode 100644
index 0000000000..c195036f95
--- /dev/null
+++ b/src/openai/types/shared_params/response_format_text.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFormatText"]
+
+
+class ResponseFormatText(TypedDict, total=False):
+    """Default response format. Used to generate text responses."""
+
+    type: Required[Literal["text"]]
+    """The type of response format being defined. Always `text`."""
diff --git a/src/openai/types/shared_params/responses_model.py b/src/openai/types/shared_params/responses_model.py
new file mode 100644
index 0000000000..ad44dd6bf7
--- /dev/null
+++ b/src/openai/types/shared_params/responses_model.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from ..shared.chat_model import ChatModel
+
+__all__ = ["ResponsesModel"]
+
+ResponsesModel: TypeAlias = Union[
+    str,
+    ChatModel,
+    Literal[
+        "o1-pro",
+        "o1-pro-2025-03-19",
+        "o3-pro",
+        "o3-pro-2025-06-10",
+        "o3-deep-research",
+        "o3-deep-research-2025-06-26",
+        "o4-mini-deep-research",
+        "o4-mini-deep-research-2025-06-26",
+        "computer-use-preview",
+        "computer-use-preview-2025-03-11",
+        "gpt-5-codex",
+        "gpt-5-pro",
+        "gpt-5-pro-2025-10-06",
+        "gpt-5.1-codex-max",
+    ],
+]
diff --git a/src/openai/types/static_file_chunking_strategy.py b/src/openai/types/static_file_chunking_strategy.py
new file mode 100644
index 0000000000..cb842442c1
--- /dev/null
+++ b/src/openai/types/static_file_chunking_strategy.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["StaticFileChunkingStrategy"]
+
+
+class StaticFileChunkingStrategy(BaseModel):
+    chunk_overlap_tokens: int
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: int
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
diff --git a/src/openai/types/static_file_chunking_strategy_object.py b/src/openai/types/static_file_chunking_strategy_object.py
new file mode 100644
index 0000000000..2a95dce5b3
--- /dev/null
+++ b/src/openai/types/static_file_chunking_strategy_object.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .static_file_chunking_strategy import StaticFileChunkingStrategy
+
+__all__ = ["StaticFileChunkingStrategyObject"]
+
+
+class StaticFileChunkingStrategyObject(BaseModel):
+    static: StaticFileChunkingStrategy
+
+    type: Literal["static"]
+    """Always `static`."""
diff --git a/src/openai/types/static_file_chunking_strategy_object_param.py b/src/openai/types/static_file_chunking_strategy_object_param.py
new file mode 100644
index 0000000000..40188a41d5
--- /dev/null
+++ b/src/openai/types/static_file_chunking_strategy_object_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .static_file_chunking_strategy_param import StaticFileChunkingStrategyParam
+
+__all__ = ["StaticFileChunkingStrategyObjectParam"]
+
+
+class StaticFileChunkingStrategyObjectParam(TypedDict, total=False):
+    """Customize your own chunking strategy by setting chunk size and chunk overlap."""
+
+    static: Required[StaticFileChunkingStrategyParam]
+
+    type: Required[Literal["static"]]
+    """Always `static`."""
diff --git a/src/openai/types/static_file_chunking_strategy_param.py b/src/openai/types/static_file_chunking_strategy_param.py
new file mode 100644
index 0000000000..f917ac5647
--- /dev/null
+++ b/src/openai/types/static_file_chunking_strategy_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["StaticFileChunkingStrategyParam"]
+
+
+class StaticFileChunkingStrategyParam(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: Required[int]
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
diff --git a/src/openai/types/upload.py b/src/openai/types/upload.py
new file mode 100644
index 0000000000..d248da6ee3
--- /dev/null
+++ b/src/openai/types/upload.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .file_object import FileObject
+
+__all__ = ["Upload"]
+
+
+class Upload(BaseModel):
+    """The Upload object can accept byte chunks in the form of Parts."""
+
+    id: str
+    """The Upload unique identifier, which can be referenced in API endpoints."""
+
+    bytes: int
+    """The intended number of bytes to be uploaded."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the Upload was created."""
+
+    expires_at: int
+    """The Unix timestamp (in seconds) for when the Upload will expire."""
+
+    filename: str
+    """The name of the file to be uploaded."""
+
+    object: Literal["upload"]
+    """The object type, which is always "upload"."""
+
+    purpose: str
+    """The intended purpose of the file.
+
+    [Please refer here](https://platform.openai.com/docs/api-reference/files/object#files/object-purpose)
+    for acceptable values.
+    """
+
+    status: Literal["pending", "completed", "cancelled", "expired"]
+    """The status of the Upload."""
+
+    file: Optional[FileObject] = None
+    """The `File` object represents a document that has been uploaded to OpenAI."""
diff --git a/src/openai/types/upload_complete_params.py b/src/openai/types/upload_complete_params.py
new file mode 100644
index 0000000000..846a241dc7
--- /dev/null
+++ b/src/openai/types/upload_complete_params.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from .._types import SequenceNotStr
+
+__all__ = ["UploadCompleteParams"]
+
+
+class UploadCompleteParams(TypedDict, total=False):
+    part_ids: Required[SequenceNotStr[str]]
+    """The ordered list of Part IDs."""
+
+    md5: str
+    """
+    The optional md5 checksum for the file contents to verify if the bytes uploaded
+    matches what you expect.
+    """
diff --git a/src/openai/types/upload_create_params.py b/src/openai/types/upload_create_params.py
new file mode 100644
index 0000000000..c25d65bedd
--- /dev/null
+++ b/src/openai/types/upload_create_params.py
@@ -0,0 +1,57 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .file_purpose import FilePurpose
+
+__all__ = ["UploadCreateParams", "ExpiresAfter"]
+
+
+class UploadCreateParams(TypedDict, total=False):
+    bytes: Required[int]
+    """The number of bytes in the file you are uploading."""
+
+    filename: Required[str]
+    """The name of the file to upload."""
+
+    mime_type: Required[str]
+    """The MIME type of the file.
+
+    This must fall within the supported MIME types for your file purpose. See the
+    supported MIME types for assistants and vision.
+    """
+
+    purpose: Required[FilePurpose]
+    """The intended purpose of the uploaded file.
+
+    See the
+    [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose).
+    """
+
+    expires_after: ExpiresAfter
+    """The expiration policy for a file.
+
+    By default, files with `purpose=batch` expire after 30 days and all other files
+    are persisted until they are manually deleted.
+    """
+
+
+class ExpiresAfter(TypedDict, total=False):
+    """The expiration policy for a file.
+
+    By default, files with `purpose=batch` expire after 30 days and all other files are persisted until they are manually deleted.
+    """
+
+    anchor: Required[Literal["created_at"]]
+    """Anchor timestamp after which the expiration policy applies.
+
+    Supported anchors: `created_at`.
+    """
+
+    seconds: Required[int]
+    """The number of seconds after the anchor time that the file will expire.
+
+    Must be between 3600 (1 hour) and 2592000 (30 days).
+    """
diff --git a/src/openai/types/uploads/__init__.py b/src/openai/types/uploads/__init__.py
new file mode 100644
index 0000000000..41deb0ab4b
--- /dev/null
+++ b/src/openai/types/uploads/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .upload_part import UploadPart as UploadPart
+from .part_create_params import PartCreateParams as PartCreateParams
diff --git a/src/openai/types/uploads/part_create_params.py b/src/openai/types/uploads/part_create_params.py
new file mode 100644
index 0000000000..9851ca41e9
--- /dev/null
+++ b/src/openai/types/uploads/part_create_params.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from ..._types import FileTypes
+
+__all__ = ["PartCreateParams"]
+
+
+class PartCreateParams(TypedDict, total=False):
+    data: Required[FileTypes]
+    """The chunk of bytes for this Part."""
diff --git a/src/openai/types/uploads/upload_part.py b/src/openai/types/uploads/upload_part.py
new file mode 100644
index 0000000000..e585b1a227
--- /dev/null
+++ b/src/openai/types/uploads/upload_part.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["UploadPart"]
+
+
+class UploadPart(BaseModel):
+    """The upload Part represents a chunk of bytes we can add to an Upload object."""
+
+    id: str
+    """The upload Part unique identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the Part was created."""
+
+    object: Literal["upload.part"]
+    """The object type, which is always `upload.part`."""
+
+    upload_id: str
+    """The ID of the Upload object that this Part was added to."""
diff --git a/src/openai/types/beta/vector_store.py b/src/openai/types/vector_store.py
similarity index 82%
rename from src/openai/types/beta/vector_store.py
rename to src/openai/types/vector_store.py
index 488961b444..82899ecd1b 100644
--- a/src/openai/types/beta/vector_store.py
+++ b/src/openai/types/vector_store.py
@@ -3,7 +3,8 @@
 from typing import Optional
 from typing_extensions import Literal
 
-from ..._models import BaseModel
+from .._models import BaseModel
+from .shared.metadata import Metadata
 
 __all__ = ["VectorStore", "FileCounts", "ExpiresAfter"]
 
@@ -26,6 +27,8 @@ class FileCounts(BaseModel):
 
 
 class ExpiresAfter(BaseModel):
+    """The expiration policy for a vector store."""
+
     anchor: Literal["last_active_at"]
     """Anchor timestamp after which the expiration policy applies.
 
@@ -37,6 +40,10 @@ class ExpiresAfter(BaseModel):
 
 
 class VectorStore(BaseModel):
+    """
+    A vector store is a collection of processed files can be used by the `file_search` tool.
+    """
+
     id: str
     """The identifier, which can be referenced in API endpoints."""
 
@@ -48,12 +55,14 @@ class VectorStore(BaseModel):
     last_active_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the vector store was last active."""
 
-    metadata: Optional[object] = None
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     name: str
diff --git a/src/openai/types/beta/vector_store_create_params.py b/src/openai/types/vector_store_create_params.py
similarity index 57%
rename from src/openai/types/beta/vector_store_create_params.py
rename to src/openai/types/vector_store_create_params.py
index f1a3abcbdf..2b72562984 100644
--- a/src/openai/types/beta/vector_store_create_params.py
+++ b/src/openai/types/vector_store_create_params.py
@@ -2,29 +2,48 @@
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
 
+from .._types import SequenceNotStr
+from .shared_params.metadata import Metadata
+from .file_chunking_strategy_param import FileChunkingStrategyParam
+
 __all__ = ["VectorStoreCreateParams", "ExpiresAfter"]
 
 
 class VectorStoreCreateParams(TypedDict, total=False):
+    chunking_strategy: FileChunkingStrategyParam
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
+    """
+
+    description: str
+    """A description for the vector store.
+
+    Can be used to describe the vector store's purpose.
+    """
+
     expires_after: ExpiresAfter
     """The expiration policy for a vector store."""
 
-    file_ids: List[str]
+    file_ids: SequenceNotStr[str]
     """
     A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
     the vector store should use. Useful for tools like `file_search` that can access
     files.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     name: str
@@ -32,6 +51,8 @@ class VectorStoreCreateParams(TypedDict, total=False):
 
 
 class ExpiresAfter(TypedDict, total=False):
+    """The expiration policy for a vector store."""
+
     anchor: Required[Literal["last_active_at"]]
     """Anchor timestamp after which the expiration policy applies.
 
diff --git a/src/openai/types/beta/vector_store_deleted.py b/src/openai/types/vector_store_deleted.py
similarity index 89%
rename from src/openai/types/beta/vector_store_deleted.py
rename to src/openai/types/vector_store_deleted.py
index 21ccda1db5..dfac9ce8bd 100644
--- a/src/openai/types/beta/vector_store_deleted.py
+++ b/src/openai/types/vector_store_deleted.py
@@ -2,7 +2,7 @@
 
 from typing_extensions import Literal
 
-from ..._models import BaseModel
+from .._models import BaseModel
 
 __all__ = ["VectorStoreDeleted"]
 
diff --git a/src/openai/types/beta/vector_store_list_params.py b/src/openai/types/vector_store_list_params.py
similarity index 93%
rename from src/openai/types/beta/vector_store_list_params.py
rename to src/openai/types/vector_store_list_params.py
index f39f67266d..e26ff90a85 100644
--- a/src/openai/types/beta/vector_store_list_params.py
+++ b/src/openai/types/vector_store_list_params.py
@@ -21,7 +21,7 @@ class VectorStoreListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/vector_store_search_params.py b/src/openai/types/vector_store_search_params.py
new file mode 100644
index 0000000000..851d63c5d1
--- /dev/null
+++ b/src/openai/types/vector_store_search_params.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .._types import SequenceNotStr
+from .shared_params.compound_filter import CompoundFilter
+from .shared_params.comparison_filter import ComparisonFilter
+
+__all__ = ["VectorStoreSearchParams", "Filters", "RankingOptions"]
+
+
+class VectorStoreSearchParams(TypedDict, total=False):
+    query: Required[Union[str, SequenceNotStr[str]]]
+    """A query string for a search"""
+
+    filters: Filters
+    """A filter to apply based on file attributes."""
+
+    max_num_results: int
+    """The maximum number of results to return.
+
+    This number should be between 1 and 50 inclusive.
+    """
+
+    ranking_options: RankingOptions
+    """Ranking options for search."""
+
+    rewrite_query: bool
+    """Whether to rewrite the natural language query for vector search."""
+
+
+Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter]
+
+
+class RankingOptions(TypedDict, total=False):
+    """Ranking options for search."""
+
+    ranker: Literal["none", "auto", "default-2024-11-15"]
+    """Enable re-ranking; set to `none` to disable, which can help reduce latency."""
+
+    score_threshold: float
diff --git a/src/openai/types/vector_store_search_response.py b/src/openai/types/vector_store_search_response.py
new file mode 100644
index 0000000000..d78b71bfba
--- /dev/null
+++ b/src/openai/types/vector_store_search_response.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["VectorStoreSearchResponse", "Content"]
+
+
+class Content(BaseModel):
+    text: str
+    """The text content returned from search."""
+
+    type: Literal["text"]
+    """The type of content."""
+
+
+class VectorStoreSearchResponse(BaseModel):
+    attributes: Optional[Dict[str, Union[str, float, bool]]] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    content: List[Content]
+    """Content chunks from the file."""
+
+    file_id: str
+    """The ID of the vector store file."""
+
+    filename: str
+    """The name of the vector store file."""
+
+    score: float
+    """The similarity score for the result."""
diff --git a/src/openai/types/beta/vector_store_update_params.py b/src/openai/types/vector_store_update_params.py
similarity index 74%
rename from src/openai/types/beta/vector_store_update_params.py
rename to src/openai/types/vector_store_update_params.py
index 0f9593e476..7c6f891170 100644
--- a/src/openai/types/beta/vector_store_update_params.py
+++ b/src/openai/types/vector_store_update_params.py
@@ -5,6 +5,8 @@
 from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
 
+from .shared_params.metadata import Metadata
+
 __all__ = ["VectorStoreUpdateParams", "ExpiresAfter"]
 
 
@@ -12,12 +14,14 @@ class VectorStoreUpdateParams(TypedDict, total=False):
     expires_after: Optional[ExpiresAfter]
     """The expiration policy for a vector store."""
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     name: Optional[str]
@@ -25,6 +29,8 @@ class VectorStoreUpdateParams(TypedDict, total=False):
 
 
 class ExpiresAfter(TypedDict, total=False):
+    """The expiration policy for a vector store."""
+
     anchor: Required[Literal["last_active_at"]]
     """Anchor timestamp after which the expiration policy applies.
 
diff --git a/src/openai/types/beta/vector_stores/__init__.py b/src/openai/types/vector_stores/__init__.py
similarity index 82%
rename from src/openai/types/beta/vector_stores/__init__.py
rename to src/openai/types/vector_stores/__init__.py
index ff05dd63d8..96ce301481 100644
--- a/src/openai/types/beta/vector_stores/__init__.py
+++ b/src/openai/types/vector_stores/__init__.py
@@ -5,6 +5,8 @@
 from .file_list_params import FileListParams as FileListParams
 from .vector_store_file import VectorStoreFile as VectorStoreFile
 from .file_create_params import FileCreateParams as FileCreateParams
+from .file_update_params import FileUpdateParams as FileUpdateParams
+from .file_content_response import FileContentResponse as FileContentResponse
 from .vector_store_file_batch import VectorStoreFileBatch as VectorStoreFileBatch
 from .file_batch_create_params import FileBatchCreateParams as FileBatchCreateParams
 from .vector_store_file_deleted import VectorStoreFileDeleted as VectorStoreFileDeleted
diff --git a/src/openai/types/vector_stores/file_batch_create_params.py b/src/openai/types/vector_stores/file_batch_create_params.py
new file mode 100644
index 0000000000..2ab98a83ab
--- /dev/null
+++ b/src/openai/types/vector_stores/file_batch_create_params.py
@@ -0,0 +1,70 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Required, TypedDict
+
+from ..._types import SequenceNotStr
+from ..file_chunking_strategy_param import FileChunkingStrategyParam
+
+__all__ = ["FileBatchCreateParams", "File"]
+
+
+class FileBatchCreateParams(TypedDict, total=False):
+    attributes: Optional[Dict[str, Union[str, float, bool]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    chunking_strategy: FileChunkingStrategyParam
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
+    """
+
+    file_ids: SequenceNotStr[str]
+    """
+    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+    the vector store should use. Useful for tools like `file_search` that can access
+    files. If `attributes` or `chunking_strategy` are provided, they will be applied
+    to all files in the batch. Mutually exclusive with `files`.
+    """
+
+    files: Iterable[File]
+    """
+    A list of objects that each include a `file_id` plus optional `attributes` or
+    `chunking_strategy`. Use this when you need to override metadata for specific
+    files. The global `attributes` or `chunking_strategy` will be ignored and must
+    be specified for each file. Mutually exclusive with `file_ids`.
+    """
+
+
+class File(TypedDict, total=False):
+    file_id: Required[str]
+    """
+    A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+    vector store should use. Useful for tools like `file_search` that can access
+    files.
+    """
+
+    attributes: Optional[Dict[str, Union[str, float, bool]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    chunking_strategy: FileChunkingStrategyParam
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
+    """
diff --git a/src/openai/types/beta/vector_stores/file_batch_list_files_params.py b/src/openai/types/vector_stores/file_batch_list_files_params.py
similarity index 94%
rename from src/openai/types/beta/vector_stores/file_batch_list_files_params.py
rename to src/openai/types/vector_stores/file_batch_list_files_params.py
index 24dee7d5a5..2a0a6c6aa7 100644
--- a/src/openai/types/beta/vector_stores/file_batch_list_files_params.py
+++ b/src/openai/types/vector_stores/file_batch_list_files_params.py
@@ -23,7 +23,7 @@ class FileBatchListFilesParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/vector_stores/file_content_response.py b/src/openai/types/vector_stores/file_content_response.py
new file mode 100644
index 0000000000..32db2f2ce9
--- /dev/null
+++ b/src/openai/types/vector_stores/file_content_response.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["FileContentResponse"]
+
+
+class FileContentResponse(BaseModel):
+    text: Optional[str] = None
+    """The text content"""
+
+    type: Optional[str] = None
+    """The content type (currently only `"text"`)"""
diff --git a/src/openai/types/vector_stores/file_create_params.py b/src/openai/types/vector_stores/file_create_params.py
new file mode 100644
index 0000000000..5b8989251a
--- /dev/null
+++ b/src/openai/types/vector_stores/file_create_params.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Required, TypedDict
+
+from ..file_chunking_strategy_param import FileChunkingStrategyParam
+
+__all__ = ["FileCreateParams"]
+
+
+class FileCreateParams(TypedDict, total=False):
+    file_id: Required[str]
+    """
+    A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+    vector store should use. Useful for tools like `file_search` that can access
+    files.
+    """
+
+    attributes: Optional[Dict[str, Union[str, float, bool]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    chunking_strategy: FileChunkingStrategyParam
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
+    """
diff --git a/src/openai/types/beta/vector_stores/file_list_params.py b/src/openai/types/vector_stores/file_list_params.py
similarity index 94%
rename from src/openai/types/beta/vector_stores/file_list_params.py
rename to src/openai/types/vector_stores/file_list_params.py
index 23dd7f0d94..867b5fb3bb 100644
--- a/src/openai/types/beta/vector_stores/file_list_params.py
+++ b/src/openai/types/vector_stores/file_list_params.py
@@ -21,7 +21,7 @@ class FileListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/vector_stores/file_update_params.py b/src/openai/types/vector_stores/file_update_params.py
new file mode 100644
index 0000000000..ebf540d046
--- /dev/null
+++ b/src/openai/types/vector_stores/file_update_params.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Required, TypedDict
+
+__all__ = ["FileUpdateParams"]
+
+
+class FileUpdateParams(TypedDict, total=False):
+    vector_store_id: Required[str]
+
+    attributes: Required[Optional[Dict[str, Union[str, float, bool]]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
diff --git a/src/openai/types/beta/vector_stores/vector_store_file.py b/src/openai/types/vector_stores/vector_store_file.py
similarity index 58%
rename from src/openai/types/beta/vector_stores/vector_store_file.py
rename to src/openai/types/vector_stores/vector_store_file.py
index 3fab489602..c1ea02227f 100644
--- a/src/openai/types/beta/vector_stores/vector_store_file.py
+++ b/src/openai/types/vector_stores/vector_store_file.py
@@ -1,22 +1,30 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Optional
+from typing import Dict, Union, Optional
 from typing_extensions import Literal
 
-from ...._models import BaseModel
+from ..._models import BaseModel
+from ..file_chunking_strategy import FileChunkingStrategy
 
 __all__ = ["VectorStoreFile", "LastError"]
 
 
 class LastError(BaseModel):
-    code: Literal["internal_error", "file_not_found", "parsing_error", "unhandled_mime_type"]
-    """One of `server_error` or `rate_limit_exceeded`."""
+    """The last error associated with this vector store file.
+
+    Will be `null` if there are no errors.
+    """
+
+    code: Literal["server_error", "unsupported_file", "invalid_file"]
+    """One of `server_error`, `unsupported_file`, or `invalid_file`."""
 
     message: str
     """A human-readable description of the error."""
 
 
 class VectorStoreFile(BaseModel):
+    """A list of files attached to a vector store."""
+
     id: str
     """The identifier, which can be referenced in API endpoints."""
 
@@ -52,3 +60,15 @@ class VectorStoreFile(BaseModel):
     that the [File](https://platform.openai.com/docs/api-reference/files) is
     attached to.
     """
+
+    attributes: Optional[Dict[str, Union[str, float, bool]]] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    chunking_strategy: Optional[FileChunkingStrategy] = None
+    """The strategy used to chunk the file."""
diff --git a/src/openai/types/beta/vector_stores/vector_store_file_batch.py b/src/openai/types/vector_stores/vector_store_file_batch.py
similarity index 94%
rename from src/openai/types/beta/vector_stores/vector_store_file_batch.py
rename to src/openai/types/vector_stores/vector_store_file_batch.py
index df130a58de..b07eb25da5 100644
--- a/src/openai/types/beta/vector_stores/vector_store_file_batch.py
+++ b/src/openai/types/vector_stores/vector_store_file_batch.py
@@ -2,7 +2,7 @@
 
 from typing_extensions import Literal
 
-from ...._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["VectorStoreFileBatch", "FileCounts"]
 
@@ -25,6 +25,8 @@ class FileCounts(BaseModel):
 
 
 class VectorStoreFileBatch(BaseModel):
+    """A batch of files attached to a vector store."""
+
     id: str
     """The identifier, which can be referenced in API endpoints."""
 
diff --git a/src/openai/types/beta/vector_stores/vector_store_file_deleted.py b/src/openai/types/vector_stores/vector_store_file_deleted.py
similarity index 89%
rename from src/openai/types/beta/vector_stores/vector_store_file_deleted.py
rename to src/openai/types/vector_stores/vector_store_file_deleted.py
index ae37f84364..5c856f26cd 100644
--- a/src/openai/types/beta/vector_stores/vector_store_file_deleted.py
+++ b/src/openai/types/vector_stores/vector_store_file_deleted.py
@@ -2,7 +2,7 @@
 
 from typing_extensions import Literal
 
-from ...._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["VectorStoreFileDeleted"]
 
diff --git a/src/openai/types/video.py b/src/openai/types/video.py
new file mode 100644
index 0000000000..e732ea54ec
--- /dev/null
+++ b/src/openai/types/video.py
@@ -0,0 +1,55 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .video_size import VideoSize
+from .video_model import VideoModel
+from .video_seconds import VideoSeconds
+from .video_create_error import VideoCreateError
+
+__all__ = ["Video"]
+
+
+class Video(BaseModel):
+    """Structured information describing a generated video job."""
+
+    id: str
+    """Unique identifier for the video job."""
+
+    completed_at: Optional[int] = None
+    """Unix timestamp (seconds) for when the job completed, if finished."""
+
+    created_at: int
+    """Unix timestamp (seconds) for when the job was created."""
+
+    error: Optional[VideoCreateError] = None
+    """Error payload that explains why generation failed, if applicable."""
+
+    expires_at: Optional[int] = None
+    """Unix timestamp (seconds) for when the downloadable assets expire, if set."""
+
+    model: VideoModel
+    """The video generation model that produced the job."""
+
+    object: Literal["video"]
+    """The object type, which is always `video`."""
+
+    progress: int
+    """Approximate completion percentage for the generation task."""
+
+    prompt: Optional[str] = None
+    """The prompt that was used to generate the video."""
+
+    remixed_from_video_id: Optional[str] = None
+    """Identifier of the source video if this video is a remix."""
+
+    seconds: VideoSeconds
+    """Duration of the generated clip in seconds."""
+
+    size: VideoSize
+    """The resolution of the generated video."""
+
+    status: Literal["queued", "in_progress", "completed", "failed"]
+    """Current lifecycle status of the video job."""
diff --git a/src/openai/types/video_create_error.py b/src/openai/types/video_create_error.py
new file mode 100644
index 0000000000..ae328b78ea
--- /dev/null
+++ b/src/openai/types/video_create_error.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["VideoCreateError"]
+
+
+class VideoCreateError(BaseModel):
+    code: str
+
+    message: str
diff --git a/src/openai/types/video_create_params.py b/src/openai/types/video_create_params.py
new file mode 100644
index 0000000000..c4d3e0851f
--- /dev/null
+++ b/src/openai/types/video_create_params.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from .._types import FileTypes
+from .video_size import VideoSize
+from .video_model import VideoModel
+from .video_seconds import VideoSeconds
+
+__all__ = ["VideoCreateParams"]
+
+
+class VideoCreateParams(TypedDict, total=False):
+    prompt: Required[str]
+    """Text prompt that describes the video to generate."""
+
+    input_reference: FileTypes
+    """Optional image reference that guides generation."""
+
+    model: VideoModel
+    """The video generation model to use (allowed values: sora-2, sora-2-pro).
+
+    Defaults to `sora-2`.
+    """
+
+    seconds: VideoSeconds
+    """Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds."""
+
+    size: VideoSize
+    """
+    Output resolution formatted as width x height (allowed values: 720x1280,
+    1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
+    """
diff --git a/src/openai/types/video_delete_response.py b/src/openai/types/video_delete_response.py
new file mode 100644
index 0000000000..1ed543aec8
--- /dev/null
+++ b/src/openai/types/video_delete_response.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["VideoDeleteResponse"]
+
+
+class VideoDeleteResponse(BaseModel):
+    """Confirmation payload returned after deleting a video."""
+
+    id: str
+    """Identifier of the deleted video."""
+
+    deleted: bool
+    """Indicates that the video resource was deleted."""
+
+    object: Literal["video.deleted"]
+    """The object type that signals the deletion response."""
diff --git a/src/openai/types/video_download_content_params.py b/src/openai/types/video_download_content_params.py
new file mode 100644
index 0000000000..8c113d6715
--- /dev/null
+++ b/src/openai/types/video_download_content_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["VideoDownloadContentParams"]
+
+
+class VideoDownloadContentParams(TypedDict, total=False):
+    variant: Literal["video", "thumbnail", "spritesheet"]
+    """Which downloadable asset to return. Defaults to the MP4 video."""
diff --git a/src/openai/types/video_list_params.py b/src/openai/types/video_list_params.py
new file mode 100644
index 0000000000..bf55ba7fa2
--- /dev/null
+++ b/src/openai/types/video_list_params.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["VideoListParams"]
+
+
+class VideoListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last item from the previous pagination request"""
+
+    limit: int
+    """Number of items to retrieve"""
+
+    order: Literal["asc", "desc"]
+    """Sort order of results by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order.
+    """
diff --git a/src/openai/types/video_model.py b/src/openai/types/video_model.py
new file mode 100644
index 0000000000..e96e7685f8
--- /dev/null
+++ b/src/openai/types/video_model.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["VideoModel"]
+
+VideoModel: TypeAlias = Literal[
+    "sora-2", "sora-2-pro", "sora-2-2025-10-06", "sora-2-pro-2025-10-06", "sora-2-2025-12-08"
+]
diff --git a/src/openai/types/video_remix_params.py b/src/openai/types/video_remix_params.py
new file mode 100644
index 0000000000..15388d6172
--- /dev/null
+++ b/src/openai/types/video_remix_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["VideoRemixParams"]
+
+
+class VideoRemixParams(TypedDict, total=False):
+    prompt: Required[str]
+    """Updated text prompt that directs the remix generation."""
diff --git a/src/openai/types/video_seconds.py b/src/openai/types/video_seconds.py
new file mode 100644
index 0000000000..e50d37dc51
--- /dev/null
+++ b/src/openai/types/video_seconds.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["VideoSeconds"]
+
+VideoSeconds: TypeAlias = Literal["4", "8", "12"]
diff --git a/src/openai/types/video_size.py b/src/openai/types/video_size.py
new file mode 100644
index 0000000000..215ac8815a
--- /dev/null
+++ b/src/openai/types/video_size.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["VideoSize"]
+
+VideoSize: TypeAlias = Literal["720x1280", "1280x720", "1024x1792", "1792x1024"]
diff --git a/src/openai/types/webhooks/__init__.py b/src/openai/types/webhooks/__init__.py
new file mode 100644
index 0000000000..8b9e55653b
--- /dev/null
+++ b/src/openai/types/webhooks/__init__.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .unwrap_webhook_event import UnwrapWebhookEvent as UnwrapWebhookEvent
+from .batch_failed_webhook_event import BatchFailedWebhookEvent as BatchFailedWebhookEvent
+from .batch_expired_webhook_event import BatchExpiredWebhookEvent as BatchExpiredWebhookEvent
+from .batch_cancelled_webhook_event import BatchCancelledWebhookEvent as BatchCancelledWebhookEvent
+from .batch_completed_webhook_event import BatchCompletedWebhookEvent as BatchCompletedWebhookEvent
+from .eval_run_failed_webhook_event import EvalRunFailedWebhookEvent as EvalRunFailedWebhookEvent
+from .response_failed_webhook_event import ResponseFailedWebhookEvent as ResponseFailedWebhookEvent
+from .eval_run_canceled_webhook_event import EvalRunCanceledWebhookEvent as EvalRunCanceledWebhookEvent
+from .eval_run_succeeded_webhook_event import EvalRunSucceededWebhookEvent as EvalRunSucceededWebhookEvent
+from .response_cancelled_webhook_event import ResponseCancelledWebhookEvent as ResponseCancelledWebhookEvent
+from .response_completed_webhook_event import ResponseCompletedWebhookEvent as ResponseCompletedWebhookEvent
+from .response_incomplete_webhook_event import ResponseIncompleteWebhookEvent as ResponseIncompleteWebhookEvent
+from .fine_tuning_job_failed_webhook_event import FineTuningJobFailedWebhookEvent as FineTuningJobFailedWebhookEvent
+from .realtime_call_incoming_webhook_event import RealtimeCallIncomingWebhookEvent as RealtimeCallIncomingWebhookEvent
+from .fine_tuning_job_cancelled_webhook_event import (
+    FineTuningJobCancelledWebhookEvent as FineTuningJobCancelledWebhookEvent,
+)
+from .fine_tuning_job_succeeded_webhook_event import (
+    FineTuningJobSucceededWebhookEvent as FineTuningJobSucceededWebhookEvent,
+)
diff --git a/src/openai/types/webhooks/batch_cancelled_webhook_event.py b/src/openai/types/webhooks/batch_cancelled_webhook_event.py
new file mode 100644
index 0000000000..9d1c485f5e
--- /dev/null
+++ b/src/openai/types/webhooks/batch_cancelled_webhook_event.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchCancelledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    id: str
+    """The unique ID of the batch API request."""
+
+
+class BatchCancelledWebhookEvent(BaseModel):
+    """Sent when a batch API request has been cancelled."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the batch API request was cancelled."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["batch.cancelled"]
+    """The type of the event. Always `batch.cancelled`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/batch_completed_webhook_event.py b/src/openai/types/webhooks/batch_completed_webhook_event.py
new file mode 100644
index 0000000000..5ae8191789
--- /dev/null
+++ b/src/openai/types/webhooks/batch_completed_webhook_event.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchCompletedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    id: str
+    """The unique ID of the batch API request."""
+
+
+class BatchCompletedWebhookEvent(BaseModel):
+    """Sent when a batch API request has been completed."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the batch API request was completed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["batch.completed"]
+    """The type of the event. Always `batch.completed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/batch_expired_webhook_event.py b/src/openai/types/webhooks/batch_expired_webhook_event.py
new file mode 100644
index 0000000000..2f08a7f579
--- /dev/null
+++ b/src/openai/types/webhooks/batch_expired_webhook_event.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchExpiredWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    id: str
+    """The unique ID of the batch API request."""
+
+
+class BatchExpiredWebhookEvent(BaseModel):
+    """Sent when a batch API request has expired."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the batch API request expired."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["batch.expired"]
+    """The type of the event. Always `batch.expired`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/batch_failed_webhook_event.py b/src/openai/types/webhooks/batch_failed_webhook_event.py
new file mode 100644
index 0000000000..7166616588
--- /dev/null
+++ b/src/openai/types/webhooks/batch_failed_webhook_event.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    id: str
+    """The unique ID of the batch API request."""
+
+
+class BatchFailedWebhookEvent(BaseModel):
+    """Sent when a batch API request has failed."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the batch API request failed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["batch.failed"]
+    """The type of the event. Always `batch.failed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/eval_run_canceled_webhook_event.py b/src/openai/types/webhooks/eval_run_canceled_webhook_event.py
new file mode 100644
index 0000000000..1948f8933b
--- /dev/null
+++ b/src/openai/types/webhooks/eval_run_canceled_webhook_event.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["EvalRunCanceledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    id: str
+    """The unique ID of the eval run."""
+
+
+class EvalRunCanceledWebhookEvent(BaseModel):
+    """Sent when an eval run has been canceled."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the eval run was canceled."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["eval.run.canceled"]
+    """The type of the event. Always `eval.run.canceled`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/eval_run_failed_webhook_event.py b/src/openai/types/webhooks/eval_run_failed_webhook_event.py
new file mode 100644
index 0000000000..4e4c860abc
--- /dev/null
+++ b/src/openai/types/webhooks/eval_run_failed_webhook_event.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["EvalRunFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    id: str
+    """The unique ID of the eval run."""
+
+
+class EvalRunFailedWebhookEvent(BaseModel):
+    """Sent when an eval run has failed."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the eval run failed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["eval.run.failed"]
+    """The type of the event. Always `eval.run.failed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/eval_run_succeeded_webhook_event.py b/src/openai/types/webhooks/eval_run_succeeded_webhook_event.py
new file mode 100644
index 0000000000..c20f22eeb9
--- /dev/null
+++ b/src/openai/types/webhooks/eval_run_succeeded_webhook_event.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["EvalRunSucceededWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    id: str
+    """The unique ID of the eval run."""
+
+
+class EvalRunSucceededWebhookEvent(BaseModel):
+    """Sent when an eval run has succeeded."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the eval run succeeded."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["eval.run.succeeded"]
+    """The type of the event. Always `eval.run.succeeded`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/fine_tuning_job_cancelled_webhook_event.py b/src/openai/types/webhooks/fine_tuning_job_cancelled_webhook_event.py
new file mode 100644
index 0000000000..0cfff85dad
--- /dev/null
+++ b/src/openai/types/webhooks/fine_tuning_job_cancelled_webhook_event.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobCancelledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    id: str
+    """The unique ID of the fine-tuning job."""
+
+
+class FineTuningJobCancelledWebhookEvent(BaseModel):
+    """Sent when a fine-tuning job has been cancelled."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the fine-tuning job was cancelled."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["fine_tuning.job.cancelled"]
+    """The type of the event. Always `fine_tuning.job.cancelled`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/fine_tuning_job_failed_webhook_event.py b/src/openai/types/webhooks/fine_tuning_job_failed_webhook_event.py
new file mode 100644
index 0000000000..0eb6bf954f
--- /dev/null
+++ b/src/openai/types/webhooks/fine_tuning_job_failed_webhook_event.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    id: str
+    """The unique ID of the fine-tuning job."""
+
+
+class FineTuningJobFailedWebhookEvent(BaseModel):
+    """Sent when a fine-tuning job has failed."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the fine-tuning job failed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["fine_tuning.job.failed"]
+    """The type of the event. Always `fine_tuning.job.failed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/fine_tuning_job_succeeded_webhook_event.py b/src/openai/types/webhooks/fine_tuning_job_succeeded_webhook_event.py
new file mode 100644
index 0000000000..26b5ea8955
--- /dev/null
+++ b/src/openai/types/webhooks/fine_tuning_job_succeeded_webhook_event.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobSucceededWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    id: str
+    """The unique ID of the fine-tuning job."""
+
+
+class FineTuningJobSucceededWebhookEvent(BaseModel):
+    """Sent when a fine-tuning job has succeeded."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the fine-tuning job succeeded."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["fine_tuning.job.succeeded"]
+    """The type of the event. Always `fine_tuning.job.succeeded`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/realtime_call_incoming_webhook_event.py b/src/openai/types/webhooks/realtime_call_incoming_webhook_event.py
new file mode 100644
index 0000000000..4647a2e2ba
--- /dev/null
+++ b/src/openai/types/webhooks/realtime_call_incoming_webhook_event.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RealtimeCallIncomingWebhookEvent", "Data", "DataSipHeader"]
+
+
+class DataSipHeader(BaseModel):
+    """A header from the SIP Invite."""
+
+    name: str
+    """Name of the SIP Header."""
+
+    value: str
+    """Value of the SIP Header."""
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    call_id: str
+    """The unique ID of this call."""
+
+    sip_headers: List[DataSipHeader]
+    """Headers from the SIP Invite."""
+
+
+class RealtimeCallIncomingWebhookEvent(BaseModel):
+    """Sent when Realtime API Receives a incoming SIP call."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the model response was completed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["realtime.call.incoming"]
+    """The type of the event. Always `realtime.call.incoming`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/response_cancelled_webhook_event.py b/src/openai/types/webhooks/response_cancelled_webhook_event.py
new file mode 100644
index 0000000000..cd791b3314
--- /dev/null
+++ b/src/openai/types/webhooks/response_cancelled_webhook_event.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCancelledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    id: str
+    """The unique ID of the model response."""
+
+
+class ResponseCancelledWebhookEvent(BaseModel):
+    """Sent when a background response has been cancelled."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the model response was cancelled."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["response.cancelled"]
+    """The type of the event. Always `response.cancelled`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/response_completed_webhook_event.py b/src/openai/types/webhooks/response_completed_webhook_event.py
new file mode 100644
index 0000000000..cf07f0c2c0
--- /dev/null
+++ b/src/openai/types/webhooks/response_completed_webhook_event.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompletedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    id: str
+    """The unique ID of the model response."""
+
+
+class ResponseCompletedWebhookEvent(BaseModel):
+    """Sent when a background response has been completed."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the model response was completed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["response.completed"]
+    """The type of the event. Always `response.completed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/response_failed_webhook_event.py b/src/openai/types/webhooks/response_failed_webhook_event.py
new file mode 100644
index 0000000000..aecb1b8f47
--- /dev/null
+++ b/src/openai/types/webhooks/response_failed_webhook_event.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    id: str
+    """The unique ID of the model response."""
+
+
+class ResponseFailedWebhookEvent(BaseModel):
+    """Sent when a background response has failed."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the model response failed."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["response.failed"]
+    """The type of the event. Always `response.failed`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/response_incomplete_webhook_event.py b/src/openai/types/webhooks/response_incomplete_webhook_event.py
new file mode 100644
index 0000000000..2367731e85
--- /dev/null
+++ b/src/openai/types/webhooks/response_incomplete_webhook_event.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseIncompleteWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+    """Event data payload."""
+
+    id: str
+    """The unique ID of the model response."""
+
+
+class ResponseIncompleteWebhookEvent(BaseModel):
+    """Sent when a background response has been interrupted."""
+
+    id: str
+    """The unique ID of the event."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) of when the model response was interrupted."""
+
+    data: Data
+    """Event data payload."""
+
+    type: Literal["response.incomplete"]
+    """The type of the event. Always `response.incomplete`."""
+
+    object: Optional[Literal["event"]] = None
+    """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/unwrap_webhook_event.py b/src/openai/types/webhooks/unwrap_webhook_event.py
new file mode 100644
index 0000000000..952383c049
--- /dev/null
+++ b/src/openai/types/webhooks/unwrap_webhook_event.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .batch_failed_webhook_event import BatchFailedWebhookEvent
+from .batch_expired_webhook_event import BatchExpiredWebhookEvent
+from .batch_cancelled_webhook_event import BatchCancelledWebhookEvent
+from .batch_completed_webhook_event import BatchCompletedWebhookEvent
+from .eval_run_failed_webhook_event import EvalRunFailedWebhookEvent
+from .response_failed_webhook_event import ResponseFailedWebhookEvent
+from .eval_run_canceled_webhook_event import EvalRunCanceledWebhookEvent
+from .eval_run_succeeded_webhook_event import EvalRunSucceededWebhookEvent
+from .response_cancelled_webhook_event import ResponseCancelledWebhookEvent
+from .response_completed_webhook_event import ResponseCompletedWebhookEvent
+from .response_incomplete_webhook_event import ResponseIncompleteWebhookEvent
+from .fine_tuning_job_failed_webhook_event import FineTuningJobFailedWebhookEvent
+from .realtime_call_incoming_webhook_event import RealtimeCallIncomingWebhookEvent
+from .fine_tuning_job_cancelled_webhook_event import FineTuningJobCancelledWebhookEvent
+from .fine_tuning_job_succeeded_webhook_event import FineTuningJobSucceededWebhookEvent
+
+__all__ = ["UnwrapWebhookEvent"]
+
+UnwrapWebhookEvent: TypeAlias = Annotated[
+    Union[
+        BatchCancelledWebhookEvent,
+        BatchCompletedWebhookEvent,
+        BatchExpiredWebhookEvent,
+        BatchFailedWebhookEvent,
+        EvalRunCanceledWebhookEvent,
+        EvalRunFailedWebhookEvent,
+        EvalRunSucceededWebhookEvent,
+        FineTuningJobCancelledWebhookEvent,
+        FineTuningJobFailedWebhookEvent,
+        FineTuningJobSucceededWebhookEvent,
+        RealtimeCallIncomingWebhookEvent,
+        ResponseCancelledWebhookEvent,
+        ResponseCompletedWebhookEvent,
+        ResponseFailedWebhookEvent,
+        ResponseIncompleteWebhookEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/websocket_connection_options.py b/src/openai/types/websocket_connection_options.py
new file mode 100644
index 0000000000..40fd24ab03
--- /dev/null
+++ b/src/openai/types/websocket_connection_options.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing_extensions import Sequence, TypedDict
+
+if TYPE_CHECKING:
+    from websockets import Subprotocol
+    from websockets.extensions import ClientExtensionFactory
+
+
+class WebsocketConnectionOptions(TypedDict, total=False):
+    """Websocket connection options copied from `websockets`.
+
+    For example: https://websockets.readthedocs.io/en/stable/reference/asyncio/client.html#websockets.asyncio.client.connect
+    """
+
+    extensions: Sequence[ClientExtensionFactory] | None
+    """List of supported extensions, in order in which they should be negotiated and run."""
+
+    subprotocols: Sequence[Subprotocol] | None
+    """List of supported subprotocols, in order of decreasing preference."""
+
+    compression: str | None
+    """The “permessage-deflate” extension is enabled by default. Set compression to None to disable it. See the [compression guide](https://websockets.readthedocs.io/en/stable/topics/compression.html) for details."""
+
+    # limits
+    max_size: int | None
+    """Maximum size of incoming messages in bytes. None disables the limit."""
+
+    max_queue: int | None | tuple[int | None, int | None]
+    """High-water mark of the buffer where frames are received. It defaults to 16 frames. The low-water mark defaults to max_queue // 4. You may pass a (high, low) tuple to set the high-water and low-water marks. If you want to disable flow control entirely, you may set it to None, although that’s a bad idea."""
+
+    write_limit: int | tuple[int, int | None]
+    """High-water mark of write buffer in bytes. It is passed to set_write_buffer_limits(). It defaults to 32 KiB. You may pass a (high, low) tuple to set the high-water and low-water marks."""
diff --git a/src/openai/version.py b/src/openai/version.py
deleted file mode 100644
index 01a08ab5a9..0000000000
--- a/src/openai/version.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from ._version import __version__
-
-VERSION: str = __version__
diff --git a/tests/api_resources/audio/test_speech.py b/tests/api_resources/audio/test_speech.py
index 781ebeceb9..b2e56a1dcc 100644
--- a/tests/api_resources/audio/test_speech.py
+++ b/tests/api_resources/audio/test_speech.py
@@ -26,9 +26,9 @@ class TestSpeech:
     def test_method_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         speech = client.audio.speech.create(
-            input="string",
+            input="input",
             model="string",
-            voice="alloy",
+            voice="ash",
         )
         assert isinstance(speech, _legacy_response.HttpxBinaryResponseContent)
         assert speech.json() == {"foo": "bar"}
@@ -38,11 +38,13 @@ def test_method_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
     def test_method_create_with_all_params(self, client: OpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         speech = client.audio.speech.create(
-            input="string",
+            input="input",
             model="string",
-            voice="alloy",
+            voice="ash",
+            instructions="instructions",
             response_format="mp3",
             speed=0.25,
+            stream_format="sse",
         )
         assert isinstance(speech, _legacy_response.HttpxBinaryResponseContent)
         assert speech.json() == {"foo": "bar"}
@@ -53,9 +55,9 @@ def test_raw_response_create(self, client: OpenAI, respx_mock: MockRouter) -> No
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
         response = client.audio.speech.with_raw_response.create(
-            input="string",
+            input="input",
             model="string",
-            voice="alloy",
+            voice="ash",
         )
 
         assert response.is_closed is True
@@ -68,9 +70,9 @@ def test_raw_response_create(self, client: OpenAI, respx_mock: MockRouter) -> No
     def test_streaming_response_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         with client.audio.speech.with_streaming_response.create(
-            input="string",
+            input="input",
             model="string",
-            voice="alloy",
+            voice="ash",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -82,16 +84,18 @@ def test_streaming_response_create(self, client: OpenAI, respx_mock: MockRouter)
 
 
 class TestAsyncSpeech:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_method_create(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         speech = await async_client.audio.speech.create(
-            input="string",
+            input="input",
             model="string",
-            voice="alloy",
+            voice="ash",
         )
         assert isinstance(speech, _legacy_response.HttpxBinaryResponseContent)
         assert speech.json() == {"foo": "bar"}
@@ -101,11 +105,13 @@ async def test_method_create(self, async_client: AsyncOpenAI, respx_mock: MockRo
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         speech = await async_client.audio.speech.create(
-            input="string",
+            input="input",
             model="string",
-            voice="alloy",
+            voice="ash",
+            instructions="instructions",
             response_format="mp3",
             speed=0.25,
+            stream_format="sse",
         )
         assert isinstance(speech, _legacy_response.HttpxBinaryResponseContent)
         assert speech.json() == {"foo": "bar"}
@@ -116,9 +122,9 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI, respx_mock:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
         response = await async_client.audio.speech.with_raw_response.create(
-            input="string",
+            input="input",
             model="string",
-            voice="alloy",
+            voice="ash",
         )
 
         assert response.is_closed is True
@@ -131,9 +137,9 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI, respx_mock:
     async def test_streaming_response_create(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         async with async_client.audio.speech.with_streaming_response.create(
-            input="string",
+            input="input",
             model="string",
-            voice="alloy",
+            voice="ash",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/audio/test_transcriptions.py b/tests/api_resources/audio/test_transcriptions.py
index ba8e9e4099..b5eaa4be1f 100644
--- a/tests/api_resources/audio/test_transcriptions.py
+++ b/tests/api_resources/audio/test_transcriptions.py
@@ -9,7 +9,7 @@
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai.types.audio import Transcription
+from openai.types.audio import TranscriptionCreateResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -18,99 +18,219 @@ class TestTranscriptions:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
-    def test_method_create(self, client: OpenAI) -> None:
+    def test_method_create_overload_1(self, client: OpenAI) -> None:
         transcription = client.audio.transcriptions.create(
             file=b"raw file contents",
-            model="whisper-1",
+            model="gpt-4o-transcribe",
         )
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
-    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+    def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
         transcription = client.audio.transcriptions.create(
             file=b"raw file contents",
-            model="whisper-1",
-            language="string",
-            prompt="string",
+            model="gpt-4o-transcribe",
+            chunking_strategy="auto",
+            include=["logprobs"],
+            known_speaker_names=["string"],
+            known_speaker_references=["string"],
+            language="language",
+            prompt="prompt",
             response_format="json",
+            stream=False,
             temperature=0,
-            timestamp_granularities=["word", "segment"],
+            timestamp_granularities=["word"],
         )
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
-    def test_raw_response_create(self, client: OpenAI) -> None:
+    def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
         response = client.audio.transcriptions.with_raw_response.create(
             file=b"raw file contents",
-            model="whisper-1",
+            model="gpt-4o-transcribe",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         transcription = response.parse()
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
-    def test_streaming_response_create(self, client: OpenAI) -> None:
+    def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
         with client.audio.transcriptions.with_streaming_response.create(
             file=b"raw file contents",
-            model="whisper-1",
+            model="gpt-4o-transcribe",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             transcription = response.parse()
-            assert_matches_type(Transcription, transcription, path=["response"])
+            assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_create_overload_2(self, client: OpenAI) -> None:
+        transcription_stream = client.audio.transcriptions.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+        )
+        transcription_stream.response.close()
+
+    @parametrize
+    def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
+        transcription_stream = client.audio.transcriptions.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+            chunking_strategy="auto",
+            include=["logprobs"],
+            known_speaker_names=["string"],
+            known_speaker_references=["string"],
+            language="language",
+            prompt="prompt",
+            response_format="json",
+            temperature=0,
+            timestamp_granularities=["word"],
+        )
+        transcription_stream.response.close()
+
+    @parametrize
+    def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
+        response = client.audio.transcriptions.with_raw_response.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
+        with client.audio.transcriptions.with_streaming_response.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
 
         assert cast(Any, response.is_closed) is True
 
 
 class TestAsyncTranscriptions:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
-    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         transcription = await async_client.audio.transcriptions.create(
             file=b"raw file contents",
-            model="whisper-1",
+            model="gpt-4o-transcribe",
         )
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
-    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
         transcription = await async_client.audio.transcriptions.create(
             file=b"raw file contents",
-            model="whisper-1",
-            language="string",
-            prompt="string",
+            model="gpt-4o-transcribe",
+            chunking_strategy="auto",
+            include=["logprobs"],
+            known_speaker_names=["string"],
+            known_speaker_references=["string"],
+            language="language",
+            prompt="prompt",
             response_format="json",
+            stream=False,
             temperature=0,
-            timestamp_granularities=["word", "segment"],
+            timestamp_granularities=["word"],
         )
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
-    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+    async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.audio.transcriptions.with_raw_response.create(
             file=b"raw file contents",
-            model="whisper-1",
+            model="gpt-4o-transcribe",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         transcription = response.parse()
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
-    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+    async def test_streaming_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         async with async_client.audio.transcriptions.with_streaming_response.create(
             file=b"raw file contents",
-            model="whisper-1",
+            model="gpt-4o-transcribe",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             transcription = await response.parse()
-            assert_matches_type(Transcription, transcription, path=["response"])
+            assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        transcription_stream = await async_client.audio.transcriptions.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+        )
+        await transcription_stream.response.aclose()
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        transcription_stream = await async_client.audio.transcriptions.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+            chunking_strategy="auto",
+            include=["logprobs"],
+            known_speaker_names=["string"],
+            known_speaker_references=["string"],
+            language="language",
+            prompt="prompt",
+            response_format="json",
+            temperature=0,
+            timestamp_granularities=["word"],
+        )
+        await transcription_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.audio.transcriptions.with_raw_response.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.audio.transcriptions.with_streaming_response.create(
+            file=b"raw file contents",
+            model="gpt-4o-transcribe",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
 
         assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/audio/test_translations.py b/tests/api_resources/audio/test_translations.py
index f5c6c68f0b..ead69e9369 100644
--- a/tests/api_resources/audio/test_translations.py
+++ b/tests/api_resources/audio/test_translations.py
@@ -9,7 +9,7 @@
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai.types.audio import Translation
+from openai.types.audio import TranslationCreateResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -23,18 +23,18 @@ def test_method_create(self, client: OpenAI) -> None:
             file=b"raw file contents",
             model="whisper-1",
         )
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         translation = client.audio.translations.create(
             file=b"raw file contents",
             model="whisper-1",
-            prompt="string",
-            response_format="string",
+            prompt="prompt",
+            response_format="json",
             temperature=0,
         )
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
@@ -46,7 +46,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         translation = response.parse()
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
@@ -58,13 +58,15 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             translation = response.parse()
-            assert_matches_type(Translation, translation, path=["response"])
+            assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
 
 class TestAsyncTranslations:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
@@ -72,18 +74,18 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
             file=b"raw file contents",
             model="whisper-1",
         )
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         translation = await async_client.audio.translations.create(
             file=b"raw file contents",
             model="whisper-1",
-            prompt="string",
-            response_format="string",
+            prompt="prompt",
+            response_format="json",
             temperature=0,
         )
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
@@ -95,7 +97,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         translation = response.parse()
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
@@ -107,6 +109,6 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             translation = await response.parse()
-            assert_matches_type(Translation, translation, path=["response"])
+            assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
         assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/vector_stores/__init__.py b/tests/api_resources/beta/chatkit/__init__.py
similarity index 100%
rename from tests/api_resources/beta/vector_stores/__init__.py
rename to tests/api_resources/beta/chatkit/__init__.py
diff --git a/tests/api_resources/beta/chatkit/test_sessions.py b/tests/api_resources/beta/chatkit/test_sessions.py
new file mode 100644
index 0000000000..c94e4c92ae
--- /dev/null
+++ b/tests/api_resources/beta/chatkit/test_sessions.py
@@ -0,0 +1,230 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.beta.chatkit import (
+    ChatSession,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestSessions:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        session = client.beta.chatkit.sessions.create(
+            user="x",
+            workflow={"id": "id"},
+        )
+        assert_matches_type(ChatSession, session, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        session = client.beta.chatkit.sessions.create(
+            user="x",
+            workflow={
+                "id": "id",
+                "state_variables": {"foo": "string"},
+                "tracing": {"enabled": True},
+                "version": "version",
+            },
+            chatkit_configuration={
+                "automatic_thread_titling": {"enabled": True},
+                "file_upload": {
+                    "enabled": True,
+                    "max_file_size": 1,
+                    "max_files": 1,
+                },
+                "history": {
+                    "enabled": True,
+                    "recent_threads": 1,
+                },
+            },
+            expires_after={
+                "anchor": "created_at",
+                "seconds": 1,
+            },
+            rate_limits={"max_requests_per_1_minute": 1},
+        )
+        assert_matches_type(ChatSession, session, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.beta.chatkit.sessions.with_raw_response.create(
+            user="x",
+            workflow={"id": "id"},
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(ChatSession, session, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.beta.chatkit.sessions.with_streaming_response.create(
+            user="x",
+            workflow={"id": "id"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = response.parse()
+            assert_matches_type(ChatSession, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_cancel(self, client: OpenAI) -> None:
+        session = client.beta.chatkit.sessions.cancel(
+            "cksess_123",
+        )
+        assert_matches_type(ChatSession, session, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: OpenAI) -> None:
+        response = client.beta.chatkit.sessions.with_raw_response.cancel(
+            "cksess_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(ChatSession, session, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.beta.chatkit.sessions.with_streaming_response.cancel(
+            "cksess_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = response.parse()
+            assert_matches_type(ChatSession, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            client.beta.chatkit.sessions.with_raw_response.cancel(
+                "",
+            )
+
+
+class TestAsyncSessions:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        session = await async_client.beta.chatkit.sessions.create(
+            user="x",
+            workflow={"id": "id"},
+        )
+        assert_matches_type(ChatSession, session, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        session = await async_client.beta.chatkit.sessions.create(
+            user="x",
+            workflow={
+                "id": "id",
+                "state_variables": {"foo": "string"},
+                "tracing": {"enabled": True},
+                "version": "version",
+            },
+            chatkit_configuration={
+                "automatic_thread_titling": {"enabled": True},
+                "file_upload": {
+                    "enabled": True,
+                    "max_file_size": 1,
+                    "max_files": 1,
+                },
+                "history": {
+                    "enabled": True,
+                    "recent_threads": 1,
+                },
+            },
+            expires_after={
+                "anchor": "created_at",
+                "seconds": 1,
+            },
+            rate_limits={"max_requests_per_1_minute": 1},
+        )
+        assert_matches_type(ChatSession, session, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.chatkit.sessions.with_raw_response.create(
+            user="x",
+            workflow={"id": "id"},
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(ChatSession, session, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.chatkit.sessions.with_streaming_response.create(
+            user="x",
+            workflow={"id": "id"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = await response.parse()
+            assert_matches_type(ChatSession, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        session = await async_client.beta.chatkit.sessions.cancel(
+            "cksess_123",
+        )
+        assert_matches_type(ChatSession, session, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.chatkit.sessions.with_raw_response.cancel(
+            "cksess_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(ChatSession, session, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.chatkit.sessions.with_streaming_response.cancel(
+            "cksess_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = await response.parse()
+            assert_matches_type(ChatSession, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            await async_client.beta.chatkit.sessions.with_raw_response.cancel(
+                "",
+            )
diff --git a/tests/api_resources/beta/chatkit/test_threads.py b/tests/api_resources/beta/chatkit/test_threads.py
new file mode 100644
index 0000000000..6395b72b2f
--- /dev/null
+++ b/tests/api_resources/beta/chatkit/test_threads.py
@@ -0,0 +1,348 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncConversationCursorPage, AsyncConversationCursorPage
+from openai.types.beta.chatkit import ChatKitThread, ThreadDeleteResponse
+from openai.types.beta.chatkit.chatkit_thread_item_list import Data
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestThreads:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        thread = client.beta.chatkit.threads.retrieve(
+            "cthr_123",
+        )
+        assert_matches_type(ChatKitThread, thread, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.beta.chatkit.threads.with_raw_response.retrieve(
+            "cthr_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        thread = response.parse()
+        assert_matches_type(ChatKitThread, thread, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.beta.chatkit.threads.with_streaming_response.retrieve(
+            "cthr_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = response.parse()
+            assert_matches_type(ChatKitThread, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            client.beta.chatkit.threads.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        thread = client.beta.chatkit.threads.list()
+        assert_matches_type(SyncConversationCursorPage[ChatKitThread], thread, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        thread = client.beta.chatkit.threads.list(
+            after="after",
+            before="before",
+            limit=0,
+            order="asc",
+            user="x",
+        )
+        assert_matches_type(SyncConversationCursorPage[ChatKitThread], thread, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.beta.chatkit.threads.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        thread = response.parse()
+        assert_matches_type(SyncConversationCursorPage[ChatKitThread], thread, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.beta.chatkit.threads.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = response.parse()
+            assert_matches_type(SyncConversationCursorPage[ChatKitThread], thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        thread = client.beta.chatkit.threads.delete(
+            "cthr_123",
+        )
+        assert_matches_type(ThreadDeleteResponse, thread, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.beta.chatkit.threads.with_raw_response.delete(
+            "cthr_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        thread = response.parse()
+        assert_matches_type(ThreadDeleteResponse, thread, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.beta.chatkit.threads.with_streaming_response.delete(
+            "cthr_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = response.parse()
+            assert_matches_type(ThreadDeleteResponse, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            client.beta.chatkit.threads.with_raw_response.delete(
+                "",
+            )
+
+    @parametrize
+    def test_method_list_items(self, client: OpenAI) -> None:
+        thread = client.beta.chatkit.threads.list_items(
+            thread_id="cthr_123",
+        )
+        assert_matches_type(SyncConversationCursorPage[Data], thread, path=["response"])
+
+    @parametrize
+    def test_method_list_items_with_all_params(self, client: OpenAI) -> None:
+        thread = client.beta.chatkit.threads.list_items(
+            thread_id="cthr_123",
+            after="after",
+            before="before",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncConversationCursorPage[Data], thread, path=["response"])
+
+    @parametrize
+    def test_raw_response_list_items(self, client: OpenAI) -> None:
+        response = client.beta.chatkit.threads.with_raw_response.list_items(
+            thread_id="cthr_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        thread = response.parse()
+        assert_matches_type(SyncConversationCursorPage[Data], thread, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list_items(self, client: OpenAI) -> None:
+        with client.beta.chatkit.threads.with_streaming_response.list_items(
+            thread_id="cthr_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = response.parse()
+            assert_matches_type(SyncConversationCursorPage[Data], thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list_items(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            client.beta.chatkit.threads.with_raw_response.list_items(
+                thread_id="",
+            )
+
+
+class TestAsyncThreads:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        thread = await async_client.beta.chatkit.threads.retrieve(
+            "cthr_123",
+        )
+        assert_matches_type(ChatKitThread, thread, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.chatkit.threads.with_raw_response.retrieve(
+            "cthr_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        thread = response.parse()
+        assert_matches_type(ChatKitThread, thread, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.chatkit.threads.with_streaming_response.retrieve(
+            "cthr_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = await response.parse()
+            assert_matches_type(ChatKitThread, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            await async_client.beta.chatkit.threads.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        thread = await async_client.beta.chatkit.threads.list()
+        assert_matches_type(AsyncConversationCursorPage[ChatKitThread], thread, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        thread = await async_client.beta.chatkit.threads.list(
+            after="after",
+            before="before",
+            limit=0,
+            order="asc",
+            user="x",
+        )
+        assert_matches_type(AsyncConversationCursorPage[ChatKitThread], thread, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.chatkit.threads.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        thread = response.parse()
+        assert_matches_type(AsyncConversationCursorPage[ChatKitThread], thread, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.chatkit.threads.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = await response.parse()
+            assert_matches_type(AsyncConversationCursorPage[ChatKitThread], thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        thread = await async_client.beta.chatkit.threads.delete(
+            "cthr_123",
+        )
+        assert_matches_type(ThreadDeleteResponse, thread, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.chatkit.threads.with_raw_response.delete(
+            "cthr_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        thread = response.parse()
+        assert_matches_type(ThreadDeleteResponse, thread, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.chatkit.threads.with_streaming_response.delete(
+            "cthr_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = await response.parse()
+            assert_matches_type(ThreadDeleteResponse, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            await async_client.beta.chatkit.threads.with_raw_response.delete(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list_items(self, async_client: AsyncOpenAI) -> None:
+        thread = await async_client.beta.chatkit.threads.list_items(
+            thread_id="cthr_123",
+        )
+        assert_matches_type(AsyncConversationCursorPage[Data], thread, path=["response"])
+
+    @parametrize
+    async def test_method_list_items_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        thread = await async_client.beta.chatkit.threads.list_items(
+            thread_id="cthr_123",
+            after="after",
+            before="before",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncConversationCursorPage[Data], thread, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list_items(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.chatkit.threads.with_raw_response.list_items(
+            thread_id="cthr_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        thread = response.parse()
+        assert_matches_type(AsyncConversationCursorPage[Data], thread, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list_items(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.chatkit.threads.with_streaming_response.list_items(
+            thread_id="cthr_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = await response.parse()
+            assert_matches_type(AsyncConversationCursorPage[Data], thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list_items(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            await async_client.beta.chatkit.threads.with_raw_response.list_items(
+                thread_id="",
+            )
diff --git a/tests/api_resources/beta/test_assistants.py b/tests/api_resources/beta/test_assistants.py
index a92acb2ca5..2557735426 100644
--- a/tests/api_resources/beta/test_assistants.py
+++ b/tests/api_resources/beta/test_assistants.py
@@ -24,33 +24,35 @@ class TestAssistants:
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.create(
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.create(
-            model="gpt-4-turbo",
-            description="string",
-            instructions="string",
-            metadata={},
-            name="string",
-            response_format="none",
+            model="gpt-4o",
+            description="description",
+            instructions="instructions",
+            metadata={"foo": "string"},
+            name="name",
+            reasoning_effort="none",
+            response_format="auto",
             temperature=1,
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {
                     "vector_store_ids": ["string"],
                     "vector_stores": [
                         {
-                            "file_ids": ["string", "string", "string"],
-                            "metadata": {},
+                            "chunking_strategy": {"type": "auto"},
+                            "file_ids": ["string"],
+                            "metadata": {"foo": "string"},
                         }
                     ],
                 },
             },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
@@ -58,7 +60,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.create(
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
 
         assert response.is_closed is True
@@ -69,7 +71,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.beta.assistants.with_streaming_response.create(
-            model="gpt-4-turbo",
+            model="gpt-4o",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -82,14 +84,14 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.retrieve(
-            "string",
+            "assistant_id",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.retrieve(
-            "string",
+            "assistant_id",
         )
 
         assert response.is_closed is True
@@ -100,7 +102,7 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
         with client.beta.assistants.with_streaming_response.retrieve(
-            "string",
+            "assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -120,26 +122,27 @@ def test_path_params_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.update(
-            "string",
+            assistant_id="assistant_id",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.update(
-            "string",
-            description="string",
-            instructions="string",
-            metadata={},
+            assistant_id="assistant_id",
+            description="description",
+            instructions="instructions",
+            metadata={"foo": "string"},
             model="string",
-            name="string",
-            response_format="none",
+            name="name",
+            reasoning_effort="none",
+            response_format="auto",
             temperature=1,
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {"vector_store_ids": ["string"]},
             },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
@@ -147,7 +150,7 @@ def test_method_update_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.update(
-            "string",
+            assistant_id="assistant_id",
         )
 
         assert response.is_closed is True
@@ -158,7 +161,7 @@ def test_raw_response_update(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_update(self, client: OpenAI) -> None:
         with client.beta.assistants.with_streaming_response.update(
-            "string",
+            assistant_id="assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -172,7 +175,7 @@ def test_streaming_response_update(self, client: OpenAI) -> None:
     def test_path_params_update(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
             client.beta.assistants.with_raw_response.update(
-                "",
+                assistant_id="",
             )
 
     @parametrize
@@ -183,8 +186,8 @@ def test_method_list(self, client: OpenAI) -> None:
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.list(
-            after="string",
-            before="string",
+            after="after",
+            before="before",
             limit=0,
             order="asc",
         )
@@ -213,14 +216,14 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.delete(
-            "string",
+            "assistant_id",
         )
         assert_matches_type(AssistantDeleted, assistant, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.delete(
-            "string",
+            "assistant_id",
         )
 
         assert response.is_closed is True
@@ -231,7 +234,7 @@ def test_raw_response_delete(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_delete(self, client: OpenAI) -> None:
         with client.beta.assistants.with_streaming_response.delete(
-            "string",
+            "assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -250,38 +253,42 @@ def test_path_params_delete(self, client: OpenAI) -> None:
 
 
 class TestAsyncAssistants:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.create(
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.create(
-            model="gpt-4-turbo",
-            description="string",
-            instructions="string",
-            metadata={},
-            name="string",
-            response_format="none",
+            model="gpt-4o",
+            description="description",
+            instructions="instructions",
+            metadata={"foo": "string"},
+            name="name",
+            reasoning_effort="none",
+            response_format="auto",
             temperature=1,
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {
                     "vector_store_ids": ["string"],
                     "vector_stores": [
                         {
-                            "file_ids": ["string", "string", "string"],
-                            "metadata": {},
+                            "chunking_strategy": {"type": "auto"},
+                            "file_ids": ["string"],
+                            "metadata": {"foo": "string"},
                         }
                     ],
                 },
             },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
@@ -289,7 +296,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.assistants.with_raw_response.create(
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
 
         assert response.is_closed is True
@@ -300,7 +307,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.assistants.with_streaming_response.create(
-            model="gpt-4-turbo",
+            model="gpt-4o",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -313,14 +320,14 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.retrieve(
-            "string",
+            "assistant_id",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.assistants.with_raw_response.retrieve(
-            "string",
+            "assistant_id",
         )
 
         assert response.is_closed is True
@@ -331,7 +338,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.assistants.with_streaming_response.retrieve(
-            "string",
+            "assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -351,26 +358,27 @@ async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_update(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.update(
-            "string",
+            assistant_id="assistant_id",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.update(
-            "string",
-            description="string",
-            instructions="string",
-            metadata={},
+            assistant_id="assistant_id",
+            description="description",
+            instructions="instructions",
+            metadata={"foo": "string"},
             model="string",
-            name="string",
-            response_format="none",
+            name="name",
+            reasoning_effort="none",
+            response_format="auto",
             temperature=1,
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {"vector_store_ids": ["string"]},
             },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
@@ -378,7 +386,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) ->
     @parametrize
     async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.assistants.with_raw_response.update(
-            "string",
+            assistant_id="assistant_id",
         )
 
         assert response.is_closed is True
@@ -389,7 +397,7 @@ async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.assistants.with_streaming_response.update(
-            "string",
+            assistant_id="assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -403,7 +411,7 @@ async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> Non
     async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
             await async_client.beta.assistants.with_raw_response.update(
-                "",
+                assistant_id="",
             )
 
     @parametrize
@@ -414,8 +422,8 @@ async def test_method_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.list(
-            after="string",
-            before="string",
+            after="after",
+            before="before",
             limit=0,
             order="asc",
         )
@@ -444,14 +452,14 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.delete(
-            "string",
+            "assistant_id",
         )
         assert_matches_type(AssistantDeleted, assistant, path=["response"])
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.assistants.with_raw_response.delete(
-            "string",
+            "assistant_id",
         )
 
         assert response.is_closed is True
@@ -462,7 +470,7 @@ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.assistants.with_streaming_response.delete(
-            "string",
+            "assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/beta/test_threads.py b/tests/api_resources/beta/test_threads.py
index 02c6e2586e..f392c86729 100644
--- a/tests/api_resources/beta/test_threads.py
+++ b/tests/api_resources/beta/test_threads.py
@@ -15,6 +15,8 @@
 )
 from openai.types.beta.threads import Run
 
+# pyright: reportDeprecated=false
+
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
 
@@ -23,126 +25,50 @@ class TestThreads:
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
-        thread = client.beta.threads.create()
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.create()
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
-        thread = client.beta.threads.create(
-            messages=[
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-            ],
-            metadata={},
-            tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
-                "file_search": {
-                    "vector_store_ids": ["string"],
-                    "vector_stores": [
-                        {
-                            "file_ids": ["string", "string", "string"],
-                            "metadata": {},
-                        }
-                    ],
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                        "attachments": [
+                            {
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
+                        ],
+                        "metadata": {"foo": "string"},
+                    }
+                ],
+                metadata={"foo": "string"},
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {
+                        "vector_store_ids": ["string"],
+                        "vector_stores": [
+                            {
+                                "chunking_strategy": {"type": "auto"},
+                                "file_ids": ["string"],
+                                "metadata": {"foo": "string"},
+                            }
+                        ],
+                    },
                 },
-            },
-        )
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.beta.threads.with_raw_response.create()
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.with_raw_response.create()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -151,27 +77,31 @@ def test_raw_response_create(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
-        with client.beta.threads.with_streaming_response.create() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.with_streaming_response.create() as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            thread = response.parse()
-            assert_matches_type(Thread, thread, path=["response"])
+                thread = response.parse()
+                assert_matches_type(Thread, thread, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
-        thread = client.beta.threads.retrieve(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.retrieve(
+                "thread_id",
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.threads.with_raw_response.retrieve(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.with_raw_response.retrieve(
+                "thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -180,48 +110,55 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
-        with client.beta.threads.with_streaming_response.retrieve(
-            "string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.with_streaming_response.retrieve(
+                "thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            thread = response.parse()
-            assert_matches_type(Thread, thread, path=["response"])
+                thread = response.parse()
+                assert_matches_type(Thread, thread, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_retrieve(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.with_raw_response.retrieve(
-                "",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.with_raw_response.retrieve(
+                    "",
+                )
 
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
-        thread = client.beta.threads.update(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.update(
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
-        thread = client.beta.threads.update(
-            "string",
-            metadata={},
-            tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
-                "file_search": {"vector_store_ids": ["string"]},
-            },
-        )
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.update(
+                thread_id="thread_id",
+                metadata={"foo": "string"},
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {"vector_store_ids": ["string"]},
+                },
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
-        response = client.beta.threads.with_raw_response.update(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.with_raw_response.update(
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -230,36 +167,41 @@ def test_raw_response_update(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_update(self, client: OpenAI) -> None:
-        with client.beta.threads.with_streaming_response.update(
-            "string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.with_streaming_response.update(
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            thread = response.parse()
-            assert_matches_type(Thread, thread, path=["response"])
+                thread = response.parse()
+                assert_matches_type(Thread, thread, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_update(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.with_raw_response.update(
-                "",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.with_raw_response.update(
+                    thread_id="",
+                )
 
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
-        thread = client.beta.threads.delete(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.delete(
+                "thread_id",
+            )
+
         assert_matches_type(ThreadDeleted, thread, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
-        response = client.beta.threads.with_raw_response.delete(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.with_raw_response.delete(
+                "thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -268,172 +210,99 @@ def test_raw_response_delete(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_delete(self, client: OpenAI) -> None:
-        with client.beta.threads.with_streaming_response.delete(
-            "string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.with_streaming_response.delete(
+                "thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            thread = response.parse()
-            assert_matches_type(ThreadDeleted, thread, path=["response"])
+                thread = response.parse()
+                assert_matches_type(ThreadDeleted, thread, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_delete(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.with_raw_response.delete(
-                "",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.with_raw_response.delete(
+                    "",
+                )
 
     @parametrize
     def test_method_create_and_run_overload_1(self, client: OpenAI) -> None:
-        thread = client.beta.threads.create_and_run(
-            assistant_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+            )
+
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
     def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI) -> None:
-        thread = client.beta.threads.create_and_run(
-            assistant_id="string",
-            instructions="string",
-            max_completion_tokens=256,
-            max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
-            stream=False,
-            temperature=1,
-            thread={
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                ],
-                "tool_resources": {
-                    "code_interpreter": {"file_ids": ["string", "string", "string"]},
-                    "file_search": {
-                        "vector_store_ids": ["string"],
-                        "vector_stores": [
-                            {
-                                "file_ids": ["string", "string", "string"],
-                                "metadata": {},
-                            }
-                        ],
+        with pytest.warns(DeprecationWarning):
+            thread = client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                response_format="auto",
+                stream=False,
+                temperature=1,
+                thread={
+                    "messages": [
+                        {
+                            "content": "string",
+                            "role": "user",
+                            "attachments": [
+                                {
+                                    "file_id": "file_id",
+                                    "tools": [{"type": "code_interpreter"}],
+                                }
+                            ],
+                            "metadata": {"foo": "string"},
+                        }
+                    ],
+                    "metadata": {"foo": "string"},
+                    "tool_resources": {
+                        "code_interpreter": {"file_ids": ["string"]},
+                        "file_search": {
+                            "vector_store_ids": ["string"],
+                            "vector_stores": [
+                                {
+                                    "chunking_strategy": {"type": "auto"},
+                                    "file_ids": ["string"],
+                                    "metadata": {"foo": "string"},
+                                }
+                            ],
+                        },
                     },
                 },
-                "metadata": {},
-            },
-            tool_choice="none",
-            tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
-                "file_search": {"vector_store_ids": ["string"]},
-            },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-            top_p=1,
-            truncation_strategy={
-                "type": "auto",
-                "last_messages": 1,
-            },
-        )
+                tool_choice="none",
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {"vector_store_ids": ["string"]},
+                },
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
+                },
+            )
+
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
     def test_raw_response_create_and_run_overload_1(self, client: OpenAI) -> None:
-        response = client.beta.threads.with_raw_response.create_and_run(
-            assistant_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.with_raw_response.create_and_run(
+                assistant_id="assistant_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -442,167 +311,93 @@ def test_raw_response_create_and_run_overload_1(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_create_and_run_overload_1(self, client: OpenAI) -> None:
-        with client.beta.threads.with_streaming_response.create_and_run(
-            assistant_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.with_streaming_response.create_and_run(
+                assistant_id="assistant_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            thread = response.parse()
-            assert_matches_type(Run, thread, path=["response"])
+                thread = response.parse()
+                assert_matches_type(Run, thread, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_method_create_and_run_overload_2(self, client: OpenAI) -> None:
-        thread_stream = client.beta.threads.create_and_run(
-            assistant_id="string",
-            stream=True,
-        )
+        with pytest.warns(DeprecationWarning):
+            thread_stream = client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+            )
+
         thread_stream.response.close()
 
     @parametrize
     def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI) -> None:
-        thread_stream = client.beta.threads.create_and_run(
-            assistant_id="string",
-            stream=True,
-            instructions="string",
-            max_completion_tokens=256,
-            max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
-            temperature=1,
-            thread={
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                ],
-                "tool_resources": {
-                    "code_interpreter": {"file_ids": ["string", "string", "string"]},
-                    "file_search": {
-                        "vector_store_ids": ["string"],
-                        "vector_stores": [
-                            {
-                                "file_ids": ["string", "string", "string"],
-                                "metadata": {},
-                            }
-                        ],
+        with pytest.warns(DeprecationWarning):
+            thread_stream = client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                response_format="auto",
+                temperature=1,
+                thread={
+                    "messages": [
+                        {
+                            "content": "string",
+                            "role": "user",
+                            "attachments": [
+                                {
+                                    "file_id": "file_id",
+                                    "tools": [{"type": "code_interpreter"}],
+                                }
+                            ],
+                            "metadata": {"foo": "string"},
+                        }
+                    ],
+                    "metadata": {"foo": "string"},
+                    "tool_resources": {
+                        "code_interpreter": {"file_ids": ["string"]},
+                        "file_search": {
+                            "vector_store_ids": ["string"],
+                            "vector_stores": [
+                                {
+                                    "chunking_strategy": {"type": "auto"},
+                                    "file_ids": ["string"],
+                                    "metadata": {"foo": "string"},
+                                }
+                            ],
+                        },
                     },
                 },
-                "metadata": {},
-            },
-            tool_choice="none",
-            tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
-                "file_search": {"vector_store_ids": ["string"]},
-            },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-            top_p=1,
-            truncation_strategy={
-                "type": "auto",
-                "last_messages": 1,
-            },
-        )
+                tool_choice="none",
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {"vector_store_ids": ["string"]},
+                },
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
+                },
+            )
+
         thread_stream.response.close()
 
     @parametrize
     def test_raw_response_create_and_run_overload_2(self, client: OpenAI) -> None:
-        response = client.beta.threads.with_raw_response.create_and_run(
-            assistant_id="string",
-            stream=True,
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.with_raw_response.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+            )
 
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         stream = response.parse()
@@ -610,144 +405,71 @@ def test_raw_response_create_and_run_overload_2(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_create_and_run_overload_2(self, client: OpenAI) -> None:
-        with client.beta.threads.with_streaming_response.create_and_run(
-            assistant_id="string",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.with_streaming_response.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            stream = response.parse()
-            stream.close()
+                stream = response.parse()
+                stream.close()
 
         assert cast(Any, response.is_closed) is True
 
 
 class TestAsyncThreads:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
-        thread = await async_client.beta.threads.create()
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.create()
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        thread = await async_client.beta.threads.create(
-            messages=[
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-            ],
-            metadata={},
-            tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
-                "file_search": {
-                    "vector_store_ids": ["string"],
-                    "vector_stores": [
-                        {
-                            "file_ids": ["string", "string", "string"],
-                            "metadata": {},
-                        }
-                    ],
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                        "attachments": [
+                            {
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
+                        ],
+                        "metadata": {"foo": "string"},
+                    }
+                ],
+                metadata={"foo": "string"},
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {
+                        "vector_store_ids": ["string"],
+                        "vector_stores": [
+                            {
+                                "chunking_strategy": {"type": "auto"},
+                                "file_ids": ["string"],
+                                "metadata": {"foo": "string"},
+                            }
+                        ],
+                    },
                 },
-            },
-        )
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.with_raw_response.create()
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.with_raw_response.create()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -756,27 +478,31 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.with_streaming_response.create() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.with_streaming_response.create() as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            thread = await response.parse()
-            assert_matches_type(Thread, thread, path=["response"])
+                thread = await response.parse()
+                assert_matches_type(Thread, thread, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
-        thread = await async_client.beta.threads.retrieve(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.retrieve(
+                "thread_id",
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.with_raw_response.retrieve(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.with_raw_response.retrieve(
+                "thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -785,48 +511,55 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.with_streaming_response.retrieve(
-            "string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.with_streaming_response.retrieve(
+                "thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            thread = await response.parse()
-            assert_matches_type(Thread, thread, path=["response"])
+                thread = await response.parse()
+                assert_matches_type(Thread, thread, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.with_raw_response.retrieve(
-                "",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.with_raw_response.retrieve(
+                    "",
+                )
 
     @parametrize
     async def test_method_update(self, async_client: AsyncOpenAI) -> None:
-        thread = await async_client.beta.threads.update(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.update(
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        thread = await async_client.beta.threads.update(
-            "string",
-            metadata={},
-            tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
-                "file_search": {"vector_store_ids": ["string"]},
-            },
-        )
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.update(
+                thread_id="thread_id",
+                metadata={"foo": "string"},
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {"vector_store_ids": ["string"]},
+                },
+            )
+
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.with_raw_response.update(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.with_raw_response.update(
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -835,36 +568,41 @@ async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.with_streaming_response.update(
-            "string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.with_streaming_response.update(
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            thread = await response.parse()
-            assert_matches_type(Thread, thread, path=["response"])
+                thread = await response.parse()
+                assert_matches_type(Thread, thread, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.with_raw_response.update(
-                "",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.with_raw_response.update(
+                    thread_id="",
+                )
 
     @parametrize
     async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
-        thread = await async_client.beta.threads.delete(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.delete(
+                "thread_id",
+            )
+
         assert_matches_type(ThreadDeleted, thread, path=["response"])
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.with_raw_response.delete(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.with_raw_response.delete(
+                "thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -873,172 +611,99 @@ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.with_streaming_response.delete(
-            "string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.with_streaming_response.delete(
+                "thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            thread = await response.parse()
-            assert_matches_type(ThreadDeleted, thread, path=["response"])
+                thread = await response.parse()
+                assert_matches_type(ThreadDeleted, thread, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.with_raw_response.delete(
-                "",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.with_raw_response.delete(
+                    "",
+                )
 
     @parametrize
     async def test_method_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
-        thread = await async_client.beta.threads.create_and_run(
-            assistant_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+            )
+
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
     async def test_method_create_and_run_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
-        thread = await async_client.beta.threads.create_and_run(
-            assistant_id="string",
-            instructions="string",
-            max_completion_tokens=256,
-            max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
-            stream=False,
-            temperature=1,
-            thread={
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                ],
-                "tool_resources": {
-                    "code_interpreter": {"file_ids": ["string", "string", "string"]},
-                    "file_search": {
-                        "vector_store_ids": ["string"],
-                        "vector_stores": [
-                            {
-                                "file_ids": ["string", "string", "string"],
-                                "metadata": {},
-                            }
-                        ],
+        with pytest.warns(DeprecationWarning):
+            thread = await async_client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                response_format="auto",
+                stream=False,
+                temperature=1,
+                thread={
+                    "messages": [
+                        {
+                            "content": "string",
+                            "role": "user",
+                            "attachments": [
+                                {
+                                    "file_id": "file_id",
+                                    "tools": [{"type": "code_interpreter"}],
+                                }
+                            ],
+                            "metadata": {"foo": "string"},
+                        }
+                    ],
+                    "metadata": {"foo": "string"},
+                    "tool_resources": {
+                        "code_interpreter": {"file_ids": ["string"]},
+                        "file_search": {
+                            "vector_store_ids": ["string"],
+                            "vector_stores": [
+                                {
+                                    "chunking_strategy": {"type": "auto"},
+                                    "file_ids": ["string"],
+                                    "metadata": {"foo": "string"},
+                                }
+                            ],
+                        },
                     },
                 },
-                "metadata": {},
-            },
-            tool_choice="none",
-            tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
-                "file_search": {"vector_store_ids": ["string"]},
-            },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-            top_p=1,
-            truncation_strategy={
-                "type": "auto",
-                "last_messages": 1,
-            },
-        )
+                tool_choice="none",
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {"vector_store_ids": ["string"]},
+                },
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
+                },
+            )
+
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
     async def test_raw_response_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.with_raw_response.create_and_run(
-            assistant_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.with_raw_response.create_and_run(
+                assistant_id="assistant_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1047,167 +712,93 @@ async def test_raw_response_create_and_run_overload_1(self, async_client: AsyncO
 
     @parametrize
     async def test_streaming_response_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.with_streaming_response.create_and_run(
-            assistant_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.with_streaming_response.create_and_run(
+                assistant_id="assistant_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            thread = await response.parse()
-            assert_matches_type(Run, thread, path=["response"])
+                thread = await response.parse()
+                assert_matches_type(Run, thread, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_method_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
-        thread_stream = await async_client.beta.threads.create_and_run(
-            assistant_id="string",
-            stream=True,
-        )
+        with pytest.warns(DeprecationWarning):
+            thread_stream = await async_client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+            )
+
         await thread_stream.response.aclose()
 
     @parametrize
     async def test_method_create_and_run_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
-        thread_stream = await async_client.beta.threads.create_and_run(
-            assistant_id="string",
-            stream=True,
-            instructions="string",
-            max_completion_tokens=256,
-            max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
-            temperature=1,
-            thread={
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                ],
-                "tool_resources": {
-                    "code_interpreter": {"file_ids": ["string", "string", "string"]},
-                    "file_search": {
-                        "vector_store_ids": ["string"],
-                        "vector_stores": [
-                            {
-                                "file_ids": ["string", "string", "string"],
-                                "metadata": {},
-                            }
-                        ],
+        with pytest.warns(DeprecationWarning):
+            thread_stream = await async_client.beta.threads.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                response_format="auto",
+                temperature=1,
+                thread={
+                    "messages": [
+                        {
+                            "content": "string",
+                            "role": "user",
+                            "attachments": [
+                                {
+                                    "file_id": "file_id",
+                                    "tools": [{"type": "code_interpreter"}],
+                                }
+                            ],
+                            "metadata": {"foo": "string"},
+                        }
+                    ],
+                    "metadata": {"foo": "string"},
+                    "tool_resources": {
+                        "code_interpreter": {"file_ids": ["string"]},
+                        "file_search": {
+                            "vector_store_ids": ["string"],
+                            "vector_stores": [
+                                {
+                                    "chunking_strategy": {"type": "auto"},
+                                    "file_ids": ["string"],
+                                    "metadata": {"foo": "string"},
+                                }
+                            ],
+                        },
                     },
                 },
-                "metadata": {},
-            },
-            tool_choice="none",
-            tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
-                "file_search": {"vector_store_ids": ["string"]},
-            },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-            top_p=1,
-            truncation_strategy={
-                "type": "auto",
-                "last_messages": 1,
-            },
-        )
+                tool_choice="none",
+                tool_resources={
+                    "code_interpreter": {"file_ids": ["string"]},
+                    "file_search": {"vector_store_ids": ["string"]},
+                },
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
+                },
+            )
+
         await thread_stream.response.aclose()
 
     @parametrize
     async def test_raw_response_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.with_raw_response.create_and_run(
-            assistant_id="string",
-            stream=True,
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.with_raw_response.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+            )
 
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         stream = response.parse()
@@ -1215,14 +806,15 @@ async def test_raw_response_create_and_run_overload_2(self, async_client: AsyncO
 
     @parametrize
     async def test_streaming_response_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.with_streaming_response.create_and_run(
-            assistant_id="string",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = await response.parse()
-            await stream.close()
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.with_streaming_response.create_and_run(
+                assistant_id="assistant_id",
+                stream=True,
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                stream = await response.parse()
+                await stream.close()
 
         assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/threads/runs/test_steps.py b/tests/api_resources/beta/threads/runs/test_steps.py
index e6108d8dad..ba44eec63d 100644
--- a/tests/api_resources/beta/threads/runs/test_steps.py
+++ b/tests/api_resources/beta/threads/runs/test_steps.py
@@ -12,6 +12,8 @@
 from openai.pagination import SyncCursorPage, AsyncCursorPage
 from openai.types.beta.threads.runs import RunStep
 
+# pyright: reportDeprecated=false
+
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
 
@@ -20,20 +22,35 @@ class TestSteps:
 
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
-        step = client.beta.threads.runs.steps.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            step = client.beta.threads.runs.steps.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+            )
+
+        assert_matches_type(RunStep, step, path=["response"])
+
+    @parametrize
+    def test_method_retrieve_with_all_params(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            step = client.beta.threads.runs.steps.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+            )
+
         assert_matches_type(RunStep, step, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.steps.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.steps.with_raw_response.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -42,68 +59,76 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
-        with client.beta.threads.runs.steps.with_streaming_response.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            step = response.parse()
-            assert_matches_type(RunStep, step, path=["response"])
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.steps.with_streaming_response.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                step = response.parse()
+                assert_matches_type(RunStep, step, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_retrieve(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.runs.steps.with_raw_response.retrieve(
-                "string",
-                thread_id="",
-                run_id="string",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
-            client.beta.threads.runs.steps.with_raw_response.retrieve(
-                "string",
-                thread_id="string",
-                run_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
-            client.beta.threads.runs.steps.with_raw_response.retrieve(
-                "",
-                thread_id="string",
-                run_id="string",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.steps.with_raw_response.retrieve(
+                    step_id="step_id",
+                    thread_id="",
+                    run_id="run_id",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                client.beta.threads.runs.steps.with_raw_response.retrieve(
+                    step_id="step_id",
+                    thread_id="thread_id",
+                    run_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
+                client.beta.threads.runs.steps.with_raw_response.retrieve(
+                    step_id="",
+                    thread_id="thread_id",
+                    run_id="run_id",
+                )
 
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
-        step = client.beta.threads.runs.steps.list(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            step = client.beta.threads.runs.steps.list(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(SyncCursorPage[RunStep], step, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
-        step = client.beta.threads.runs.steps.list(
-            "string",
-            thread_id="string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
+        with pytest.warns(DeprecationWarning):
+            step = client.beta.threads.runs.steps.list(
+                run_id="run_id",
+                thread_id="thread_id",
+                after="after",
+                before="before",
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+                limit=0,
+                order="asc",
+            )
+
         assert_matches_type(SyncCursorPage[RunStep], step, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.steps.with_raw_response.list(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.steps.with_raw_response.list(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -112,52 +137,71 @@ def test_raw_response_list(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
-        with client.beta.threads.runs.steps.with_streaming_response.list(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.steps.with_streaming_response.list(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            step = response.parse()
-            assert_matches_type(SyncCursorPage[RunStep], step, path=["response"])
+                step = response.parse()
+                assert_matches_type(SyncCursorPage[RunStep], step, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_list(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.runs.steps.with_raw_response.list(
-                "string",
-                thread_id="",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.steps.with_raw_response.list(
+                    run_id="run_id",
+                    thread_id="",
+                )
 
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
-            client.beta.threads.runs.steps.with_raw_response.list(
-                "",
-                thread_id="string",
-            )
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                client.beta.threads.runs.steps.with_raw_response.list(
+                    run_id="",
+                    thread_id="thread_id",
+                )
 
 
 class TestAsyncSteps:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
-        step = await async_client.beta.threads.runs.steps.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            step = await async_client.beta.threads.runs.steps.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+            )
+
+        assert_matches_type(RunStep, step, path=["response"])
+
+    @parametrize
+    async def test_method_retrieve_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            step = await async_client.beta.threads.runs.steps.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+            )
+
         assert_matches_type(RunStep, step, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -166,68 +210,76 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.runs.steps.with_streaming_response.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            step = await response.parse()
-            assert_matches_type(RunStep, step, path=["response"])
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.steps.with_streaming_response.retrieve(
+                step_id="step_id",
+                thread_id="thread_id",
+                run_id="run_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                step = await response.parse()
+                assert_matches_type(RunStep, step, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
-                "string",
-                thread_id="",
-                run_id="string",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
-            await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
-                "string",
-                thread_id="string",
-                run_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
-            await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
-                "",
-                thread_id="string",
-                run_id="string",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
+                    step_id="step_id",
+                    thread_id="",
+                    run_id="run_id",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
+                    step_id="step_id",
+                    thread_id="thread_id",
+                    run_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
+                await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
+                    step_id="",
+                    thread_id="thread_id",
+                    run_id="run_id",
+                )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncOpenAI) -> None:
-        step = await async_client.beta.threads.runs.steps.list(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            step = await async_client.beta.threads.runs.steps.list(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(AsyncCursorPage[RunStep], step, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        step = await async_client.beta.threads.runs.steps.list(
-            "string",
-            thread_id="string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
+        with pytest.warns(DeprecationWarning):
+            step = await async_client.beta.threads.runs.steps.list(
+                run_id="run_id",
+                thread_id="thread_id",
+                after="after",
+                before="before",
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+                limit=0,
+                order="asc",
+            )
+
         assert_matches_type(AsyncCursorPage[RunStep], step, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.runs.steps.with_raw_response.list(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.steps.with_raw_response.list(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -236,28 +288,30 @@ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.runs.steps.with_streaming_response.list(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.steps.with_streaming_response.list(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            step = await response.parse()
-            assert_matches_type(AsyncCursorPage[RunStep], step, path=["response"])
+                step = await response.parse()
+                assert_matches_type(AsyncCursorPage[RunStep], step, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.runs.steps.with_raw_response.list(
-                "string",
-                thread_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
-            await async_client.beta.threads.runs.steps.with_raw_response.list(
-                "",
-                thread_id="string",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.steps.with_raw_response.list(
+                    run_id="run_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                await async_client.beta.threads.runs.steps.with_raw_response.list(
+                    run_id="",
+                    thread_id="thread_id",
+                )
diff --git a/tests/api_resources/beta/threads/test_messages.py b/tests/api_resources/beta/threads/test_messages.py
index b5be32a421..7f57002f27 100644
--- a/tests/api_resources/beta/threads/test_messages.py
+++ b/tests/api_resources/beta/threads/test_messages.py
@@ -15,6 +15,8 @@
     MessageDeleted,
 )
 
+# pyright: reportDeprecated=false
+
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
 
@@ -23,44 +25,41 @@ class TestMessages:
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.create(
-            "string",
-            content="string",
-            role="user",
-        )
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+            )
+
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.create(
-            "string",
-            content="string",
-            role="user",
-            attachments=[
-                {
-                    "file_id": "string",
-                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-                },
-                {
-                    "file_id": "string",
-                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-                },
-                {
-                    "file_id": "string",
-                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-                },
-            ],
-            metadata={},
-        )
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+                attachments=[
+                    {
+                        "file_id": "file_id",
+                        "tools": [{"type": "code_interpreter"}],
+                    }
+                ],
+                metadata={"foo": "string"},
+            )
+
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.beta.threads.messages.with_raw_response.create(
-            "string",
-            content="string",
-            role="user",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.messages.with_raw_response.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -69,42 +68,47 @@ def test_raw_response_create(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
-        with client.beta.threads.messages.with_streaming_response.create(
-            "string",
-            content="string",
-            role="user",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.messages.with_streaming_response.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            message = response.parse()
-            assert_matches_type(Message, message, path=["response"])
+                message = response.parse()
+                assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_create(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.messages.with_raw_response.create(
-                "",
-                content="string",
-                role="user",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.create(
+                    thread_id="",
+                    content="string",
+                    role="user",
+                )
 
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.retrieve(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.retrieve(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.threads.messages.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.messages.with_raw_response.retrieve(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -113,55 +117,62 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
-        with client.beta.threads.messages.with_streaming_response.retrieve(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.messages.with_streaming_response.retrieve(
+                message_id="message_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            message = response.parse()
-            assert_matches_type(Message, message, path=["response"])
+                message = response.parse()
+                assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_retrieve(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.messages.with_raw_response.retrieve(
-                "string",
-                thread_id="",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.retrieve(
+                    message_id="message_id",
+                    thread_id="",
+                )
 
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
-            client.beta.threads.messages.with_raw_response.retrieve(
-                "",
-                thread_id="string",
-            )
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.retrieve(
+                    message_id="",
+                    thread_id="thread_id",
+                )
 
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.update(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.update(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.update(
-            "string",
-            thread_id="string",
-            metadata={},
-        )
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.update(
+                message_id="message_id",
+                thread_id="thread_id",
+                metadata={"foo": "string"},
+            )
+
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
-        response = client.beta.threads.messages.with_raw_response.update(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.messages.with_raw_response.update(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -170,56 +181,63 @@ def test_raw_response_update(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_update(self, client: OpenAI) -> None:
-        with client.beta.threads.messages.with_streaming_response.update(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.messages.with_streaming_response.update(
+                message_id="message_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            message = response.parse()
-            assert_matches_type(Message, message, path=["response"])
+                message = response.parse()
+                assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_update(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.messages.with_raw_response.update(
-                "string",
-                thread_id="",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.update(
+                    message_id="message_id",
+                    thread_id="",
+                )
 
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
-            client.beta.threads.messages.with_raw_response.update(
-                "",
-                thread_id="string",
-            )
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.update(
+                    message_id="",
+                    thread_id="thread_id",
+                )
 
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.list(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.list(
+                thread_id="thread_id",
+            )
+
         assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.list(
-            "string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-            run_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.list(
+                thread_id="thread_id",
+                after="after",
+                before="before",
+                limit=0,
+                order="asc",
+                run_id="run_id",
+            )
+
         assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.beta.threads.messages.with_raw_response.list(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.messages.with_raw_response.list(
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -228,38 +246,43 @@ def test_raw_response_list(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
-        with client.beta.threads.messages.with_streaming_response.list(
-            "string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.messages.with_streaming_response.list(
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            message = response.parse()
-            assert_matches_type(SyncCursorPage[Message], message, path=["response"])
+                message = response.parse()
+                assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_list(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.messages.with_raw_response.list(
-                "",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.list(
+                    thread_id="",
+                )
 
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
-        message = client.beta.threads.messages.delete(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            message = client.beta.threads.messages.delete(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(MessageDeleted, message, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
-        response = client.beta.threads.messages.with_raw_response.delete(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.messages.with_raw_response.delete(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -268,76 +291,77 @@ def test_raw_response_delete(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_delete(self, client: OpenAI) -> None:
-        with client.beta.threads.messages.with_streaming_response.delete(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.messages.with_streaming_response.delete(
+                message_id="message_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            message = response.parse()
-            assert_matches_type(MessageDeleted, message, path=["response"])
+                message = response.parse()
+                assert_matches_type(MessageDeleted, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_delete(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.messages.with_raw_response.delete(
-                "string",
-                thread_id="",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.delete(
+                    message_id="message_id",
+                    thread_id="",
+                )
 
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
-            client.beta.threads.messages.with_raw_response.delete(
-                "",
-                thread_id="string",
-            )
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
+                client.beta.threads.messages.with_raw_response.delete(
+                    message_id="",
+                    thread_id="thread_id",
+                )
 
 
 class TestAsyncMessages:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
-        message = await async_client.beta.threads.messages.create(
-            "string",
-            content="string",
-            role="user",
-        )
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+            )
+
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        message = await async_client.beta.threads.messages.create(
-            "string",
-            content="string",
-            role="user",
-            attachments=[
-                {
-                    "file_id": "string",
-                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-                },
-                {
-                    "file_id": "string",
-                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-                },
-                {
-                    "file_id": "string",
-                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-                },
-            ],
-            metadata={},
-        )
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+                attachments=[
+                    {
+                        "file_id": "file_id",
+                        "tools": [{"type": "code_interpreter"}],
+                    }
+                ],
+                metadata={"foo": "string"},
+            )
+
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.messages.with_raw_response.create(
-            "string",
-            content="string",
-            role="user",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.messages.with_raw_response.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -346,42 +370,47 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.messages.with_streaming_response.create(
-            "string",
-            content="string",
-            role="user",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.messages.with_streaming_response.create(
+                thread_id="thread_id",
+                content="string",
+                role="user",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            message = await response.parse()
-            assert_matches_type(Message, message, path=["response"])
+                message = await response.parse()
+                assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.messages.with_raw_response.create(
-                "",
-                content="string",
-                role="user",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.create(
+                    thread_id="",
+                    content="string",
+                    role="user",
+                )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
-        message = await async_client.beta.threads.messages.retrieve(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.retrieve(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.messages.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.messages.with_raw_response.retrieve(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -390,55 +419,62 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.messages.with_streaming_response.retrieve(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.messages.with_streaming_response.retrieve(
+                message_id="message_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            message = await response.parse()
-            assert_matches_type(Message, message, path=["response"])
+                message = await response.parse()
+                assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.messages.with_raw_response.retrieve(
-                "string",
-                thread_id="",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.retrieve(
+                    message_id="message_id",
+                    thread_id="",
+                )
 
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
-            await async_client.beta.threads.messages.with_raw_response.retrieve(
-                "",
-                thread_id="string",
-            )
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.retrieve(
+                    message_id="",
+                    thread_id="thread_id",
+                )
 
     @parametrize
     async def test_method_update(self, async_client: AsyncOpenAI) -> None:
-        message = await async_client.beta.threads.messages.update(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.update(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        message = await async_client.beta.threads.messages.update(
-            "string",
-            thread_id="string",
-            metadata={},
-        )
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.update(
+                message_id="message_id",
+                thread_id="thread_id",
+                metadata={"foo": "string"},
+            )
+
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.messages.with_raw_response.update(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.messages.with_raw_response.update(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -447,56 +483,63 @@ async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.messages.with_streaming_response.update(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.messages.with_streaming_response.update(
+                message_id="message_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            message = await response.parse()
-            assert_matches_type(Message, message, path=["response"])
+                message = await response.parse()
+                assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.messages.with_raw_response.update(
-                "string",
-                thread_id="",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.update(
+                    message_id="message_id",
+                    thread_id="",
+                )
 
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
-            await async_client.beta.threads.messages.with_raw_response.update(
-                "",
-                thread_id="string",
-            )
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.update(
+                    message_id="",
+                    thread_id="thread_id",
+                )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncOpenAI) -> None:
-        message = await async_client.beta.threads.messages.list(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.list(
+                thread_id="thread_id",
+            )
+
         assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        message = await async_client.beta.threads.messages.list(
-            "string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-            run_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.list(
+                thread_id="thread_id",
+                after="after",
+                before="before",
+                limit=0,
+                order="asc",
+                run_id="run_id",
+            )
+
         assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.messages.with_raw_response.list(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.messages.with_raw_response.list(
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -505,38 +548,43 @@ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.messages.with_streaming_response.list(
-            "string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.messages.with_streaming_response.list(
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            message = await response.parse()
-            assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
+                message = await response.parse()
+                assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.messages.with_raw_response.list(
-                "",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.list(
+                    thread_id="",
+                )
 
     @parametrize
     async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
-        message = await async_client.beta.threads.messages.delete(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            message = await async_client.beta.threads.messages.delete(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(MessageDeleted, message, path=["response"])
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.messages.with_raw_response.delete(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.messages.with_raw_response.delete(
+                message_id="message_id",
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -545,28 +593,30 @@ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.messages.with_streaming_response.delete(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.messages.with_streaming_response.delete(
+                message_id="message_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            message = await response.parse()
-            assert_matches_type(MessageDeleted, message, path=["response"])
+                message = await response.parse()
+                assert_matches_type(MessageDeleted, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.messages.with_raw_response.delete(
-                "string",
-                thread_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
-            await async_client.beta.threads.messages.with_raw_response.delete(
-                "",
-                thread_id="string",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.delete(
+                    message_id="message_id",
+                    thread_id="",
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
+                await async_client.beta.threads.messages.with_raw_response.delete(
+                    message_id="",
+                    thread_id="thread_id",
+                )
diff --git a/tests/api_resources/beta/threads/test_runs.py b/tests/api_resources/beta/threads/test_runs.py
index 089dd1253e..3a6b36864d 100644
--- a/tests/api_resources/beta/threads/test_runs.py
+++ b/tests/api_resources/beta/threads/test_runs.py
@@ -24,137 +24,63 @@ class TestRuns:
 
     @parametrize
     def test_method_create_overload_1(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-            additional_instructions="string",
-            additional_messages=[
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+                additional_instructions="additional_instructions",
+                additional_messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                        "attachments": [
+                            {
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
+                        ],
+                        "metadata": {"foo": "string"},
+                    }
+                ],
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                reasoning_effort="none",
+                response_format="auto",
+                stream=False,
+                temperature=1,
+                tool_choice="none",
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
                 },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-            ],
-            instructions="string",
-            max_completion_tokens=256,
-            max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
-            stream=False,
-            temperature=1,
-            tool_choice="none",
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-            top_p=1,
-            truncation_strategy={
-                "type": "auto",
-                "last_messages": 1,
-            },
-        )
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.with_raw_response.create(
-            "string",
-            assistant_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -163,161 +89,89 @@ def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
-        with client.beta.threads.runs.with_streaming_response.create(
-            "string",
-            assistant_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            run = response.parse()
-            assert_matches_type(Run, run, path=["response"])
+                run = response.parse()
+                assert_matches_type(Run, run, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_create_overload_1(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.runs.with_raw_response.create(
-                "",
-                assistant_id="string",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.create(
+                    thread_id="",
+                    assistant_id="assistant_id",
+                )
 
     @parametrize
     def test_method_create_overload_2(self, client: OpenAI) -> None:
-        run_stream = client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-            stream=True,
-        )
+        with pytest.warns(DeprecationWarning):
+            run_stream = client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+            )
+
         run_stream.response.close()
 
     @parametrize
     def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
-        run_stream = client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-            stream=True,
-            additional_instructions="string",
-            additional_messages=[
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
+        with pytest.warns(DeprecationWarning):
+            run_stream = client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+                additional_instructions="additional_instructions",
+                additional_messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                        "attachments": [
+                            {
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
+                        ],
+                        "metadata": {"foo": "string"},
+                    }
+                ],
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                reasoning_effort="none",
+                response_format="auto",
+                temperature=1,
+                tool_choice="none",
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
                 },
-            ],
-            instructions="string",
-            max_completion_tokens=256,
-            max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
-            temperature=1,
-            tool_choice="none",
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-            top_p=1,
-            truncation_strategy={
-                "type": "auto",
-                "last_messages": 1,
-            },
-        )
+            )
+
         run_stream.response.close()
 
     @parametrize
     def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.with_raw_response.create(
-            "string",
-            assistant_id="string",
-            stream=True,
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+            )
 
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         stream = response.parse()
@@ -325,42 +179,47 @@ def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
-        with client.beta.threads.runs.with_streaming_response.create(
-            "string",
-            assistant_id="string",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            stream = response.parse()
-            stream.close()
+                stream = response.parse()
+                stream.close()
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_create_overload_2(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.runs.with_raw_response.create(
-                "",
-                assistant_id="string",
-                stream=True,
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.create(
+                    thread_id="",
+                    assistant_id="assistant_id",
+                    stream=True,
+                )
 
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.retrieve(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.retrieve(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.retrieve(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -369,55 +228,62 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
-        with client.beta.threads.runs.with_streaming_response.retrieve(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.retrieve(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            run = response.parse()
-            assert_matches_type(Run, run, path=["response"])
+                run = response.parse()
+                assert_matches_type(Run, run, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_retrieve(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.runs.with_raw_response.retrieve(
-                "string",
-                thread_id="",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.retrieve(
+                    run_id="run_id",
+                    thread_id="",
+                )
 
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
-            client.beta.threads.runs.with_raw_response.retrieve(
-                "",
-                thread_id="string",
-            )
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.retrieve(
+                    run_id="",
+                    thread_id="thread_id",
+                )
 
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.update(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.update(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.update(
-            "string",
-            thread_id="string",
-            metadata={},
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.update(
+                run_id="run_id",
+                thread_id="thread_id",
+                metadata={"foo": "string"},
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.with_raw_response.update(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.update(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -426,55 +292,62 @@ def test_raw_response_update(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_update(self, client: OpenAI) -> None:
-        with client.beta.threads.runs.with_streaming_response.update(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.update(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            run = response.parse()
-            assert_matches_type(Run, run, path=["response"])
+                run = response.parse()
+                assert_matches_type(Run, run, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_update(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.runs.with_raw_response.update(
-                "string",
-                thread_id="",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.update(
+                    run_id="run_id",
+                    thread_id="",
+                )
 
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
-            client.beta.threads.runs.with_raw_response.update(
-                "",
-                thread_id="string",
-            )
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.update(
+                    run_id="",
+                    thread_id="thread_id",
+                )
 
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.list(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.list(
+                thread_id="thread_id",
+            )
+
         assert_matches_type(SyncCursorPage[Run], run, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.list(
-            "string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.list(
+                thread_id="thread_id",
+                after="after",
+                before="before",
+                limit=0,
+                order="asc",
+            )
+
         assert_matches_type(SyncCursorPage[Run], run, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.with_raw_response.list(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.list(
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -483,38 +356,43 @@ def test_raw_response_list(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
-        with client.beta.threads.runs.with_streaming_response.list(
-            "string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.list(
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            run = response.parse()
-            assert_matches_type(SyncCursorPage[Run], run, path=["response"])
+                run = response.parse()
+                assert_matches_type(SyncCursorPage[Run], run, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_list(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.runs.with_raw_response.list(
-                "",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.list(
+                    thread_id="",
+                )
 
     @parametrize
     def test_method_cancel(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.cancel(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.cancel(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_raw_response_cancel(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.with_raw_response.cancel(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.cancel(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -523,71 +401,70 @@ def test_raw_response_cancel(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_cancel(self, client: OpenAI) -> None:
-        with client.beta.threads.runs.with_streaming_response.cancel(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.cancel(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            run = response.parse()
-            assert_matches_type(Run, run, path=["response"])
+                run = response.parse()
+                assert_matches_type(Run, run, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_cancel(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.runs.with_raw_response.cancel(
-                "string",
-                thread_id="",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.cancel(
+                    run_id="run_id",
+                    thread_id="",
+                )
 
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
-            client.beta.threads.runs.with_raw_response.cancel(
-                "",
-                thread_id="string",
-            )
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.cancel(
+                    run_id="",
+                    thread_id="thread_id",
+                )
 
     @parametrize
     def test_method_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[{}],
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_method_submit_tool_outputs_with_all_params_overload_1(self, client: OpenAI) -> None:
-        run = client.beta.threads.runs.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[
-                {
-                    "tool_call_id": "string",
-                    "output": "string",
-                },
-                {
-                    "tool_call_id": "string",
-                    "output": "string",
-                },
-                {
-                    "tool_call_id": "string",
-                    "output": "string",
-                },
-            ],
-            stream=False,
-        )
+        with pytest.warns(DeprecationWarning):
+            run = client.beta.threads.runs.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[
+                    {
+                        "output": "output",
+                        "tool_call_id": "tool_call_id",
+                    }
+                ],
+                stream=False,
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_raw_response_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[{}],
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -596,53 +473,58 @@ def test_raw_response_submit_tool_outputs_overload_1(self, client: OpenAI) -> No
 
     @parametrize
     def test_streaming_response_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
-        with client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            run = response.parse()
-            assert_matches_type(Run, run, path=["response"])
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[{}],
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                run = response.parse()
+                assert_matches_type(Run, run, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "string",
-                thread_id="",
-                tool_outputs=[{}, {}, {}],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
-            client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "",
-                thread_id="string",
-                tool_outputs=[{}, {}, {}],
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="run_id",
+                    thread_id="",
+                    tool_outputs=[{}],
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="",
+                    thread_id="thread_id",
+                    tool_outputs=[{}],
+                )
 
     @parametrize
     def test_method_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
-        run_stream = client.beta.threads.runs.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            stream=True,
-            tool_outputs=[{}, {}, {}],
-        )
+        with pytest.warns(DeprecationWarning):
+            run_stream = client.beta.threads.runs.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                stream=True,
+                tool_outputs=[{}],
+            )
+
         run_stream.response.close()
 
     @parametrize
     def test_raw_response_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
-        response = client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            stream=True,
-            tool_outputs=[{}, {}, {}],
-        )
+        with pytest.warns(DeprecationWarning):
+            response = client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                stream=True,
+                tool_outputs=[{}],
+            )
 
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         stream = response.parse()
@@ -650,175 +532,105 @@ def test_raw_response_submit_tool_outputs_overload_2(self, client: OpenAI) -> No
 
     @parametrize
     def test_streaming_response_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
-        with client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            stream=True,
-            tool_outputs=[{}, {}, {}],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = response.parse()
-            stream.close()
+        with pytest.warns(DeprecationWarning):
+            with client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                stream=True,
+                tool_outputs=[{}],
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                stream = response.parse()
+                stream.close()
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "string",
-                thread_id="",
-                stream=True,
-                tool_outputs=[{}, {}, {}],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
-            client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "",
-                thread_id="string",
-                stream=True,
-                tool_outputs=[{}, {}, {}],
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="run_id",
+                    thread_id="",
+                    stream=True,
+                    tool_outputs=[{}],
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="",
+                    thread_id="thread_id",
+                    stream=True,
+                    tool_outputs=[{}],
+                )
 
 
 class TestAsyncRuns:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None:
-        run = await async_client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
-        run = await async_client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-            additional_instructions="string",
-            additional_messages=[
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+                additional_instructions="additional_instructions",
+                additional_messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                        "attachments": [
+                            {
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
+                        ],
+                        "metadata": {"foo": "string"},
+                    }
+                ],
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                reasoning_effort="none",
+                response_format="auto",
+                stream=False,
+                temperature=1,
+                tool_choice="none",
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
                 },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-            ],
-            instructions="string",
-            max_completion_tokens=256,
-            max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
-            stream=False,
-            temperature=1,
-            tool_choice="none",
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-            top_p=1,
-            truncation_strategy={
-                "type": "auto",
-                "last_messages": 1,
-            },
-        )
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.runs.with_raw_response.create(
-            "string",
-            assistant_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -827,161 +639,89 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -
 
     @parametrize
     async def test_streaming_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.runs.with_streaming_response.create(
-            "string",
-            assistant_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            run = await response.parse()
-            assert_matches_type(Run, run, path=["response"])
+                run = await response.parse()
+                assert_matches_type(Run, run, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_create_overload_1(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.runs.with_raw_response.create(
-                "",
-                assistant_id="string",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.create(
+                    thread_id="",
+                    assistant_id="assistant_id",
+                )
 
     @parametrize
     async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None:
-        run_stream = await async_client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-            stream=True,
-        )
+        with pytest.warns(DeprecationWarning):
+            run_stream = await async_client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+            )
+
         await run_stream.response.aclose()
 
     @parametrize
     async def test_method_create_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
-        run_stream = await async_client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-            stream=True,
-            additional_instructions="string",
-            additional_messages=[
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
+        with pytest.warns(DeprecationWarning):
+            run_stream = await async_client.beta.threads.runs.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+                include=["step_details.tool_calls[*].file_search.results[*].content"],
+                additional_instructions="additional_instructions",
+                additional_messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                        "attachments": [
+                            {
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
+                        ],
+                        "metadata": {"foo": "string"},
+                    }
+                ],
+                instructions="instructions",
+                max_completion_tokens=256,
+                max_prompt_tokens=256,
+                metadata={"foo": "string"},
+                model="string",
+                parallel_tool_calls=True,
+                reasoning_effort="none",
+                response_format="auto",
+                temperature=1,
+                tool_choice="none",
+                tools=[{"type": "code_interpreter"}],
+                top_p=1,
+                truncation_strategy={
+                    "type": "auto",
+                    "last_messages": 1,
                 },
-            ],
-            instructions="string",
-            max_completion_tokens=256,
-            max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
-            temperature=1,
-            tool_choice="none",
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-            top_p=1,
-            truncation_strategy={
-                "type": "auto",
-                "last_messages": 1,
-            },
-        )
+            )
+
         await run_stream.response.aclose()
 
     @parametrize
     async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.runs.with_raw_response.create(
-            "string",
-            assistant_id="string",
-            stream=True,
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+            )
 
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         stream = response.parse()
@@ -989,42 +729,47 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -
 
     @parametrize
     async def test_streaming_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.runs.with_streaming_response.create(
-            "string",
-            assistant_id="string",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.create(
+                thread_id="thread_id",
+                assistant_id="assistant_id",
+                stream=True,
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            stream = await response.parse()
-            await stream.close()
+                stream = await response.parse()
+                await stream.close()
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_create_overload_2(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.runs.with_raw_response.create(
-                "",
-                assistant_id="string",
-                stream=True,
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.create(
+                    thread_id="",
+                    assistant_id="assistant_id",
+                    stream=True,
+                )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
-        run = await async_client.beta.threads.runs.retrieve(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.retrieve(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.runs.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.retrieve(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1033,55 +778,62 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.runs.with_streaming_response.retrieve(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.retrieve(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            run = await response.parse()
-            assert_matches_type(Run, run, path=["response"])
+                run = await response.parse()
+                assert_matches_type(Run, run, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.runs.with_raw_response.retrieve(
-                "string",
-                thread_id="",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.retrieve(
+                    run_id="run_id",
+                    thread_id="",
+                )
 
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
-            await async_client.beta.threads.runs.with_raw_response.retrieve(
-                "",
-                thread_id="string",
-            )
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.retrieve(
+                    run_id="",
+                    thread_id="thread_id",
+                )
 
     @parametrize
     async def test_method_update(self, async_client: AsyncOpenAI) -> None:
-        run = await async_client.beta.threads.runs.update(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.update(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        run = await async_client.beta.threads.runs.update(
-            "string",
-            thread_id="string",
-            metadata={},
-        )
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.update(
+                run_id="run_id",
+                thread_id="thread_id",
+                metadata={"foo": "string"},
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.runs.with_raw_response.update(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.update(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1090,55 +842,62 @@ async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.runs.with_streaming_response.update(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.update(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            run = await response.parse()
-            assert_matches_type(Run, run, path=["response"])
+                run = await response.parse()
+                assert_matches_type(Run, run, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.runs.with_raw_response.update(
-                "string",
-                thread_id="",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.update(
+                    run_id="run_id",
+                    thread_id="",
+                )
 
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
-            await async_client.beta.threads.runs.with_raw_response.update(
-                "",
-                thread_id="string",
-            )
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.update(
+                    run_id="",
+                    thread_id="thread_id",
+                )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncOpenAI) -> None:
-        run = await async_client.beta.threads.runs.list(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.list(
+                thread_id="thread_id",
+            )
+
         assert_matches_type(AsyncCursorPage[Run], run, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        run = await async_client.beta.threads.runs.list(
-            "string",
-            after="string",
-            before="string",
-            limit=0,
-            order="asc",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.list(
+                thread_id="thread_id",
+                after="after",
+                before="before",
+                limit=0,
+                order="asc",
+            )
+
         assert_matches_type(AsyncCursorPage[Run], run, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.runs.with_raw_response.list(
-            "string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.list(
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1147,38 +906,43 @@ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.runs.with_streaming_response.list(
-            "string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.list(
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            run = await response.parse()
-            assert_matches_type(AsyncCursorPage[Run], run, path=["response"])
+                run = await response.parse()
+                assert_matches_type(AsyncCursorPage[Run], run, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.runs.with_raw_response.list(
-                "",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.list(
+                    thread_id="",
+                )
 
     @parametrize
     async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
-        run = await async_client.beta.threads.runs.cancel(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.cancel(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.runs.with_raw_response.cancel(
-            "string",
-            thread_id="string",
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.cancel(
+                run_id="run_id",
+                thread_id="thread_id",
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1187,71 +951,70 @@ async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.runs.with_streaming_response.cancel(
-            "string",
-            thread_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.cancel(
+                run_id="run_id",
+                thread_id="thread_id",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
-            run = await response.parse()
-            assert_matches_type(Run, run, path=["response"])
+                run = await response.parse()
+                assert_matches_type(Run, run, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.runs.with_raw_response.cancel(
-                "string",
-                thread_id="",
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.cancel(
+                    run_id="run_id",
+                    thread_id="",
+                )
 
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
-            await async_client.beta.threads.runs.with_raw_response.cancel(
-                "",
-                thread_id="string",
-            )
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.cancel(
+                    run_id="",
+                    thread_id="thread_id",
+                )
 
     @parametrize
     async def test_method_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
-        run = await async_client.beta.threads.runs.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
-        )
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[{}],
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     async def test_method_submit_tool_outputs_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
-        run = await async_client.beta.threads.runs.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[
-                {
-                    "tool_call_id": "string",
-                    "output": "string",
-                },
-                {
-                    "tool_call_id": "string",
-                    "output": "string",
-                },
-                {
-                    "tool_call_id": "string",
-                    "output": "string",
-                },
-            ],
-            stream=False,
-        )
+        with pytest.warns(DeprecationWarning):
+            run = await async_client.beta.threads.runs.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[
+                    {
+                        "output": "output",
+                        "tool_call_id": "tool_call_id",
+                    }
+                ],
+                stream=False,
+            )
+
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     async def test_raw_response_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[{}],
+            )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1260,53 +1023,58 @@ async def test_raw_response_submit_tool_outputs_overload_1(self, async_client: A
 
     @parametrize
     async def test_streaming_response_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            run = await response.parse()
-            assert_matches_type(Run, run, path=["response"])
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                tool_outputs=[{}],
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                run = await response.parse()
+                assert_matches_type(Run, run, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "string",
-                thread_id="",
-                tool_outputs=[{}, {}, {}],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
-            await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "",
-                thread_id="string",
-                tool_outputs=[{}, {}, {}],
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="run_id",
+                    thread_id="",
+                    tool_outputs=[{}],
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="",
+                    thread_id="thread_id",
+                    tool_outputs=[{}],
+                )
 
     @parametrize
     async def test_method_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
-        run_stream = await async_client.beta.threads.runs.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            stream=True,
-            tool_outputs=[{}, {}, {}],
-        )
+        with pytest.warns(DeprecationWarning):
+            run_stream = await async_client.beta.threads.runs.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                stream=True,
+                tool_outputs=[{}],
+            )
+
         await run_stream.response.aclose()
 
     @parametrize
     async def test_raw_response_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            stream=True,
-            tool_outputs=[{}, {}, {}],
-        )
+        with pytest.warns(DeprecationWarning):
+            response = await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                stream=True,
+                tool_outputs=[{}],
+            )
 
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         stream = response.parse()
@@ -1314,34 +1082,36 @@ async def test_raw_response_submit_tool_outputs_overload_2(self, async_client: A
 
     @parametrize
     async def test_streaming_response_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            stream=True,
-            tool_outputs=[{}, {}, {}],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = await response.parse()
-            await stream.close()
+        with pytest.warns(DeprecationWarning):
+            async with async_client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
+                run_id="run_id",
+                thread_id="thread_id",
+                stream=True,
+                tool_outputs=[{}],
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                stream = await response.parse()
+                await stream.close()
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
-            await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "string",
-                thread_id="",
-                stream=True,
-                tool_outputs=[{}, {}, {}],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
-            await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "",
-                thread_id="string",
-                stream=True,
-                tool_outputs=[{}, {}, {}],
-            )
+        with pytest.warns(DeprecationWarning):
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="run_id",
+                    thread_id="",
+                    stream=True,
+                    tool_outputs=[{}],
+                )
+
+            with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+                await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                    run_id="",
+                    thread_id="thread_id",
+                    stream=True,
+                    tool_outputs=[{}],
+                )
diff --git a/tests/api_resources/chat/completions/__init__.py b/tests/api_resources/chat/completions/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/chat/completions/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/chat/completions/test_messages.py b/tests/api_resources/chat/completions/test_messages.py
new file mode 100644
index 0000000000..4a4267e539
--- /dev/null
+++ b/tests/api_resources/chat/completions/test_messages.py
@@ -0,0 +1,121 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.chat import ChatCompletionStoreMessage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestMessages:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        message = client.chat.completions.messages.list(
+            completion_id="completion_id",
+        )
+        assert_matches_type(SyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        message = client.chat.completions.messages.list(
+            completion_id="completion_id",
+            after="after",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.chat.completions.messages.with_raw_response.list(
+            completion_id="completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(SyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.chat.completions.messages.with_streaming_response.list(
+            completion_id="completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = response.parse()
+            assert_matches_type(SyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            client.chat.completions.messages.with_raw_response.list(
+                completion_id="",
+            )
+
+
+class TestAsyncMessages:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        message = await async_client.chat.completions.messages.list(
+            completion_id="completion_id",
+        )
+        assert_matches_type(AsyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        message = await async_client.chat.completions.messages.list(
+            completion_id="completion_id",
+            after="after",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.messages.with_raw_response.list(
+            completion_id="completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(AsyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.messages.with_streaming_response.list(
+            completion_id="completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = await response.parse()
+            assert_matches_type(AsyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            await async_client.chat.completions.messages.with_raw_response.list(
+                completion_id="",
+            )
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 1c195c4001..2511644fb1 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -9,8 +9,10 @@
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
 from openai.types.chat import (
     ChatCompletion,
+    ChatCompletionDeleted,
 )
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -25,10 +27,10 @@ def test_method_create_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -38,61 +40,80 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
-                    "name": "string",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
+            audio={
+                "format": "wav",
+                "voice": "ash",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
                 {
-                    "description": "string",
-                    "name": "string",
+                    "name": "name",
+                    "description": "description",
                     "parameters": {"foo": "bar"},
                 }
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
+            metadata={"foo": "string"},
+            modalities=["text"],
             n=1,
+            parallel_tool_calls=True,
+            prediction={
+                "content": "string",
+                "type": "content",
+            },
             presence_penalty=-2,
-            response_format={"type": "json_object"},
-            seed=-9223372036854776000,
-            stop="string",
+            prompt_cache_key="prompt-cache-key-1234",
+            prompt_cache_retention="in-memory",
+            reasoning_effort="none",
+            response_format={"type": "text"},
+            safety_identifier="safety-identifier-1234",
+            seed=-9007199254740991,
+            service_tier="auto",
+            stop="\n",
+            store=True,
             stream=False,
-            stream_options={"include_usage": True},
+            stream_options={
+                "include_obfuscation": True,
+                "include_usage": True,
+            },
             temperature=1,
             tool_choice="none",
             tools=[
                 {
-                    "type": "function",
                     "function": {
-                        "description": "string",
-                        "name": "string",
+                        "name": "name",
+                        "description": "description",
                         "parameters": {"foo": "bar"},
+                        "strict": True,
                     },
-                },
-                {
                     "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
-                {
-                    "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
+                }
             ],
             top_logprobs=0,
             top_p=1,
             user="user-1234",
+            verbosity="low",
+            web_search_options={
+                "search_context_size": "low",
+                "user_location": {
+                    "approximate": {
+                        "city": "city",
+                        "country": "country",
+                        "region": "region",
+                        "timezone": "timezone",
+                    },
+                    "type": "approximate",
+                },
+            },
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -102,10 +123,10 @@ def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
 
         assert response.is_closed is True
@@ -119,10 +140,10 @@ def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -138,10 +159,10 @@ def test_method_create_overload_2(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
         )
         completion_stream.response.close()
@@ -152,61 +173,80 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
-                    "name": "string",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
+            audio={
+                "format": "wav",
+                "voice": "ash",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
                 {
-                    "description": "string",
-                    "name": "string",
+                    "name": "name",
+                    "description": "description",
                     "parameters": {"foo": "bar"},
                 }
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
+            metadata={"foo": "string"},
+            modalities=["text"],
             n=1,
+            parallel_tool_calls=True,
+            prediction={
+                "content": "string",
+                "type": "content",
+            },
             presence_penalty=-2,
-            response_format={"type": "json_object"},
-            seed=-9223372036854776000,
-            stop="string",
-            stream_options={"include_usage": True},
+            prompt_cache_key="prompt-cache-key-1234",
+            prompt_cache_retention="in-memory",
+            reasoning_effort="none",
+            response_format={"type": "text"},
+            safety_identifier="safety-identifier-1234",
+            seed=-9007199254740991,
+            service_tier="auto",
+            stop="\n",
+            store=True,
+            stream_options={
+                "include_obfuscation": True,
+                "include_usage": True,
+            },
             temperature=1,
             tool_choice="none",
             tools=[
                 {
-                    "type": "function",
                     "function": {
-                        "description": "string",
-                        "name": "string",
+                        "name": "name",
+                        "description": "description",
                         "parameters": {"foo": "bar"},
+                        "strict": True,
                     },
-                },
-                {
                     "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
-                {
-                    "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
+                }
             ],
             top_logprobs=0,
             top_p=1,
             user="user-1234",
+            verbosity="low",
+            web_search_options={
+                "search_context_size": "low",
+                "user_location": {
+                    "approximate": {
+                        "city": "city",
+                        "country": "country",
+                        "region": "region",
+                        "timezone": "timezone",
+                    },
+                    "type": "approximate",
+                },
+            },
         )
         completion_stream.response.close()
 
@@ -216,10 +256,10 @@ def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
         )
 
@@ -233,10 +273,10 @@ def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
         ) as response:
             assert not response.is_closed
@@ -247,9 +287,165 @@ def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
 
         assert cast(Any, response.is_closed) is True
 
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        completion = client.chat.completions.retrieve(
+            "completion_id",
+        )
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.chat.completions.with_raw_response.retrieve(
+            "completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.retrieve(
+            "completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            client.chat.completions.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_update(self, client: OpenAI) -> None:
+        completion = client.chat.completions.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    def test_raw_response_update(self, client: OpenAI) -> None:
+        response = client.chat.completions.with_raw_response.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            client.chat.completions.with_raw_response.update(
+                completion_id="",
+                metadata={"foo": "string"},
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        completion = client.chat.completions.list()
+        assert_matches_type(SyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        completion = client.chat.completions.list(
+            after="after",
+            limit=0,
+            metadata={"foo": "string"},
+            model="model",
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.chat.completions.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(SyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(SyncCursorPage[ChatCompletion], completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        completion = client.chat.completions.delete(
+            "completion_id",
+        )
+        assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.chat.completions.with_raw_response.delete(
+            "completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.delete(
+            "completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            client.chat.completions.with_raw_response.delete(
+                "",
+            )
+
 
 class TestAsyncCompletions:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None:
@@ -257,10 +453,10 @@ async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -270,61 +466,80 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
-                    "name": "string",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
+            audio={
+                "format": "wav",
+                "voice": "ash",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
                 {
-                    "description": "string",
-                    "name": "string",
+                    "name": "name",
+                    "description": "description",
                     "parameters": {"foo": "bar"},
                 }
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
+            metadata={"foo": "string"},
+            modalities=["text"],
             n=1,
+            parallel_tool_calls=True,
+            prediction={
+                "content": "string",
+                "type": "content",
+            },
             presence_penalty=-2,
-            response_format={"type": "json_object"},
-            seed=-9223372036854776000,
-            stop="string",
+            prompt_cache_key="prompt-cache-key-1234",
+            prompt_cache_retention="in-memory",
+            reasoning_effort="none",
+            response_format={"type": "text"},
+            safety_identifier="safety-identifier-1234",
+            seed=-9007199254740991,
+            service_tier="auto",
+            stop="\n",
+            store=True,
             stream=False,
-            stream_options={"include_usage": True},
+            stream_options={
+                "include_obfuscation": True,
+                "include_usage": True,
+            },
             temperature=1,
             tool_choice="none",
             tools=[
                 {
-                    "type": "function",
                     "function": {
-                        "description": "string",
-                        "name": "string",
+                        "name": "name",
+                        "description": "description",
                         "parameters": {"foo": "bar"},
+                        "strict": True,
                     },
-                },
-                {
                     "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
-                {
-                    "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
+                }
             ],
             top_logprobs=0,
             top_p=1,
             user="user-1234",
+            verbosity="low",
+            web_search_options={
+                "search_context_size": "low",
+                "user_location": {
+                    "approximate": {
+                        "city": "city",
+                        "country": "country",
+                        "region": "region",
+                        "timezone": "timezone",
+                    },
+                    "type": "approximate",
+                },
+            },
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -334,10 +549,10 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
 
         assert response.is_closed is True
@@ -351,10 +566,10 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncOpe
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -370,10 +585,10 @@ async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
         )
         await completion_stream.response.aclose()
@@ -384,61 +599,80 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
-                    "name": "string",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
+            audio={
+                "format": "wav",
+                "voice": "ash",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
                 {
-                    "description": "string",
-                    "name": "string",
+                    "name": "name",
+                    "description": "description",
                     "parameters": {"foo": "bar"},
                 }
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
+            metadata={"foo": "string"},
+            modalities=["text"],
             n=1,
+            parallel_tool_calls=True,
+            prediction={
+                "content": "string",
+                "type": "content",
+            },
             presence_penalty=-2,
-            response_format={"type": "json_object"},
-            seed=-9223372036854776000,
-            stop="string",
-            stream_options={"include_usage": True},
+            prompt_cache_key="prompt-cache-key-1234",
+            prompt_cache_retention="in-memory",
+            reasoning_effort="none",
+            response_format={"type": "text"},
+            safety_identifier="safety-identifier-1234",
+            seed=-9007199254740991,
+            service_tier="auto",
+            stop="\n",
+            store=True,
+            stream_options={
+                "include_obfuscation": True,
+                "include_usage": True,
+            },
             temperature=1,
             tool_choice="none",
             tools=[
                 {
-                    "type": "function",
                     "function": {
-                        "description": "string",
-                        "name": "string",
+                        "name": "name",
+                        "description": "description",
                         "parameters": {"foo": "bar"},
+                        "strict": True,
                     },
-                },
-                {
                     "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
-                {
-                    "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
+                }
             ],
             top_logprobs=0,
             top_p=1,
             user="user-1234",
+            verbosity="low",
+            web_search_options={
+                "search_context_size": "low",
+                "user_location": {
+                    "approximate": {
+                        "city": "city",
+                        "country": "country",
+                        "region": "region",
+                        "timezone": "timezone",
+                    },
+                    "type": "approximate",
+                },
+            },
         )
         await completion_stream.response.aclose()
 
@@ -448,10 +682,10 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
         )
 
@@ -465,10 +699,10 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncOpe
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
         ) as response:
             assert not response.is_closed
@@ -478,3 +712,157 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncOpe
             await stream.close()
 
         assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.retrieve(
+            "completion_id",
+        )
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.with_raw_response.retrieve(
+            "completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.with_streaming_response.retrieve(
+            "completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            await async_client.chat.completions.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.with_raw_response.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.with_streaming_response.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            await async_client.chat.completions.with_raw_response.update(
+                completion_id="",
+                metadata={"foo": "string"},
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.list()
+        assert_matches_type(AsyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.list(
+            after="after",
+            limit=0,
+            metadata={"foo": "string"},
+            model="model",
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(AsyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(AsyncCursorPage[ChatCompletion], completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.delete(
+            "completion_id",
+        )
+        assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.with_raw_response.delete(
+            "completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.with_streaming_response.delete(
+            "completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            await async_client.chat.completions.with_raw_response.delete(
+                "",
+            )
diff --git a/tests/api_resources/containers/__init__.py b/tests/api_resources/containers/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/containers/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/containers/files/__init__.py b/tests/api_resources/containers/files/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/containers/files/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/containers/files/test_content.py b/tests/api_resources/containers/files/test_content.py
new file mode 100644
index 0000000000..67fcdca36c
--- /dev/null
+++ b/tests/api_resources/containers/files/test_content.py
@@ -0,0 +1,154 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import httpx
+import pytest
+from respx import MockRouter
+
+import openai._legacy_response as _legacy_response
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+
+# pyright: reportDeprecated=false
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestContent:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_method_retrieve(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/containers/container_id/files/file_id/content").mock(
+            return_value=httpx.Response(200, json={"foo": "bar"})
+        )
+        content = client.containers.files.content.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        )
+        assert isinstance(content, _legacy_response.HttpxBinaryResponseContent)
+        assert content.json() == {"foo": "bar"}
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_raw_response_retrieve(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/containers/container_id/files/file_id/content").mock(
+            return_value=httpx.Response(200, json={"foo": "bar"})
+        )
+
+        response = client.containers.files.content.with_raw_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        content = response.parse()
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, content, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_streaming_response_retrieve(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/containers/container_id/files/file_id/content").mock(
+            return_value=httpx.Response(200, json={"foo": "bar"})
+        )
+        with client.containers.files.content.with_streaming_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            content = response.parse()
+            assert_matches_type(bytes, content, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            client.containers.files.content.with_raw_response.retrieve(
+                file_id="file_id",
+                container_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.containers.files.content.with_raw_response.retrieve(
+                file_id="",
+                container_id="container_id",
+            )
+
+
+class TestAsyncContent:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_method_retrieve(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/containers/container_id/files/file_id/content").mock(
+            return_value=httpx.Response(200, json={"foo": "bar"})
+        )
+        content = await async_client.containers.files.content.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        )
+        assert isinstance(content, _legacy_response.HttpxBinaryResponseContent)
+        assert content.json() == {"foo": "bar"}
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/containers/container_id/files/file_id/content").mock(
+            return_value=httpx.Response(200, json={"foo": "bar"})
+        )
+
+        response = await async_client.containers.files.content.with_raw_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        content = response.parse()
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, content, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/containers/container_id/files/file_id/content").mock(
+            return_value=httpx.Response(200, json={"foo": "bar"})
+        )
+        async with async_client.containers.files.content.with_streaming_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            content = await response.parse()
+            assert_matches_type(bytes, content, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            await async_client.containers.files.content.with_raw_response.retrieve(
+                file_id="file_id",
+                container_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.containers.files.content.with_raw_response.retrieve(
+                file_id="",
+                container_id="container_id",
+            )
diff --git a/tests/api_resources/beta/vector_stores/test_files.py b/tests/api_resources/containers/test_files.py
similarity index 51%
rename from tests/api_resources/beta/vector_stores/test_files.py
rename to tests/api_resources/containers/test_files.py
index 58301e2d37..f9d82d005c 100644
--- a/tests/api_resources/beta/vector_stores/test_files.py
+++ b/tests/api_resources/containers/test_files.py
@@ -10,9 +10,10 @@
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta.vector_stores import (
-    VectorStoreFile,
-    VectorStoreFileDeleted,
+from openai.types.containers import (
+    FileListResponse,
+    FileCreateResponse,
+    FileRetrieveResponse,
 )
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -23,380 +24,388 @@ class TestFiles:
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
-        file = client.beta.vector_stores.files.create(
-            "vs_abc123",
-            file_id="string",
+        file = client.containers.files.create(
+            container_id="container_id",
         )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
+        assert_matches_type(FileCreateResponse, file, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        file = client.containers.files.create(
+            container_id="container_id",
+            file=b"raw file contents",
+            file_id="file_id",
+        )
+        assert_matches_type(FileCreateResponse, file, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.files.with_raw_response.create(
-            "vs_abc123",
-            file_id="string",
+        response = client.containers.files.with_raw_response.create(
+            container_id="container_id",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(VectorStoreFile, file, path=["response"])
+        assert_matches_type(FileCreateResponse, file, path=["response"])
 
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.files.with_streaming_response.create(
-            "vs_abc123",
-            file_id="string",
+        with client.containers.files.with_streaming_response.create(
+            container_id="container_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = response.parse()
-            assert_matches_type(VectorStoreFile, file, path=["response"])
+            assert_matches_type(FileCreateResponse, file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_create(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.files.with_raw_response.create(
-                "",
-                file_id="string",
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            client.containers.files.with_raw_response.create(
+                container_id="",
             )
 
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
-        file = client.beta.vector_stores.files.retrieve(
-            "file-abc123",
-            vector_store_id="vs_abc123",
+        file = client.containers.files.retrieve(
+            file_id="file_id",
+            container_id="container_id",
         )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
+        assert_matches_type(FileRetrieveResponse, file, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.files.with_raw_response.retrieve(
-            "file-abc123",
-            vector_store_id="vs_abc123",
+        response = client.containers.files.with_raw_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(VectorStoreFile, file, path=["response"])
+        assert_matches_type(FileRetrieveResponse, file, path=["response"])
 
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.files.with_streaming_response.retrieve(
-            "file-abc123",
-            vector_store_id="vs_abc123",
+        with client.containers.files.with_streaming_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = response.parse()
-            assert_matches_type(VectorStoreFile, file, path=["response"])
+            assert_matches_type(FileRetrieveResponse, file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_retrieve(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.files.with_raw_response.retrieve(
-                "file-abc123",
-                vector_store_id="",
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            client.containers.files.with_raw_response.retrieve(
+                file_id="file_id",
+                container_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            client.beta.vector_stores.files.with_raw_response.retrieve(
-                "",
-                vector_store_id="vs_abc123",
+            client.containers.files.with_raw_response.retrieve(
+                file_id="",
+                container_id="container_id",
             )
 
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
-        file = client.beta.vector_stores.files.list(
-            "string",
+        file = client.containers.files.list(
+            container_id="container_id",
         )
-        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+        assert_matches_type(SyncCursorPage[FileListResponse], file, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
-        file = client.beta.vector_stores.files.list(
-            "string",
-            after="string",
-            before="string",
-            filter="in_progress",
+        file = client.containers.files.list(
+            container_id="container_id",
+            after="after",
             limit=0,
             order="asc",
         )
-        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+        assert_matches_type(SyncCursorPage[FileListResponse], file, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.files.with_raw_response.list(
-            "string",
+        response = client.containers.files.with_raw_response.list(
+            container_id="container_id",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+        assert_matches_type(SyncCursorPage[FileListResponse], file, path=["response"])
 
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.files.with_streaming_response.list(
-            "string",
+        with client.containers.files.with_streaming_response.list(
+            container_id="container_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = response.parse()
-            assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+            assert_matches_type(SyncCursorPage[FileListResponse], file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_list(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.files.with_raw_response.list(
-                "",
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            client.containers.files.with_raw_response.list(
+                container_id="",
             )
 
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
-        file = client.beta.vector_stores.files.delete(
-            "string",
-            vector_store_id="string",
+        file = client.containers.files.delete(
+            file_id="file_id",
+            container_id="container_id",
         )
-        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+        assert file is None
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.files.with_raw_response.delete(
-            "string",
-            vector_store_id="string",
+        response = client.containers.files.with_raw_response.delete(
+            file_id="file_id",
+            container_id="container_id",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+        assert file is None
 
     @parametrize
     def test_streaming_response_delete(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.files.with_streaming_response.delete(
-            "string",
-            vector_store_id="string",
+        with client.containers.files.with_streaming_response.delete(
+            file_id="file_id",
+            container_id="container_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = response.parse()
-            assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+            assert file is None
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_path_params_delete(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.files.with_raw_response.delete(
-                "string",
-                vector_store_id="",
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            client.containers.files.with_raw_response.delete(
+                file_id="file_id",
+                container_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            client.beta.vector_stores.files.with_raw_response.delete(
-                "",
-                vector_store_id="string",
+            client.containers.files.with_raw_response.delete(
+                file_id="",
+                container_id="container_id",
             )
 
 
 class TestAsyncFiles:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.vector_stores.files.create(
-            "vs_abc123",
-            file_id="string",
+        file = await async_client.containers.files.create(
+            container_id="container_id",
+        )
+        assert_matches_type(FileCreateResponse, file, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.containers.files.create(
+            container_id="container_id",
+            file=b"raw file contents",
+            file_id="file_id",
         )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
+        assert_matches_type(FileCreateResponse, file, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.files.with_raw_response.create(
-            "vs_abc123",
-            file_id="string",
+        response = await async_client.containers.files.with_raw_response.create(
+            container_id="container_id",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(VectorStoreFile, file, path=["response"])
+        assert_matches_type(FileCreateResponse, file, path=["response"])
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.files.with_streaming_response.create(
-            "vs_abc123",
-            file_id="string",
+        async with async_client.containers.files.with_streaming_response.create(
+            container_id="container_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = await response.parse()
-            assert_matches_type(VectorStoreFile, file, path=["response"])
+            assert_matches_type(FileCreateResponse, file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.files.with_raw_response.create(
-                "",
-                file_id="string",
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            await async_client.containers.files.with_raw_response.create(
+                container_id="",
             )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.vector_stores.files.retrieve(
-            "file-abc123",
-            vector_store_id="vs_abc123",
+        file = await async_client.containers.files.retrieve(
+            file_id="file_id",
+            container_id="container_id",
         )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
+        assert_matches_type(FileRetrieveResponse, file, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.files.with_raw_response.retrieve(
-            "file-abc123",
-            vector_store_id="vs_abc123",
+        response = await async_client.containers.files.with_raw_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(VectorStoreFile, file, path=["response"])
+        assert_matches_type(FileRetrieveResponse, file, path=["response"])
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.files.with_streaming_response.retrieve(
-            "file-abc123",
-            vector_store_id="vs_abc123",
+        async with async_client.containers.files.with_streaming_response.retrieve(
+            file_id="file_id",
+            container_id="container_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = await response.parse()
-            assert_matches_type(VectorStoreFile, file, path=["response"])
+            assert_matches_type(FileRetrieveResponse, file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.files.with_raw_response.retrieve(
-                "file-abc123",
-                vector_store_id="",
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            await async_client.containers.files.with_raw_response.retrieve(
+                file_id="file_id",
+                container_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            await async_client.beta.vector_stores.files.with_raw_response.retrieve(
-                "",
-                vector_store_id="vs_abc123",
+            await async_client.containers.files.with_raw_response.retrieve(
+                file_id="",
+                container_id="container_id",
             )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.vector_stores.files.list(
-            "string",
+        file = await async_client.containers.files.list(
+            container_id="container_id",
         )
-        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+        assert_matches_type(AsyncCursorPage[FileListResponse], file, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.vector_stores.files.list(
-            "string",
-            after="string",
-            before="string",
-            filter="in_progress",
+        file = await async_client.containers.files.list(
+            container_id="container_id",
+            after="after",
             limit=0,
             order="asc",
         )
-        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+        assert_matches_type(AsyncCursorPage[FileListResponse], file, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.files.with_raw_response.list(
-            "string",
+        response = await async_client.containers.files.with_raw_response.list(
+            container_id="container_id",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+        assert_matches_type(AsyncCursorPage[FileListResponse], file, path=["response"])
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.files.with_streaming_response.list(
-            "string",
+        async with async_client.containers.files.with_streaming_response.list(
+            container_id="container_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = await response.parse()
-            assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+            assert_matches_type(AsyncCursorPage[FileListResponse], file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.files.with_raw_response.list(
-                "",
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            await async_client.containers.files.with_raw_response.list(
+                container_id="",
             )
 
     @parametrize
     async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.vector_stores.files.delete(
-            "string",
-            vector_store_id="string",
+        file = await async_client.containers.files.delete(
+            file_id="file_id",
+            container_id="container_id",
         )
-        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+        assert file is None
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.files.with_raw_response.delete(
-            "string",
-            vector_store_id="string",
+        response = await async_client.containers.files.with_raw_response.delete(
+            file_id="file_id",
+            container_id="container_id",
         )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+        assert file is None
 
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.files.with_streaming_response.delete(
-            "string",
-            vector_store_id="string",
+        async with async_client.containers.files.with_streaming_response.delete(
+            file_id="file_id",
+            container_id="container_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = await response.parse()
-            assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+            assert file is None
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.files.with_raw_response.delete(
-                "string",
-                vector_store_id="",
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            await async_client.containers.files.with_raw_response.delete(
+                file_id="file_id",
+                container_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            await async_client.beta.vector_stores.files.with_raw_response.delete(
-                "",
-                vector_store_id="string",
+            await async_client.containers.files.with_raw_response.delete(
+                file_id="",
+                container_id="container_id",
             )
diff --git a/tests/api_resources/conversations/__init__.py b/tests/api_resources/conversations/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/conversations/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/conversations/test_items.py b/tests/api_resources/conversations/test_items.py
new file mode 100644
index 0000000000..0503301f16
--- /dev/null
+++ b/tests/api_resources/conversations/test_items.py
@@ -0,0 +1,499 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncConversationCursorPage, AsyncConversationCursorPage
+from openai.types.conversations import (
+    Conversation,
+    ConversationItem,
+    ConversationItemList,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestItems:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        item = client.conversations.items.create(
+            conversation_id="conv_123",
+            items=[
+                {
+                    "content": "string",
+                    "role": "user",
+                    "type": "message",
+                }
+            ],
+        )
+        assert_matches_type(ConversationItemList, item, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        item = client.conversations.items.create(
+            conversation_id="conv_123",
+            items=[
+                {
+                    "content": "string",
+                    "role": "user",
+                    "type": "message",
+                }
+            ],
+            include=["file_search_call.results"],
+        )
+        assert_matches_type(ConversationItemList, item, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.conversations.items.with_raw_response.create(
+            conversation_id="conv_123",
+            items=[
+                {
+                    "content": "string",
+                    "role": "user",
+                    "type": "message",
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        item = response.parse()
+        assert_matches_type(ConversationItemList, item, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.conversations.items.with_streaming_response.create(
+            conversation_id="conv_123",
+            items=[
+                {
+                    "content": "string",
+                    "role": "user",
+                    "type": "message",
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            item = response.parse()
+            assert_matches_type(ConversationItemList, item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `conversation_id` but received ''"):
+            client.conversations.items.with_raw_response.create(
+                conversation_id="",
+                items=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                        "type": "message",
+                    }
+                ],
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        item = client.conversations.items.retrieve(
+            item_id="msg_abc",
+            conversation_id="conv_123",
+        )
+        assert_matches_type(ConversationItem, item, path=["response"])
+
+    @parametrize
+    def test_method_retrieve_with_all_params(self, client: OpenAI) -> None:
+        item = client.conversations.items.retrieve(
+            item_id="msg_abc",
+            conversation_id="conv_123",
+            include=["file_search_call.results"],
+        )
+        assert_matches_type(ConversationItem, item, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.conversations.items.with_raw_response.retrieve(
+            item_id="msg_abc",
+            conversation_id="conv_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        item = response.parse()
+        assert_matches_type(ConversationItem, item, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.conversations.items.with_streaming_response.retrieve(
+            item_id="msg_abc",
+            conversation_id="conv_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            item = response.parse()
+            assert_matches_type(ConversationItem, item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `conversation_id` but received ''"):
+            client.conversations.items.with_raw_response.retrieve(
+                item_id="msg_abc",
+                conversation_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `item_id` but received ''"):
+            client.conversations.items.with_raw_response.retrieve(
+                item_id="",
+                conversation_id="conv_123",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        item = client.conversations.items.list(
+            conversation_id="conv_123",
+        )
+        assert_matches_type(SyncConversationCursorPage[ConversationItem], item, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        item = client.conversations.items.list(
+            conversation_id="conv_123",
+            after="after",
+            include=["file_search_call.results"],
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncConversationCursorPage[ConversationItem], item, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.conversations.items.with_raw_response.list(
+            conversation_id="conv_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        item = response.parse()
+        assert_matches_type(SyncConversationCursorPage[ConversationItem], item, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.conversations.items.with_streaming_response.list(
+            conversation_id="conv_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            item = response.parse()
+            assert_matches_type(SyncConversationCursorPage[ConversationItem], item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `conversation_id` but received ''"):
+            client.conversations.items.with_raw_response.list(
+                conversation_id="",
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        item = client.conversations.items.delete(
+            item_id="msg_abc",
+            conversation_id="conv_123",
+        )
+        assert_matches_type(Conversation, item, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.conversations.items.with_raw_response.delete(
+            item_id="msg_abc",
+            conversation_id="conv_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        item = response.parse()
+        assert_matches_type(Conversation, item, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.conversations.items.with_streaming_response.delete(
+            item_id="msg_abc",
+            conversation_id="conv_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            item = response.parse()
+            assert_matches_type(Conversation, item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `conversation_id` but received ''"):
+            client.conversations.items.with_raw_response.delete(
+                item_id="msg_abc",
+                conversation_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `item_id` but received ''"):
+            client.conversations.items.with_raw_response.delete(
+                item_id="",
+                conversation_id="conv_123",
+            )
+
+
+class TestAsyncItems:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        item = await async_client.conversations.items.create(
+            conversation_id="conv_123",
+            items=[
+                {
+                    "content": "string",
+                    "role": "user",
+                    "type": "message",
+                }
+            ],
+        )
+        assert_matches_type(ConversationItemList, item, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        item = await async_client.conversations.items.create(
+            conversation_id="conv_123",
+            items=[
+                {
+                    "content": "string",
+                    "role": "user",
+                    "type": "message",
+                }
+            ],
+            include=["file_search_call.results"],
+        )
+        assert_matches_type(ConversationItemList, item, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.conversations.items.with_raw_response.create(
+            conversation_id="conv_123",
+            items=[
+                {
+                    "content": "string",
+                    "role": "user",
+                    "type": "message",
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        item = response.parse()
+        assert_matches_type(ConversationItemList, item, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.conversations.items.with_streaming_response.create(
+            conversation_id="conv_123",
+            items=[
+                {
+                    "content": "string",
+                    "role": "user",
+                    "type": "message",
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            item = await response.parse()
+            assert_matches_type(ConversationItemList, item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `conversation_id` but received ''"):
+            await async_client.conversations.items.with_raw_response.create(
+                conversation_id="",
+                items=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                        "type": "message",
+                    }
+                ],
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        item = await async_client.conversations.items.retrieve(
+            item_id="msg_abc",
+            conversation_id="conv_123",
+        )
+        assert_matches_type(ConversationItem, item, path=["response"])
+
+    @parametrize
+    async def test_method_retrieve_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        item = await async_client.conversations.items.retrieve(
+            item_id="msg_abc",
+            conversation_id="conv_123",
+            include=["file_search_call.results"],
+        )
+        assert_matches_type(ConversationItem, item, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.conversations.items.with_raw_response.retrieve(
+            item_id="msg_abc",
+            conversation_id="conv_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        item = response.parse()
+        assert_matches_type(ConversationItem, item, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.conversations.items.with_streaming_response.retrieve(
+            item_id="msg_abc",
+            conversation_id="conv_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            item = await response.parse()
+            assert_matches_type(ConversationItem, item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `conversation_id` but received ''"):
+            await async_client.conversations.items.with_raw_response.retrieve(
+                item_id="msg_abc",
+                conversation_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `item_id` but received ''"):
+            await async_client.conversations.items.with_raw_response.retrieve(
+                item_id="",
+                conversation_id="conv_123",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        item = await async_client.conversations.items.list(
+            conversation_id="conv_123",
+        )
+        assert_matches_type(AsyncConversationCursorPage[ConversationItem], item, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        item = await async_client.conversations.items.list(
+            conversation_id="conv_123",
+            after="after",
+            include=["file_search_call.results"],
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncConversationCursorPage[ConversationItem], item, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.conversations.items.with_raw_response.list(
+            conversation_id="conv_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        item = response.parse()
+        assert_matches_type(AsyncConversationCursorPage[ConversationItem], item, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.conversations.items.with_streaming_response.list(
+            conversation_id="conv_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            item = await response.parse()
+            assert_matches_type(AsyncConversationCursorPage[ConversationItem], item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `conversation_id` but received ''"):
+            await async_client.conversations.items.with_raw_response.list(
+                conversation_id="",
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        item = await async_client.conversations.items.delete(
+            item_id="msg_abc",
+            conversation_id="conv_123",
+        )
+        assert_matches_type(Conversation, item, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.conversations.items.with_raw_response.delete(
+            item_id="msg_abc",
+            conversation_id="conv_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        item = response.parse()
+        assert_matches_type(Conversation, item, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.conversations.items.with_streaming_response.delete(
+            item_id="msg_abc",
+            conversation_id="conv_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            item = await response.parse()
+            assert_matches_type(Conversation, item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `conversation_id` but received ''"):
+            await async_client.conversations.items.with_raw_response.delete(
+                item_id="msg_abc",
+                conversation_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `item_id` but received ''"):
+            await async_client.conversations.items.with_raw_response.delete(
+                item_id="",
+                conversation_id="conv_123",
+            )
diff --git a/tests/api_resources/evals/__init__.py b/tests/api_resources/evals/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/evals/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/evals/runs/__init__.py b/tests/api_resources/evals/runs/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/evals/runs/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/evals/runs/test_output_items.py b/tests/api_resources/evals/runs/test_output_items.py
new file mode 100644
index 0000000000..673867ac42
--- /dev/null
+++ b/tests/api_resources/evals/runs/test_output_items.py
@@ -0,0 +1,265 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.evals.runs import OutputItemListResponse, OutputItemRetrieveResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestOutputItems:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        output_item = client.evals.runs.output_items.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        )
+        assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.evals.runs.output_items.with_raw_response.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        output_item = response.parse()
+        assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.evals.runs.output_items.with_streaming_response.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            output_item = response.parse()
+            assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="output_item_id",
+                eval_id="",
+                run_id="run_id",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="output_item_id",
+                eval_id="eval_id",
+                run_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `output_item_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="",
+                eval_id="eval_id",
+                run_id="run_id",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        output_item = client.evals.runs.output_items.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        output_item = client.evals.runs.output_items.list(
+            run_id="run_id",
+            eval_id="eval_id",
+            after="after",
+            limit=0,
+            order="asc",
+            status="fail",
+        )
+        assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.evals.runs.output_items.with_raw_response.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        output_item = response.parse()
+        assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.evals.runs.output_items.with_streaming_response.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            output_item = response.parse()
+            assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.list(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.list(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+
+class TestAsyncOutputItems:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        output_item = await async_client.evals.runs.output_items.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        )
+        assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.output_items.with_raw_response.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        output_item = response.parse()
+        assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.output_items.with_streaming_response.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            output_item = await response.parse()
+            assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="output_item_id",
+                eval_id="",
+                run_id="run_id",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="output_item_id",
+                eval_id="eval_id",
+                run_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `output_item_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="",
+                eval_id="eval_id",
+                run_id="run_id",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        output_item = await async_client.evals.runs.output_items.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        output_item = await async_client.evals.runs.output_items.list(
+            run_id="run_id",
+            eval_id="eval_id",
+            after="after",
+            limit=0,
+            order="asc",
+            status="fail",
+        )
+        assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.output_items.with_raw_response.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        output_item = response.parse()
+        assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.output_items.with_streaming_response.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            output_item = await response.parse()
+            assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.list(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.list(
+                run_id="",
+                eval_id="eval_id",
+            )
diff --git a/tests/api_resources/evals/test_runs.py b/tests/api_resources/evals/test_runs.py
new file mode 100644
index 0000000000..1367cb4bab
--- /dev/null
+++ b/tests/api_resources/evals/test_runs.py
@@ -0,0 +1,591 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.evals import (
+    RunListResponse,
+    RunCancelResponse,
+    RunCreateResponse,
+    RunDeleteResponse,
+    RunRetrieveResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestRuns:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        run = client.evals.runs.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        )
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        run = client.evals.runs.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [
+                        {
+                            "item": {"foo": "bar"},
+                            "sample": {"foo": "bar"},
+                        }
+                    ],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(RunCreateResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.create(
+                eval_id="",
+                data_source={
+                    "source": {
+                        "content": [{"item": {"foo": "bar"}}],
+                        "type": "file_content",
+                    },
+                    "type": "jsonl",
+                },
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        run = client.evals.runs.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.retrieve(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.with_raw_response.retrieve(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        run = client.evals.runs.list(
+            eval_id="eval_id",
+        )
+        assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        run = client.evals.runs.list(
+            eval_id="eval_id",
+            after="after",
+            limit=0,
+            order="asc",
+            status="queued",
+        )
+        assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.list(
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.list(
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.list(
+                eval_id="",
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        run = client.evals.runs.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.delete(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.with_raw_response.delete(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+    @parametrize
+    def test_method_cancel(self, client: OpenAI) -> None:
+        run = client.evals.runs.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunCancelResponse, run, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunCancelResponse, run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(RunCancelResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.cancel(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.with_raw_response.cancel(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+
+class TestAsyncRuns:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        )
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [
+                        {
+                            "item": {"foo": "bar"},
+                            "sample": {"foo": "bar"},
+                        }
+                    ],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(RunCreateResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.create(
+                eval_id="",
+                data_source={
+                    "source": {
+                        "content": [{"item": {"foo": "bar"}}],
+                        "type": "file_content",
+                    },
+                    "type": "jsonl",
+                },
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.retrieve(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.retrieve(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.list(
+            eval_id="eval_id",
+        )
+        assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.list(
+            eval_id="eval_id",
+            after="after",
+            limit=0,
+            order="asc",
+            status="queued",
+        )
+        assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.list(
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.list(
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.list(
+                eval_id="",
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.delete(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.delete(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunCancelResponse, run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunCancelResponse, run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(RunCancelResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.cancel(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.cancel(
+                run_id="",
+                eval_id="eval_id",
+            )
diff --git a/tests/api_resources/fine_tuning/alpha/__init__.py b/tests/api_resources/fine_tuning/alpha/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/fine_tuning/alpha/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/fine_tuning/alpha/test_graders.py b/tests/api_resources/fine_tuning/alpha/test_graders.py
new file mode 100644
index 0000000000..4a237114b6
--- /dev/null
+++ b/tests/api_resources/fine_tuning/alpha/test_graders.py
@@ -0,0 +1,285 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.fine_tuning.alpha import (
+    GraderRunResponse,
+    GraderValidateResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestGraders:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_run(self, client: OpenAI) -> None:
+        grader = client.fine_tuning.alpha.graders.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+        )
+        assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+    @parametrize
+    def test_method_run_with_all_params(self, client: OpenAI) -> None:
+        grader = client.fine_tuning.alpha.graders.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+            item={},
+        )
+        assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+    @parametrize
+    def test_raw_response_run(self, client: OpenAI) -> None:
+        response = client.fine_tuning.alpha.graders.with_raw_response.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        grader = response.parse()
+        assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+    @parametrize
+    def test_streaming_response_run(self, client: OpenAI) -> None:
+        with client.fine_tuning.alpha.graders.with_streaming_response.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            grader = response.parse()
+            assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_validate(self, client: OpenAI) -> None:
+        grader = client.fine_tuning.alpha.graders.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        )
+        assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+    @parametrize
+    def test_method_validate_with_all_params(self, client: OpenAI) -> None:
+        grader = client.fine_tuning.alpha.graders.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        )
+        assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+    @parametrize
+    def test_raw_response_validate(self, client: OpenAI) -> None:
+        response = client.fine_tuning.alpha.graders.with_raw_response.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        grader = response.parse()
+        assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+    @parametrize
+    def test_streaming_response_validate(self, client: OpenAI) -> None:
+        with client.fine_tuning.alpha.graders.with_streaming_response.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            grader = response.parse()
+            assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncGraders:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_run(self, async_client: AsyncOpenAI) -> None:
+        grader = await async_client.fine_tuning.alpha.graders.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+        )
+        assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+    @parametrize
+    async def test_method_run_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        grader = await async_client.fine_tuning.alpha.graders.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+            item={},
+        )
+        assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+    @parametrize
+    async def test_raw_response_run(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.alpha.graders.with_raw_response.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        grader = response.parse()
+        assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_run(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.alpha.graders.with_streaming_response.run(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+            model_sample="model_sample",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            grader = await response.parse()
+            assert_matches_type(GraderRunResponse, grader, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_validate(self, async_client: AsyncOpenAI) -> None:
+        grader = await async_client.fine_tuning.alpha.graders.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        )
+        assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+    @parametrize
+    async def test_method_validate_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        grader = await async_client.fine_tuning.alpha.graders.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        )
+        assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+    @parametrize
+    async def test_raw_response_validate(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.alpha.graders.with_raw_response.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        grader = response.parse()
+        assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_validate(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.alpha.graders.with_streaming_response.validate(
+            grader={
+                "input": "input",
+                "name": "name",
+                "operation": "eq",
+                "reference": "reference",
+                "type": "string_check",
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            grader = await response.parse()
+            assert_matches_type(GraderValidateResponse, grader, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/fine_tuning/checkpoints/__init__.py b/tests/api_resources/fine_tuning/checkpoints/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/fine_tuning/checkpoints/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/fine_tuning/checkpoints/test_permissions.py b/tests/api_resources/fine_tuning/checkpoints/test_permissions.py
new file mode 100644
index 0000000000..9420e3a34c
--- /dev/null
+++ b/tests/api_resources/fine_tuning/checkpoints/test_permissions.py
@@ -0,0 +1,319 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncPage, AsyncPage
+from openai.types.fine_tuning.checkpoints import (
+    PermissionCreateResponse,
+    PermissionDeleteResponse,
+    PermissionRetrieveResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestPermissions:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        permission = client.fine_tuning.checkpoints.permissions.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        )
+        assert_matches_type(SyncPage[PermissionCreateResponse], permission, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(SyncPage[PermissionCreateResponse], permission, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.fine_tuning.checkpoints.permissions.with_streaming_response.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = response.parse()
+            assert_matches_type(SyncPage[PermissionCreateResponse], permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+                fine_tuned_model_checkpoint="",
+                project_ids=["string"],
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        permission = client.fine_tuning.checkpoints.permissions.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    def test_method_retrieve_with_all_params(self, client: OpenAI) -> None:
+        permission = client.fine_tuning.checkpoints.permissions.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="after",
+            limit=0,
+            order="ascending",
+            project_id="project_id",
+        )
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.fine_tuning.checkpoints.permissions.with_streaming_response.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = response.parse()
+            assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+                fine_tuned_model_checkpoint="",
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        permission = client.fine_tuning.checkpoints.permissions.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        )
+        assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.fine_tuning.checkpoints.permissions.with_streaming_response.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = response.parse()
+            assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+                permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+                fine_tuned_model_checkpoint="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `permission_id` but received ''"):
+            client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+                permission_id="",
+                fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            )
+
+
+class TestAsyncPermissions:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        permission = await async_client.fine_tuning.checkpoints.permissions.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        )
+        assert_matches_type(AsyncPage[PermissionCreateResponse], permission, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(AsyncPage[PermissionCreateResponse], permission, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.checkpoints.permissions.with_streaming_response.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = await response.parse()
+            assert_matches_type(AsyncPage[PermissionCreateResponse], permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            await async_client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+                fine_tuned_model_checkpoint="",
+                project_ids=["string"],
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        permission = await async_client.fine_tuning.checkpoints.permissions.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_method_retrieve_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        permission = await async_client.fine_tuning.checkpoints.permissions.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="after",
+            limit=0,
+            order="ascending",
+            project_id="project_id",
+        )
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.checkpoints.permissions.with_streaming_response.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = await response.parse()
+            assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            await async_client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+                fine_tuned_model_checkpoint="",
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        permission = await async_client.fine_tuning.checkpoints.permissions.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        )
+        assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.checkpoints.permissions.with_streaming_response.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = await response.parse()
+            assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            await async_client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+                permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+                fine_tuned_model_checkpoint="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `permission_id` but received ''"):
+            await async_client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+                permission_id="",
+                fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            )
diff --git a/tests/api_resources/fine_tuning/jobs/test_checkpoints.py b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
index 915d5c6f63..e5b8e75499 100644
--- a/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
+++ b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
@@ -21,15 +21,15 @@ class TestCheckpoints:
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         checkpoint = client.fine_tuning.jobs.checkpoints.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
         assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         checkpoint = client.fine_tuning.jobs.checkpoints.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-            after="string",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="after",
             limit=0,
         )
         assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
@@ -37,7 +37,7 @@ def test_method_list_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
         response = client.fine_tuning.jobs.checkpoints.with_raw_response.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
 
         assert response.is_closed is True
@@ -48,7 +48,7 @@ def test_raw_response_list(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
         with client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -62,25 +62,27 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
     def test_path_params_list(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
             client.fine_tuning.jobs.checkpoints.with_raw_response.list(
-                "",
+                fine_tuning_job_id="",
             )
 
 
 class TestAsyncCheckpoints:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncOpenAI) -> None:
         checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
         assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
         checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-            after="string",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="after",
             limit=0,
         )
         assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
@@ -88,7 +90,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> N
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
 
         assert response.is_closed is True
@@ -99,7 +101,7 @@ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
         async with async_client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -113,5 +115,5 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
     async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
             await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
-                "",
+                fine_tuning_job_id="",
             )
diff --git a/tests/api_resources/fine_tuning/test_jobs.py b/tests/api_resources/fine_tuning/test_jobs.py
index 1ff6d63b31..9d8d985e86 100644
--- a/tests/api_resources/fine_tuning/test_jobs.py
+++ b/tests/api_resources/fine_tuning/test_jobs.py
@@ -24,7 +24,7 @@ class TestJobs:
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
         )
         assert_matches_type(FineTuningJob, job, path=["response"])
@@ -32,7 +32,7 @@ def test_method_create(self, client: OpenAI) -> None:
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
             hyperparameters={
                 "batch_size": "auto",
@@ -44,30 +44,49 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
                     "type": "wandb",
                     "wandb": {
                         "project": "my-wandb-project",
-                        "name": "string",
-                        "entity": "string",
-                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                        "entity": "entity",
+                        "name": "name",
+                        "tags": ["custom-tag"],
                     },
+                }
+            ],
+            metadata={"foo": "string"},
+            method={
+                "type": "supervised",
+                "dpo": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "beta": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
                 },
-                {
-                    "type": "wandb",
-                    "wandb": {
-                        "project": "my-wandb-project",
-                        "name": "string",
-                        "entity": "string",
-                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                "reinforcement": {
+                    "grader": {
+                        "input": "input",
+                        "name": "name",
+                        "operation": "eq",
+                        "reference": "reference",
+                        "type": "string_check",
                     },
-                },
-                {
-                    "type": "wandb",
-                    "wandb": {
-                        "project": "my-wandb-project",
-                        "name": "string",
-                        "entity": "string",
-                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "compute_multiplier": "auto",
+                        "eval_interval": "auto",
+                        "eval_samples": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                        "reasoning_effort": "default",
                     },
                 },
-            ],
+                "supervised": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
+                },
+            },
             seed=42,
             suffix="x",
             validation_file="file-abc123",
@@ -77,7 +96,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.fine_tuning.jobs.with_raw_response.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
         )
 
@@ -89,7 +108,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.fine_tuning.jobs.with_streaming_response.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
         ) as response:
             assert not response.is_closed
@@ -146,8 +165,9 @@ def test_method_list(self, client: OpenAI) -> None:
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.list(
-            after="string",
+            after="after",
             limit=0,
+            metadata={"foo": "string"},
         )
         assert_matches_type(SyncCursorPage[FineTuningJob], job, path=["response"])
 
@@ -212,15 +232,15 @@ def test_path_params_cancel(self, client: OpenAI) -> None:
     @parametrize
     def test_method_list_events(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
         assert_matches_type(SyncCursorPage[FineTuningJobEvent], job, path=["response"])
 
     @parametrize
     def test_method_list_events_with_all_params(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-            after="string",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="after",
             limit=0,
         )
         assert_matches_type(SyncCursorPage[FineTuningJobEvent], job, path=["response"])
@@ -228,7 +248,7 @@ def test_method_list_events_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_list_events(self, client: OpenAI) -> None:
         response = client.fine_tuning.jobs.with_raw_response.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
 
         assert response.is_closed is True
@@ -239,7 +259,7 @@ def test_raw_response_list_events(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_list_events(self, client: OpenAI) -> None:
         with client.fine_tuning.jobs.with_streaming_response.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -253,17 +273,95 @@ def test_streaming_response_list_events(self, client: OpenAI) -> None:
     def test_path_params_list_events(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
             client.fine_tuning.jobs.with_raw_response.list_events(
+                fine_tuning_job_id="",
+            )
+
+    @parametrize
+    def test_method_pause(self, client: OpenAI) -> None:
+        job = client.fine_tuning.jobs.pause(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    def test_raw_response_pause(self, client: OpenAI) -> None:
+        response = client.fine_tuning.jobs.with_raw_response.pause(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    def test_streaming_response_pause(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.with_streaming_response.pause(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_pause(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            client.fine_tuning.jobs.with_raw_response.pause(
+                "",
+            )
+
+    @parametrize
+    def test_method_resume(self, client: OpenAI) -> None:
+        job = client.fine_tuning.jobs.resume(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    def test_raw_response_resume(self, client: OpenAI) -> None:
+        response = client.fine_tuning.jobs.with_raw_response.resume(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    def test_streaming_response_resume(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.with_streaming_response.resume(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_resume(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            client.fine_tuning.jobs.with_raw_response.resume(
                 "",
             )
 
 
 class TestAsyncJobs:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         job = await async_client.fine_tuning.jobs.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
         )
         assert_matches_type(FineTuningJob, job, path=["response"])
@@ -271,7 +369,7 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         job = await async_client.fine_tuning.jobs.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
             hyperparameters={
                 "batch_size": "auto",
@@ -283,30 +381,49 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
                     "type": "wandb",
                     "wandb": {
                         "project": "my-wandb-project",
-                        "name": "string",
-                        "entity": "string",
-                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                        "entity": "entity",
+                        "name": "name",
+                        "tags": ["custom-tag"],
                     },
+                }
+            ],
+            metadata={"foo": "string"},
+            method={
+                "type": "supervised",
+                "dpo": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "beta": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
                 },
-                {
-                    "type": "wandb",
-                    "wandb": {
-                        "project": "my-wandb-project",
-                        "name": "string",
-                        "entity": "string",
-                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                "reinforcement": {
+                    "grader": {
+                        "input": "input",
+                        "name": "name",
+                        "operation": "eq",
+                        "reference": "reference",
+                        "type": "string_check",
                     },
-                },
-                {
-                    "type": "wandb",
-                    "wandb": {
-                        "project": "my-wandb-project",
-                        "name": "string",
-                        "entity": "string",
-                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "compute_multiplier": "auto",
+                        "eval_interval": "auto",
+                        "eval_samples": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                        "reasoning_effort": "default",
                     },
                 },
-            ],
+                "supervised": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
+                },
+            },
             seed=42,
             suffix="x",
             validation_file="file-abc123",
@@ -316,7 +433,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.fine_tuning.jobs.with_raw_response.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
         )
 
@@ -328,7 +445,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.fine_tuning.jobs.with_streaming_response.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
         ) as response:
             assert not response.is_closed
@@ -385,8 +502,9 @@ async def test_method_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
         job = await async_client.fine_tuning.jobs.list(
-            after="string",
+            after="after",
             limit=0,
+            metadata={"foo": "string"},
         )
         assert_matches_type(AsyncCursorPage[FineTuningJob], job, path=["response"])
 
@@ -451,15 +569,15 @@ async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_list_events(self, async_client: AsyncOpenAI) -> None:
         job = await async_client.fine_tuning.jobs.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
         assert_matches_type(AsyncCursorPage[FineTuningJobEvent], job, path=["response"])
 
     @parametrize
     async def test_method_list_events_with_all_params(self, async_client: AsyncOpenAI) -> None:
         job = await async_client.fine_tuning.jobs.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-            after="string",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="after",
             limit=0,
         )
         assert_matches_type(AsyncCursorPage[FineTuningJobEvent], job, path=["response"])
@@ -467,7 +585,7 @@ async def test_method_list_events_with_all_params(self, async_client: AsyncOpenA
     @parametrize
     async def test_raw_response_list_events(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.fine_tuning.jobs.with_raw_response.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
 
         assert response.is_closed is True
@@ -478,7 +596,7 @@ async def test_raw_response_list_events(self, async_client: AsyncOpenAI) -> None
     @parametrize
     async def test_streaming_response_list_events(self, async_client: AsyncOpenAI) -> None:
         async with async_client.fine_tuning.jobs.with_streaming_response.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -492,5 +610,81 @@ async def test_streaming_response_list_events(self, async_client: AsyncOpenAI) -
     async def test_path_params_list_events(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
             await async_client.fine_tuning.jobs.with_raw_response.list_events(
+                fine_tuning_job_id="",
+            )
+
+    @parametrize
+    async def test_method_pause(self, async_client: AsyncOpenAI) -> None:
+        job = await async_client.fine_tuning.jobs.pause(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    async def test_raw_response_pause(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.jobs.with_raw_response.pause(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_pause(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.jobs.with_streaming_response.pause(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_pause(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            await async_client.fine_tuning.jobs.with_raw_response.pause(
+                "",
+            )
+
+    @parametrize
+    async def test_method_resume(self, async_client: AsyncOpenAI) -> None:
+        job = await async_client.fine_tuning.jobs.resume(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    async def test_raw_response_resume(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.jobs.with_raw_response.resume(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(FineTuningJob, job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_resume(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.jobs.with_streaming_response.resume(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_resume(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
+            await async_client.fine_tuning.jobs.with_raw_response.resume(
                 "",
             )
diff --git a/tests/api_resources/realtime/__init__.py b/tests/api_resources/realtime/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/realtime/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/realtime/test_calls.py b/tests/api_resources/realtime/test_calls.py
new file mode 100644
index 0000000000..5495a58a4e
--- /dev/null
+++ b/tests/api_resources/realtime/test_calls.py
@@ -0,0 +1,692 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import httpx
+import pytest
+from respx import MockRouter
+
+import openai._legacy_response as _legacy_response
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+
+# pyright: reportDeprecated=false
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestCalls:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_method_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.post("/realtime/calls").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        call = client.realtime.calls.create(
+            sdp="sdp",
+        )
+        assert isinstance(call, _legacy_response.HttpxBinaryResponseContent)
+        assert call.json() == {"foo": "bar"}
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_method_create_with_all_params(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.post("/realtime/calls").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        call = client.realtime.calls.create(
+            sdp="sdp",
+            session={
+                "type": "realtime",
+                "audio": {
+                    "input": {
+                        "format": {
+                            "rate": 24000,
+                            "type": "audio/pcm",
+                        },
+                        "noise_reduction": {"type": "near_field"},
+                        "transcription": {
+                            "language": "language",
+                            "model": "whisper-1",
+                            "prompt": "prompt",
+                        },
+                        "turn_detection": {
+                            "type": "server_vad",
+                            "create_response": True,
+                            "idle_timeout_ms": 5000,
+                            "interrupt_response": True,
+                            "prefix_padding_ms": 0,
+                            "silence_duration_ms": 0,
+                            "threshold": 0,
+                        },
+                    },
+                    "output": {
+                        "format": {
+                            "rate": 24000,
+                            "type": "audio/pcm",
+                        },
+                        "speed": 0.25,
+                        "voice": "ash",
+                    },
+                },
+                "include": ["item.input_audio_transcription.logprobs"],
+                "instructions": "instructions",
+                "max_output_tokens": 0,
+                "model": "string",
+                "output_modalities": ["text"],
+                "prompt": {
+                    "id": "id",
+                    "variables": {"foo": "string"},
+                    "version": "version",
+                },
+                "tool_choice": "none",
+                "tools": [
+                    {
+                        "description": "description",
+                        "name": "name",
+                        "parameters": {},
+                        "type": "function",
+                    }
+                ],
+                "tracing": "auto",
+                "truncation": "auto",
+            },
+        )
+        assert isinstance(call, _legacy_response.HttpxBinaryResponseContent)
+        assert call.json() == {"foo": "bar"}
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_raw_response_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.post("/realtime/calls").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+
+        response = client.realtime.calls.with_raw_response.create(
+            sdp="sdp",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        call = response.parse()
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, call, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_streaming_response_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.post("/realtime/calls").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        with client.realtime.calls.with_streaming_response.create(
+            sdp="sdp",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            call = response.parse()
+            assert_matches_type(bytes, call, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_accept(self, client: OpenAI) -> None:
+        call = client.realtime.calls.accept(
+            call_id="call_id",
+            type="realtime",
+        )
+        assert call is None
+
+    @parametrize
+    def test_method_accept_with_all_params(self, client: OpenAI) -> None:
+        call = client.realtime.calls.accept(
+            call_id="call_id",
+            type="realtime",
+            audio={
+                "input": {
+                    "format": {
+                        "rate": 24000,
+                        "type": "audio/pcm",
+                    },
+                    "noise_reduction": {"type": "near_field"},
+                    "transcription": {
+                        "language": "language",
+                        "model": "whisper-1",
+                        "prompt": "prompt",
+                    },
+                    "turn_detection": {
+                        "type": "server_vad",
+                        "create_response": True,
+                        "idle_timeout_ms": 5000,
+                        "interrupt_response": True,
+                        "prefix_padding_ms": 0,
+                        "silence_duration_ms": 0,
+                        "threshold": 0,
+                    },
+                },
+                "output": {
+                    "format": {
+                        "rate": 24000,
+                        "type": "audio/pcm",
+                    },
+                    "speed": 0.25,
+                    "voice": "ash",
+                },
+            },
+            include=["item.input_audio_transcription.logprobs"],
+            instructions="instructions",
+            max_output_tokens=0,
+            model="string",
+            output_modalities=["text"],
+            prompt={
+                "id": "id",
+                "variables": {"foo": "string"},
+                "version": "version",
+            },
+            tool_choice="none",
+            tools=[
+                {
+                    "description": "description",
+                    "name": "name",
+                    "parameters": {},
+                    "type": "function",
+                }
+            ],
+            tracing="auto",
+            truncation="auto",
+        )
+        assert call is None
+
+    @parametrize
+    def test_raw_response_accept(self, client: OpenAI) -> None:
+        response = client.realtime.calls.with_raw_response.accept(
+            call_id="call_id",
+            type="realtime",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        call = response.parse()
+        assert call is None
+
+    @parametrize
+    def test_streaming_response_accept(self, client: OpenAI) -> None:
+        with client.realtime.calls.with_streaming_response.accept(
+            call_id="call_id",
+            type="realtime",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            call = response.parse()
+            assert call is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_accept(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `call_id` but received ''"):
+            client.realtime.calls.with_raw_response.accept(
+                call_id="",
+                type="realtime",
+            )
+
+    @parametrize
+    def test_method_hangup(self, client: OpenAI) -> None:
+        call = client.realtime.calls.hangup(
+            "call_id",
+        )
+        assert call is None
+
+    @parametrize
+    def test_raw_response_hangup(self, client: OpenAI) -> None:
+        response = client.realtime.calls.with_raw_response.hangup(
+            "call_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        call = response.parse()
+        assert call is None
+
+    @parametrize
+    def test_streaming_response_hangup(self, client: OpenAI) -> None:
+        with client.realtime.calls.with_streaming_response.hangup(
+            "call_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            call = response.parse()
+            assert call is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_hangup(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `call_id` but received ''"):
+            client.realtime.calls.with_raw_response.hangup(
+                "",
+            )
+
+    @parametrize
+    def test_method_refer(self, client: OpenAI) -> None:
+        call = client.realtime.calls.refer(
+            call_id="call_id",
+            target_uri="tel:+14155550123",
+        )
+        assert call is None
+
+    @parametrize
+    def test_raw_response_refer(self, client: OpenAI) -> None:
+        response = client.realtime.calls.with_raw_response.refer(
+            call_id="call_id",
+            target_uri="tel:+14155550123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        call = response.parse()
+        assert call is None
+
+    @parametrize
+    def test_streaming_response_refer(self, client: OpenAI) -> None:
+        with client.realtime.calls.with_streaming_response.refer(
+            call_id="call_id",
+            target_uri="tel:+14155550123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            call = response.parse()
+            assert call is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_refer(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `call_id` but received ''"):
+            client.realtime.calls.with_raw_response.refer(
+                call_id="",
+                target_uri="tel:+14155550123",
+            )
+
+    @parametrize
+    def test_method_reject(self, client: OpenAI) -> None:
+        call = client.realtime.calls.reject(
+            call_id="call_id",
+        )
+        assert call is None
+
+    @parametrize
+    def test_method_reject_with_all_params(self, client: OpenAI) -> None:
+        call = client.realtime.calls.reject(
+            call_id="call_id",
+            status_code=486,
+        )
+        assert call is None
+
+    @parametrize
+    def test_raw_response_reject(self, client: OpenAI) -> None:
+        response = client.realtime.calls.with_raw_response.reject(
+            call_id="call_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        call = response.parse()
+        assert call is None
+
+    @parametrize
+    def test_streaming_response_reject(self, client: OpenAI) -> None:
+        with client.realtime.calls.with_streaming_response.reject(
+            call_id="call_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            call = response.parse()
+            assert call is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_reject(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `call_id` but received ''"):
+            client.realtime.calls.with_raw_response.reject(
+                call_id="",
+            )
+
+
+class TestAsyncCalls:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_method_create(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.post("/realtime/calls").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        call = await async_client.realtime.calls.create(
+            sdp="sdp",
+        )
+        assert isinstance(call, _legacy_response.HttpxBinaryResponseContent)
+        assert call.json() == {"foo": "bar"}
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.post("/realtime/calls").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        call = await async_client.realtime.calls.create(
+            sdp="sdp",
+            session={
+                "type": "realtime",
+                "audio": {
+                    "input": {
+                        "format": {
+                            "rate": 24000,
+                            "type": "audio/pcm",
+                        },
+                        "noise_reduction": {"type": "near_field"},
+                        "transcription": {
+                            "language": "language",
+                            "model": "whisper-1",
+                            "prompt": "prompt",
+                        },
+                        "turn_detection": {
+                            "type": "server_vad",
+                            "create_response": True,
+                            "idle_timeout_ms": 5000,
+                            "interrupt_response": True,
+                            "prefix_padding_ms": 0,
+                            "silence_duration_ms": 0,
+                            "threshold": 0,
+                        },
+                    },
+                    "output": {
+                        "format": {
+                            "rate": 24000,
+                            "type": "audio/pcm",
+                        },
+                        "speed": 0.25,
+                        "voice": "ash",
+                    },
+                },
+                "include": ["item.input_audio_transcription.logprobs"],
+                "instructions": "instructions",
+                "max_output_tokens": 0,
+                "model": "string",
+                "output_modalities": ["text"],
+                "prompt": {
+                    "id": "id",
+                    "variables": {"foo": "string"},
+                    "version": "version",
+                },
+                "tool_choice": "none",
+                "tools": [
+                    {
+                        "description": "description",
+                        "name": "name",
+                        "parameters": {},
+                        "type": "function",
+                    }
+                ],
+                "tracing": "auto",
+                "truncation": "auto",
+            },
+        )
+        assert isinstance(call, _legacy_response.HttpxBinaryResponseContent)
+        assert call.json() == {"foo": "bar"}
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_raw_response_create(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.post("/realtime/calls").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+
+        response = await async_client.realtime.calls.with_raw_response.create(
+            sdp="sdp",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        call = response.parse()
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, call, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.post("/realtime/calls").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        async with async_client.realtime.calls.with_streaming_response.create(
+            sdp="sdp",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            call = await response.parse()
+            assert_matches_type(bytes, call, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_accept(self, async_client: AsyncOpenAI) -> None:
+        call = await async_client.realtime.calls.accept(
+            call_id="call_id",
+            type="realtime",
+        )
+        assert call is None
+
+    @parametrize
+    async def test_method_accept_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        call = await async_client.realtime.calls.accept(
+            call_id="call_id",
+            type="realtime",
+            audio={
+                "input": {
+                    "format": {
+                        "rate": 24000,
+                        "type": "audio/pcm",
+                    },
+                    "noise_reduction": {"type": "near_field"},
+                    "transcription": {
+                        "language": "language",
+                        "model": "whisper-1",
+                        "prompt": "prompt",
+                    },
+                    "turn_detection": {
+                        "type": "server_vad",
+                        "create_response": True,
+                        "idle_timeout_ms": 5000,
+                        "interrupt_response": True,
+                        "prefix_padding_ms": 0,
+                        "silence_duration_ms": 0,
+                        "threshold": 0,
+                    },
+                },
+                "output": {
+                    "format": {
+                        "rate": 24000,
+                        "type": "audio/pcm",
+                    },
+                    "speed": 0.25,
+                    "voice": "ash",
+                },
+            },
+            include=["item.input_audio_transcription.logprobs"],
+            instructions="instructions",
+            max_output_tokens=0,
+            model="string",
+            output_modalities=["text"],
+            prompt={
+                "id": "id",
+                "variables": {"foo": "string"},
+                "version": "version",
+            },
+            tool_choice="none",
+            tools=[
+                {
+                    "description": "description",
+                    "name": "name",
+                    "parameters": {},
+                    "type": "function",
+                }
+            ],
+            tracing="auto",
+            truncation="auto",
+        )
+        assert call is None
+
+    @parametrize
+    async def test_raw_response_accept(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.realtime.calls.with_raw_response.accept(
+            call_id="call_id",
+            type="realtime",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        call = response.parse()
+        assert call is None
+
+    @parametrize
+    async def test_streaming_response_accept(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.realtime.calls.with_streaming_response.accept(
+            call_id="call_id",
+            type="realtime",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            call = await response.parse()
+            assert call is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_accept(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `call_id` but received ''"):
+            await async_client.realtime.calls.with_raw_response.accept(
+                call_id="",
+                type="realtime",
+            )
+
+    @parametrize
+    async def test_method_hangup(self, async_client: AsyncOpenAI) -> None:
+        call = await async_client.realtime.calls.hangup(
+            "call_id",
+        )
+        assert call is None
+
+    @parametrize
+    async def test_raw_response_hangup(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.realtime.calls.with_raw_response.hangup(
+            "call_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        call = response.parse()
+        assert call is None
+
+    @parametrize
+    async def test_streaming_response_hangup(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.realtime.calls.with_streaming_response.hangup(
+            "call_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            call = await response.parse()
+            assert call is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_hangup(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `call_id` but received ''"):
+            await async_client.realtime.calls.with_raw_response.hangup(
+                "",
+            )
+
+    @parametrize
+    async def test_method_refer(self, async_client: AsyncOpenAI) -> None:
+        call = await async_client.realtime.calls.refer(
+            call_id="call_id",
+            target_uri="tel:+14155550123",
+        )
+        assert call is None
+
+    @parametrize
+    async def test_raw_response_refer(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.realtime.calls.with_raw_response.refer(
+            call_id="call_id",
+            target_uri="tel:+14155550123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        call = response.parse()
+        assert call is None
+
+    @parametrize
+    async def test_streaming_response_refer(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.realtime.calls.with_streaming_response.refer(
+            call_id="call_id",
+            target_uri="tel:+14155550123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            call = await response.parse()
+            assert call is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_refer(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `call_id` but received ''"):
+            await async_client.realtime.calls.with_raw_response.refer(
+                call_id="",
+                target_uri="tel:+14155550123",
+            )
+
+    @parametrize
+    async def test_method_reject(self, async_client: AsyncOpenAI) -> None:
+        call = await async_client.realtime.calls.reject(
+            call_id="call_id",
+        )
+        assert call is None
+
+    @parametrize
+    async def test_method_reject_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        call = await async_client.realtime.calls.reject(
+            call_id="call_id",
+            status_code=486,
+        )
+        assert call is None
+
+    @parametrize
+    async def test_raw_response_reject(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.realtime.calls.with_raw_response.reject(
+            call_id="call_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        call = response.parse()
+        assert call is None
+
+    @parametrize
+    async def test_streaming_response_reject(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.realtime.calls.with_streaming_response.reject(
+            call_id="call_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            call = await response.parse()
+            assert call is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_reject(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `call_id` but received ''"):
+            await async_client.realtime.calls.with_raw_response.reject(
+                call_id="",
+            )
diff --git a/tests/api_resources/realtime/test_client_secrets.py b/tests/api_resources/realtime/test_client_secrets.py
new file mode 100644
index 0000000000..cd15b4be52
--- /dev/null
+++ b/tests/api_resources/realtime/test_client_secrets.py
@@ -0,0 +1,204 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.realtime import ClientSecretCreateResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestClientSecrets:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        client_secret = client.realtime.client_secrets.create()
+        assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        client_secret = client.realtime.client_secrets.create(
+            expires_after={
+                "anchor": "created_at",
+                "seconds": 10,
+            },
+            session={
+                "type": "realtime",
+                "audio": {
+                    "input": {
+                        "format": {
+                            "rate": 24000,
+                            "type": "audio/pcm",
+                        },
+                        "noise_reduction": {"type": "near_field"},
+                        "transcription": {
+                            "language": "language",
+                            "model": "whisper-1",
+                            "prompt": "prompt",
+                        },
+                        "turn_detection": {
+                            "type": "server_vad",
+                            "create_response": True,
+                            "idle_timeout_ms": 5000,
+                            "interrupt_response": True,
+                            "prefix_padding_ms": 0,
+                            "silence_duration_ms": 0,
+                            "threshold": 0,
+                        },
+                    },
+                    "output": {
+                        "format": {
+                            "rate": 24000,
+                            "type": "audio/pcm",
+                        },
+                        "speed": 0.25,
+                        "voice": "ash",
+                    },
+                },
+                "include": ["item.input_audio_transcription.logprobs"],
+                "instructions": "instructions",
+                "max_output_tokens": 0,
+                "model": "string",
+                "output_modalities": ["text"],
+                "prompt": {
+                    "id": "id",
+                    "variables": {"foo": "string"},
+                    "version": "version",
+                },
+                "tool_choice": "none",
+                "tools": [
+                    {
+                        "description": "description",
+                        "name": "name",
+                        "parameters": {},
+                        "type": "function",
+                    }
+                ],
+                "tracing": "auto",
+                "truncation": "auto",
+            },
+        )
+        assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.realtime.client_secrets.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        client_secret = response.parse()
+        assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.realtime.client_secrets.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            client_secret = response.parse()
+            assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncClientSecrets:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        client_secret = await async_client.realtime.client_secrets.create()
+        assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        client_secret = await async_client.realtime.client_secrets.create(
+            expires_after={
+                "anchor": "created_at",
+                "seconds": 10,
+            },
+            session={
+                "type": "realtime",
+                "audio": {
+                    "input": {
+                        "format": {
+                            "rate": 24000,
+                            "type": "audio/pcm",
+                        },
+                        "noise_reduction": {"type": "near_field"},
+                        "transcription": {
+                            "language": "language",
+                            "model": "whisper-1",
+                            "prompt": "prompt",
+                        },
+                        "turn_detection": {
+                            "type": "server_vad",
+                            "create_response": True,
+                            "idle_timeout_ms": 5000,
+                            "interrupt_response": True,
+                            "prefix_padding_ms": 0,
+                            "silence_duration_ms": 0,
+                            "threshold": 0,
+                        },
+                    },
+                    "output": {
+                        "format": {
+                            "rate": 24000,
+                            "type": "audio/pcm",
+                        },
+                        "speed": 0.25,
+                        "voice": "ash",
+                    },
+                },
+                "include": ["item.input_audio_transcription.logprobs"],
+                "instructions": "instructions",
+                "max_output_tokens": 0,
+                "model": "string",
+                "output_modalities": ["text"],
+                "prompt": {
+                    "id": "id",
+                    "variables": {"foo": "string"},
+                    "version": "version",
+                },
+                "tool_choice": "none",
+                "tools": [
+                    {
+                        "description": "description",
+                        "name": "name",
+                        "parameters": {},
+                        "type": "function",
+                    }
+                ],
+                "tracing": "auto",
+                "truncation": "auto",
+            },
+        )
+        assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.realtime.client_secrets.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        client_secret = response.parse()
+        assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.realtime.client_secrets.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            client_secret = await response.parse()
+            assert_matches_type(ClientSecretCreateResponse, client_secret, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/responses/__init__.py b/tests/api_resources/responses/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/responses/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/responses/test_input_items.py b/tests/api_resources/responses/test_input_items.py
new file mode 100644
index 0000000000..ed6fddf33a
--- /dev/null
+++ b/tests/api_resources/responses/test_input_items.py
@@ -0,0 +1,123 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.responses import ResponseItem
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestInputItems:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        input_item = client.responses.input_items.list(
+            response_id="response_id",
+        )
+        assert_matches_type(SyncCursorPage[ResponseItem], input_item, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        input_item = client.responses.input_items.list(
+            response_id="response_id",
+            after="after",
+            include=["file_search_call.results"],
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[ResponseItem], input_item, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.responses.input_items.with_raw_response.list(
+            response_id="response_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        input_item = response.parse()
+        assert_matches_type(SyncCursorPage[ResponseItem], input_item, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.responses.input_items.with_streaming_response.list(
+            response_id="response_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            input_item = response.parse()
+            assert_matches_type(SyncCursorPage[ResponseItem], input_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            client.responses.input_items.with_raw_response.list(
+                response_id="",
+            )
+
+
+class TestAsyncInputItems:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        input_item = await async_client.responses.input_items.list(
+            response_id="response_id",
+        )
+        assert_matches_type(AsyncCursorPage[ResponseItem], input_item, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        input_item = await async_client.responses.input_items.list(
+            response_id="response_id",
+            after="after",
+            include=["file_search_call.results"],
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[ResponseItem], input_item, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.input_items.with_raw_response.list(
+            response_id="response_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        input_item = response.parse()
+        assert_matches_type(AsyncCursorPage[ResponseItem], input_item, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.input_items.with_streaming_response.list(
+            response_id="response_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            input_item = await response.parse()
+            assert_matches_type(AsyncCursorPage[ResponseItem], input_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            await async_client.responses.input_items.with_raw_response.list(
+                response_id="",
+            )
diff --git a/tests/api_resources/responses/test_input_tokens.py b/tests/api_resources/responses/test_input_tokens.py
new file mode 100644
index 0000000000..d9aecc33bd
--- /dev/null
+++ b/tests/api_resources/responses/test_input_tokens.py
@@ -0,0 +1,138 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.responses import InputTokenCountResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestInputTokens:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_count(self, client: OpenAI) -> None:
+        input_token = client.responses.input_tokens.count()
+        assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+    @parametrize
+    def test_method_count_with_all_params(self, client: OpenAI) -> None:
+        input_token = client.responses.input_tokens.count(
+            conversation="string",
+            input="string",
+            instructions="instructions",
+            model="model",
+            parallel_tool_calls=True,
+            previous_response_id="resp_123",
+            reasoning={
+                "effort": "none",
+                "generate_summary": "auto",
+                "summary": "auto",
+            },
+            text={
+                "format": {"type": "text"},
+                "verbosity": "low",
+            },
+            tool_choice="none",
+            tools=[
+                {
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                    "strict": True,
+                    "type": "function",
+                    "description": "description",
+                }
+            ],
+            truncation="auto",
+        )
+        assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+    @parametrize
+    def test_raw_response_count(self, client: OpenAI) -> None:
+        response = client.responses.input_tokens.with_raw_response.count()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        input_token = response.parse()
+        assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+    @parametrize
+    def test_streaming_response_count(self, client: OpenAI) -> None:
+        with client.responses.input_tokens.with_streaming_response.count() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            input_token = response.parse()
+            assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncInputTokens:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_count(self, async_client: AsyncOpenAI) -> None:
+        input_token = await async_client.responses.input_tokens.count()
+        assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+    @parametrize
+    async def test_method_count_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        input_token = await async_client.responses.input_tokens.count(
+            conversation="string",
+            input="string",
+            instructions="instructions",
+            model="model",
+            parallel_tool_calls=True,
+            previous_response_id="resp_123",
+            reasoning={
+                "effort": "none",
+                "generate_summary": "auto",
+                "summary": "auto",
+            },
+            text={
+                "format": {"type": "text"},
+                "verbosity": "low",
+            },
+            tool_choice="none",
+            tools=[
+                {
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                    "strict": True,
+                    "type": "function",
+                    "description": "description",
+                }
+            ],
+            truncation="auto",
+        )
+        assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+    @parametrize
+    async def test_raw_response_count(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.input_tokens.with_raw_response.count()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        input_token = response.parse()
+        assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_count(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.input_tokens.with_streaming_response.count() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            input_token = await response.parse()
+            assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_batches.py b/tests/api_resources/test_batches.py
index 6f9b598e61..2167561348 100644
--- a/tests/api_resources/test_batches.py
+++ b/tests/api_resources/test_batches.py
@@ -22,8 +22,8 @@ class TestBatches:
     def test_method_create(self, client: OpenAI) -> None:
         batch = client.batches.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
         )
         assert_matches_type(Batch, batch, path=["response"])
 
@@ -31,9 +31,13 @@ def test_method_create(self, client: OpenAI) -> None:
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         batch = client.batches.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
             metadata={"foo": "string"},
+            output_expires_after={
+                "anchor": "created_at",
+                "seconds": 3600,
+            },
         )
         assert_matches_type(Batch, batch, path=["response"])
 
@@ -41,8 +45,8 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.batches.with_raw_response.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
         )
 
         assert response.is_closed is True
@@ -54,8 +58,8 @@ def test_raw_response_create(self, client: OpenAI) -> None:
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.batches.with_streaming_response.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -68,14 +72,14 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         batch = client.batches.retrieve(
-            "string",
+            "batch_id",
         )
         assert_matches_type(Batch, batch, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.batches.with_raw_response.retrieve(
-            "string",
+            "batch_id",
         )
 
         assert response.is_closed is True
@@ -86,7 +90,7 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
         with client.batches.with_streaming_response.retrieve(
-            "string",
+            "batch_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -111,7 +115,7 @@ def test_method_list(self, client: OpenAI) -> None:
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         batch = client.batches.list(
-            after="string",
+            after="after",
             limit=0,
         )
         assert_matches_type(SyncCursorPage[Batch], batch, path=["response"])
@@ -139,14 +143,14 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
     @parametrize
     def test_method_cancel(self, client: OpenAI) -> None:
         batch = client.batches.cancel(
-            "string",
+            "batch_id",
         )
         assert_matches_type(Batch, batch, path=["response"])
 
     @parametrize
     def test_raw_response_cancel(self, client: OpenAI) -> None:
         response = client.batches.with_raw_response.cancel(
-            "string",
+            "batch_id",
         )
 
         assert response.is_closed is True
@@ -157,7 +161,7 @@ def test_raw_response_cancel(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_cancel(self, client: OpenAI) -> None:
         with client.batches.with_streaming_response.cancel(
-            "string",
+            "batch_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -176,14 +180,16 @@ def test_path_params_cancel(self, client: OpenAI) -> None:
 
 
 class TestAsyncBatches:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         batch = await async_client.batches.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
         )
         assert_matches_type(Batch, batch, path=["response"])
 
@@ -191,9 +197,13 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         batch = await async_client.batches.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
             metadata={"foo": "string"},
+            output_expires_after={
+                "anchor": "created_at",
+                "seconds": 3600,
+            },
         )
         assert_matches_type(Batch, batch, path=["response"])
 
@@ -201,8 +211,8 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.batches.with_raw_response.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
         )
 
         assert response.is_closed is True
@@ -214,8 +224,8 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.batches.with_streaming_response.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -228,14 +238,14 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         batch = await async_client.batches.retrieve(
-            "string",
+            "batch_id",
         )
         assert_matches_type(Batch, batch, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.batches.with_raw_response.retrieve(
-            "string",
+            "batch_id",
         )
 
         assert response.is_closed is True
@@ -246,7 +256,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         async with async_client.batches.with_streaming_response.retrieve(
-            "string",
+            "batch_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -271,7 +281,7 @@ async def test_method_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
         batch = await async_client.batches.list(
-            after="string",
+            after="after",
             limit=0,
         )
         assert_matches_type(AsyncCursorPage[Batch], batch, path=["response"])
@@ -299,14 +309,14 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
         batch = await async_client.batches.cancel(
-            "string",
+            "batch_id",
         )
         assert_matches_type(Batch, batch, path=["response"])
 
     @parametrize
     async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.batches.with_raw_response.cancel(
-            "string",
+            "batch_id",
         )
 
         assert response.is_closed is True
@@ -317,7 +327,7 @@ async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
         async with async_client.batches.with_streaming_response.cancel(
-            "string",
+            "batch_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/test_completions.py b/tests/api_resources/test_completions.py
index 69d914200f..a8fb0e59eb 100644
--- a/tests/api_resources/test_completions.py
+++ b/tests/api_resources/test_completions.py
@@ -38,10 +38,13 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9223372036854776000,
+            seed=0,
             stop="\n",
             stream=False,
-            stream_options={"include_usage": True},
+            stream_options={
+                "include_obfuscation": True,
+                "include_usage": True,
+            },
             suffix="test.",
             temperature=1,
             top_p=1,
@@ -98,9 +101,12 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9223372036854776000,
+            seed=0,
             stop="\n",
-            stream_options={"include_usage": True},
+            stream_options={
+                "include_obfuscation": True,
+                "include_usage": True,
+            },
             suffix="test.",
             temperature=1,
             top_p=1,
@@ -137,7 +143,9 @@ def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
 
 
 class TestAsyncCompletions:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None:
@@ -160,10 +168,13 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9223372036854776000,
+            seed=0,
             stop="\n",
             stream=False,
-            stream_options={"include_usage": True},
+            stream_options={
+                "include_obfuscation": True,
+                "include_usage": True,
+            },
             suffix="test.",
             temperature=1,
             top_p=1,
@@ -220,9 +231,12 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9223372036854776000,
+            seed=0,
             stop="\n",
-            stream_options={"include_usage": True},
+            stream_options={
+                "include_obfuscation": True,
+                "include_usage": True,
+            },
             suffix="test.",
             temperature=1,
             top_p=1,
diff --git a/tests/api_resources/test_containers.py b/tests/api_resources/test_containers.py
new file mode 100644
index 0000000000..cf173c7fd5
--- /dev/null
+++ b/tests/api_resources/test_containers.py
@@ -0,0 +1,337 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import (
+    ContainerListResponse,
+    ContainerCreateResponse,
+    ContainerRetrieveResponse,
+)
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestContainers:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        container = client.containers.create(
+            name="name",
+        )
+        assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        container = client.containers.create(
+            name="name",
+            expires_after={
+                "anchor": "last_active_at",
+                "minutes": 0,
+            },
+            file_ids=["string"],
+            memory_limit="1g",
+        )
+        assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.containers.with_raw_response.create(
+            name="name",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.containers.with_streaming_response.create(
+            name="name",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = response.parse()
+            assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        container = client.containers.retrieve(
+            "container_id",
+        )
+        assert_matches_type(ContainerRetrieveResponse, container, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.containers.with_raw_response.retrieve(
+            "container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert_matches_type(ContainerRetrieveResponse, container, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.containers.with_streaming_response.retrieve(
+            "container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = response.parse()
+            assert_matches_type(ContainerRetrieveResponse, container, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            client.containers.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        container = client.containers.list()
+        assert_matches_type(SyncCursorPage[ContainerListResponse], container, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        container = client.containers.list(
+            after="after",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[ContainerListResponse], container, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.containers.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert_matches_type(SyncCursorPage[ContainerListResponse], container, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.containers.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = response.parse()
+            assert_matches_type(SyncCursorPage[ContainerListResponse], container, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        container = client.containers.delete(
+            "container_id",
+        )
+        assert container is None
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.containers.with_raw_response.delete(
+            "container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert container is None
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.containers.with_streaming_response.delete(
+            "container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = response.parse()
+            assert container is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            client.containers.with_raw_response.delete(
+                "",
+            )
+
+
+class TestAsyncContainers:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        container = await async_client.containers.create(
+            name="name",
+        )
+        assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        container = await async_client.containers.create(
+            name="name",
+            expires_after={
+                "anchor": "last_active_at",
+                "minutes": 0,
+            },
+            file_ids=["string"],
+            memory_limit="1g",
+        )
+        assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.containers.with_raw_response.create(
+            name="name",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.containers.with_streaming_response.create(
+            name="name",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = await response.parse()
+            assert_matches_type(ContainerCreateResponse, container, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        container = await async_client.containers.retrieve(
+            "container_id",
+        )
+        assert_matches_type(ContainerRetrieveResponse, container, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.containers.with_raw_response.retrieve(
+            "container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert_matches_type(ContainerRetrieveResponse, container, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.containers.with_streaming_response.retrieve(
+            "container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = await response.parse()
+            assert_matches_type(ContainerRetrieveResponse, container, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            await async_client.containers.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        container = await async_client.containers.list()
+        assert_matches_type(AsyncCursorPage[ContainerListResponse], container, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        container = await async_client.containers.list(
+            after="after",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[ContainerListResponse], container, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.containers.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert_matches_type(AsyncCursorPage[ContainerListResponse], container, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.containers.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = await response.parse()
+            assert_matches_type(AsyncCursorPage[ContainerListResponse], container, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        container = await async_client.containers.delete(
+            "container_id",
+        )
+        assert container is None
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.containers.with_raw_response.delete(
+            "container_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        container = response.parse()
+        assert container is None
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.containers.with_streaming_response.delete(
+            "container_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            container = await response.parse()
+            assert container is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `container_id` but received ''"):
+            await async_client.containers.with_raw_response.delete(
+                "",
+            )
diff --git a/tests/api_resources/test_conversations.py b/tests/api_resources/test_conversations.py
new file mode 100644
index 0000000000..d21e685a04
--- /dev/null
+++ b/tests/api_resources/test_conversations.py
@@ -0,0 +1,341 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.conversations import (
+    Conversation,
+    ConversationDeletedResource,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestConversations:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        conversation = client.conversations.create()
+        assert_matches_type(Conversation, conversation, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        conversation = client.conversations.create(
+            items=[
+                {
+                    "content": "string",
+                    "role": "user",
+                    "type": "message",
+                }
+            ],
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(Conversation, conversation, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.conversations.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        conversation = response.parse()
+        assert_matches_type(Conversation, conversation, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.conversations.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            conversation = response.parse()
+            assert_matches_type(Conversation, conversation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        conversation = client.conversations.retrieve(
+            "conv_123",
+        )
+        assert_matches_type(Conversation, conversation, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.conversations.with_raw_response.retrieve(
+            "conv_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        conversation = response.parse()
+        assert_matches_type(Conversation, conversation, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.conversations.with_streaming_response.retrieve(
+            "conv_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            conversation = response.parse()
+            assert_matches_type(Conversation, conversation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `conversation_id` but received ''"):
+            client.conversations.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_update(self, client: OpenAI) -> None:
+        conversation = client.conversations.update(
+            conversation_id="conv_123",
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(Conversation, conversation, path=["response"])
+
+    @parametrize
+    def test_raw_response_update(self, client: OpenAI) -> None:
+        response = client.conversations.with_raw_response.update(
+            conversation_id="conv_123",
+            metadata={"foo": "string"},
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        conversation = response.parse()
+        assert_matches_type(Conversation, conversation, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.conversations.with_streaming_response.update(
+            conversation_id="conv_123",
+            metadata={"foo": "string"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            conversation = response.parse()
+            assert_matches_type(Conversation, conversation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `conversation_id` but received ''"):
+            client.conversations.with_raw_response.update(
+                conversation_id="",
+                metadata={"foo": "string"},
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        conversation = client.conversations.delete(
+            "conv_123",
+        )
+        assert_matches_type(ConversationDeletedResource, conversation, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.conversations.with_raw_response.delete(
+            "conv_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        conversation = response.parse()
+        assert_matches_type(ConversationDeletedResource, conversation, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.conversations.with_streaming_response.delete(
+            "conv_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            conversation = response.parse()
+            assert_matches_type(ConversationDeletedResource, conversation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `conversation_id` but received ''"):
+            client.conversations.with_raw_response.delete(
+                "",
+            )
+
+
+class TestAsyncConversations:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        conversation = await async_client.conversations.create()
+        assert_matches_type(Conversation, conversation, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        conversation = await async_client.conversations.create(
+            items=[
+                {
+                    "content": "string",
+                    "role": "user",
+                    "type": "message",
+                }
+            ],
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(Conversation, conversation, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.conversations.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        conversation = response.parse()
+        assert_matches_type(Conversation, conversation, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.conversations.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            conversation = await response.parse()
+            assert_matches_type(Conversation, conversation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        conversation = await async_client.conversations.retrieve(
+            "conv_123",
+        )
+        assert_matches_type(Conversation, conversation, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.conversations.with_raw_response.retrieve(
+            "conv_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        conversation = response.parse()
+        assert_matches_type(Conversation, conversation, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.conversations.with_streaming_response.retrieve(
+            "conv_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            conversation = await response.parse()
+            assert_matches_type(Conversation, conversation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `conversation_id` but received ''"):
+            await async_client.conversations.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        conversation = await async_client.conversations.update(
+            conversation_id="conv_123",
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(Conversation, conversation, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.conversations.with_raw_response.update(
+            conversation_id="conv_123",
+            metadata={"foo": "string"},
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        conversation = response.parse()
+        assert_matches_type(Conversation, conversation, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.conversations.with_streaming_response.update(
+            conversation_id="conv_123",
+            metadata={"foo": "string"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            conversation = await response.parse()
+            assert_matches_type(Conversation, conversation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `conversation_id` but received ''"):
+            await async_client.conversations.with_raw_response.update(
+                conversation_id="",
+                metadata={"foo": "string"},
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        conversation = await async_client.conversations.delete(
+            "conv_123",
+        )
+        assert_matches_type(ConversationDeletedResource, conversation, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.conversations.with_raw_response.delete(
+            "conv_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        conversation = response.parse()
+        assert_matches_type(ConversationDeletedResource, conversation, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.conversations.with_streaming_response.delete(
+            "conv_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            conversation = await response.parse()
+            assert_matches_type(ConversationDeletedResource, conversation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `conversation_id` but received ''"):
+            await async_client.conversations.with_raw_response.delete(
+                "",
+            )
diff --git a/tests/api_resources/test_embeddings.py b/tests/api_resources/test_embeddings.py
index e75545b4e2..ce6e213d59 100644
--- a/tests/api_resources/test_embeddings.py
+++ b/tests/api_resources/test_embeddings.py
@@ -64,7 +64,9 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
 
 
 class TestAsyncEmbeddings:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
diff --git a/tests/api_resources/test_evals.py b/tests/api_resources/test_evals.py
new file mode 100644
index 0000000000..473a4711ca
--- /dev/null
+++ b/tests/api_resources/test_evals.py
@@ -0,0 +1,573 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import (
+    EvalListResponse,
+    EvalCreateResponse,
+    EvalDeleteResponse,
+    EvalUpdateResponse,
+    EvalRetrieveResponse,
+)
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestEvals:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        eval = client.evals.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        )
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        eval = client.evals.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+                "include_sample_schema": True,
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        eval = client.evals.retrieve(
+            "eval_id",
+        )
+        assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.retrieve(
+            "eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.retrieve(
+            "eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_update(self, client: OpenAI) -> None:
+        eval = client.evals.update(
+            eval_id="eval_id",
+        )
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_method_update_with_all_params(self, client: OpenAI) -> None:
+        eval = client.evals.update(
+            eval_id="eval_id",
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_update(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.update(
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.update(
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.with_raw_response.update(
+                eval_id="",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        eval = client.evals.list()
+        assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        eval = client.evals.list(
+            after="after",
+            limit=0,
+            order="asc",
+            order_by="created_at",
+        )
+        assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        eval = client.evals.delete(
+            "eval_id",
+        )
+        assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.delete(
+            "eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.delete(
+            "eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.with_raw_response.delete(
+                "",
+            )
+
+
+class TestAsyncEvals:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        )
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+                "include_sample_schema": True,
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.retrieve(
+            "eval_id",
+        )
+        assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.retrieve(
+            "eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.retrieve(
+            "eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.update(
+            eval_id="eval_id",
+        )
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.update(
+            eval_id="eval_id",
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.update(
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.update(
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.with_raw_response.update(
+                eval_id="",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.list()
+        assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.list(
+            after="after",
+            limit=0,
+            order="asc",
+            order_by="created_at",
+        )
+        assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.delete(
+            "eval_id",
+        )
+        assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.delete(
+            "eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.delete(
+            "eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.with_raw_response.delete(
+                "",
+            )
diff --git a/tests/api_resources/test_files.py b/tests/api_resources/test_files.py
index 882f0ddbe7..59b0403e73 100644
--- a/tests/api_resources/test_files.py
+++ b/tests/api_resources/test_files.py
@@ -13,7 +13,7 @@
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.types import FileObject, FileDeleted
-from openai.pagination import SyncPage, AsyncPage
+from openai.pagination import SyncCursorPage, AsyncCursorPage
 
 # pyright: reportDeprecated=false
 
@@ -31,6 +31,18 @@ def test_method_create(self, client: OpenAI) -> None:
         )
         assert_matches_type(FileObject, file, path=["response"])
 
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        file = client.files.create(
+            file=b"raw file contents",
+            purpose="assistants",
+            expires_after={
+                "anchor": "created_at",
+                "seconds": 3600,
+            },
+        )
+        assert_matches_type(FileObject, file, path=["response"])
+
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.files.with_raw_response.create(
@@ -60,14 +72,14 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         file = client.files.retrieve(
-            "string",
+            "file_id",
         )
         assert_matches_type(FileObject, file, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.files.with_raw_response.retrieve(
-            "string",
+            "file_id",
         )
 
         assert response.is_closed is True
@@ -78,7 +90,7 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
         with client.files.with_streaming_response.retrieve(
-            "string",
+            "file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -98,14 +110,17 @@ def test_path_params_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         file = client.files.list()
-        assert_matches_type(SyncPage[FileObject], file, path=["response"])
+        assert_matches_type(SyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         file = client.files.list(
-            purpose="string",
+            after="after",
+            limit=0,
+            order="asc",
+            purpose="purpose",
         )
-        assert_matches_type(SyncPage[FileObject], file, path=["response"])
+        assert_matches_type(SyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
@@ -114,7 +129,7 @@ def test_raw_response_list(self, client: OpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(SyncPage[FileObject], file, path=["response"])
+        assert_matches_type(SyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
@@ -123,21 +138,21 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = response.parse()
-            assert_matches_type(SyncPage[FileObject], file, path=["response"])
+            assert_matches_type(SyncCursorPage[FileObject], file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         file = client.files.delete(
-            "string",
+            "file_id",
         )
         assert_matches_type(FileDeleted, file, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
         response = client.files.with_raw_response.delete(
-            "string",
+            "file_id",
         )
 
         assert response.is_closed is True
@@ -148,7 +163,7 @@ def test_raw_response_delete(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_delete(self, client: OpenAI) -> None:
         with client.files.with_streaming_response.delete(
-            "string",
+            "file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -168,9 +183,9 @@ def test_path_params_delete(self, client: OpenAI) -> None:
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_method_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        respx_mock.get("/files/file_id/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         file = client.files.content(
-            "string",
+            "file_id",
         )
         assert isinstance(file, _legacy_response.HttpxBinaryResponseContent)
         assert file.json() == {"foo": "bar"}
@@ -178,10 +193,10 @@ def test_method_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_raw_response_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        respx_mock.get("/files/file_id/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
         response = client.files.with_raw_response.content(
-            "string",
+            "file_id",
         )
 
         assert response.is_closed is True
@@ -192,9 +207,9 @@ def test_raw_response_content(self, client: OpenAI, respx_mock: MockRouter) -> N
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_streaming_response_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        respx_mock.get("/files/file_id/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         with client.files.with_streaming_response.content(
-            "string",
+            "file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -216,7 +231,7 @@ def test_path_params_content(self, client: OpenAI) -> None:
     def test_method_retrieve_content(self, client: OpenAI) -> None:
         with pytest.warns(DeprecationWarning):
             file = client.files.retrieve_content(
-                "string",
+                "file_id",
             )
 
         assert_matches_type(str, file, path=["response"])
@@ -225,7 +240,7 @@ def test_method_retrieve_content(self, client: OpenAI) -> None:
     def test_raw_response_retrieve_content(self, client: OpenAI) -> None:
         with pytest.warns(DeprecationWarning):
             response = client.files.with_raw_response.retrieve_content(
-                "string",
+                "file_id",
             )
 
         assert response.is_closed is True
@@ -237,7 +252,7 @@ def test_raw_response_retrieve_content(self, client: OpenAI) -> None:
     def test_streaming_response_retrieve_content(self, client: OpenAI) -> None:
         with pytest.warns(DeprecationWarning):
             with client.files.with_streaming_response.retrieve_content(
-                "string",
+                "file_id",
             ) as response:
                 assert not response.is_closed
                 assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -257,7 +272,9 @@ def test_path_params_retrieve_content(self, client: OpenAI) -> None:
 
 
 class TestAsyncFiles:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
@@ -267,6 +284,18 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         )
         assert_matches_type(FileObject, file, path=["response"])
 
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.files.create(
+            file=b"raw file contents",
+            purpose="assistants",
+            expires_after={
+                "anchor": "created_at",
+                "seconds": 3600,
+            },
+        )
+        assert_matches_type(FileObject, file, path=["response"])
+
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.files.with_raw_response.create(
@@ -296,14 +325,14 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         file = await async_client.files.retrieve(
-            "string",
+            "file_id",
         )
         assert_matches_type(FileObject, file, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.files.with_raw_response.retrieve(
-            "string",
+            "file_id",
         )
 
         assert response.is_closed is True
@@ -314,7 +343,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         async with async_client.files.with_streaming_response.retrieve(
-            "string",
+            "file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -334,14 +363,17 @@ async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_list(self, async_client: AsyncOpenAI) -> None:
         file = await async_client.files.list()
-        assert_matches_type(AsyncPage[FileObject], file, path=["response"])
+        assert_matches_type(AsyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
         file = await async_client.files.list(
-            purpose="string",
+            after="after",
+            limit=0,
+            order="asc",
+            purpose="purpose",
         )
-        assert_matches_type(AsyncPage[FileObject], file, path=["response"])
+        assert_matches_type(AsyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
@@ -350,7 +382,7 @@ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(AsyncPage[FileObject], file, path=["response"])
+        assert_matches_type(AsyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
@@ -359,21 +391,21 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = await response.parse()
-            assert_matches_type(AsyncPage[FileObject], file, path=["response"])
+            assert_matches_type(AsyncCursorPage[FileObject], file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
         file = await async_client.files.delete(
-            "string",
+            "file_id",
         )
         assert_matches_type(FileDeleted, file, path=["response"])
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.files.with_raw_response.delete(
-            "string",
+            "file_id",
         )
 
         assert response.is_closed is True
@@ -384,7 +416,7 @@ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
         async with async_client.files.with_streaming_response.delete(
-            "string",
+            "file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -404,9 +436,9 @@ async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_method_content(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        respx_mock.get("/files/file_id/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         file = await async_client.files.content(
-            "string",
+            "file_id",
         )
         assert isinstance(file, _legacy_response.HttpxBinaryResponseContent)
         assert file.json() == {"foo": "bar"}
@@ -414,10 +446,10 @@ async def test_method_content(self, async_client: AsyncOpenAI, respx_mock: MockR
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_raw_response_content(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        respx_mock.get("/files/file_id/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
         response = await async_client.files.with_raw_response.content(
-            "string",
+            "file_id",
         )
 
         assert response.is_closed is True
@@ -428,9 +460,9 @@ async def test_raw_response_content(self, async_client: AsyncOpenAI, respx_mock:
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_streaming_response_content(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        respx_mock.get("/files/file_id/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         async with async_client.files.with_streaming_response.content(
-            "string",
+            "file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -452,7 +484,7 @@ async def test_path_params_content(self, async_client: AsyncOpenAI) -> None:
     async def test_method_retrieve_content(self, async_client: AsyncOpenAI) -> None:
         with pytest.warns(DeprecationWarning):
             file = await async_client.files.retrieve_content(
-                "string",
+                "file_id",
             )
 
         assert_matches_type(str, file, path=["response"])
@@ -461,7 +493,7 @@ async def test_method_retrieve_content(self, async_client: AsyncOpenAI) -> None:
     async def test_raw_response_retrieve_content(self, async_client: AsyncOpenAI) -> None:
         with pytest.warns(DeprecationWarning):
             response = await async_client.files.with_raw_response.retrieve_content(
-                "string",
+                "file_id",
             )
 
         assert response.is_closed is True
@@ -473,7 +505,7 @@ async def test_raw_response_retrieve_content(self, async_client: AsyncOpenAI) ->
     async def test_streaming_response_retrieve_content(self, async_client: AsyncOpenAI) -> None:
         with pytest.warns(DeprecationWarning):
             async with async_client.files.with_streaming_response.retrieve_content(
-                "string",
+                "file_id",
             ) as response:
                 assert not response.is_closed
                 assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/test_images.py b/tests/api_resources/test_images.py
index 2e31f3354a..99fe77d8e0 100644
--- a/tests/api_resources/test_images.py
+++ b/tests/api_resources/test_images.py
@@ -28,7 +28,7 @@ def test_method_create_variation(self, client: OpenAI) -> None:
     def test_method_create_variation_with_all_params(self, client: OpenAI) -> None:
         image = client.images.create_variation(
             image=b"raw file contents",
-            model="dall-e-2",
+            model="string",
             n=1,
             response_format="url",
             size="1024x1024",
@@ -61,7 +61,7 @@ def test_streaming_response_create_variation(self, client: OpenAI) -> None:
         assert cast(Any, response.is_closed) is True
 
     @parametrize
-    def test_method_edit(self, client: OpenAI) -> None:
+    def test_method_edit_overload_1(self, client: OpenAI) -> None:
         image = client.images.edit(
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
@@ -69,21 +69,28 @@ def test_method_edit(self, client: OpenAI) -> None:
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    def test_method_edit_with_all_params(self, client: OpenAI) -> None:
+    def test_method_edit_with_all_params_overload_1(self, client: OpenAI) -> None:
         image = client.images.edit(
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
+            background="transparent",
+            input_fidelity="high",
             mask=b"raw file contents",
-            model="dall-e-2",
+            model="string",
             n=1,
+            output_compression=100,
+            output_format="png",
+            partial_images=1,
+            quality="high",
             response_format="url",
             size="1024x1024",
+            stream=False,
             user="user-1234",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    def test_raw_response_edit(self, client: OpenAI) -> None:
+    def test_raw_response_edit_overload_1(self, client: OpenAI) -> None:
         response = client.images.with_raw_response.edit(
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
@@ -95,7 +102,7 @@ def test_raw_response_edit(self, client: OpenAI) -> None:
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    def test_streaming_response_edit(self, client: OpenAI) -> None:
+    def test_streaming_response_edit_overload_1(self, client: OpenAI) -> None:
         with client.images.with_streaming_response.edit(
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
@@ -109,28 +116,91 @@ def test_streaming_response_edit(self, client: OpenAI) -> None:
         assert cast(Any, response.is_closed) is True
 
     @parametrize
-    def test_method_generate(self, client: OpenAI) -> None:
+    def test_method_edit_overload_2(self, client: OpenAI) -> None:
+        image_stream = client.images.edit(
+            image=b"raw file contents",
+            prompt="A cute baby sea otter wearing a beret",
+            stream=True,
+        )
+        image_stream.response.close()
+
+    @parametrize
+    def test_method_edit_with_all_params_overload_2(self, client: OpenAI) -> None:
+        image_stream = client.images.edit(
+            image=b"raw file contents",
+            prompt="A cute baby sea otter wearing a beret",
+            stream=True,
+            background="transparent",
+            input_fidelity="high",
+            mask=b"raw file contents",
+            model="string",
+            n=1,
+            output_compression=100,
+            output_format="png",
+            partial_images=1,
+            quality="high",
+            response_format="url",
+            size="1024x1024",
+            user="user-1234",
+        )
+        image_stream.response.close()
+
+    @parametrize
+    def test_raw_response_edit_overload_2(self, client: OpenAI) -> None:
+        response = client.images.with_raw_response.edit(
+            image=b"raw file contents",
+            prompt="A cute baby sea otter wearing a beret",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_edit_overload_2(self, client: OpenAI) -> None:
+        with client.images.with_streaming_response.edit(
+            image=b"raw file contents",
+            prompt="A cute baby sea otter wearing a beret",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_generate_overload_1(self, client: OpenAI) -> None:
         image = client.images.generate(
             prompt="A cute baby sea otter",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    def test_method_generate_with_all_params(self, client: OpenAI) -> None:
+    def test_method_generate_with_all_params_overload_1(self, client: OpenAI) -> None:
         image = client.images.generate(
             prompt="A cute baby sea otter",
-            model="dall-e-3",
+            background="transparent",
+            model="string",
+            moderation="low",
             n=1,
-            quality="standard",
+            output_compression=100,
+            output_format="png",
+            partial_images=1,
+            quality="medium",
             response_format="url",
             size="1024x1024",
+            stream=False,
             style="vivid",
             user="user-1234",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    def test_raw_response_generate(self, client: OpenAI) -> None:
+    def test_raw_response_generate_overload_1(self, client: OpenAI) -> None:
         response = client.images.with_raw_response.generate(
             prompt="A cute baby sea otter",
         )
@@ -141,7 +211,7 @@ def test_raw_response_generate(self, client: OpenAI) -> None:
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    def test_streaming_response_generate(self, client: OpenAI) -> None:
+    def test_streaming_response_generate_overload_1(self, client: OpenAI) -> None:
         with client.images.with_streaming_response.generate(
             prompt="A cute baby sea otter",
         ) as response:
@@ -153,9 +223,64 @@ def test_streaming_response_generate(self, client: OpenAI) -> None:
 
         assert cast(Any, response.is_closed) is True
 
+    @parametrize
+    def test_method_generate_overload_2(self, client: OpenAI) -> None:
+        image_stream = client.images.generate(
+            prompt="A cute baby sea otter",
+            stream=True,
+        )
+        image_stream.response.close()
+
+    @parametrize
+    def test_method_generate_with_all_params_overload_2(self, client: OpenAI) -> None:
+        image_stream = client.images.generate(
+            prompt="A cute baby sea otter",
+            stream=True,
+            background="transparent",
+            model="string",
+            moderation="low",
+            n=1,
+            output_compression=100,
+            output_format="png",
+            partial_images=1,
+            quality="medium",
+            response_format="url",
+            size="1024x1024",
+            style="vivid",
+            user="user-1234",
+        )
+        image_stream.response.close()
+
+    @parametrize
+    def test_raw_response_generate_overload_2(self, client: OpenAI) -> None:
+        response = client.images.with_raw_response.generate(
+            prompt="A cute baby sea otter",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_generate_overload_2(self, client: OpenAI) -> None:
+        with client.images.with_streaming_response.generate(
+            prompt="A cute baby sea otter",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncImages:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create_variation(self, async_client: AsyncOpenAI) -> None:
@@ -168,7 +293,7 @@ async def test_method_create_variation(self, async_client: AsyncOpenAI) -> None:
     async def test_method_create_variation_with_all_params(self, async_client: AsyncOpenAI) -> None:
         image = await async_client.images.create_variation(
             image=b"raw file contents",
-            model="dall-e-2",
+            model="string",
             n=1,
             response_format="url",
             size="1024x1024",
@@ -201,7 +326,7 @@ async def test_streaming_response_create_variation(self, async_client: AsyncOpen
         assert cast(Any, response.is_closed) is True
 
     @parametrize
-    async def test_method_edit(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_edit_overload_1(self, async_client: AsyncOpenAI) -> None:
         image = await async_client.images.edit(
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
@@ -209,21 +334,28 @@ async def test_method_edit(self, async_client: AsyncOpenAI) -> None:
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    async def test_method_edit_with_all_params(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_edit_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
         image = await async_client.images.edit(
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
+            background="transparent",
+            input_fidelity="high",
             mask=b"raw file contents",
-            model="dall-e-2",
+            model="string",
             n=1,
+            output_compression=100,
+            output_format="png",
+            partial_images=1,
+            quality="high",
             response_format="url",
             size="1024x1024",
+            stream=False,
             user="user-1234",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    async def test_raw_response_edit(self, async_client: AsyncOpenAI) -> None:
+    async def test_raw_response_edit_overload_1(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.images.with_raw_response.edit(
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
@@ -235,7 +367,7 @@ async def test_raw_response_edit(self, async_client: AsyncOpenAI) -> None:
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    async def test_streaming_response_edit(self, async_client: AsyncOpenAI) -> None:
+    async def test_streaming_response_edit_overload_1(self, async_client: AsyncOpenAI) -> None:
         async with async_client.images.with_streaming_response.edit(
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
@@ -249,28 +381,91 @@ async def test_streaming_response_edit(self, async_client: AsyncOpenAI) -> None:
         assert cast(Any, response.is_closed) is True
 
     @parametrize
-    async def test_method_generate(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_edit_overload_2(self, async_client: AsyncOpenAI) -> None:
+        image_stream = await async_client.images.edit(
+            image=b"raw file contents",
+            prompt="A cute baby sea otter wearing a beret",
+            stream=True,
+        )
+        await image_stream.response.aclose()
+
+    @parametrize
+    async def test_method_edit_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        image_stream = await async_client.images.edit(
+            image=b"raw file contents",
+            prompt="A cute baby sea otter wearing a beret",
+            stream=True,
+            background="transparent",
+            input_fidelity="high",
+            mask=b"raw file contents",
+            model="string",
+            n=1,
+            output_compression=100,
+            output_format="png",
+            partial_images=1,
+            quality="high",
+            response_format="url",
+            size="1024x1024",
+            user="user-1234",
+        )
+        await image_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_edit_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.images.with_raw_response.edit(
+            image=b"raw file contents",
+            prompt="A cute baby sea otter wearing a beret",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_edit_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.images.with_streaming_response.edit(
+            image=b"raw file contents",
+            prompt="A cute baby sea otter wearing a beret",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_generate_overload_1(self, async_client: AsyncOpenAI) -> None:
         image = await async_client.images.generate(
             prompt="A cute baby sea otter",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    async def test_method_generate_with_all_params(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_generate_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
         image = await async_client.images.generate(
             prompt="A cute baby sea otter",
-            model="dall-e-3",
+            background="transparent",
+            model="string",
+            moderation="low",
             n=1,
-            quality="standard",
+            output_compression=100,
+            output_format="png",
+            partial_images=1,
+            quality="medium",
             response_format="url",
             size="1024x1024",
+            stream=False,
             style="vivid",
             user="user-1234",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    async def test_raw_response_generate(self, async_client: AsyncOpenAI) -> None:
+    async def test_raw_response_generate_overload_1(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.images.with_raw_response.generate(
             prompt="A cute baby sea otter",
         )
@@ -281,7 +476,7 @@ async def test_raw_response_generate(self, async_client: AsyncOpenAI) -> None:
         assert_matches_type(ImagesResponse, image, path=["response"])
 
     @parametrize
-    async def test_streaming_response_generate(self, async_client: AsyncOpenAI) -> None:
+    async def test_streaming_response_generate_overload_1(self, async_client: AsyncOpenAI) -> None:
         async with async_client.images.with_streaming_response.generate(
             prompt="A cute baby sea otter",
         ) as response:
@@ -292,3 +487,56 @@ async def test_streaming_response_generate(self, async_client: AsyncOpenAI) -> N
             assert_matches_type(ImagesResponse, image, path=["response"])
 
         assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_generate_overload_2(self, async_client: AsyncOpenAI) -> None:
+        image_stream = await async_client.images.generate(
+            prompt="A cute baby sea otter",
+            stream=True,
+        )
+        await image_stream.response.aclose()
+
+    @parametrize
+    async def test_method_generate_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        image_stream = await async_client.images.generate(
+            prompt="A cute baby sea otter",
+            stream=True,
+            background="transparent",
+            model="string",
+            moderation="low",
+            n=1,
+            output_compression=100,
+            output_format="png",
+            partial_images=1,
+            quality="medium",
+            response_format="url",
+            size="1024x1024",
+            style="vivid",
+            user="user-1234",
+        )
+        await image_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_generate_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.images.with_raw_response.generate(
+            prompt="A cute baby sea otter",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_generate_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.images.with_streaming_response.generate(
+            prompt="A cute baby sea otter",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index 71f8e5834b..cf70871ade 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -21,14 +21,14 @@ class TestModels:
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         model = client.models.retrieve(
-            "gpt-3.5-turbo",
+            "gpt-4o-mini",
         )
         assert_matches_type(Model, model, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.models.with_raw_response.retrieve(
-            "gpt-3.5-turbo",
+            "gpt-4o-mini",
         )
 
         assert response.is_closed is True
@@ -39,7 +39,7 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
         with client.models.with_streaming_response.retrieve(
-            "gpt-3.5-turbo",
+            "gpt-4o-mini",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -84,14 +84,14 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         model = client.models.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         )
         assert_matches_type(ModelDeleted, model, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
         response = client.models.with_raw_response.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         )
 
         assert response.is_closed is True
@@ -102,7 +102,7 @@ def test_raw_response_delete(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_delete(self, client: OpenAI) -> None:
         with client.models.with_streaming_response.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -121,19 +121,21 @@ def test_path_params_delete(self, client: OpenAI) -> None:
 
 
 class TestAsyncModels:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         model = await async_client.models.retrieve(
-            "gpt-3.5-turbo",
+            "gpt-4o-mini",
         )
         assert_matches_type(Model, model, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.models.with_raw_response.retrieve(
-            "gpt-3.5-turbo",
+            "gpt-4o-mini",
         )
 
         assert response.is_closed is True
@@ -144,7 +146,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         async with async_client.models.with_streaming_response.retrieve(
-            "gpt-3.5-turbo",
+            "gpt-4o-mini",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -189,14 +191,14 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
         model = await async_client.models.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         )
         assert_matches_type(ModelDeleted, model, path=["response"])
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.models.with_raw_response.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         )
 
         assert response.is_closed is True
@@ -207,7 +209,7 @@ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
         async with async_client.models.with_streaming_response.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/test_moderations.py b/tests/api_resources/test_moderations.py
index 94b9ecd31b..870c9e342f 100644
--- a/tests/api_resources/test_moderations.py
+++ b/tests/api_resources/test_moderations.py
@@ -28,7 +28,7 @@ def test_method_create(self, client: OpenAI) -> None:
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         moderation = client.moderations.create(
             input="I want to kill them.",
-            model="text-moderation-stable",
+            model="string",
         )
         assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
 
@@ -58,7 +58,9 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
 
 
 class TestAsyncModerations:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
@@ -71,7 +73,7 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         moderation = await async_client.moderations.create(
             input="I want to kill them.",
-            model="text-moderation-stable",
+            model="string",
         )
         assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
 
diff --git a/tests/api_resources/test_realtime.py b/tests/api_resources/test_realtime.py
new file mode 100644
index 0000000000..2b0c7f7d8d
--- /dev/null
+++ b/tests/api_resources/test_realtime.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+
+import pytest
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestRealtime:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+
+class TestAsyncRealtime:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
new file mode 100644
index 0000000000..bb47cac225
--- /dev/null
+++ b/tests/api_resources/test_responses.py
@@ -0,0 +1,785 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.responses import (
+    Response,
+    CompactedResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestResponses:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create_overload_1(self, client: OpenAI) -> None:
+        response = client.responses.create()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
+        response = client.responses.create(
+            background=True,
+            conversation="string",
+            include=["file_search_call.results"],
+            input="string",
+            instructions="instructions",
+            max_output_tokens=0,
+            max_tool_calls=0,
+            metadata={"foo": "string"},
+            model="gpt-5.1",
+            parallel_tool_calls=True,
+            previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {"foo": "string"},
+                "version": "version",
+            },
+            prompt_cache_key="prompt-cache-key-1234",
+            prompt_cache_retention="in-memory",
+            reasoning={
+                "effort": "none",
+                "generate_summary": "auto",
+                "summary": "auto",
+            },
+            safety_identifier="safety-identifier-1234",
+            service_tier="auto",
+            store=True,
+            stream=False,
+            stream_options={"include_obfuscation": True},
+            temperature=1,
+            text={
+                "format": {"type": "text"},
+                "verbosity": "low",
+            },
+            tool_choice="none",
+            tools=[
+                {
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                    "strict": True,
+                    "type": "function",
+                    "description": "description",
+                }
+            ],
+            top_logprobs=0,
+            top_p=1,
+            truncation="auto",
+            user="user-1234",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
+        http_response = client.responses.with_raw_response.create()
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.create() as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    def test_method_create_overload_2(self, client: OpenAI) -> None:
+        response_stream = client.responses.create(
+            stream=True,
+        )
+        response_stream.response.close()
+
+    @parametrize
+    def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
+        response_stream = client.responses.create(
+            stream=True,
+            background=True,
+            conversation="string",
+            include=["file_search_call.results"],
+            input="string",
+            instructions="instructions",
+            max_output_tokens=0,
+            max_tool_calls=0,
+            metadata={"foo": "string"},
+            model="gpt-5.1",
+            parallel_tool_calls=True,
+            previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {"foo": "string"},
+                "version": "version",
+            },
+            prompt_cache_key="prompt-cache-key-1234",
+            prompt_cache_retention="in-memory",
+            reasoning={
+                "effort": "none",
+                "generate_summary": "auto",
+                "summary": "auto",
+            },
+            safety_identifier="safety-identifier-1234",
+            service_tier="auto",
+            store=True,
+            stream_options={"include_obfuscation": True},
+            temperature=1,
+            text={
+                "format": {"type": "text"},
+                "verbosity": "low",
+            },
+            tool_choice="none",
+            tools=[
+                {
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                    "strict": True,
+                    "type": "function",
+                    "description": "description",
+                }
+            ],
+            top_logprobs=0,
+            top_p=1,
+            truncation="auto",
+            user="user-1234",
+        )
+        response_stream.response.close()
+
+    @parametrize
+    def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
+        response = client.responses.with_raw_response.create(
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.create(
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve_overload_1(self, client: OpenAI) -> None:
+        response = client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_method_retrieve_with_all_params_overload_1(self, client: OpenAI) -> None:
+        response = client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            include=["file_search_call.results"],
+            include_obfuscation=True,
+            starting_after=0,
+            stream=False,
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve_overload_1(self, client: OpenAI) -> None:
+        http_response = client.responses.with_raw_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve_overload_1(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve_overload_1(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            client.responses.with_raw_response.retrieve(
+                response_id="",
+            )
+
+    @parametrize
+    def test_method_retrieve_overload_2(self, client: OpenAI) -> None:
+        response_stream = client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+        )
+        response_stream.response.close()
+
+    @parametrize
+    def test_method_retrieve_with_all_params_overload_2(self, client: OpenAI) -> None:
+        response_stream = client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+            include=["file_search_call.results"],
+            include_obfuscation=True,
+            starting_after=0,
+        )
+        response_stream.response.close()
+
+    @parametrize
+    def test_raw_response_retrieve_overload_2(self, client: OpenAI) -> None:
+        response = client.responses.with_raw_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_retrieve_overload_2(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve_overload_2(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            client.responses.with_raw_response.retrieve(
+                response_id="",
+                stream=True,
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        response = client.responses.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert response is None
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        http_response = client.responses.with_raw_response.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert response is None
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert response is None
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            client.responses.with_raw_response.delete(
+                "",
+            )
+
+    @parametrize
+    def test_method_cancel(self, client: OpenAI) -> None:
+        response = client.responses.cancel(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: OpenAI) -> None:
+        http_response = client.responses.with_raw_response.cancel(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.cancel(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            client.responses.with_raw_response.cancel(
+                "",
+            )
+
+    @parametrize
+    def test_method_compact(self, client: OpenAI) -> None:
+        response = client.responses.compact(
+            model="gpt-5.2",
+        )
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    def test_method_compact_with_all_params(self, client: OpenAI) -> None:
+        response = client.responses.compact(
+            model="gpt-5.2",
+            input="string",
+            instructions="instructions",
+            previous_response_id="resp_123",
+        )
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    def test_raw_response_compact(self, client: OpenAI) -> None:
+        http_response = client.responses.with_raw_response.compact(
+            model="gpt-5.2",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    def test_streaming_response_compact(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.compact(
+            model="gpt-5.2",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert_matches_type(CompactedResponse, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+
+class TestAsyncResponses:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.create()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.create(
+            background=True,
+            conversation="string",
+            include=["file_search_call.results"],
+            input="string",
+            instructions="instructions",
+            max_output_tokens=0,
+            max_tool_calls=0,
+            metadata={"foo": "string"},
+            model="gpt-5.1",
+            parallel_tool_calls=True,
+            previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {"foo": "string"},
+                "version": "version",
+            },
+            prompt_cache_key="prompt-cache-key-1234",
+            prompt_cache_retention="in-memory",
+            reasoning={
+                "effort": "none",
+                "generate_summary": "auto",
+                "summary": "auto",
+            },
+            safety_identifier="safety-identifier-1234",
+            service_tier="auto",
+            store=True,
+            stream=False,
+            stream_options={"include_obfuscation": True},
+            temperature=1,
+            text={
+                "format": {"type": "text"},
+                "verbosity": "low",
+            },
+            tool_choice="none",
+            tools=[
+                {
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                    "strict": True,
+                    "type": "function",
+                    "description": "description",
+                }
+            ],
+            top_logprobs=0,
+            top_p=1,
+            truncation="auto",
+            user="user-1234",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        http_response = await async_client.responses.with_raw_response.create()
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.create() as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response_stream = await async_client.responses.create(
+            stream=True,
+        )
+        await response_stream.response.aclose()
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response_stream = await async_client.responses.create(
+            stream=True,
+            background=True,
+            conversation="string",
+            include=["file_search_call.results"],
+            input="string",
+            instructions="instructions",
+            max_output_tokens=0,
+            max_tool_calls=0,
+            metadata={"foo": "string"},
+            model="gpt-5.1",
+            parallel_tool_calls=True,
+            previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {"foo": "string"},
+                "version": "version",
+            },
+            prompt_cache_key="prompt-cache-key-1234",
+            prompt_cache_retention="in-memory",
+            reasoning={
+                "effort": "none",
+                "generate_summary": "auto",
+                "summary": "auto",
+            },
+            safety_identifier="safety-identifier-1234",
+            service_tier="auto",
+            store=True,
+            stream_options={"include_obfuscation": True},
+            temperature=1,
+            text={
+                "format": {"type": "text"},
+                "verbosity": "low",
+            },
+            tool_choice="none",
+            tools=[
+                {
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                    "strict": True,
+                    "type": "function",
+                    "description": "description",
+                }
+            ],
+            top_logprobs=0,
+            top_p=1,
+            truncation="auto",
+            user="user-1234",
+        )
+        await response_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.with_raw_response.create(
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.create(
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve_overload_1(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_method_retrieve_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            include=["file_search_call.results"],
+            include_obfuscation=True,
+            starting_after=0,
+            stream=False,
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve_overload_1(self, async_client: AsyncOpenAI) -> None:
+        http_response = await async_client.responses.with_raw_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve_overload_1(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            await async_client.responses.with_raw_response.retrieve(
+                response_id="",
+            )
+
+    @parametrize
+    async def test_method_retrieve_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response_stream = await async_client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+        )
+        await response_stream.response.aclose()
+
+    @parametrize
+    async def test_method_retrieve_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response_stream = await async_client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+            include=["file_search_call.results"],
+            include_obfuscation=True,
+            starting_after=0,
+        )
+        await response_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_retrieve_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.with_raw_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_retrieve_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            await async_client.responses.with_raw_response.retrieve(
+                response_id="",
+                stream=True,
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert response is None
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        http_response = await async_client.responses.with_raw_response.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert response is None
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert response is None
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            await async_client.responses.with_raw_response.delete(
+                "",
+            )
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.cancel(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        http_response = await async_client.responses.with_raw_response.cancel(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.cancel(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            await async_client.responses.with_raw_response.cancel(
+                "",
+            )
+
+    @parametrize
+    async def test_method_compact(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.compact(
+            model="gpt-5.2",
+        )
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    async def test_method_compact_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.compact(
+            model="gpt-5.2",
+            input="string",
+            instructions="instructions",
+            previous_response_id="resp_123",
+        )
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    async def test_raw_response_compact(self, async_client: AsyncOpenAI) -> None:
+        http_response = await async_client.responses.with_raw_response.compact(
+            model="gpt-5.2",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_compact(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.compact(
+            model="gpt-5.2",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert_matches_type(CompactedResponse, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
diff --git a/tests/api_resources/test_uploads.py b/tests/api_resources/test_uploads.py
new file mode 100644
index 0000000000..0e438a3c61
--- /dev/null
+++ b/tests/api_resources/test_uploads.py
@@ -0,0 +1,310 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import Upload
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestUploads:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        upload = client.uploads.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        upload = client.uploads.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+            expires_after={
+                "anchor": "created_at",
+                "seconds": 3600,
+            },
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.uploads.with_raw_response.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.uploads.with_streaming_response.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_cancel(self, client: OpenAI) -> None:
+        upload = client.uploads.cancel(
+            "upload_abc123",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: OpenAI) -> None:
+        response = client.uploads.with_raw_response.cancel(
+            "upload_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.uploads.with_streaming_response.cancel(
+            "upload_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            client.uploads.with_raw_response.cancel(
+                "",
+            )
+
+    @parametrize
+    def test_method_complete(self, client: OpenAI) -> None:
+        upload = client.uploads.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_method_complete_with_all_params(self, client: OpenAI) -> None:
+        upload = client.uploads.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+            md5="md5",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_raw_response_complete(self, client: OpenAI) -> None:
+        response = client.uploads.with_raw_response.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_streaming_response_complete(self, client: OpenAI) -> None:
+        with client.uploads.with_streaming_response.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_complete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            client.uploads.with_raw_response.complete(
+                upload_id="",
+                part_ids=["string"],
+            )
+
+
+class TestAsyncUploads:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        upload = await async_client.uploads.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        upload = await async_client.uploads.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+            expires_after={
+                "anchor": "created_at",
+                "seconds": 3600,
+            },
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.uploads.with_raw_response.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.uploads.with_streaming_response.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = await response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        upload = await async_client.uploads.cancel(
+            "upload_abc123",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.uploads.with_raw_response.cancel(
+            "upload_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.uploads.with_streaming_response.cancel(
+            "upload_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = await response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            await async_client.uploads.with_raw_response.cancel(
+                "",
+            )
+
+    @parametrize
+    async def test_method_complete(self, async_client: AsyncOpenAI) -> None:
+        upload = await async_client.uploads.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_method_complete_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        upload = await async_client.uploads.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+            md5="md5",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_raw_response_complete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.uploads.with_raw_response.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_complete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.uploads.with_streaming_response.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = await response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_complete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            await async_client.uploads.with_raw_response.complete(
+                upload_id="",
+                part_ids=["string"],
+            )
diff --git a/tests/api_resources/beta/test_vector_stores.py b/tests/api_resources/test_vector_stores.py
similarity index 58%
rename from tests/api_resources/beta/test_vector_stores.py
rename to tests/api_resources/test_vector_stores.py
index e671c96a45..cce9c52cea 100644
--- a/tests/api_resources/beta/test_vector_stores.py
+++ b/tests/api_resources/test_vector_stores.py
@@ -9,11 +9,12 @@
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta import (
+from openai.types import (
     VectorStore,
     VectorStoreDeleted,
+    VectorStoreSearchResponse,
 )
+from openai.pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -23,25 +24,27 @@ class TestVectorStores:
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.create()
+        vector_store = client.vector_stores.create()
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.create(
+        vector_store = client.vector_stores.create(
+            chunking_strategy={"type": "auto"},
+            description="description",
             expires_after={
                 "anchor": "last_active_at",
                 "days": 1,
             },
-            file_ids=["string", "string", "string"],
-            metadata={},
-            name="string",
+            file_ids=["string"],
+            metadata={"foo": "string"},
+            name="name",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.with_raw_response.create()
+        response = client.vector_stores.with_raw_response.create()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -50,7 +53,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.with_streaming_response.create() as response:
+        with client.vector_stores.with_streaming_response.create() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -61,15 +64,15 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
 
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.retrieve(
-            "string",
+        vector_store = client.vector_stores.retrieve(
+            "vector_store_id",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.with_raw_response.retrieve(
-            "string",
+        response = client.vector_stores.with_raw_response.retrieve(
+            "vector_store_id",
         )
 
         assert response.is_closed is True
@@ -79,8 +82,8 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.with_streaming_response.retrieve(
-            "string",
+        with client.vector_stores.with_streaming_response.retrieve(
+            "vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -93,34 +96,34 @@ def test_streaming_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_path_params_retrieve(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.with_raw_response.retrieve(
+            client.vector_stores.with_raw_response.retrieve(
                 "",
             )
 
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.update(
-            "string",
+        vector_store = client.vector_stores.update(
+            vector_store_id="vector_store_id",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.update(
-            "string",
+        vector_store = client.vector_stores.update(
+            vector_store_id="vector_store_id",
             expires_after={
                 "anchor": "last_active_at",
                 "days": 1,
             },
-            metadata={},
-            name="string",
+            metadata={"foo": "string"},
+            name="name",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.with_raw_response.update(
-            "string",
+        response = client.vector_stores.with_raw_response.update(
+            vector_store_id="vector_store_id",
         )
 
         assert response.is_closed is True
@@ -130,8 +133,8 @@ def test_raw_response_update(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_update(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.with_streaming_response.update(
-            "string",
+        with client.vector_stores.with_streaming_response.update(
+            vector_store_id="vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -144,20 +147,20 @@ def test_streaming_response_update(self, client: OpenAI) -> None:
     @parametrize
     def test_path_params_update(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.with_raw_response.update(
-                "",
+            client.vector_stores.with_raw_response.update(
+                vector_store_id="",
             )
 
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.list()
+        vector_store = client.vector_stores.list()
         assert_matches_type(SyncCursorPage[VectorStore], vector_store, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.list(
-            after="string",
-            before="string",
+        vector_store = client.vector_stores.list(
+            after="after",
+            before="before",
             limit=0,
             order="asc",
         )
@@ -165,7 +168,7 @@ def test_method_list_with_all_params(self, client: OpenAI) -> None:
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.with_raw_response.list()
+        response = client.vector_stores.with_raw_response.list()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -174,7 +177,7 @@ def test_raw_response_list(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.with_streaming_response.list() as response:
+        with client.vector_stores.with_streaming_response.list() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -185,15 +188,15 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
 
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.delete(
-            "string",
+        vector_store = client.vector_stores.delete(
+            "vector_store_id",
         )
         assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.with_raw_response.delete(
-            "string",
+        response = client.vector_stores.with_raw_response.delete(
+            "vector_store_id",
         )
 
         assert response.is_closed is True
@@ -203,8 +206,8 @@ def test_raw_response_delete(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_delete(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.with_streaming_response.delete(
-            "string",
+        with client.vector_stores.with_streaming_response.delete(
+            "vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -217,35 +220,100 @@ def test_streaming_response_delete(self, client: OpenAI) -> None:
     @parametrize
     def test_path_params_delete(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.with_raw_response.delete(
+            client.vector_stores.with_raw_response.delete(
                 "",
             )
 
+    @parametrize
+    def test_method_search(self, client: OpenAI) -> None:
+        vector_store = client.vector_stores.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        )
+        assert_matches_type(SyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    def test_method_search_with_all_params(self, client: OpenAI) -> None:
+        vector_store = client.vector_stores.search(
+            vector_store_id="vs_abc123",
+            query="string",
+            filters={
+                "key": "key",
+                "type": "eq",
+                "value": "string",
+            },
+            max_num_results=1,
+            ranking_options={
+                "ranker": "none",
+                "score_threshold": 0,
+            },
+            rewrite_query=True,
+        )
+        assert_matches_type(SyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    def test_raw_response_search(self, client: OpenAI) -> None:
+        response = client.vector_stores.with_raw_response.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(SyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    def test_streaming_response_search(self, client: OpenAI) -> None:
+        with client.vector_stores.with_streaming_response.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = response.parse()
+            assert_matches_type(SyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_search(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.with_raw_response.search(
+                vector_store_id="",
+                query="string",
+            )
+
 
 class TestAsyncVectorStores:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.create()
+        vector_store = await async_client.vector_stores.create()
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.create(
+        vector_store = await async_client.vector_stores.create(
+            chunking_strategy={"type": "auto"},
+            description="description",
             expires_after={
                 "anchor": "last_active_at",
                 "days": 1,
             },
-            file_ids=["string", "string", "string"],
-            metadata={},
-            name="string",
+            file_ids=["string"],
+            metadata={"foo": "string"},
+            name="name",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.with_raw_response.create()
+        response = await async_client.vector_stores.with_raw_response.create()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -254,7 +322,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.with_streaming_response.create() as response:
+        async with async_client.vector_stores.with_streaming_response.create() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -265,15 +333,15 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.retrieve(
-            "string",
+        vector_store = await async_client.vector_stores.retrieve(
+            "vector_store_id",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.with_raw_response.retrieve(
-            "string",
+        response = await async_client.vector_stores.with_raw_response.retrieve(
+            "vector_store_id",
         )
 
         assert response.is_closed is True
@@ -283,8 +351,8 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.with_streaming_response.retrieve(
-            "string",
+        async with async_client.vector_stores.with_streaming_response.retrieve(
+            "vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -297,34 +365,34 @@ async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> N
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.with_raw_response.retrieve(
+            await async_client.vector_stores.with_raw_response.retrieve(
                 "",
             )
 
     @parametrize
     async def test_method_update(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.update(
-            "string",
+        vector_store = await async_client.vector_stores.update(
+            vector_store_id="vector_store_id",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.update(
-            "string",
+        vector_store = await async_client.vector_stores.update(
+            vector_store_id="vector_store_id",
             expires_after={
                 "anchor": "last_active_at",
                 "days": 1,
             },
-            metadata={},
-            name="string",
+            metadata={"foo": "string"},
+            name="name",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.with_raw_response.update(
-            "string",
+        response = await async_client.vector_stores.with_raw_response.update(
+            vector_store_id="vector_store_id",
         )
 
         assert response.is_closed is True
@@ -334,8 +402,8 @@ async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.with_streaming_response.update(
-            "string",
+        async with async_client.vector_stores.with_streaming_response.update(
+            vector_store_id="vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -348,20 +416,20 @@ async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.with_raw_response.update(
-                "",
+            await async_client.vector_stores.with_raw_response.update(
+                vector_store_id="",
             )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.list()
+        vector_store = await async_client.vector_stores.list()
         assert_matches_type(AsyncCursorPage[VectorStore], vector_store, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.list(
-            after="string",
-            before="string",
+        vector_store = await async_client.vector_stores.list(
+            after="after",
+            before="before",
             limit=0,
             order="asc",
         )
@@ -369,7 +437,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> N
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.with_raw_response.list()
+        response = await async_client.vector_stores.with_raw_response.list()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -378,7 +446,7 @@ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.with_streaming_response.list() as response:
+        async with async_client.vector_stores.with_streaming_response.list() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -389,15 +457,15 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.delete(
-            "string",
+        vector_store = await async_client.vector_stores.delete(
+            "vector_store_id",
         )
         assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.with_raw_response.delete(
-            "string",
+        response = await async_client.vector_stores.with_raw_response.delete(
+            "vector_store_id",
         )
 
         assert response.is_closed is True
@@ -407,8 +475,8 @@ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.with_streaming_response.delete(
-            "string",
+        async with async_client.vector_stores.with_streaming_response.delete(
+            "vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -421,6 +489,67 @@ async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.with_raw_response.delete(
+            await async_client.vector_stores.with_raw_response.delete(
                 "",
             )
+
+    @parametrize
+    async def test_method_search(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.vector_stores.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        )
+        assert_matches_type(AsyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    async def test_method_search_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.vector_stores.search(
+            vector_store_id="vs_abc123",
+            query="string",
+            filters={
+                "key": "key",
+                "type": "eq",
+                "value": "string",
+            },
+            max_num_results=1,
+            ranking_options={
+                "ranker": "none",
+                "score_threshold": 0,
+            },
+            rewrite_query=True,
+        )
+        assert_matches_type(AsyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    async def test_raw_response_search(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.with_raw_response.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(AsyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_search(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.with_streaming_response.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = await response.parse()
+            assert_matches_type(AsyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_search(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.with_raw_response.search(
+                vector_store_id="",
+                query="string",
+            )
diff --git a/tests/api_resources/test_videos.py b/tests/api_resources/test_videos.py
new file mode 100644
index 0000000000..20d1baa7b5
--- /dev/null
+++ b/tests/api_resources/test_videos.py
@@ -0,0 +1,539 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import httpx
+import pytest
+from respx import MockRouter
+
+import openai._legacy_response as _legacy_response
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import (
+    Video,
+    VideoDeleteResponse,
+)
+from openai.pagination import SyncConversationCursorPage, AsyncConversationCursorPage
+
+# pyright: reportDeprecated=false
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestVideos:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        video = client.videos.create(
+            prompt="x",
+        )
+        assert_matches_type(Video, video, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        video = client.videos.create(
+            prompt="x",
+            input_reference=b"raw file contents",
+            model="sora-2",
+            seconds="4",
+            size="720x1280",
+        )
+        assert_matches_type(Video, video, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.videos.with_raw_response.create(
+            prompt="x",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        video = response.parse()
+        assert_matches_type(Video, video, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.videos.with_streaming_response.create(
+            prompt="x",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            video = response.parse()
+            assert_matches_type(Video, video, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        video = client.videos.retrieve(
+            "video_123",
+        )
+        assert_matches_type(Video, video, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.videos.with_raw_response.retrieve(
+            "video_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        video = response.parse()
+        assert_matches_type(Video, video, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.videos.with_streaming_response.retrieve(
+            "video_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            video = response.parse()
+            assert_matches_type(Video, video, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `video_id` but received ''"):
+            client.videos.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        video = client.videos.list()
+        assert_matches_type(SyncConversationCursorPage[Video], video, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        video = client.videos.list(
+            after="after",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncConversationCursorPage[Video], video, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.videos.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        video = response.parse()
+        assert_matches_type(SyncConversationCursorPage[Video], video, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.videos.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            video = response.parse()
+            assert_matches_type(SyncConversationCursorPage[Video], video, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        video = client.videos.delete(
+            "video_123",
+        )
+        assert_matches_type(VideoDeleteResponse, video, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.videos.with_raw_response.delete(
+            "video_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        video = response.parse()
+        assert_matches_type(VideoDeleteResponse, video, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.videos.with_streaming_response.delete(
+            "video_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            video = response.parse()
+            assert_matches_type(VideoDeleteResponse, video, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `video_id` but received ''"):
+            client.videos.with_raw_response.delete(
+                "",
+            )
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_method_download_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/videos/video_123/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        video = client.videos.download_content(
+            video_id="video_123",
+        )
+        assert isinstance(video, _legacy_response.HttpxBinaryResponseContent)
+        assert video.json() == {"foo": "bar"}
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_method_download_content_with_all_params(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/videos/video_123/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        video = client.videos.download_content(
+            video_id="video_123",
+            variant="video",
+        )
+        assert isinstance(video, _legacy_response.HttpxBinaryResponseContent)
+        assert video.json() == {"foo": "bar"}
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_raw_response_download_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/videos/video_123/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+
+        response = client.videos.with_raw_response.download_content(
+            video_id="video_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        video = response.parse()
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, video, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_streaming_response_download_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/videos/video_123/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        with client.videos.with_streaming_response.download_content(
+            video_id="video_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            video = response.parse()
+            assert_matches_type(bytes, video, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_path_params_download_content(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `video_id` but received ''"):
+            client.videos.with_raw_response.download_content(
+                video_id="",
+            )
+
+    @parametrize
+    def test_method_remix(self, client: OpenAI) -> None:
+        video = client.videos.remix(
+            video_id="video_123",
+            prompt="x",
+        )
+        assert_matches_type(Video, video, path=["response"])
+
+    @parametrize
+    def test_raw_response_remix(self, client: OpenAI) -> None:
+        response = client.videos.with_raw_response.remix(
+            video_id="video_123",
+            prompt="x",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        video = response.parse()
+        assert_matches_type(Video, video, path=["response"])
+
+    @parametrize
+    def test_streaming_response_remix(self, client: OpenAI) -> None:
+        with client.videos.with_streaming_response.remix(
+            video_id="video_123",
+            prompt="x",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            video = response.parse()
+            assert_matches_type(Video, video, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_remix(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `video_id` but received ''"):
+            client.videos.with_raw_response.remix(
+                video_id="",
+                prompt="x",
+            )
+
+
+class TestAsyncVideos:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        video = await async_client.videos.create(
+            prompt="x",
+        )
+        assert_matches_type(Video, video, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        video = await async_client.videos.create(
+            prompt="x",
+            input_reference=b"raw file contents",
+            model="sora-2",
+            seconds="4",
+            size="720x1280",
+        )
+        assert_matches_type(Video, video, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.videos.with_raw_response.create(
+            prompt="x",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        video = response.parse()
+        assert_matches_type(Video, video, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.videos.with_streaming_response.create(
+            prompt="x",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            video = await response.parse()
+            assert_matches_type(Video, video, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        video = await async_client.videos.retrieve(
+            "video_123",
+        )
+        assert_matches_type(Video, video, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.videos.with_raw_response.retrieve(
+            "video_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        video = response.parse()
+        assert_matches_type(Video, video, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.videos.with_streaming_response.retrieve(
+            "video_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            video = await response.parse()
+            assert_matches_type(Video, video, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `video_id` but received ''"):
+            await async_client.videos.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        video = await async_client.videos.list()
+        assert_matches_type(AsyncConversationCursorPage[Video], video, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        video = await async_client.videos.list(
+            after="after",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncConversationCursorPage[Video], video, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.videos.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        video = response.parse()
+        assert_matches_type(AsyncConversationCursorPage[Video], video, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.videos.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            video = await response.parse()
+            assert_matches_type(AsyncConversationCursorPage[Video], video, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        video = await async_client.videos.delete(
+            "video_123",
+        )
+        assert_matches_type(VideoDeleteResponse, video, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.videos.with_raw_response.delete(
+            "video_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        video = response.parse()
+        assert_matches_type(VideoDeleteResponse, video, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.videos.with_streaming_response.delete(
+            "video_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            video = await response.parse()
+            assert_matches_type(VideoDeleteResponse, video, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `video_id` but received ''"):
+            await async_client.videos.with_raw_response.delete(
+                "",
+            )
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_method_download_content(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/videos/video_123/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        video = await async_client.videos.download_content(
+            video_id="video_123",
+        )
+        assert isinstance(video, _legacy_response.HttpxBinaryResponseContent)
+        assert video.json() == {"foo": "bar"}
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_method_download_content_with_all_params(
+        self, async_client: AsyncOpenAI, respx_mock: MockRouter
+    ) -> None:
+        respx_mock.get("/videos/video_123/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        video = await async_client.videos.download_content(
+            video_id="video_123",
+            variant="video",
+        )
+        assert isinstance(video, _legacy_response.HttpxBinaryResponseContent)
+        assert video.json() == {"foo": "bar"}
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_raw_response_download_content(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/videos/video_123/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+
+        response = await async_client.videos.with_raw_response.download_content(
+            video_id="video_123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        video = response.parse()
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, video, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_streaming_response_download_content(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/videos/video_123/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        async with async_client.videos.with_streaming_response.download_content(
+            video_id="video_123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            video = await response.parse()
+            assert_matches_type(bytes, video, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_path_params_download_content(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `video_id` but received ''"):
+            await async_client.videos.with_raw_response.download_content(
+                video_id="",
+            )
+
+    @parametrize
+    async def test_method_remix(self, async_client: AsyncOpenAI) -> None:
+        video = await async_client.videos.remix(
+            video_id="video_123",
+            prompt="x",
+        )
+        assert_matches_type(Video, video, path=["response"])
+
+    @parametrize
+    async def test_raw_response_remix(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.videos.with_raw_response.remix(
+            video_id="video_123",
+            prompt="x",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        video = response.parse()
+        assert_matches_type(Video, video, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_remix(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.videos.with_streaming_response.remix(
+            video_id="video_123",
+            prompt="x",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            video = await response.parse()
+            assert_matches_type(Video, video, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_remix(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `video_id` but received ''"):
+            await async_client.videos.with_raw_response.remix(
+                video_id="",
+                prompt="x",
+            )
diff --git a/tests/api_resources/test_webhooks.py b/tests/api_resources/test_webhooks.py
new file mode 100644
index 0000000000..543805d045
--- /dev/null
+++ b/tests/api_resources/test_webhooks.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+
+import pytest
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestWebhooks:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+
+class TestAsyncWebhooks:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
diff --git a/tests/api_resources/uploads/__init__.py b/tests/api_resources/uploads/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/uploads/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/uploads/test_parts.py b/tests/api_resources/uploads/test_parts.py
new file mode 100644
index 0000000000..191d3a1b04
--- /dev/null
+++ b/tests/api_resources/uploads/test_parts.py
@@ -0,0 +1,108 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.uploads import UploadPart
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestParts:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        part = client.uploads.parts.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        )
+        assert_matches_type(UploadPart, part, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.uploads.parts.with_raw_response.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        part = response.parse()
+        assert_matches_type(UploadPart, part, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.uploads.parts.with_streaming_response.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            part = response.parse()
+            assert_matches_type(UploadPart, part, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            client.uploads.parts.with_raw_response.create(
+                upload_id="",
+                data=b"raw file contents",
+            )
+
+
+class TestAsyncParts:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        part = await async_client.uploads.parts.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        )
+        assert_matches_type(UploadPart, part, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.uploads.parts.with_raw_response.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        part = response.parse()
+        assert_matches_type(UploadPart, part, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.uploads.parts.with_streaming_response.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            part = await response.parse()
+            assert_matches_type(UploadPart, part, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            await async_client.uploads.parts.with_raw_response.create(
+                upload_id="",
+                data=b"raw file contents",
+            )
diff --git a/tests/api_resources/vector_stores/__init__.py b/tests/api_resources/vector_stores/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/vector_stores/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/vector_stores/test_file_batches.py b/tests/api_resources/vector_stores/test_file_batches.py
similarity index 63%
rename from tests/api_resources/beta/vector_stores/test_file_batches.py
rename to tests/api_resources/vector_stores/test_file_batches.py
index 9854d1a138..abbefc20e9 100644
--- a/tests/api_resources/beta/vector_stores/test_file_batches.py
+++ b/tests/api_resources/vector_stores/test_file_batches.py
@@ -10,7 +10,7 @@
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta.vector_stores import (
+from openai.types.vector_stores import (
     VectorStoreFile,
     VectorStoreFileBatch,
 )
@@ -23,17 +23,32 @@ class TestFileBatches:
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
-        file_batch = client.beta.vector_stores.file_batches.create(
-            "vs_abc123",
+        file_batch = client.vector_stores.file_batches.create(
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        file_batch = client.vector_stores.file_batches.create(
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+            chunking_strategy={"type": "auto"},
             file_ids=["string"],
+            files=[
+                {
+                    "file_id": "file_id",
+                    "attributes": {"foo": "string"},
+                    "chunking_strategy": {"type": "auto"},
+                }
+            ],
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.file_batches.with_raw_response.create(
-            "vs_abc123",
-            file_ids=["string"],
+        response = client.vector_stores.file_batches.with_raw_response.create(
+            vector_store_id="vs_abc123",
         )
 
         assert response.is_closed is True
@@ -43,9 +58,8 @@ def test_raw_response_create(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.file_batches.with_streaming_response.create(
-            "vs_abc123",
-            file_ids=["string"],
+        with client.vector_stores.file_batches.with_streaming_response.create(
+            vector_store_id="vs_abc123",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -58,23 +72,22 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_path_params_create(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.file_batches.with_raw_response.create(
-                "",
-                file_ids=["string"],
+            client.vector_stores.file_batches.with_raw_response.create(
+                vector_store_id="",
             )
 
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
-        file_batch = client.beta.vector_stores.file_batches.retrieve(
-            "vsfb_abc123",
+        file_batch = client.vector_stores.file_batches.retrieve(
+            batch_id="vsfb_abc123",
             vector_store_id="vs_abc123",
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.file_batches.with_raw_response.retrieve(
-            "vsfb_abc123",
+        response = client.vector_stores.file_batches.with_raw_response.retrieve(
+            batch_id="vsfb_abc123",
             vector_store_id="vs_abc123",
         )
 
@@ -85,8 +98,8 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.file_batches.with_streaming_response.retrieve(
-            "vsfb_abc123",
+        with client.vector_stores.file_batches.with_streaming_response.retrieve(
+            batch_id="vsfb_abc123",
             vector_store_id="vs_abc123",
         ) as response:
             assert not response.is_closed
@@ -100,30 +113,30 @@ def test_streaming_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_path_params_retrieve(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.file_batches.with_raw_response.retrieve(
-                "vsfb_abc123",
+            client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="vsfb_abc123",
                 vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
-            client.beta.vector_stores.file_batches.with_raw_response.retrieve(
-                "",
+            client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="",
                 vector_store_id="vs_abc123",
             )
 
     @parametrize
     def test_method_cancel(self, client: OpenAI) -> None:
-        file_batch = client.beta.vector_stores.file_batches.cancel(
-            "string",
-            vector_store_id="string",
+        file_batch = client.vector_stores.file_batches.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
     @parametrize
     def test_raw_response_cancel(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.file_batches.with_raw_response.cancel(
-            "string",
-            vector_store_id="string",
+        response = client.vector_stores.file_batches.with_raw_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
 
         assert response.is_closed is True
@@ -133,9 +146,9 @@ def test_raw_response_cancel(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_cancel(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.file_batches.with_streaming_response.cancel(
-            "string",
-            vector_store_id="string",
+        with client.vector_stores.file_batches.with_streaming_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -148,32 +161,32 @@ def test_streaming_response_cancel(self, client: OpenAI) -> None:
     @parametrize
     def test_path_params_cancel(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.file_batches.with_raw_response.cancel(
-                "string",
+            client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="batch_id",
                 vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
-            client.beta.vector_stores.file_batches.with_raw_response.cancel(
-                "",
-                vector_store_id="string",
+            client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="",
+                vector_store_id="vector_store_id",
             )
 
     @parametrize
     def test_method_list_files(self, client: OpenAI) -> None:
-        file_batch = client.beta.vector_stores.file_batches.list_files(
-            "string",
-            vector_store_id="string",
+        file_batch = client.vector_stores.file_batches.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
         assert_matches_type(SyncCursorPage[VectorStoreFile], file_batch, path=["response"])
 
     @parametrize
     def test_method_list_files_with_all_params(self, client: OpenAI) -> None:
-        file_batch = client.beta.vector_stores.file_batches.list_files(
-            "string",
-            vector_store_id="string",
-            after="string",
-            before="string",
+        file_batch = client.vector_stores.file_batches.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
             filter="in_progress",
             limit=0,
             order="asc",
@@ -182,9 +195,9 @@ def test_method_list_files_with_all_params(self, client: OpenAI) -> None:
 
     @parametrize
     def test_raw_response_list_files(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.file_batches.with_raw_response.list_files(
-            "string",
-            vector_store_id="string",
+        response = client.vector_stores.file_batches.with_raw_response.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
 
         assert response.is_closed is True
@@ -194,9 +207,9 @@ def test_raw_response_list_files(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_list_files(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.file_batches.with_streaming_response.list_files(
-            "string",
-            vector_store_id="string",
+        with client.vector_stores.file_batches.with_streaming_response.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -209,34 +222,51 @@ def test_streaming_response_list_files(self, client: OpenAI) -> None:
     @parametrize
     def test_path_params_list_files(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.file_batches.with_raw_response.list_files(
-                "string",
+            client.vector_stores.file_batches.with_raw_response.list_files(
+                batch_id="batch_id",
                 vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
-            client.beta.vector_stores.file_batches.with_raw_response.list_files(
-                "",
-                vector_store_id="string",
+            client.vector_stores.file_batches.with_raw_response.list_files(
+                batch_id="",
+                vector_store_id="vector_store_id",
             )
 
 
 class TestAsyncFileBatches:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
-        file_batch = await async_client.beta.vector_stores.file_batches.create(
-            "vs_abc123",
+        file_batch = await async_client.vector_stores.file_batches.create(
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file_batch = await async_client.vector_stores.file_batches.create(
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+            chunking_strategy={"type": "auto"},
             file_ids=["string"],
+            files=[
+                {
+                    "file_id": "file_id",
+                    "attributes": {"foo": "string"},
+                    "chunking_strategy": {"type": "auto"},
+                }
+            ],
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.file_batches.with_raw_response.create(
-            "vs_abc123",
-            file_ids=["string"],
+        response = await async_client.vector_stores.file_batches.with_raw_response.create(
+            vector_store_id="vs_abc123",
         )
 
         assert response.is_closed is True
@@ -246,9 +276,8 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.file_batches.with_streaming_response.create(
-            "vs_abc123",
-            file_ids=["string"],
+        async with async_client.vector_stores.file_batches.with_streaming_response.create(
+            vector_store_id="vs_abc123",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -261,23 +290,22 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.file_batches.with_raw_response.create(
-                "",
-                file_ids=["string"],
+            await async_client.vector_stores.file_batches.with_raw_response.create(
+                vector_store_id="",
             )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
-        file_batch = await async_client.beta.vector_stores.file_batches.retrieve(
-            "vsfb_abc123",
+        file_batch = await async_client.vector_stores.file_batches.retrieve(
+            batch_id="vsfb_abc123",
             vector_store_id="vs_abc123",
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.file_batches.with_raw_response.retrieve(
-            "vsfb_abc123",
+        response = await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+            batch_id="vsfb_abc123",
             vector_store_id="vs_abc123",
         )
 
@@ -288,8 +316,8 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.file_batches.with_streaming_response.retrieve(
-            "vsfb_abc123",
+        async with async_client.vector_stores.file_batches.with_streaming_response.retrieve(
+            batch_id="vsfb_abc123",
             vector_store_id="vs_abc123",
         ) as response:
             assert not response.is_closed
@@ -303,30 +331,30 @@ async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> N
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.file_batches.with_raw_response.retrieve(
-                "vsfb_abc123",
+            await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="vsfb_abc123",
                 vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
-            await async_client.beta.vector_stores.file_batches.with_raw_response.retrieve(
-                "",
+            await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="",
                 vector_store_id="vs_abc123",
             )
 
     @parametrize
     async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
-        file_batch = await async_client.beta.vector_stores.file_batches.cancel(
-            "string",
-            vector_store_id="string",
+        file_batch = await async_client.vector_stores.file_batches.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
     @parametrize
     async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.file_batches.with_raw_response.cancel(
-            "string",
-            vector_store_id="string",
+        response = await async_client.vector_stores.file_batches.with_raw_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
 
         assert response.is_closed is True
@@ -336,9 +364,9 @@ async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.file_batches.with_streaming_response.cancel(
-            "string",
-            vector_store_id="string",
+        async with async_client.vector_stores.file_batches.with_streaming_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -351,32 +379,32 @@ async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.file_batches.with_raw_response.cancel(
-                "string",
+            await async_client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="batch_id",
                 vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
-            await async_client.beta.vector_stores.file_batches.with_raw_response.cancel(
-                "",
-                vector_store_id="string",
+            await async_client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="",
+                vector_store_id="vector_store_id",
             )
 
     @parametrize
     async def test_method_list_files(self, async_client: AsyncOpenAI) -> None:
-        file_batch = await async_client.beta.vector_stores.file_batches.list_files(
-            "string",
-            vector_store_id="string",
+        file_batch = await async_client.vector_stores.file_batches.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
         assert_matches_type(AsyncCursorPage[VectorStoreFile], file_batch, path=["response"])
 
     @parametrize
     async def test_method_list_files_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        file_batch = await async_client.beta.vector_stores.file_batches.list_files(
-            "string",
-            vector_store_id="string",
-            after="string",
-            before="string",
+        file_batch = await async_client.vector_stores.file_batches.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
             filter="in_progress",
             limit=0,
             order="asc",
@@ -385,9 +413,9 @@ async def test_method_list_files_with_all_params(self, async_client: AsyncOpenAI
 
     @parametrize
     async def test_raw_response_list_files(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.file_batches.with_raw_response.list_files(
-            "string",
-            vector_store_id="string",
+        response = await async_client.vector_stores.file_batches.with_raw_response.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
 
         assert response.is_closed is True
@@ -397,9 +425,9 @@ async def test_raw_response_list_files(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_list_files(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.file_batches.with_streaming_response.list_files(
-            "string",
-            vector_store_id="string",
+        async with async_client.vector_stores.file_batches.with_streaming_response.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -412,13 +440,13 @@ async def test_streaming_response_list_files(self, async_client: AsyncOpenAI) ->
     @parametrize
     async def test_path_params_list_files(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.file_batches.with_raw_response.list_files(
-                "string",
+            await async_client.vector_stores.file_batches.with_raw_response.list_files(
+                batch_id="batch_id",
                 vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
-            await async_client.beta.vector_stores.file_batches.with_raw_response.list_files(
-                "",
-                vector_store_id="string",
+            await async_client.vector_stores.file_batches.with_raw_response.list_files(
+                batch_id="",
+                vector_store_id="vector_store_id",
             )
diff --git a/tests/api_resources/vector_stores/test_files.py b/tests/api_resources/vector_stores/test_files.py
new file mode 100644
index 0000000000..0778704d5d
--- /dev/null
+++ b/tests/api_resources/vector_stores/test_files.py
@@ -0,0 +1,627 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage
+from openai.types.vector_stores import (
+    VectorStoreFile,
+    FileContentResponse,
+    VectorStoreFileDeleted,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestFiles:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+            attributes={"foo": "string"},
+            chunking_strategy={"type": "auto"},
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.create(
+                vector_store_id="",
+                file_id="file_id",
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.retrieve(
+                file_id="file-abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.vector_stores.files.with_raw_response.retrieve(
+                file_id="",
+                vector_store_id="vs_abc123",
+            )
+
+    @parametrize
+    def test_method_update(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_update(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.update(
+                file_id="file-abc123",
+                vector_store_id="",
+                attributes={"foo": "string"},
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.vector_stores.files.with_raw_response.update(
+                file_id="",
+                vector_store_id="vs_abc123",
+                attributes={"foo": "string"},
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.list(
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.list(
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
+            filter="in_progress",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.list(
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.list(
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.list(
+                vector_store_id="",
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.delete(
+                file_id="file_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.vector_stores.files.with_raw_response.delete(
+                file_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    def test_method_content(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(SyncPage[FileContentResponse], file, path=["response"])
+
+    @parametrize
+    def test_raw_response_content(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(SyncPage[FileContentResponse], file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_content(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(SyncPage[FileContentResponse], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_content(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.content(
+                file_id="file-abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.vector_stores.files.with_raw_response.content(
+                file_id="",
+                vector_store_id="vs_abc123",
+            )
+
+
+class TestAsyncFiles:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+            attributes={"foo": "string"},
+            chunking_strategy={"type": "auto"},
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.create(
+                vector_store_id="",
+                file_id="file_id",
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.retrieve(
+                file_id="file-abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.retrieve(
+                file_id="",
+                vector_store_id="vs_abc123",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.update(
+                file_id="file-abc123",
+                vector_store_id="",
+                attributes={"foo": "string"},
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.update(
+                file_id="",
+                vector_store_id="vs_abc123",
+                attributes={"foo": "string"},
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.list(
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.list(
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
+            filter="in_progress",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.list(
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.list(
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.list(
+                vector_store_id="",
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.delete(
+                file_id="file_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.delete(
+                file_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    async def test_method_content(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(AsyncPage[FileContentResponse], file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_content(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(AsyncPage[FileContentResponse], file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_content(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(AsyncPage[FileContentResponse], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_content(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.content(
+                file_id="file-abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.content(
+                file_id="",
+                vector_store_id="vs_abc123",
+            )
diff --git a/tests/conftest.py b/tests/conftest.py
index 15af57e770..408bcf76c0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,27 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import os
-import asyncio
 import logging
 from typing import TYPE_CHECKING, Iterator, AsyncIterator
 
+import httpx
 import pytest
+from pytest_asyncio import is_async_test
 
-from openai import OpenAI, AsyncOpenAI
+from openai import OpenAI, AsyncOpenAI, DefaultAioHttpClient
+from openai._utils import is_dict
 
 if TYPE_CHECKING:
-    from _pytest.fixtures import FixtureRequest
+    from _pytest.fixtures import FixtureRequest  # pyright: ignore[reportPrivateImportUsage]
 
 pytest.register_assert_rewrite("tests.utils")
 
 logging.getLogger("openai").setLevel(logging.DEBUG)
 
 
-@pytest.fixture(scope="session")
-def event_loop() -> Iterator[asyncio.AbstractEventLoop]:
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
+# automatically add `pytest.mark.asyncio()` to all of our async tests
+# so we don't have to add that boilerplate everywhere
+def pytest_collection_modifyitems(items: list[pytest.Function]) -> None:
+    pytest_asyncio_tests = (item for item in items if is_async_test(item))
+    session_scope_marker = pytest.mark.asyncio(loop_scope="session")
+    for async_test in pytest_asyncio_tests:
+        async_test.add_marker(session_scope_marker, append=False)
+
+    # We skip tests that use both the aiohttp client and respx_mock as respx_mock
+    # doesn't support custom transports.
+    for item in items:
+        if "async_client" not in item.fixturenames or "respx_mock" not in item.fixturenames:
+            continue
+
+        if not hasattr(item, "callspec"):
+            continue
+
+        async_client_param = item.callspec.params.get("async_client")
+        if is_dict(async_client_param) and async_client_param.get("http_client") == "aiohttp":
+            item.add_marker(pytest.mark.skip(reason="aiohttp client is not compatible with respx_mock"))
 
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -41,9 +60,25 @@ def client(request: FixtureRequest) -> Iterator[OpenAI]:
 
 @pytest.fixture(scope="session")
 async def async_client(request: FixtureRequest) -> AsyncIterator[AsyncOpenAI]:
-    strict = getattr(request, "param", True)
-    if not isinstance(strict, bool):
-        raise TypeError(f"Unexpected fixture parameter type {type(strict)}, expected {bool}")
-
-    async with AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=strict) as client:
+    param = getattr(request, "param", True)
+
+    # defaults
+    strict = True
+    http_client: None | httpx.AsyncClient = None
+
+    if isinstance(param, bool):
+        strict = param
+    elif is_dict(param):
+        strict = param.get("strict", True)
+        assert isinstance(strict, bool)
+
+        http_client_type = param.get("http_client", "httpx")
+        if http_client_type == "aiohttp":
+            http_client = DefaultAioHttpClient()
+    else:
+        raise TypeError(f"Unexpected fixture parameter type {type(param)}, expected bool or dict")
+
+    async with AsyncOpenAI(
+        base_url=base_url, api_key=api_key, _strict_response_validation=strict, http_client=http_client
+    ) as client:
         yield client
diff --git a/tests/lib/test_azure.py b/tests/lib/test_azure.py
deleted file mode 100644
index 9360b2925a..0000000000
--- a/tests/lib/test_azure.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from typing import Union
-from typing_extensions import Literal
-
-import pytest
-
-from openai._models import FinalRequestOptions
-from openai.lib.azure import AzureOpenAI, AsyncAzureOpenAI
-
-Client = Union[AzureOpenAI, AsyncAzureOpenAI]
-
-
-sync_client = AzureOpenAI(
-    api_version="2023-07-01",
-    api_key="example API key",
-    azure_endpoint="https://example-resource.azure.openai.com",
-)
-
-async_client = AsyncAzureOpenAI(
-    api_version="2023-07-01",
-    api_key="example API key",
-    azure_endpoint="https://example-resource.azure.openai.com",
-)
-
-
-@pytest.mark.parametrize("client", [sync_client, async_client])
-def test_implicit_deployment_path(client: Client) -> None:
-    req = client._build_request(
-        FinalRequestOptions.construct(
-            method="post",
-            url="/chat/completions",
-            json_data={"model": "my-deployment-model"},
-        )
-    )
-    assert (
-        req.url
-        == "https://example-resource.azure.openai.com/openai/deployments/my-deployment-model/chat/completions?api-version=2023-07-01"
-    )
-
-
-@pytest.mark.parametrize(
-    "client,method",
-    [
-        (sync_client, "copy"),
-        (sync_client, "with_options"),
-        (async_client, "copy"),
-        (async_client, "with_options"),
-    ],
-)
-def test_client_copying(client: Client, method: Literal["copy", "with_options"]) -> None:
-    if method == "copy":
-        copied = client.copy()
-    else:
-        copied = client.with_options()
-
-    assert copied._custom_query == {"api-version": "2023-07-01"}
-
-
-@pytest.mark.parametrize(
-    "client",
-    [sync_client, async_client],
-)
-def test_client_copying_override_options(client: Client) -> None:
-    copied = client.copy(
-        api_version="2022-05-01",
-    )
-    assert copied._custom_query == {"api-version": "2022-05-01"}
diff --git a/tests/lib/test_old_api.py b/tests/lib/test_old_api.py
deleted file mode 100644
index 261b8acb94..0000000000
--- a/tests/lib/test_old_api.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import pytest
-
-import openai
-from openai.lib._old_api import APIRemovedInV1
-
-
-def test_basic_attribute_access_works() -> None:
-    for attr in dir(openai):
-        dir(getattr(openai, attr))
-
-
-def test_helpful_error_is_raised() -> None:
-    with pytest.raises(APIRemovedInV1):
-        openai.Completion.create()  # type: ignore
-
-    with pytest.raises(APIRemovedInV1):
-        openai.ChatCompletion.create()  # type: ignore
diff --git a/tests/test_client.py b/tests/test_client.py
index c1e545e66f..563b7d0441 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -4,12 +4,14 @@
 
 import gc
 import os
+import sys
 import json
 import asyncio
 import inspect
 import tracemalloc
 from typing import Any, Union, cast
 from unittest import mock
+from typing_extensions import Literal
 
 import httpx
 import pytest
@@ -17,11 +19,21 @@
 from pydantic import ValidationError
 
 from openai import OpenAI, AsyncOpenAI, APIResponseValidationError
+from openai._types import Omit
+from openai._utils import asyncify
 from openai._models import BaseModel, FinalRequestOptions
-from openai._constants import RAW_RESPONSE_HEADER
 from openai._streaming import Stream, AsyncStream
 from openai._exceptions import OpenAIError, APIStatusError, APITimeoutError, APIResponseValidationError
-from openai._base_client import DEFAULT_TIMEOUT, HTTPX_DEFAULT_TIMEOUT, BaseClient, make_request_options
+from openai._base_client import (
+    DEFAULT_TIMEOUT,
+    HTTPX_DEFAULT_TIMEOUT,
+    BaseClient,
+    OtherPlatform,
+    DefaultHttpxClient,
+    DefaultAsyncHttpxClient,
+    get_platform,
+    make_request_options,
+)
 
 from .utils import update_env
 
@@ -48,51 +60,49 @@ def _get_open_connections(client: OpenAI | AsyncOpenAI) -> int:
 
 
 class TestOpenAI:
-    client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-
     @pytest.mark.respx(base_url=base_url)
-    def test_raw_response(self, respx_mock: MockRouter) -> None:
+    def test_raw_response(self, respx_mock: MockRouter, client: OpenAI) -> None:
         respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = self.client.post("/foo", cast_to=httpx.Response)
+        response = client.post("/foo", cast_to=httpx.Response)
         assert response.status_code == 200
         assert isinstance(response, httpx.Response)
         assert response.json() == {"foo": "bar"}
 
     @pytest.mark.respx(base_url=base_url)
-    def test_raw_response_for_binary(self, respx_mock: MockRouter) -> None:
+    def test_raw_response_for_binary(self, respx_mock: MockRouter, client: OpenAI) -> None:
         respx_mock.post("/foo").mock(
             return_value=httpx.Response(200, headers={"Content-Type": "application/binary"}, content='{"foo": "bar"}')
         )
 
-        response = self.client.post("/foo", cast_to=httpx.Response)
+        response = client.post("/foo", cast_to=httpx.Response)
         assert response.status_code == 200
         assert isinstance(response, httpx.Response)
         assert response.json() == {"foo": "bar"}
 
-    def test_copy(self) -> None:
-        copied = self.client.copy()
-        assert id(copied) != id(self.client)
+    def test_copy(self, client: OpenAI) -> None:
+        copied = client.copy()
+        assert id(copied) != id(client)
 
-        copied = self.client.copy(api_key="another My API Key")
+        copied = client.copy(api_key="another My API Key")
         assert copied.api_key == "another My API Key"
-        assert self.client.api_key == "My API Key"
+        assert client.api_key == "My API Key"
 
-    def test_copy_default_options(self) -> None:
+    def test_copy_default_options(self, client: OpenAI) -> None:
         # options that have a default are overridden correctly
-        copied = self.client.copy(max_retries=7)
+        copied = client.copy(max_retries=7)
         assert copied.max_retries == 7
-        assert self.client.max_retries == 2
+        assert client.max_retries == 2
 
         copied2 = copied.copy(max_retries=6)
         assert copied2.max_retries == 6
         assert copied.max_retries == 7
 
         # timeout
-        assert isinstance(self.client.timeout, httpx.Timeout)
-        copied = self.client.copy(timeout=None)
+        assert isinstance(client.timeout, httpx.Timeout)
+        copied = client.copy(timeout=None)
         assert copied.timeout is None
-        assert isinstance(self.client.timeout, httpx.Timeout)
+        assert isinstance(client.timeout, httpx.Timeout)
 
     def test_copy_default_headers(self) -> None:
         client = OpenAI(
@@ -127,6 +137,7 @@ def test_copy_default_headers(self) -> None:
             match="`default_headers` and `set_default_headers` arguments are mutually exclusive",
         ):
             client.copy(set_default_headers={}, default_headers={"X-Foo": "Bar"})
+        client.close()
 
     def test_copy_default_query(self) -> None:
         client = OpenAI(
@@ -164,13 +175,15 @@ def test_copy_default_query(self) -> None:
         ):
             client.copy(set_default_query={}, default_query={"foo": "Bar"})
 
-    def test_copy_signature(self) -> None:
+        client.close()
+
+    def test_copy_signature(self, client: OpenAI) -> None:
         # ensure the same parameters that can be passed to the client are defined in the `.copy()` method
         init_signature = inspect.signature(
             # mypy doesn't like that we access the `__init__` property.
-            self.client.__init__,  # type: ignore[misc]
+            client.__init__,  # type: ignore[misc]
         )
-        copy_signature = inspect.signature(self.client.copy)
+        copy_signature = inspect.signature(client.copy)
         exclude_params = {"transport", "proxies", "_strict_response_validation"}
 
         for name in init_signature.parameters.keys():
@@ -180,12 +193,13 @@ def test_copy_signature(self) -> None:
             copy_param = copy_signature.parameters.get(name)
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
-    def test_copy_build_request(self) -> None:
+    @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
+    def test_copy_build_request(self, client: OpenAI) -> None:
         options = FinalRequestOptions(method="get", url="/foo")
 
         def build_request(options: FinalRequestOptions) -> None:
-            client = self.client.copy()
-            client._build_request(options)
+            client_copy = client.copy()
+            client_copy._build_request(options)
 
         # ensure that the machinery is warmed up before tracing starts.
         build_request(options)
@@ -242,14 +256,12 @@ def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.Statistic
                     print(frame)
             raise AssertionError()
 
-    def test_request_timeout(self) -> None:
-        request = self.client._build_request(FinalRequestOptions(method="get", url="/foo"))
+    def test_request_timeout(self, client: OpenAI) -> None:
+        request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == DEFAULT_TIMEOUT
 
-        request = self.client._build_request(
-            FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0))
-        )
+        request = client._build_request(FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0)))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == httpx.Timeout(100.0)
 
@@ -260,6 +272,8 @@ def test_client_timeout_option(self) -> None:
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == httpx.Timeout(0)
 
+        client.close()
+
     def test_http_client_timeout_option(self) -> None:
         # custom timeout given to the httpx client should be used
         with httpx.Client(timeout=None) as http_client:
@@ -271,6 +285,8 @@ def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == httpx.Timeout(None)
 
+            client.close()
+
         # no timeout given to the httpx client should not use the httpx default
         with httpx.Client() as http_client:
             client = OpenAI(
@@ -281,6 +297,8 @@ def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT
 
+            client.close()
+
         # explicitly passing the default timeout currently results in it being ignored
         with httpx.Client(timeout=HTTPX_DEFAULT_TIMEOUT) as http_client:
             client = OpenAI(
@@ -291,6 +309,8 @@ def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT  # our default
 
+            client.close()
+
     async def test_invalid_http_client(self) -> None:
         with pytest.raises(TypeError, match="Invalid `http_client` arg"):
             async with httpx.AsyncClient() as http_client:
@@ -302,14 +322,14 @@ async def test_invalid_http_client(self) -> None:
                 )
 
     def test_default_headers_option(self) -> None:
-        client = OpenAI(
+        test_client = OpenAI(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
         )
-        request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
+        request = test_client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "bar"
         assert request.headers.get("x-stainless-lang") == "python"
 
-        client2 = OpenAI(
+        test_client2 = OpenAI(
             base_url=base_url,
             api_key=api_key,
             _strict_response_validation=True,
@@ -318,17 +338,21 @@ def test_default_headers_option(self) -> None:
                 "X-Stainless-Lang": "my-overriding-header",
             },
         )
-        request = client2._build_request(FinalRequestOptions(method="get", url="/foo"))
+        request = test_client2._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "stainless"
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
+        test_client.close()
+        test_client2.close()
+
     def test_validate_headers(self) -> None:
         client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("Authorization") == f"Bearer {api_key}"
 
         with pytest.raises(OpenAIError):
-            client2 = OpenAI(base_url=base_url, api_key=None, _strict_response_validation=True)
+            with update_env(**{"OPENAI_API_KEY": Omit()}):
+                client2 = OpenAI(base_url=base_url, api_key=None, _strict_response_validation=True)
             _ = client2
 
     def test_default_query_option(self) -> None:
@@ -343,14 +367,16 @@ def test_default_query_option(self) -> None:
             FinalRequestOptions(
                 method="get",
                 url="/foo",
-                params={"foo": "baz", "query_param": "overriden"},
+                params={"foo": "baz", "query_param": "overridden"},
             )
         )
         url = httpx.URL(request.url)
-        assert dict(url.params) == {"foo": "baz", "query_param": "overriden"}
+        assert dict(url.params) == {"foo": "baz", "query_param": "overridden"}
 
-    def test_request_extra_json(self) -> None:
-        request = self.client._build_request(
+        client.close()
+
+    def test_request_extra_json(self, client: OpenAI) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -361,7 +387,7 @@ def test_request_extra_json(self) -> None:
         data = json.loads(request.content.decode("utf-8"))
         assert data == {"foo": "bar", "baz": False}
 
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -372,7 +398,7 @@ def test_request_extra_json(self) -> None:
         assert data == {"baz": False}
 
         # `extra_json` takes priority over `json_data` when keys clash
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -383,8 +409,8 @@ def test_request_extra_json(self) -> None:
         data = json.loads(request.content.decode("utf-8"))
         assert data == {"foo": "bar", "baz": None}
 
-    def test_request_extra_headers(self) -> None:
-        request = self.client._build_request(
+    def test_request_extra_headers(self, client: OpenAI) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -394,7 +420,7 @@ def test_request_extra_headers(self) -> None:
         assert request.headers.get("X-Foo") == "Foo"
 
         # `extra_headers` takes priority over `default_headers` when keys clash
-        request = self.client.with_options(default_headers={"X-Bar": "true"})._build_request(
+        request = client.with_options(default_headers={"X-Bar": "true"})._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -405,8 +431,8 @@ def test_request_extra_headers(self) -> None:
         )
         assert request.headers.get("X-Bar") == "false"
 
-    def test_request_extra_query(self) -> None:
-        request = self.client._build_request(
+    def test_request_extra_query(self, client: OpenAI) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -419,7 +445,7 @@ def test_request_extra_query(self) -> None:
         assert params == {"my_query_param": "Foo"}
 
         # if both `query` and `extra_query` are given, they are merged
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -433,7 +459,7 @@ def test_request_extra_query(self) -> None:
         assert params == {"bar": "1", "foo": "2"}
 
         # `extra_query` takes priority over `query` when keys clash
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -449,7 +475,7 @@ def test_request_extra_query(self) -> None:
     def test_multipart_repeating_array(self, client: OpenAI) -> None:
         request = client._build_request(
             FinalRequestOptions.construct(
-                method="get",
+                method="post",
                 url="/foo",
                 headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"},
                 json_data={"array": ["foo", "bar"]},
@@ -476,7 +502,7 @@ def test_multipart_repeating_array(self, client: OpenAI) -> None:
         ]
 
     @pytest.mark.respx(base_url=base_url)
-    def test_basic_union_response(self, respx_mock: MockRouter) -> None:
+    def test_basic_union_response(self, respx_mock: MockRouter, client: OpenAI) -> None:
         class Model1(BaseModel):
             name: str
 
@@ -485,12 +511,12 @@ class Model2(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
     @pytest.mark.respx(base_url=base_url)
-    def test_union_response_different_types(self, respx_mock: MockRouter) -> None:
+    def test_union_response_different_types(self, respx_mock: MockRouter, client: OpenAI) -> None:
         """Union of objects with the same field name using a different type"""
 
         class Model1(BaseModel):
@@ -501,18 +527,18 @@ class Model2(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": 1}))
 
-        response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model1)
         assert response.foo == 1
 
     @pytest.mark.respx(base_url=base_url)
-    def test_non_application_json_content_type_for_json_data(self, respx_mock: MockRouter) -> None:
+    def test_non_application_json_content_type_for_json_data(self, respx_mock: MockRouter, client: OpenAI) -> None:
         """
         Response that sets Content-Type to something other than application/json but returns json data
         """
@@ -528,7 +554,7 @@ class Model(BaseModel):
             )
         )
 
-        response = self.client.get("/foo", cast_to=Model)
+        response = client.get("/foo", cast_to=Model)
         assert isinstance(response, Model)
         assert response.foo == 2
 
@@ -540,6 +566,8 @@ def test_base_url_setter(self) -> None:
 
         assert client.base_url == "https://example.com/from_setter/"
 
+        client.close()
+
     def test_base_url_env(self) -> None:
         with update_env(OPENAI_BASE_URL="http://localhost:5000/from/env"):
             client = OpenAI(api_key=api_key, _strict_response_validation=True)
@@ -567,6 +595,7 @@ def test_base_url_trailing_slash(self, client: OpenAI) -> None:
             ),
         )
         assert request.url == "http://localhost:5000/custom/path/foo"
+        client.close()
 
     @pytest.mark.parametrize(
         "client",
@@ -590,6 +619,7 @@ def test_base_url_no_trailing_slash(self, client: OpenAI) -> None:
             ),
         )
         assert request.url == "http://localhost:5000/custom/path/foo"
+        client.close()
 
     @pytest.mark.parametrize(
         "client",
@@ -613,35 +643,36 @@ def test_absolute_request_url(self, client: OpenAI) -> None:
             ),
         )
         assert request.url == "https://myapi.com/foo"
+        client.close()
 
     def test_copied_client_does_not_close_http(self) -> None:
-        client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-        assert not client.is_closed()
+        test_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
+        assert not test_client.is_closed()
 
-        copied = client.copy()
-        assert copied is not client
+        copied = test_client.copy()
+        assert copied is not test_client
 
         del copied
 
-        assert not client.is_closed()
+        assert not test_client.is_closed()
 
     def test_client_context_manager(self) -> None:
-        client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-        with client as c2:
-            assert c2 is client
+        test_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
+        with test_client as c2:
+            assert c2 is test_client
             assert not c2.is_closed()
-            assert not client.is_closed()
-        assert client.is_closed()
+            assert not test_client.is_closed()
+        assert test_client.is_closed()
 
     @pytest.mark.respx(base_url=base_url)
-    def test_client_response_validation_error(self, respx_mock: MockRouter) -> None:
+    def test_client_response_validation_error(self, respx_mock: MockRouter, client: OpenAI) -> None:
         class Model(BaseModel):
             foo: str
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": {"invalid": True}}))
 
         with pytest.raises(APIResponseValidationError) as exc:
-            self.client.get("/foo", cast_to=Model)
+            client.get("/foo", cast_to=Model)
 
         assert isinstance(exc.value.__cause__, ValidationError)
 
@@ -650,13 +681,13 @@ def test_client_max_retries_validation(self) -> None:
             OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True, max_retries=cast(Any, None))
 
     @pytest.mark.respx(base_url=base_url)
-    def test_default_stream_cls(self, respx_mock: MockRouter) -> None:
+    def test_default_stream_cls(self, respx_mock: MockRouter, client: OpenAI) -> None:
         class Model(BaseModel):
             name: str
 
         respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        stream = self.client.post("/foo", cast_to=Model, stream=True, stream_cls=Stream[Model])
+        stream = client.post("/foo", cast_to=Model, stream=True, stream_cls=Stream[Model])
         assert isinstance(stream, Stream)
         stream.response.close()
 
@@ -672,11 +703,14 @@ class Model(BaseModel):
         with pytest.raises(APIResponseValidationError):
             strict_client.get("/foo", cast_to=Model)
 
-        client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
+        non_strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
 
-        response = client.get("/foo", cast_to=Model)
+        response = non_strict_client.get("/foo", cast_to=Model)
         assert isinstance(response, str)  # type: ignore[unreachable]
 
+        strict_client.close()
+        non_strict_client.close()
+
     @pytest.mark.parametrize(
         "remaining_retries,retry_after,timeout",
         [
@@ -695,12 +729,13 @@ class Model(BaseModel):
             [3, "", 0.5],
             [2, "", 0.5 * 2.0],
             [1, "", 0.5 * 4.0],
+            [-1100, "", 8],  # test large number potentially overflowing
         ],
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
-    def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str, timeout: float) -> None:
-        client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-
+    def test_parse_retry_after_header(
+        self, remaining_retries: int, retry_after: str, timeout: float, client: OpenAI
+    ) -> None:
         headers = httpx.Headers({"retry-after": retry_after})
         options = FinalRequestOptions(method="get", url="/foo", max_retries=3)
         calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
@@ -708,107 +743,267 @@ def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str
 
     @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
+    def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, client: OpenAI) -> None:
         respx_mock.post("/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
 
         with pytest.raises(APITimeoutError):
-            self.client.post(
-                "/chat/completions",
-                body=cast(
-                    object,
-                    dict(
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": "Say this is a test",
-                            }
-                        ],
-                        model="gpt-3.5-turbo",
-                    ),
-                ),
-                cast_to=httpx.Response,
-                options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
-            )
-
-        assert _get_open_connections(self.client) == 0
+            client.chat.completions.with_streaming_response.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "developer",
+                    }
+                ],
+                model="gpt-4o",
+            ).__enter__()
+
+        assert _get_open_connections(client) == 0
 
     @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
+    def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, client: OpenAI) -> None:
         respx_mock.post("/chat/completions").mock(return_value=httpx.Response(500))
 
         with pytest.raises(APIStatusError):
-            self.client.post(
-                "/chat/completions",
-                body=cast(
-                    object,
-                    dict(
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": "Say this is a test",
-                            }
-                        ],
-                        model="gpt-3.5-turbo",
-                    ),
-                ),
-                cast_to=httpx.Response,
-                options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
-            )
+            client.chat.completions.with_streaming_response.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "developer",
+                    }
+                ],
+                model="gpt-4o",
+            ).__enter__()
+        assert _get_open_connections(client) == 0
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.parametrize("failure_mode", ["status", "exception"])
+    def test_retries_taken(
+        self,
+        client: OpenAI,
+        failures_before_success: int,
+        failure_mode: Literal["status", "exception"],
+        respx_mock: MockRouter,
+    ) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                if failure_mode == "exception":
+                    raise RuntimeError("oops")
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+        )
+
+        assert response.retries_taken == failures_before_success
+        assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
 
-        assert _get_open_connections(self.client) == 0
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_omit_retry_count_header(
+        self, client: OpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+            extra_headers={"x-stainless-retry-count": Omit()},
+        )
 
+        assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
 
-class TestAsyncOpenAI:
-    client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_overwrite_retry_count_header(
+        self, client: OpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+            extra_headers={"x-stainless-retry-count": "42"},
+        )
+
+        assert response.http_request.headers.get("x-stainless-retry-count") == "42"
 
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_retries_taken_new_response_class(
+        self, client: OpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+        ) as response:
+            assert response.retries_taken == failures_before_success
+            assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
+    def test_proxy_environment_variables(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        # Test that the proxy environment variables are set correctly
+        monkeypatch.setenv("HTTPS_PROXY", "https://example.org")
+
+        client = DefaultHttpxClient()
+
+        mounts = tuple(client._mounts.items())
+        assert len(mounts) == 1
+        assert mounts[0][0].pattern == "https://"
+
+    @pytest.mark.filterwarnings("ignore:.*deprecated.*:DeprecationWarning")
+    def test_default_client_creation(self) -> None:
+        # Ensure that the client can be initialized without any exceptions
+        DefaultHttpxClient(
+            verify=True,
+            cert=None,
+            trust_env=True,
+            http1=True,
+            http2=False,
+            limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
+        )
+
+    @pytest.mark.respx(base_url=base_url)
+    def test_follow_redirects(self, respx_mock: MockRouter, client: OpenAI) -> None:
+        # Test that the default follow_redirects=True allows following redirects
+        respx_mock.post("/redirect").mock(
+            return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
+        )
+        respx_mock.get("/redirected").mock(return_value=httpx.Response(200, json={"status": "ok"}))
+
+        response = client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
+        assert response.status_code == 200
+        assert response.json() == {"status": "ok"}
+
+    @pytest.mark.respx(base_url=base_url)
+    def test_follow_redirects_disabled(self, respx_mock: MockRouter, client: OpenAI) -> None:
+        # Test that follow_redirects=False prevents following redirects
+        respx_mock.post("/redirect").mock(
+            return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
+        )
+
+        with pytest.raises(APIStatusError) as exc_info:
+            client.post("/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response)
+
+        assert exc_info.value.response.status_code == 302
+        assert exc_info.value.response.headers["Location"] == f"{base_url}/redirected"
+
+
+class TestAsyncOpenAI:
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
-    async def test_raw_response(self, respx_mock: MockRouter) -> None:
+    async def test_raw_response(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = await self.client.post("/foo", cast_to=httpx.Response)
+        response = await async_client.post("/foo", cast_to=httpx.Response)
         assert response.status_code == 200
         assert isinstance(response, httpx.Response)
         assert response.json() == {"foo": "bar"}
 
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
-    async def test_raw_response_for_binary(self, respx_mock: MockRouter) -> None:
+    async def test_raw_response_for_binary(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         respx_mock.post("/foo").mock(
             return_value=httpx.Response(200, headers={"Content-Type": "application/binary"}, content='{"foo": "bar"}')
         )
 
-        response = await self.client.post("/foo", cast_to=httpx.Response)
+        response = await async_client.post("/foo", cast_to=httpx.Response)
         assert response.status_code == 200
         assert isinstance(response, httpx.Response)
         assert response.json() == {"foo": "bar"}
 
-    def test_copy(self) -> None:
-        copied = self.client.copy()
-        assert id(copied) != id(self.client)
+    def test_copy(self, async_client: AsyncOpenAI) -> None:
+        copied = async_client.copy()
+        assert id(copied) != id(async_client)
 
-        copied = self.client.copy(api_key="another My API Key")
+        copied = async_client.copy(api_key="another My API Key")
         assert copied.api_key == "another My API Key"
-        assert self.client.api_key == "My API Key"
+        assert async_client.api_key == "My API Key"
 
-    def test_copy_default_options(self) -> None:
+    def test_copy_default_options(self, async_client: AsyncOpenAI) -> None:
         # options that have a default are overridden correctly
-        copied = self.client.copy(max_retries=7)
+        copied = async_client.copy(max_retries=7)
         assert copied.max_retries == 7
-        assert self.client.max_retries == 2
+        assert async_client.max_retries == 2
 
         copied2 = copied.copy(max_retries=6)
         assert copied2.max_retries == 6
         assert copied.max_retries == 7
 
         # timeout
-        assert isinstance(self.client.timeout, httpx.Timeout)
-        copied = self.client.copy(timeout=None)
+        assert isinstance(async_client.timeout, httpx.Timeout)
+        copied = async_client.copy(timeout=None)
         assert copied.timeout is None
-        assert isinstance(self.client.timeout, httpx.Timeout)
+        assert isinstance(async_client.timeout, httpx.Timeout)
 
-    def test_copy_default_headers(self) -> None:
+    async def test_copy_default_headers(self) -> None:
         client = AsyncOpenAI(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
         )
@@ -841,8 +1036,9 @@ def test_copy_default_headers(self) -> None:
             match="`default_headers` and `set_default_headers` arguments are mutually exclusive",
         ):
             client.copy(set_default_headers={}, default_headers={"X-Foo": "Bar"})
+        await client.close()
 
-    def test_copy_default_query(self) -> None:
+    async def test_copy_default_query(self) -> None:
         client = AsyncOpenAI(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_query={"foo": "bar"}
         )
@@ -878,13 +1074,15 @@ def test_copy_default_query(self) -> None:
         ):
             client.copy(set_default_query={}, default_query={"foo": "Bar"})
 
-    def test_copy_signature(self) -> None:
+        await client.close()
+
+    def test_copy_signature(self, async_client: AsyncOpenAI) -> None:
         # ensure the same parameters that can be passed to the client are defined in the `.copy()` method
         init_signature = inspect.signature(
             # mypy doesn't like that we access the `__init__` property.
-            self.client.__init__,  # type: ignore[misc]
+            async_client.__init__,  # type: ignore[misc]
         )
-        copy_signature = inspect.signature(self.client.copy)
+        copy_signature = inspect.signature(async_client.copy)
         exclude_params = {"transport", "proxies", "_strict_response_validation"}
 
         for name in init_signature.parameters.keys():
@@ -894,12 +1092,13 @@ def test_copy_signature(self) -> None:
             copy_param = copy_signature.parameters.get(name)
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
-    def test_copy_build_request(self) -> None:
+    @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
+    def test_copy_build_request(self, async_client: AsyncOpenAI) -> None:
         options = FinalRequestOptions(method="get", url="/foo")
 
         def build_request(options: FinalRequestOptions) -> None:
-            client = self.client.copy()
-            client._build_request(options)
+            client_copy = async_client.copy()
+            client_copy._build_request(options)
 
         # ensure that the machinery is warmed up before tracing starts.
         build_request(options)
@@ -956,12 +1155,12 @@ def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.Statistic
                     print(frame)
             raise AssertionError()
 
-    async def test_request_timeout(self) -> None:
-        request = self.client._build_request(FinalRequestOptions(method="get", url="/foo"))
+    async def test_request_timeout(self, async_client: AsyncOpenAI) -> None:
+        request = async_client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == DEFAULT_TIMEOUT
 
-        request = self.client._build_request(
+        request = async_client._build_request(
             FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0))
         )
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -976,6 +1175,8 @@ async def test_client_timeout_option(self) -> None:
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == httpx.Timeout(0)
 
+        await client.close()
+
     async def test_http_client_timeout_option(self) -> None:
         # custom timeout given to the httpx client should be used
         async with httpx.AsyncClient(timeout=None) as http_client:
@@ -987,6 +1188,8 @@ async def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == httpx.Timeout(None)
 
+            await client.close()
+
         # no timeout given to the httpx client should not use the httpx default
         async with httpx.AsyncClient() as http_client:
             client = AsyncOpenAI(
@@ -997,6 +1200,8 @@ async def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT
 
+            await client.close()
+
         # explicitly passing the default timeout currently results in it being ignored
         async with httpx.AsyncClient(timeout=HTTPX_DEFAULT_TIMEOUT) as http_client:
             client = AsyncOpenAI(
@@ -1007,6 +1212,8 @@ async def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT  # our default
 
+            await client.close()
+
     def test_invalid_http_client(self) -> None:
         with pytest.raises(TypeError, match="Invalid `http_client` arg"):
             with httpx.Client() as http_client:
@@ -1017,15 +1224,15 @@ def test_invalid_http_client(self) -> None:
                     http_client=cast(Any, http_client),
                 )
 
-    def test_default_headers_option(self) -> None:
-        client = AsyncOpenAI(
+    async def test_default_headers_option(self) -> None:
+        test_client = AsyncOpenAI(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
         )
-        request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
+        request = test_client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "bar"
         assert request.headers.get("x-stainless-lang") == "python"
 
-        client2 = AsyncOpenAI(
+        test_client2 = AsyncOpenAI(
             base_url=base_url,
             api_key=api_key,
             _strict_response_validation=True,
@@ -1034,20 +1241,24 @@ def test_default_headers_option(self) -> None:
                 "X-Stainless-Lang": "my-overriding-header",
             },
         )
-        request = client2._build_request(FinalRequestOptions(method="get", url="/foo"))
+        request = test_client2._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "stainless"
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
+        await test_client.close()
+        await test_client2.close()
+
     def test_validate_headers(self) -> None:
         client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("Authorization") == f"Bearer {api_key}"
 
         with pytest.raises(OpenAIError):
-            client2 = AsyncOpenAI(base_url=base_url, api_key=None, _strict_response_validation=True)
+            with update_env(**{"OPENAI_API_KEY": Omit()}):
+                client2 = AsyncOpenAI(base_url=base_url, api_key=None, _strict_response_validation=True)
             _ = client2
 
-    def test_default_query_option(self) -> None:
+    async def test_default_query_option(self) -> None:
         client = AsyncOpenAI(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_query={"query_param": "bar"}
         )
@@ -1059,14 +1270,16 @@ def test_default_query_option(self) -> None:
             FinalRequestOptions(
                 method="get",
                 url="/foo",
-                params={"foo": "baz", "query_param": "overriden"},
+                params={"foo": "baz", "query_param": "overridden"},
             )
         )
         url = httpx.URL(request.url)
-        assert dict(url.params) == {"foo": "baz", "query_param": "overriden"}
+        assert dict(url.params) == {"foo": "baz", "query_param": "overridden"}
+
+        await client.close()
 
-    def test_request_extra_json(self) -> None:
-        request = self.client._build_request(
+    def test_request_extra_json(self, client: OpenAI) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1077,7 +1290,7 @@ def test_request_extra_json(self) -> None:
         data = json.loads(request.content.decode("utf-8"))
         assert data == {"foo": "bar", "baz": False}
 
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1088,7 +1301,7 @@ def test_request_extra_json(self) -> None:
         assert data == {"baz": False}
 
         # `extra_json` takes priority over `json_data` when keys clash
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1099,8 +1312,8 @@ def test_request_extra_json(self) -> None:
         data = json.loads(request.content.decode("utf-8"))
         assert data == {"foo": "bar", "baz": None}
 
-    def test_request_extra_headers(self) -> None:
-        request = self.client._build_request(
+    def test_request_extra_headers(self, client: OpenAI) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1110,7 +1323,7 @@ def test_request_extra_headers(self) -> None:
         assert request.headers.get("X-Foo") == "Foo"
 
         # `extra_headers` takes priority over `default_headers` when keys clash
-        request = self.client.with_options(default_headers={"X-Bar": "true"})._build_request(
+        request = client.with_options(default_headers={"X-Bar": "true"})._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1121,8 +1334,8 @@ def test_request_extra_headers(self) -> None:
         )
         assert request.headers.get("X-Bar") == "false"
 
-    def test_request_extra_query(self) -> None:
-        request = self.client._build_request(
+    def test_request_extra_query(self, client: OpenAI) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1135,7 +1348,7 @@ def test_request_extra_query(self) -> None:
         assert params == {"my_query_param": "Foo"}
 
         # if both `query` and `extra_query` are given, they are merged
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1149,7 +1362,7 @@ def test_request_extra_query(self) -> None:
         assert params == {"bar": "1", "foo": "2"}
 
         # `extra_query` takes priority over `query` when keys clash
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1165,7 +1378,7 @@ def test_request_extra_query(self) -> None:
     def test_multipart_repeating_array(self, async_client: AsyncOpenAI) -> None:
         request = async_client._build_request(
             FinalRequestOptions.construct(
-                method="get",
+                method="post",
                 url="/foo",
                 headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"},
                 json_data={"array": ["foo", "bar"]},
@@ -1192,7 +1405,7 @@ def test_multipart_repeating_array(self, async_client: AsyncOpenAI) -> None:
         ]
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_basic_union_response(self, respx_mock: MockRouter) -> None:
+    async def test_basic_union_response(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         class Model1(BaseModel):
             name: str
 
@@ -1201,12 +1414,12 @@ class Model2(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_union_response_different_types(self, respx_mock: MockRouter) -> None:
+    async def test_union_response_different_types(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         """Union of objects with the same field name using a different type"""
 
         class Model1(BaseModel):
@@ -1217,18 +1430,20 @@ class Model2(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": 1}))
 
-        response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model1)
         assert response.foo == 1
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_non_application_json_content_type_for_json_data(self, respx_mock: MockRouter) -> None:
+    async def test_non_application_json_content_type_for_json_data(
+        self, respx_mock: MockRouter, async_client: AsyncOpenAI
+    ) -> None:
         """
         Response that sets Content-Type to something other than application/json but returns json data
         """
@@ -1244,11 +1459,11 @@ class Model(BaseModel):
             )
         )
 
-        response = await self.client.get("/foo", cast_to=Model)
+        response = await async_client.get("/foo", cast_to=Model)
         assert isinstance(response, Model)
         assert response.foo == 2
 
-    def test_base_url_setter(self) -> None:
+    async def test_base_url_setter(self) -> None:
         client = AsyncOpenAI(
             base_url="https://example.com/from_init", api_key=api_key, _strict_response_validation=True
         )
@@ -1258,7 +1473,9 @@ def test_base_url_setter(self) -> None:
 
         assert client.base_url == "https://example.com/from_setter/"
 
-    def test_base_url_env(self) -> None:
+        await client.close()
+
+    async def test_base_url_env(self) -> None:
         with update_env(OPENAI_BASE_URL="http://localhost:5000/from/env"):
             client = AsyncOpenAI(api_key=api_key, _strict_response_validation=True)
             assert client.base_url == "http://localhost:5000/from/env/"
@@ -1278,7 +1495,7 @@ def test_base_url_env(self) -> None:
         ],
         ids=["standard", "custom http client"],
     )
-    def test_base_url_trailing_slash(self, client: AsyncOpenAI) -> None:
+    async def test_base_url_trailing_slash(self, client: AsyncOpenAI) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -1287,6 +1504,7 @@ def test_base_url_trailing_slash(self, client: AsyncOpenAI) -> None:
             ),
         )
         assert request.url == "http://localhost:5000/custom/path/foo"
+        await client.close()
 
     @pytest.mark.parametrize(
         "client",
@@ -1303,7 +1521,7 @@ def test_base_url_trailing_slash(self, client: AsyncOpenAI) -> None:
         ],
         ids=["standard", "custom http client"],
     )
-    def test_base_url_no_trailing_slash(self, client: AsyncOpenAI) -> None:
+    async def test_base_url_no_trailing_slash(self, client: AsyncOpenAI) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -1312,6 +1530,7 @@ def test_base_url_no_trailing_slash(self, client: AsyncOpenAI) -> None:
             ),
         )
         assert request.url == "http://localhost:5000/custom/path/foo"
+        await client.close()
 
     @pytest.mark.parametrize(
         "client",
@@ -1328,7 +1547,7 @@ def test_base_url_no_trailing_slash(self, client: AsyncOpenAI) -> None:
         ],
         ids=["standard", "custom http client"],
     )
-    def test_absolute_request_url(self, client: AsyncOpenAI) -> None:
+    async def test_absolute_request_url(self, client: AsyncOpenAI) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -1337,37 +1556,37 @@ def test_absolute_request_url(self, client: AsyncOpenAI) -> None:
             ),
         )
         assert request.url == "https://myapi.com/foo"
+        await client.close()
 
     async def test_copied_client_does_not_close_http(self) -> None:
-        client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-        assert not client.is_closed()
+        test_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
+        assert not test_client.is_closed()
 
-        copied = client.copy()
-        assert copied is not client
+        copied = test_client.copy()
+        assert copied is not test_client
 
         del copied
 
         await asyncio.sleep(0.2)
-        assert not client.is_closed()
+        assert not test_client.is_closed()
 
     async def test_client_context_manager(self) -> None:
-        client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-        async with client as c2:
-            assert c2 is client
+        test_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
+        async with test_client as c2:
+            assert c2 is test_client
             assert not c2.is_closed()
-            assert not client.is_closed()
-        assert client.is_closed()
+            assert not test_client.is_closed()
+        assert test_client.is_closed()
 
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
-    async def test_client_response_validation_error(self, respx_mock: MockRouter) -> None:
+    async def test_client_response_validation_error(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         class Model(BaseModel):
             foo: str
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": {"invalid": True}}))
 
         with pytest.raises(APIResponseValidationError) as exc:
-            await self.client.get("/foo", cast_to=Model)
+            await async_client.get("/foo", cast_to=Model)
 
         assert isinstance(exc.value.__cause__, ValidationError)
 
@@ -1378,19 +1597,17 @@ async def test_client_max_retries_validation(self) -> None:
             )
 
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
-    async def test_default_stream_cls(self, respx_mock: MockRouter) -> None:
+    async def test_default_stream_cls(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         class Model(BaseModel):
             name: str
 
         respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        stream = await self.client.post("/foo", cast_to=Model, stream=True, stream_cls=AsyncStream[Model])
+        stream = await async_client.post("/foo", cast_to=Model, stream=True, stream_cls=AsyncStream[Model])
         assert isinstance(stream, AsyncStream)
         await stream.response.aclose()
 
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
     async def test_received_text_for_expected_json(self, respx_mock: MockRouter) -> None:
         class Model(BaseModel):
             name: str
@@ -1402,11 +1619,14 @@ class Model(BaseModel):
         with pytest.raises(APIResponseValidationError):
             await strict_client.get("/foo", cast_to=Model)
 
-        client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
+        non_strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
 
-        response = await client.get("/foo", cast_to=Model)
+        response = await non_strict_client.get("/foo", cast_to=Model)
         assert isinstance(response, str)  # type: ignore[unreachable]
 
+        await strict_client.close()
+        await non_strict_client.close()
+
     @pytest.mark.parametrize(
         "remaining_retries,retry_after,timeout",
         [
@@ -1425,66 +1645,236 @@ class Model(BaseModel):
             [3, "", 0.5],
             [2, "", 0.5 * 2.0],
             [1, "", 0.5 * 4.0],
+            [-1100, "", 8],  # test large number potentially overflowing
         ],
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
-    @pytest.mark.asyncio
-    async def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str, timeout: float) -> None:
-        client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-
+    async def test_parse_retry_after_header(
+        self, remaining_retries: int, retry_after: str, timeout: float, async_client: AsyncOpenAI
+    ) -> None:
         headers = httpx.Headers({"retry-after": retry_after})
         options = FinalRequestOptions(method="get", url="/foo", max_retries=3)
-        calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
+        calculated = async_client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, 0.5 * 0.875)  # pyright: ignore[reportUnknownMemberType]
 
     @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
+    async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         respx_mock.post("/chat/completions").mock(side_effect=httpx.TimeoutException("Test timeout error"))
 
         with pytest.raises(APITimeoutError):
-            await self.client.post(
-                "/chat/completions",
-                body=cast(
-                    object,
-                    dict(
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": "Say this is a test",
-                            }
-                        ],
-                        model="gpt-3.5-turbo",
-                    ),
-                ),
-                cast_to=httpx.Response,
-                options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
-            )
-
-        assert _get_open_connections(self.client) == 0
+            await async_client.chat.completions.with_streaming_response.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "developer",
+                    }
+                ],
+                model="gpt-4o",
+            ).__aenter__()
+
+        assert _get_open_connections(async_client) == 0
 
     @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
+    async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         respx_mock.post("/chat/completions").mock(return_value=httpx.Response(500))
 
         with pytest.raises(APIStatusError):
-            await self.client.post(
-                "/chat/completions",
-                body=cast(
-                    object,
-                    dict(
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": "Say this is a test",
-                            }
-                        ],
-                        model="gpt-3.5-turbo",
-                    ),
-                ),
-                cast_to=httpx.Response,
-                options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
+            await async_client.chat.completions.with_streaming_response.create(
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "developer",
+                    }
+                ],
+                model="gpt-4o",
+            ).__aenter__()
+        assert _get_open_connections(async_client) == 0
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.parametrize("failure_mode", ["status", "exception"])
+    async def test_retries_taken(
+        self,
+        async_client: AsyncOpenAI,
+        failures_before_success: int,
+        failure_mode: Literal["status", "exception"],
+        respx_mock: MockRouter,
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                if failure_mode == "exception":
+                    raise RuntimeError("oops")
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+        )
+
+        assert response.retries_taken == failures_before_success
+        assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    async def test_omit_retry_count_header(
+        self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+            extra_headers={"x-stainless-retry-count": Omit()},
+        )
+
+        assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    async def test_overwrite_retry_count_header(
+        self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+            extra_headers={"x-stainless-retry-count": "42"},
+        )
+
+        assert response.http_request.headers.get("x-stainless-retry-count") == "42"
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    async def test_retries_taken_new_response_class(
+        self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        async with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+        ) as response:
+            assert response.retries_taken == failures_before_success
+            assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
+    async def test_get_platform(self) -> None:
+        platform = await asyncify(get_platform)()
+        assert isinstance(platform, (str, OtherPlatform))
+
+    async def test_proxy_environment_variables(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        # Test that the proxy environment variables are set correctly
+        monkeypatch.setenv("HTTPS_PROXY", "https://example.org")
+
+        client = DefaultAsyncHttpxClient()
+
+        mounts = tuple(client._mounts.items())
+        assert len(mounts) == 1
+        assert mounts[0][0].pattern == "https://"
+
+    @pytest.mark.filterwarnings("ignore:.*deprecated.*:DeprecationWarning")
+    async def test_default_client_creation(self) -> None:
+        # Ensure that the client can be initialized without any exceptions
+        DefaultAsyncHttpxClient(
+            verify=True,
+            cert=None,
+            trust_env=True,
+            http1=True,
+            http2=False,
+            limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
+        )
+
+    @pytest.mark.respx(base_url=base_url)
+    async def test_follow_redirects(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
+        # Test that the default follow_redirects=True allows following redirects
+        respx_mock.post("/redirect").mock(
+            return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
+        )
+        respx_mock.get("/redirected").mock(return_value=httpx.Response(200, json={"status": "ok"}))
+
+        response = await async_client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
+        assert response.status_code == 200
+        assert response.json() == {"status": "ok"}
+
+    @pytest.mark.respx(base_url=base_url)
+    async def test_follow_redirects_disabled(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
+        # Test that follow_redirects=False prevents following redirects
+        respx_mock.post("/redirect").mock(
+            return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
+        )
+
+        with pytest.raises(APIStatusError) as exc_info:
+            await async_client.post(
+                "/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response
             )
 
-        assert _get_open_connections(self.client) == 0
+        assert exc_info.value.response.status_code == 302
+        assert exc_info.value.response.headers["Location"] == f"{base_url}/redirected"
diff --git a/tests/test_deepcopy.py b/tests/test_deepcopy.py
index 8cf65ce94e..86a2adb1a2 100644
--- a/tests/test_deepcopy.py
+++ b/tests/test_deepcopy.py
@@ -41,8 +41,7 @@ def test_nested_list() -> None:
     assert_different_identities(obj1[1], obj2[1])
 
 
-class MyObject:
-    ...
+class MyObject: ...
 
 
 def test_ignores_other_types() -> None:
diff --git a/tests/test_legacy_response.py b/tests/test_legacy_response.py
index 45025f81d0..4f24ce187d 100644
--- a/tests/test_legacy_response.py
+++ b/tests/test_legacy_response.py
@@ -1,5 +1,5 @@
 import json
-from typing import cast
+from typing import Any, Union, cast
 from typing_extensions import Annotated
 
 import httpx
@@ -12,8 +12,7 @@
 from openai._legacy_response import LegacyAPIResponse
 
 
-class PydanticModel(pydantic.BaseModel):
-    ...
+class PydanticModel(pydantic.BaseModel): ...
 
 
 def test_response_parse_mismatched_basemodel(client: OpenAI) -> None:
@@ -33,6 +32,31 @@ def test_response_parse_mismatched_basemodel(client: OpenAI) -> None:
         response.parse(to=PydanticModel)
 
 
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        ("false", False),
+        ("true", True),
+        ("False", False),
+        ("True", True),
+        ("TrUe", True),
+        ("FalSe", False),
+    ],
+)
+def test_response_parse_bool(client: OpenAI, content: str, expected: bool) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(200, content=content),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    result = response.parse(to=bool)
+    assert result is expected
+
+
 def test_response_parse_custom_stream(client: OpenAI) -> None:
     response = LegacyAPIResponse(
         raw=httpx.Response(200, content=b"foo"),
@@ -82,3 +106,23 @@ def test_response_parse_annotated_type(client: OpenAI) -> None:
     )
     assert obj.foo == "hello!"
     assert obj.bar == 2
+
+
+class OtherModel(pydantic.BaseModel):
+    a: str
+
+
+@pytest.mark.parametrize("client", [False], indirect=True)  # loose validation
+def test_response_parse_expect_model_union_non_json_content(client: OpenAI) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(200, content=b"foo", headers={"Content-Type": "application/text"}),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(to=cast(Any, Union[CustomModel, OtherModel]))
+    assert isinstance(obj, str)
+    assert obj == "foo"
diff --git a/tests/test_models.py b/tests/test_models.py
index b703444248..588869ee35 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,15 +1,15 @@
 import json
-from typing import Any, Dict, List, Union, Optional, cast
+from typing import TYPE_CHECKING, Any, Dict, List, Union, Optional, cast
 from datetime import datetime, timezone
-from typing_extensions import Literal, Annotated
+from typing_extensions import Literal, Annotated, TypeAliasType
 
 import pytest
 import pydantic
 from pydantic import Field
 
 from openai._utils import PropertyInfo
-from openai._compat import PYDANTIC_V2, parse_obj, model_dump, model_json
-from openai._models import BaseModel, construct_type
+from openai._compat import PYDANTIC_V1, parse_obj, model_dump, model_json
+from openai._models import DISCRIMINATOR_CACHE, BaseModel, construct_type
 
 
 class BasicModel(BaseModel):
@@ -245,7 +245,7 @@ class Model(BaseModel):
     assert m.foo is True
 
     m = Model.construct(foo="CARD_HOLDER")
-    assert m.foo is "CARD_HOLDER"
+    assert m.foo == "CARD_HOLDER"
 
     m = Model.construct(foo={"bar": False})
     assert isinstance(m.foo, Submodel1)
@@ -294,12 +294,12 @@ class Model(BaseModel):
     assert cast(bool, m.foo) is True
 
     m = Model.construct(foo={"name": 3})
-    if PYDANTIC_V2:
-        assert isinstance(m.foo, Submodel1)
-        assert m.foo.name == 3  # type: ignore
-    else:
+    if PYDANTIC_V1:
         assert isinstance(m.foo, Submodel2)
         assert m.foo.name == "3"
+    else:
+        assert isinstance(m.foo, Submodel1)
+        assert m.foo.name == 3  # type: ignore
 
 
 def test_list_of_unions() -> None:
@@ -426,10 +426,10 @@ class Model(BaseModel):
 
     expected = datetime(2019, 12, 27, 18, 11, 19, 117000, tzinfo=timezone.utc)
 
-    if PYDANTIC_V2:
-        expected_json = '{"created_at":"2019-12-27T18:11:19.117000Z"}'
-    else:
+    if PYDANTIC_V1:
         expected_json = '{"created_at": "2019-12-27T18:11:19.117000+00:00"}'
+    else:
+        expected_json = '{"created_at":"2019-12-27T18:11:19.117000Z"}'
 
     model = Model.construct(created_at="2019-12-27T18:11:19.117Z")
     assert model.created_at == expected
@@ -492,12 +492,15 @@ class Model(BaseModel):
         resource_id: Optional[str] = None
 
     m = Model.construct()
+    assert m.resource_id is None
     assert "resource_id" not in m.model_fields_set
 
     m = Model.construct(resource_id=None)
+    assert m.resource_id is None
     assert "resource_id" in m.model_fields_set
 
     m = Model.construct(resource_id="foo")
+    assert m.resource_id == "foo"
     assert "resource_id" in m.model_fields_set
 
 
@@ -520,19 +523,15 @@ class Model(BaseModel):
     assert m3.to_dict(exclude_none=True) == {}
     assert m3.to_dict(exclude_defaults=True) == {}
 
-    if PYDANTIC_V2:
+    class Model2(BaseModel):
+        created_at: datetime
 
-        class Model2(BaseModel):
-            created_at: datetime
-
-        time_str = "2024-03-21T11:39:01.275859"
-        m4 = Model2.construct(created_at=time_str)
-        assert m4.to_dict(mode="python") == {"created_at": datetime.fromisoformat(time_str)}
-        assert m4.to_dict(mode="json") == {"created_at": time_str}
-    else:
-        with pytest.raises(ValueError, match="mode is only supported in Pydantic v2"):
-            m.to_dict(mode="json")
+    time_str = "2024-03-21T11:39:01.275859"
+    m4 = Model2.construct(created_at=time_str)
+    assert m4.to_dict(mode="python") == {"created_at": datetime.fromisoformat(time_str)}
+    assert m4.to_dict(mode="json") == {"created_at": time_str}
 
+    if PYDANTIC_V1:
         with pytest.raises(ValueError, match="warnings is only supported in Pydantic v2"):
             m.to_dict(warnings=False)
 
@@ -557,10 +556,7 @@ class Model(BaseModel):
     assert m3.model_dump() == {"foo": None}
     assert m3.model_dump(exclude_none=True) == {}
 
-    if not PYDANTIC_V2:
-        with pytest.raises(ValueError, match="mode is only supported in Pydantic v2"):
-            m.model_dump(mode="json")
-
+    if PYDANTIC_V1:
         with pytest.raises(ValueError, match="round_trip is only supported in Pydantic v2"):
             m.model_dump(round_trip=True)
 
@@ -568,6 +564,14 @@ class Model(BaseModel):
             m.model_dump(warnings=False)
 
 
+def test_compat_method_no_error_for_warnings() -> None:
+    class Model(BaseModel):
+        foo: Optional[str]
+
+    m = Model(foo="hello")
+    assert isinstance(model_dump(m, warnings=False), dict)
+
+
 def test_to_json() -> None:
     class Model(BaseModel):
         foo: Optional[str] = Field(alias="FOO", default=None)
@@ -576,10 +580,10 @@ class Model(BaseModel):
     assert json.loads(m.to_json()) == {"FOO": "hello"}
     assert json.loads(m.to_json(use_api_names=False)) == {"foo": "hello"}
 
-    if PYDANTIC_V2:
-        assert m.to_json(indent=None) == '{"FOO":"hello"}'
-    else:
+    if PYDANTIC_V1:
         assert m.to_json(indent=None) == '{"FOO": "hello"}'
+    else:
+        assert m.to_json(indent=None) == '{"FOO":"hello"}'
 
     m2 = Model()
     assert json.loads(m2.to_json()) == {}
@@ -591,7 +595,7 @@ class Model(BaseModel):
     assert json.loads(m3.to_json()) == {"FOO": None}
     assert json.loads(m3.to_json(exclude_none=True)) == {}
 
-    if not PYDANTIC_V2:
+    if PYDANTIC_V1:
         with pytest.raises(ValueError, match="warnings is only supported in Pydantic v2"):
             m.to_json(warnings=False)
 
@@ -618,7 +622,7 @@ class Model(BaseModel):
     assert json.loads(m3.model_dump_json()) == {"foo": None}
     assert json.loads(m3.model_dump_json(exclude_none=True)) == {}
 
-    if not PYDANTIC_V2:
+    if PYDANTIC_V1:
         with pytest.raises(ValueError, match="round_trip is only supported in Pydantic v2"):
             m.model_dump_json(round_trip=True)
 
@@ -675,12 +679,12 @@ class B(BaseModel):
     )
     assert isinstance(m, A)
     assert m.type == "a"
-    if PYDANTIC_V2:
-        assert m.data == 100  # type: ignore[comparison-overlap]
-    else:
+    if PYDANTIC_V1:
         # pydantic v1 automatically converts inputs to strings
         # if the expected type is a str
         assert m.data == "100"
+    else:
+        assert m.data == 100  # type: ignore[comparison-overlap]
 
 
 def test_discriminated_unions_unknown_variant() -> None:
@@ -764,12 +768,12 @@ class B(BaseModel):
     )
     assert isinstance(m, A)
     assert m.foo_type == "a"
-    if PYDANTIC_V2:
-        assert m.data == 100  # type: ignore[comparison-overlap]
-    else:
+    if PYDANTIC_V1:
         # pydantic v1 automatically converts inputs to strings
         # if the expected type is a str
         assert m.data == "100"
+    else:
+        assert m.data == 100  # type: ignore[comparison-overlap]
 
 
 def test_discriminated_unions_overlapping_discriminators_invalid_data() -> None:
@@ -805,7 +809,7 @@ class B(BaseModel):
 
     UnionType = cast(Any, Union[A, B])
 
-    assert not hasattr(UnionType, "__discriminator__")
+    assert not DISCRIMINATOR_CACHE.get(UnionType)
 
     m = construct_type(
         value={"type": "b", "data": "foo"}, type_=cast(Any, Annotated[UnionType, PropertyInfo(discriminator="type")])
@@ -814,7 +818,7 @@ class B(BaseModel):
     assert m.type == "b"
     assert m.data == "foo"  # type: ignore[comparison-overlap]
 
-    discriminator = UnionType.__discriminator__
+    discriminator = DISCRIMINATOR_CACHE.get(UnionType)
     assert discriminator is not None
 
     m = construct_type(
@@ -826,4 +830,134 @@ class B(BaseModel):
 
     # if the discriminator details object stays the same between invocations then
     # we hit the cache
-    assert UnionType.__discriminator__ is discriminator
+    assert DISCRIMINATOR_CACHE.get(UnionType) is discriminator
+
+
+@pytest.mark.skipif(PYDANTIC_V1, reason="TypeAliasType is not supported in Pydantic v1")
+def test_type_alias_type() -> None:
+    Alias = TypeAliasType("Alias", str)  # pyright: ignore
+
+    class Model(BaseModel):
+        alias: Alias
+        union: Union[int, Alias]
+
+    m = construct_type(value={"alias": "foo", "union": "bar"}, type_=Model)
+    assert isinstance(m, Model)
+    assert isinstance(m.alias, str)
+    assert m.alias == "foo"
+    assert isinstance(m.union, str)
+    assert m.union == "bar"
+
+
+@pytest.mark.skipif(PYDANTIC_V1, reason="TypeAliasType is not supported in Pydantic v1")
+def test_field_named_cls() -> None:
+    class Model(BaseModel):
+        cls: str
+
+    m = construct_type(value={"cls": "foo"}, type_=Model)
+    assert isinstance(m, Model)
+    assert isinstance(m.cls, str)
+
+
+def test_discriminated_union_case() -> None:
+    class A(BaseModel):
+        type: Literal["a"]
+
+        data: bool
+
+    class B(BaseModel):
+        type: Literal["b"]
+
+        data: List[Union[A, object]]
+
+    class ModelA(BaseModel):
+        type: Literal["modelA"]
+
+        data: int
+
+    class ModelB(BaseModel):
+        type: Literal["modelB"]
+
+        required: str
+
+        data: Union[A, B]
+
+    # when constructing ModelA | ModelB, value data doesn't match ModelB exactly - missing `required`
+    m = construct_type(
+        value={"type": "modelB", "data": {"type": "a", "data": True}},
+        type_=cast(Any, Annotated[Union[ModelA, ModelB], PropertyInfo(discriminator="type")]),
+    )
+
+    assert isinstance(m, ModelB)
+
+
+def test_nested_discriminated_union() -> None:
+    class InnerType1(BaseModel):
+        type: Literal["type_1"]
+
+    class InnerModel(BaseModel):
+        inner_value: str
+
+    class InnerType2(BaseModel):
+        type: Literal["type_2"]
+        some_inner_model: InnerModel
+
+    class Type1(BaseModel):
+        base_type: Literal["base_type_1"]
+        value: Annotated[
+            Union[
+                InnerType1,
+                InnerType2,
+            ],
+            PropertyInfo(discriminator="type"),
+        ]
+
+    class Type2(BaseModel):
+        base_type: Literal["base_type_2"]
+
+    T = Annotated[
+        Union[
+            Type1,
+            Type2,
+        ],
+        PropertyInfo(discriminator="base_type"),
+    ]
+
+    model = construct_type(
+        type_=T,
+        value={
+            "base_type": "base_type_1",
+            "value": {
+                "type": "type_2",
+            },
+        },
+    )
+    assert isinstance(model, Type1)
+    assert isinstance(model.value, InnerType2)
+
+
+@pytest.mark.skipif(PYDANTIC_V1, reason="this is only supported in pydantic v2 for now")
+def test_extra_properties() -> None:
+    class Item(BaseModel):
+        prop: int
+
+    class Model(BaseModel):
+        __pydantic_extra__: Dict[str, Item] = Field(init=False)  # pyright: ignore[reportIncompatibleVariableOverride]
+
+        other: str
+
+        if TYPE_CHECKING:
+
+            def __getattr__(self, attr: str) -> Item: ...
+
+    model = construct_type(
+        type_=Model,
+        value={
+            "a": {"prop": 1},
+            "other": "foo",
+        },
+    )
+    assert isinstance(model, Model)
+    assert model.a.prop == 1
+    assert isinstance(model.a, Item)
+    assert model.other == "foo"
diff --git a/tests/test_module_client.py b/tests/test_module_client.py
index 05b5f81111..58505eb425 100644
--- a/tests/test_module_client.py
+++ b/tests/test_module_client.py
@@ -2,8 +2,6 @@
 
 from __future__ import annotations
 
-import os as _os
-
 import httpx
 import pytest
 from httpx import URL
@@ -17,17 +15,13 @@ def reset_state() -> None:
     openai.api_key = None or "My API Key"
     openai.organization = None
     openai.project = None
+    openai.webhook_secret = None
     openai.base_url = None
     openai.timeout = DEFAULT_TIMEOUT
     openai.max_retries = DEFAULT_MAX_RETRIES
     openai.default_headers = None
     openai.default_query = None
     openai.http_client = None
-    openai.api_type = _os.environ.get("OPENAI_API_TYPE")  # type: ignore
-    openai.api_version = None
-    openai.azure_endpoint = None
-    openai.azure_ad_token = None
-    openai.azure_ad_token_provider = None
 
 
 @pytest.fixture(autouse=True)
@@ -94,90 +88,3 @@ def test_http_client_option() -> None:
     openai.http_client = new_client
 
     assert openai.completions._client._client is new_client
-
-
-import contextlib
-from typing import Iterator
-
-from openai.lib.azure import AzureOpenAI
-
-
-@contextlib.contextmanager
-def fresh_env() -> Iterator[None]:
-    old = _os.environ.copy()
-
-    try:
-        _os.environ.clear()
-        yield
-    finally:
-        _os.environ.update(old)
-
-
-def test_only_api_key_results_in_openai_api() -> None:
-    with fresh_env():
-        openai.api_type = None
-        openai.api_key = "example API key"
-
-        assert type(openai.completions._client).__name__ == "_ModuleClient"
-
-
-def test_azure_api_key_env_without_api_version() -> None:
-    with fresh_env():
-        openai.api_type = None
-        _os.environ["AZURE_OPENAI_API_KEY"] = "example API key"
-
-        with pytest.raises(
-            ValueError,
-            match=r"Must provide either the `api_version` argument or the `OPENAI_API_VERSION` environment variable",
-        ):
-            openai.completions._client  # noqa: B018
-
-
-def test_azure_api_key_and_version_env() -> None:
-    with fresh_env():
-        openai.api_type = None
-        _os.environ["AZURE_OPENAI_API_KEY"] = "example API key"
-        _os.environ["OPENAI_API_VERSION"] = "example-version"
-
-        with pytest.raises(
-            ValueError,
-            match=r"Must provide one of the `base_url` or `azure_endpoint` arguments, or the `AZURE_OPENAI_ENDPOINT` environment variable",
-        ):
-            openai.completions._client  # noqa: B018
-
-
-def test_azure_api_key_version_and_endpoint_env() -> None:
-    with fresh_env():
-        openai.api_type = None
-        _os.environ["AZURE_OPENAI_API_KEY"] = "example API key"
-        _os.environ["OPENAI_API_VERSION"] = "example-version"
-        _os.environ["AZURE_OPENAI_ENDPOINT"] = "https://www.example"
-
-        openai.completions._client  # noqa: B018
-
-        assert openai.api_type == "azure"
-
-
-def test_azure_azure_ad_token_version_and_endpoint_env() -> None:
-    with fresh_env():
-        openai.api_type = None
-        _os.environ["AZURE_OPENAI_AD_TOKEN"] = "example AD token"
-        _os.environ["OPENAI_API_VERSION"] = "example-version"
-        _os.environ["AZURE_OPENAI_ENDPOINT"] = "https://www.example"
-
-        client = openai.completions._client
-        assert isinstance(client, AzureOpenAI)
-        assert client._azure_ad_token == "example AD token"
-
-
-def test_azure_azure_ad_token_provider_version_and_endpoint_env() -> None:
-    with fresh_env():
-        openai.api_type = None
-        _os.environ["OPENAI_API_VERSION"] = "example-version"
-        _os.environ["AZURE_OPENAI_ENDPOINT"] = "https://www.example"
-        openai.azure_ad_token_provider = lambda: "token"
-
-        client = openai.completions._client
-        assert isinstance(client, AzureOpenAI)
-        assert client._azure_ad_token_provider is not None
-        assert client._azure_ad_token_provider() == "token"
diff --git a/tests/test_response.py b/tests/test_response.py
index af153b67c4..d022306440 100644
--- a/tests/test_response.py
+++ b/tests/test_response.py
@@ -1,5 +1,5 @@
 import json
-from typing import List, cast
+from typing import Any, List, Union, cast
 from typing_extensions import Annotated
 
 import httpx
@@ -19,16 +19,13 @@
 from openai._base_client import FinalRequestOptions
 
 
-class ConcreteBaseAPIResponse(APIResponse[bytes]):
-    ...
+class ConcreteBaseAPIResponse(APIResponse[bytes]): ...
 
 
-class ConcreteAPIResponse(APIResponse[List[str]]):
-    ...
+class ConcreteAPIResponse(APIResponse[List[str]]): ...
 
 
-class ConcreteAsyncAPIResponse(APIResponse[httpx.Response]):
-    ...
+class ConcreteAsyncAPIResponse(APIResponse[httpx.Response]): ...
 
 
 def test_extract_response_type_direct_classes() -> None:
@@ -56,8 +53,7 @@ def test_extract_response_type_binary_response() -> None:
     assert extract_response_type(AsyncBinaryAPIResponse) == bytes
 
 
-class PydanticModel(pydantic.BaseModel):
-    ...
+class PydanticModel(pydantic.BaseModel): ...
 
 
 def test_response_parse_mismatched_basemodel(client: OpenAI) -> None:
@@ -192,3 +188,90 @@ async def test_async_response_parse_annotated_type(async_client: AsyncOpenAI) ->
     )
     assert obj.foo == "hello!"
     assert obj.bar == 2
+
+
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        ("false", False),
+        ("true", True),
+        ("False", False),
+        ("True", True),
+        ("TrUe", True),
+        ("FalSe", False),
+    ],
+)
+def test_response_parse_bool(client: OpenAI, content: str, expected: bool) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=content),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    result = response.parse(to=bool)
+    assert result is expected
+
+
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        ("false", False),
+        ("true", True),
+        ("False", False),
+        ("True", True),
+        ("TrUe", True),
+        ("FalSe", False),
+    ],
+)
+async def test_async_response_parse_bool(client: AsyncOpenAI, content: str, expected: bool) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=content),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    result = await response.parse(to=bool)
+    assert result is expected
+
+
+class OtherModel(BaseModel):
+    a: str
+
+
+@pytest.mark.parametrize("client", [False], indirect=True)  # loose validation
+def test_response_parse_expect_model_union_non_json_content(client: OpenAI) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=b"foo", headers={"Content-Type": "application/text"}),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(to=cast(Any, Union[CustomModel, OtherModel]))
+    assert isinstance(obj, str)
+    assert obj == "foo"
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("async_client", [False], indirect=True)  # loose validation
+async def test_async_response_parse_expect_model_union_non_json_content(async_client: AsyncOpenAI) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=b"foo", headers={"Content-Type": "application/text"}),
+        client=async_client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = await response.parse(to=cast(Any, Union[CustomModel, OtherModel]))
+    assert isinstance(obj, str)
+    assert obj == "foo"
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 1eb6cde9d6..bece75dfc7 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -2,20 +2,20 @@
 
 import io
 import pathlib
-from typing import Any, List, Union, TypeVar, Iterable, Optional, cast
+from typing import Any, Dict, List, Union, TypeVar, Iterable, Optional, cast
 from datetime import date, datetime
 from typing_extensions import Required, Annotated, TypedDict
 
 import pytest
 
-from openai._types import Base64FileInput
+from openai._types import Base64FileInput, omit, not_given
 from openai._utils import (
     PropertyInfo,
     transform as _transform,
     parse_datetime,
     async_transform as _async_transform,
 )
-from openai._compat import PYDANTIC_V2
+from openai._compat import PYDANTIC_V1
 from openai._models import BaseModel
 
 _T = TypeVar("_T")
@@ -177,17 +177,32 @@ class DateDict(TypedDict, total=False):
     foo: Annotated[date, PropertyInfo(format="iso8601")]
 
 
+class DatetimeModel(BaseModel):
+    foo: datetime
+
+
+class DateModel(BaseModel):
+    foo: Optional[date]
+
+
 @parametrize
 @pytest.mark.asyncio
 async def test_iso8601_format(use_async: bool) -> None:
     dt = datetime.fromisoformat("2023-02-23T14:16:36.337692+00:00")
+    tz = "+00:00" if PYDANTIC_V1 else "Z"
     assert await transform({"foo": dt}, DatetimeDict, use_async) == {"foo": "2023-02-23T14:16:36.337692+00:00"}  # type: ignore[comparison-overlap]
+    assert await transform(DatetimeModel(foo=dt), Any, use_async) == {"foo": "2023-02-23T14:16:36.337692" + tz}  # type: ignore[comparison-overlap]
 
     dt = dt.replace(tzinfo=None)
     assert await transform({"foo": dt}, DatetimeDict, use_async) == {"foo": "2023-02-23T14:16:36.337692"}  # type: ignore[comparison-overlap]
+    assert await transform(DatetimeModel(foo=dt), Any, use_async) == {"foo": "2023-02-23T14:16:36.337692"}  # type: ignore[comparison-overlap]
 
     assert await transform({"foo": None}, DateDict, use_async) == {"foo": None}  # type: ignore[comparison-overlap]
+    assert await transform(DateModel(foo=None), Any, use_async) == {"foo": None}  # type: ignore
     assert await transform({"foo": date.fromisoformat("2023-02-23")}, DateDict, use_async) == {"foo": "2023-02-23"}  # type: ignore[comparison-overlap]
+    assert await transform(DateModel(foo=date.fromisoformat("2023-02-23")), DateDict, use_async) == {
+        "foo": "2023-02-23"
+    }  # type: ignore[comparison-overlap]
 
 
 @parametrize
@@ -282,11 +297,11 @@ async def test_pydantic_unknown_field(use_async: bool) -> None:
 @pytest.mark.asyncio
 async def test_pydantic_mismatched_types(use_async: bool) -> None:
     model = MyModel.construct(foo=True)
-    if PYDANTIC_V2:
+    if PYDANTIC_V1:
+        params = await transform(model, Any, use_async)
+    else:
         with pytest.warns(UserWarning):
             params = await transform(model, Any, use_async)
-    else:
-        params = await transform(model, Any, use_async)
     assert cast(Any, params) == {"foo": True}
 
 
@@ -294,11 +309,11 @@ async def test_pydantic_mismatched_types(use_async: bool) -> None:
 @pytest.mark.asyncio
 async def test_pydantic_mismatched_object_type(use_async: bool) -> None:
     model = MyModel.construct(foo=MyModel.construct(hello="world"))
-    if PYDANTIC_V2:
+    if PYDANTIC_V1:
+        params = await transform(model, Any, use_async)
+    else:
         with pytest.warns(UserWarning):
             params = await transform(model, Any, use_async)
-    else:
-        params = await transform(model, Any, use_async)
     assert cast(Any, params) == {"foo": {"hello": "world"}}
 
 
@@ -373,6 +388,15 @@ def my_iter() -> Iterable[Baz8]:
     }
 
 
+@parametrize
+@pytest.mark.asyncio
+async def test_dictionary_items(use_async: bool) -> None:
+    class DictItems(TypedDict):
+        foo_baz: Annotated[str, PropertyInfo(alias="fooBaz")]
+
+    assert await transform({"foo": {"foo_baz": "bar"}}, Dict[str, DictItems], use_async) == {"foo": {"fooBaz": "bar"}}
+
+
 class TypedDictIterableUnionStr(TypedDict):
     foo: Annotated[Union[str, Iterable[Baz8]], PropertyInfo(alias="FOO")]
 
@@ -408,3 +432,29 @@ async def test_base64_file_input(use_async: bool) -> None:
     assert await transform({"foo": io.BytesIO(b"Hello, world!")}, TypedDictBase64Input, use_async) == {
         "foo": "SGVsbG8sIHdvcmxkIQ=="
     }  # type: ignore[comparison-overlap]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_transform_skipping(use_async: bool) -> None:
+    # lists of ints are left as-is
+    data = [1, 2, 3]
+    assert await transform(data, List[int], use_async) is data
+
+    # iterables of ints are converted to a list
+    data = iter([1, 2, 3])
+    assert await transform(data, Iterable[int], use_async) == [1, 2, 3]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_strips_notgiven(use_async: bool) -> None:
+    assert await transform({"foo_bar": "bar"}, Foo1, use_async) == {"fooBar": "bar"}
+    assert await transform({"foo_bar": not_given}, Foo1, use_async) == {}
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_strips_omit(use_async: bool) -> None:
+    assert await transform({"foo_bar": "bar"}, Foo1, use_async) == {"fooBar": "bar"}
+    assert await transform({"foo_bar": omit}, Foo1, use_async) == {}
diff --git a/tests/test_utils/test_datetime_parse.py b/tests/test_utils/test_datetime_parse.py
new file mode 100644
index 0000000000..44c33a4ccb
--- /dev/null
+++ b/tests/test_utils/test_datetime_parse.py
@@ -0,0 +1,110 @@
+"""
+Copied from https://github.com/pydantic/pydantic/blob/v1.10.22/tests/test_datetime_parse.py
+with modifications so it works without pydantic v1 imports.
+"""
+
+from typing import Type, Union
+from datetime import date, datetime, timezone, timedelta
+
+import pytest
+
+from openai._utils import parse_date, parse_datetime
+
+
+def create_tz(minutes: int) -> timezone:
+    return timezone(timedelta(minutes=minutes))
+
+
+@pytest.mark.parametrize(
+    "value,result",
+    [
+        # Valid inputs
+        ("1494012444.883309", date(2017, 5, 5)),
+        (b"1494012444.883309", date(2017, 5, 5)),
+        (1_494_012_444.883_309, date(2017, 5, 5)),
+        ("1494012444", date(2017, 5, 5)),
+        (1_494_012_444, date(2017, 5, 5)),
+        (0, date(1970, 1, 1)),
+        ("2012-04-23", date(2012, 4, 23)),
+        (b"2012-04-23", date(2012, 4, 23)),
+        ("2012-4-9", date(2012, 4, 9)),
+        (date(2012, 4, 9), date(2012, 4, 9)),
+        (datetime(2012, 4, 9, 12, 15), date(2012, 4, 9)),
+        # Invalid inputs
+        ("x20120423", ValueError),
+        ("2012-04-56", ValueError),
+        (19_999_999_999, date(2603, 10, 11)),  # just before watershed
+        (20_000_000_001, date(1970, 8, 20)),  # just after watershed
+        (1_549_316_052, date(2019, 2, 4)),  # nowish in s
+        (1_549_316_052_104, date(2019, 2, 4)),  # nowish in ms
+        (1_549_316_052_104_324, date(2019, 2, 4)),  # nowish in μs
+        (1_549_316_052_104_324_096, date(2019, 2, 4)),  # nowish in ns
+        ("infinity", date(9999, 12, 31)),
+        ("inf", date(9999, 12, 31)),
+        (float("inf"), date(9999, 12, 31)),
+        ("infinity ", date(9999, 12, 31)),
+        (int("1" + "0" * 100), date(9999, 12, 31)),
+        (1e1000, date(9999, 12, 31)),
+        ("-infinity", date(1, 1, 1)),
+        ("-inf", date(1, 1, 1)),
+        ("nan", ValueError),
+    ],
+)
+def test_date_parsing(value: Union[str, bytes, int, float], result: Union[date, Type[Exception]]) -> None:
+    if type(result) == type and issubclass(result, Exception):  # pyright: ignore[reportUnnecessaryIsInstance]
+        with pytest.raises(result):
+            parse_date(value)
+    else:
+        assert parse_date(value) == result
+
+
+@pytest.mark.parametrize(
+    "value,result",
+    [
+        # Valid inputs
+        # values in seconds
+        ("1494012444.883309", datetime(2017, 5, 5, 19, 27, 24, 883_309, tzinfo=timezone.utc)),
+        (1_494_012_444.883_309, datetime(2017, 5, 5, 19, 27, 24, 883_309, tzinfo=timezone.utc)),
+        ("1494012444", datetime(2017, 5, 5, 19, 27, 24, tzinfo=timezone.utc)),
+        (b"1494012444", datetime(2017, 5, 5, 19, 27, 24, tzinfo=timezone.utc)),
+        (1_494_012_444, datetime(2017, 5, 5, 19, 27, 24, tzinfo=timezone.utc)),
+        # values in ms
+        ("1494012444000.883309", datetime(2017, 5, 5, 19, 27, 24, 883, tzinfo=timezone.utc)),
+        ("-1494012444000.883309", datetime(1922, 8, 29, 4, 32, 35, 999117, tzinfo=timezone.utc)),
+        (1_494_012_444_000, datetime(2017, 5, 5, 19, 27, 24, tzinfo=timezone.utc)),
+        ("2012-04-23T09:15:00", datetime(2012, 4, 23, 9, 15)),
+        ("2012-4-9 4:8:16", datetime(2012, 4, 9, 4, 8, 16)),
+        ("2012-04-23T09:15:00Z", datetime(2012, 4, 23, 9, 15, 0, 0, timezone.utc)),
+        ("2012-4-9 4:8:16-0320", datetime(2012, 4, 9, 4, 8, 16, 0, create_tz(-200))),
+        ("2012-04-23T10:20:30.400+02:30", datetime(2012, 4, 23, 10, 20, 30, 400_000, create_tz(150))),
+        ("2012-04-23T10:20:30.400+02", datetime(2012, 4, 23, 10, 20, 30, 400_000, create_tz(120))),
+        ("2012-04-23T10:20:30.400-02", datetime(2012, 4, 23, 10, 20, 30, 400_000, create_tz(-120))),
+        (b"2012-04-23T10:20:30.400-02", datetime(2012, 4, 23, 10, 20, 30, 400_000, create_tz(-120))),
+        (datetime(2017, 5, 5), datetime(2017, 5, 5)),
+        (0, datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc)),
+        # Invalid inputs
+        ("x20120423091500", ValueError),
+        ("2012-04-56T09:15:90", ValueError),
+        ("2012-04-23T11:05:00-25:00", ValueError),
+        (19_999_999_999, datetime(2603, 10, 11, 11, 33, 19, tzinfo=timezone.utc)),  # just before watershed
+        (20_000_000_001, datetime(1970, 8, 20, 11, 33, 20, 1000, tzinfo=timezone.utc)),  # just after watershed
+        (1_549_316_052, datetime(2019, 2, 4, 21, 34, 12, 0, tzinfo=timezone.utc)),  # nowish in s
+        (1_549_316_052_104, datetime(2019, 2, 4, 21, 34, 12, 104_000, tzinfo=timezone.utc)),  # nowish in ms
+        (1_549_316_052_104_324, datetime(2019, 2, 4, 21, 34, 12, 104_324, tzinfo=timezone.utc)),  # nowish in μs
+        (1_549_316_052_104_324_096, datetime(2019, 2, 4, 21, 34, 12, 104_324, tzinfo=timezone.utc)),  # nowish in ns
+        ("infinity", datetime(9999, 12, 31, 23, 59, 59, 999999)),
+        ("inf", datetime(9999, 12, 31, 23, 59, 59, 999999)),
+        ("inf ", datetime(9999, 12, 31, 23, 59, 59, 999999)),
+        (1e50, datetime(9999, 12, 31, 23, 59, 59, 999999)),
+        (float("inf"), datetime(9999, 12, 31, 23, 59, 59, 999999)),
+        ("-infinity", datetime(1, 1, 1, 0, 0)),
+        ("-inf", datetime(1, 1, 1, 0, 0)),
+        ("nan", ValueError),
+    ],
+)
+def test_datetime_parsing(value: Union[str, bytes, int, float], result: Union[datetime, Type[Exception]]) -> None:
+    if type(result) == type and issubclass(result, Exception):  # pyright: ignore[reportUnnecessaryIsInstance]
+        with pytest.raises(result):
+            parse_datetime(value)
+    else:
+        assert parse_datetime(value) == result
diff --git a/tests/test_utils/test_proxy.py b/tests/test_utils/test_proxy.py
index aedd3731ee..2da724423a 100644
--- a/tests/test_utils/test_proxy.py
+++ b/tests/test_utils/test_proxy.py
@@ -21,3 +21,14 @@ def test_recursive_proxy() -> None:
     assert dir(proxy) == []
     assert type(proxy).__name__ == "RecursiveLazyProxy"
     assert type(operator.attrgetter("name.foo.bar.baz")(proxy)).__name__ == "RecursiveLazyProxy"
+
+
+def test_isinstance_does_not_error() -> None:
+    class AlwaysErrorProxy(LazyProxy[Any]):
+        @override
+        def __load__(self) -> Any:
+            raise RuntimeError("Mocking missing dependency")
+
+    proxy = AlwaysErrorProxy()
+    assert not isinstance(proxy, dict)
+    assert isinstance(proxy, LazyProxy)
diff --git a/tests/test_utils/test_typing.py b/tests/test_utils/test_typing.py
index 690960802a..535935b9e1 100644
--- a/tests/test_utils/test_typing.py
+++ b/tests/test_utils/test_typing.py
@@ -9,24 +9,19 @@
 _T3 = TypeVar("_T3")
 
 
-class BaseGeneric(Generic[_T]):
-    ...
+class BaseGeneric(Generic[_T]): ...
 
 
-class SubclassGeneric(BaseGeneric[_T]):
-    ...
+class SubclassGeneric(BaseGeneric[_T]): ...
 
 
-class BaseGenericMultipleTypeArgs(Generic[_T, _T2, _T3]):
-    ...
+class BaseGenericMultipleTypeArgs(Generic[_T, _T2, _T3]): ...
 
 
-class SubclassGenericMultipleTypeArgs(BaseGenericMultipleTypeArgs[_T, _T2, _T3]):
-    ...
+class SubclassGenericMultipleTypeArgs(BaseGenericMultipleTypeArgs[_T, _T2, _T3]): ...
 
 
-class SubclassDifferentOrderGenericMultipleTypeArgs(BaseGenericMultipleTypeArgs[_T2, _T, _T3]):
-    ...
+class SubclassDifferentOrderGenericMultipleTypeArgs(BaseGenericMultipleTypeArgs[_T2, _T, _T3]): ...
 
 
 def test_extract_type_var() -> None:
diff --git a/tests/utils.py b/tests/utils.py
index 060b99339f..a03848975c 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -4,20 +4,22 @@
 import inspect
 import traceback
 import contextlib
-from typing import Any, TypeVar, Iterator, cast
+from typing import Any, TypeVar, Iterator, Sequence, cast
 from datetime import date, datetime
 from typing_extensions import Literal, get_args, get_origin, assert_type
 
-from openai._types import NoneType
+from openai._types import Omit, NoneType
 from openai._utils import (
     is_dict,
     is_list,
     is_list_type,
     is_union_type,
     extract_type_arg,
+    is_sequence_type,
     is_annotated_type,
+    is_type_alias_type,
 )
-from openai._compat import PYDANTIC_V2, field_outer_type, get_model_fields
+from openai._compat import PYDANTIC_V1, field_outer_type, get_model_fields
 from openai._models import BaseModel
 
 BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
@@ -26,12 +28,12 @@
 def assert_matches_model(model: type[BaseModelT], value: BaseModelT, *, path: list[str]) -> bool:
     for name, field in get_model_fields(model).items():
         field_value = getattr(value, name)
-        if PYDANTIC_V2:
-            allow_none = False
-        else:
+        if PYDANTIC_V1:
             # in v1 nullability was structured differently
             # https://docs.pydantic.dev/2.0/migration/#required-optional-and-nullable-fields
             allow_none = getattr(field, "allow_none", False)
+        else:
+            allow_none = False
 
         assert_matches_type(
             field_outer_type(field),
@@ -51,6 +53,9 @@ def assert_matches_type(
     path: list[str],
     allow_none: bool = False,
 ) -> None:
+    if is_type_alias_type(type_):
+        type_ = type_.__value__
+
     # unwrap `Annotated[T, ...]` -> `T`
     if is_annotated_type(type_):
         type_ = extract_type_arg(type_, 0)
@@ -67,6 +72,13 @@ def assert_matches_type(
     if is_list_type(type_):
         return _assert_list_type(type_, value)
 
+    if is_sequence_type(type_):
+        assert isinstance(value, Sequence)
+        inner_type = get_args(type_)[0]
+        for entry in value:  # type: ignore
+            assert_type(inner_type, entry)  # type: ignore
+        return
+
     if origin == str:
         assert isinstance(value, str)
     elif origin == int:
@@ -139,11 +151,15 @@ def _assert_list_type(type_: type[object], value: object) -> None:
 
 
 @contextlib.contextmanager
-def update_env(**new_env: str) -> Iterator[None]:
+def update_env(**new_env: str | Omit) -> Iterator[None]:
     old = os.environ.copy()
 
     try:
-        os.environ.update(new_env)
+        for name, value in new_env.items():
+            if isinstance(value, Omit):
+                os.environ.pop(name, None)
+            else:
+                os.environ[name] = value
 
         yield None
     finally: