Skip to content

chore(internal): support multipart data with overlapping keys #1104

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions src/openai/_base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
RequestOptions,
ModelBuilderProtocol,
)
from ._utils import is_dict, is_given, is_mapping
from ._utils import is_dict, is_list, is_given, is_mapping
from ._compat import model_copy, model_dump
from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type
from ._response import (
Expand Down Expand Up @@ -451,14 +451,18 @@ def _build_request(

headers = self._build_headers(options)
params = _merge_mappings(self._custom_query, options.params)
content_type = headers.get("Content-Type")

# If the given Content-Type header is multipart/form-data then it
# has to be removed so that httpx can generate the header with
# additional information for us as it has to be in this form
# for the server to be able to correctly parse the request:
# multipart/form-data; boundary=---abc--
if headers.get("Content-Type") == "multipart/form-data":
headers.pop("Content-Type")
if content_type is not None and content_type.startswith("multipart/form-data"):
if "boundary" not in content_type:
# only remove the header if the boundary hasn't been explicitly set
# as the caller doesn't want httpx to come up with their own boundary
headers.pop("Content-Type")

# As we are now sending multipart/form-data instead of application/json
# we need to tell httpx to use it, https://www.python-httpx.org/advanced/#multipart-file-encoding
Expand Down Expand Up @@ -494,9 +498,25 @@ def _serialize_multipartform(self, data: Mapping[object, object]) -> dict[str, o
)
serialized: dict[str, object] = {}
for key, value in items:
if key in serialized:
raise ValueError(f"Duplicate key encountered: {key}; This behaviour is not supported")
serialized[key] = value
existing = serialized.get(key)

if not existing:
serialized[key] = value
continue

# If a value has already been set for this key then that
# means we're sending data like `array[]=[1, 2, 3]` and we
# need to tell httpx that we want to send multiple values with
# the same key which is done by using a list or a tuple.
#
# Note: 2d arrays should never result in the same key at both
# levels so it's safe to assume that if the value is a list,
# it was because we changed it to be a list.
if is_list(existing):
existing.append(value)
else:
serialized[key] = [existing, value]

return serialized

def _maybe_override_cast_to(self, cast_to: type[ResponseT], options: FinalRequestOptions) -> type[ResponseT]:
Expand Down
58 changes: 58 additions & 0 deletions tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,35 @@ def test_request_extra_query(self) -> None:
params = dict(request.url.params)
assert params == {"foo": "2"}

def test_multipart_repeating_array(self, client: OpenAI) -> None:
request = client._build_request(
FinalRequestOptions.construct(
method="get",
url="/foo",
headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"},
json_data={"array": ["foo", "bar"]},
files=[("foo.txt", b"hello world")],
)
)

assert request.read().split(b"\r\n") == [
b"--6b7ba517decee4a450543ea6ae821c82",
b'Content-Disposition: form-data; name="array[]"',
b"",
b"foo",
b"--6b7ba517decee4a450543ea6ae821c82",
b'Content-Disposition: form-data; name="array[]"',
b"",
b"bar",
b"--6b7ba517decee4a450543ea6ae821c82",
b'Content-Disposition: form-data; name="foo.txt"; filename="upload"',
b"Content-Type: application/octet-stream",
b"",
b"hello world",
b"--6b7ba517decee4a450543ea6ae821c82--",
b"",
]

@pytest.mark.respx(base_url=base_url)
def test_basic_union_response(self, respx_mock: MockRouter) -> None:
class Model1(BaseModel):
Expand Down Expand Up @@ -1104,6 +1133,35 @@ def test_request_extra_query(self) -> None:
params = dict(request.url.params)
assert params == {"foo": "2"}

def test_multipart_repeating_array(self, async_client: AsyncOpenAI) -> None:
request = async_client._build_request(
FinalRequestOptions.construct(
method="get",
url="/foo",
headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"},
json_data={"array": ["foo", "bar"]},
files=[("foo.txt", b"hello world")],
)
)

assert request.read().split(b"\r\n") == [
b"--6b7ba517decee4a450543ea6ae821c82",
b'Content-Disposition: form-data; name="array[]"',
b"",
b"foo",
b"--6b7ba517decee4a450543ea6ae821c82",
b'Content-Disposition: form-data; name="array[]"',
b"",
b"bar",
b"--6b7ba517decee4a450543ea6ae821c82",
b'Content-Disposition: form-data; name="foo.txt"; filename="upload"',
b"Content-Type: application/octet-stream",
b"",
b"hello world",
b"--6b7ba517decee4a450543ea6ae821c82--",
b"",
]

@pytest.mark.respx(base_url=base_url)
async def test_basic_union_response(self, respx_mock: MockRouter) -> None:
class Model1(BaseModel):
Expand Down