Skip to content

feat(api): adding chunking_strategy to polling helpers #1478

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/openai/resources/beta/vector_stores/file_batches.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,13 @@ def create_and_poll(
*,
file_ids: List[str],
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFileBatch:
"""Create a vector store batch and poll until all files have been processed."""
batch = self.create(
vector_store_id=vector_store_id,
file_ids=file_ids,
chunking_strategy=chunking_strategy,
)
# TODO: don't poll unless necessary??
return self.poll(
Expand Down Expand Up @@ -306,6 +308,7 @@ def upload_and_poll(
max_concurrency: int = 5,
file_ids: List[str] = [],
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFileBatch:
"""Uploads the given files concurrently and then creates a vector store file batch.

Expand Down Expand Up @@ -343,6 +346,7 @@ def upload_and_poll(
vector_store_id=vector_store_id,
file_ids=[*file_ids, *(f.id for f in results)],
poll_interval_ms=poll_interval_ms,
chunking_strategy=chunking_strategy,
)
return batch

Expand Down Expand Up @@ -488,11 +492,13 @@ async def create_and_poll(
*,
file_ids: List[str],
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFileBatch:
"""Create a vector store batch and poll until all files have been processed."""
batch = await self.create(
vector_store_id=vector_store_id,
file_ids=file_ids,
chunking_strategy=chunking_strategy,
)
# TODO: don't poll unless necessary??
return await self.poll(
Expand Down Expand Up @@ -620,6 +626,7 @@ async def upload_and_poll(
max_concurrency: int = 5,
file_ids: List[str] = [],
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFileBatch:
"""Uploads the given files concurrently and then creates a vector store file batch.

Expand Down Expand Up @@ -680,6 +687,7 @@ async def trio_upload_file(limiter: trio.CapacityLimiter, file: FileTypes) -> No
vector_store_id=vector_store_id,
file_ids=[*file_ids, *(f.id for f in uploaded_files)],
poll_interval_ms=poll_interval_ms,
chunking_strategy=chunking_strategy,
)
return batch

Expand Down
16 changes: 12 additions & 4 deletions src/openai/resources/beta/vector_stores/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,10 @@ def create_and_poll(
*,
vector_store_id: str,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFile:
"""Attach a file to the given vector store and wait for it to be processed."""
self.create(vector_store_id=vector_store_id, file_id=file_id)
self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy)

return self.poll(
file_id,
Expand Down Expand Up @@ -301,27 +302,30 @@ def upload(
*,
vector_store_id: str,
file: FileTypes,
chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFile:
"""Upload a file to the `files` API and then attach it to the given vector store.

Note the file will be asynchronously processed (you can use the alternative
polling helper method to wait for processing to complete).
"""
file_obj = self._client.files.create(file=file, purpose="assistants")
return self.create(vector_store_id=vector_store_id, file_id=file_obj.id)
return self.create(vector_store_id=vector_store_id, file_id=file_obj.id, chunking_strategy=chunking_strategy)

def upload_and_poll(
self,
*,
vector_store_id: str,
file: FileTypes,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFile:
"""Add a file to a vector store and poll until processing is complete."""
file_obj = self._client.files.create(file=file, purpose="assistants")
return self.create_and_poll(
vector_store_id=vector_store_id,
file_id=file_obj.id,
chunking_strategy=chunking_strategy,
poll_interval_ms=poll_interval_ms,
)

Expand Down Expand Up @@ -542,9 +546,10 @@ async def create_and_poll(
*,
vector_store_id: str,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFile:
"""Attach a file to the given vector store and wait for it to be processed."""
await self.create(vector_store_id=vector_store_id, file_id=file_id)
await self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy)

return await self.poll(
file_id,
Expand Down Expand Up @@ -598,28 +603,31 @@ async def upload(
*,
vector_store_id: str,
file: FileTypes,
chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFile:
"""Upload a file to the `files` API and then attach it to the given vector store.

Note the file will be asynchronously processed (you can use the alternative
polling helper method to wait for processing to complete).
"""
file_obj = await self._client.files.create(file=file, purpose="assistants")
return await self.create(vector_store_id=vector_store_id, file_id=file_obj.id)
return await self.create(vector_store_id=vector_store_id, file_id=file_obj.id, chunking_strategy=chunking_strategy)

async def upload_and_poll(
self,
*,
vector_store_id: str,
file: FileTypes,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFile:
"""Add a file to a vector store and poll until processing is complete."""
file_obj = await self._client.files.create(file=file, purpose="assistants")
return await self.create_and_poll(
vector_store_id=vector_store_id,
file_id=file_obj.id,
poll_interval_ms=poll_interval_ms,
chunking_strategy=chunking_strategy
)


Expand Down