Skip to content

Commit 800c162

Browse files
Feiyang1glasnt
andauthored
Update model name to gemini-embedding-001 in code snipets (#13388)
* Update model name to gemini-embedding-001 in code snipets * update to max 3072 dim * Update examples to process one input at a time * Fix indentation and unused variable * remove unused variable * correct linting issue --------- Co-authored-by: Katie McLaughlin <[email protected]>
1 parent 394b8aa commit 800c162

File tree

4 files changed

+31
-19
lines changed

4 files changed

+31
-19
lines changed

generative_ai/embeddings/batch_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def embed_text_batch() -> BatchPredictionJob:
3939
output_uri = OUTPUT_URI
4040

4141
textembedding_model = language_models.TextEmbeddingModel.from_pretrained(
42-
"textembedding-gecko@003"
42+
"gemini-embedding-001"
4343
)
4444

4545
batch_prediction_job = textembedding_model.batch_predict(

generative_ai/embeddings/code_retrieval_example.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,31 @@
1717
# [START generativeaionvertexai_embedding_code_retrieval]
1818
from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel
1919

20-
MODEL_NAME = "text-embedding-005"
21-
DIMENSIONALITY = 256
20+
MODEL_NAME = "gemini-embedding-001"
21+
DIMENSIONALITY = 3072
2222

2323

2424
def embed_text(
2525
texts: list[str] = ["Retrieve a function that adds two numbers"],
2626
task: str = "CODE_RETRIEVAL_QUERY",
27-
model_name: str = "text-embedding-005",
28-
dimensionality: int | None = 256,
27+
model_name: str = "gemini-embedding-001",
28+
dimensionality: int | None = 3072,
2929
) -> list[list[float]]:
3030
"""Embeds texts with a pre-trained, foundational model."""
3131
model = TextEmbeddingModel.from_pretrained(model_name)
32-
inputs = [TextEmbeddingInput(text, task) for text in texts]
3332
kwargs = dict(output_dimensionality=dimensionality) if dimensionality else {}
34-
embeddings = model.get_embeddings(inputs, **kwargs)
35-
# Example response:
36-
# [[0.025890009477734566, -0.05553026497364044, 0.006374752148985863,...],
37-
return [embedding.values for embedding in embeddings]
33+
34+
embeddings = []
35+
# gemini-embedding-001 takes one input at a time
36+
for text in texts:
37+
text_input = TextEmbeddingInput(text, task)
38+
embedding = model.get_embeddings([text_input], **kwargs)
39+
print(embedding)
40+
# Example response:
41+
# [[0.006135190837085247, -0.01462465338408947, 0.004978656303137541, ...]]
42+
embeddings.append(embedding[0].values)
43+
44+
return embeddings
3845

3946

4047
if __name__ == "__main__":

generative_ai/embeddings/document_retrieval_example.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,24 @@ def embed_text() -> list[list[float]]:
2828
# A list of texts to be embedded.
2929
texts = ["banana muffins? ", "banana bread? banana muffins?"]
3030
# The dimensionality of the output embeddings.
31-
dimensionality = 256
31+
dimensionality = 3072
3232
# The task type for embedding. Check the available tasks in the model's documentation.
3333
task = "RETRIEVAL_DOCUMENT"
3434

35-
model = TextEmbeddingModel.from_pretrained("text-embedding-005")
36-
inputs = [TextEmbeddingInput(text, task) for text in texts]
35+
model = TextEmbeddingModel.from_pretrained("gemini-embedding-001")
3736
kwargs = dict(output_dimensionality=dimensionality) if dimensionality else {}
38-
embeddings = model.get_embeddings(inputs, **kwargs)
3937

40-
print(embeddings)
41-
# Example response:
42-
# [[0.006135190837085247, -0.01462465338408947, 0.004978656303137541, ...], [0.1234434666, ...]],
43-
return [embedding.values for embedding in embeddings]
38+
embeddings = []
39+
# gemini-embedding-001 takes one input at a time
40+
for text in texts:
41+
text_input = TextEmbeddingInput(text, task)
42+
embedding = model.get_embeddings([text_input], **kwargs)
43+
print(embedding)
44+
# Example response:
45+
# [[0.006135190837085247, -0.01462465338408947, 0.004978656303137541, ...]]
46+
embeddings.append(embedding[0].values)
47+
48+
return embeddings
4449

4550

4651
# [END generativeaionvertexai_embedding]

generative_ai/embeddings/test_embeddings_examples.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_generate_embeddings_with_lower_dimension() -> None:
8181
@backoff.on_exception(backoff.expo, ResourceExhausted, max_time=10)
8282
def test_text_embed_text() -> None:
8383
embeddings = document_retrieval_example.embed_text()
84-
assert [len(e) for e in embeddings] == [256, 256]
84+
assert [len(e) for e in embeddings] == [3072, 3072]
8585

8686

8787
@backoff.on_exception(backoff.expo, ResourceExhausted, max_time=10)

0 commit comments

Comments
 (0)