Skip to content

Commit 4b8cda6

Browse files
authored
Updating mllama after strftime. (#2993)
* Updating mllama after strftime. * Town instead village. * Forgot the integration snapshot. * Attempt to fix intel CPU. * Intel extension fix. * Workaround intel. * Moving those deps directly into pyproject. * Revert "Moving those deps directly into pyproject." This reverts commit 98c1496. * Non system uv. * Fixing the docker environment hopefully. * Missed a step. * Move workdir up a bit. * Bailing out of reproducible python env. * Triton version.
1 parent 856709d commit 4b8cda6

File tree

4 files changed

+20
-83
lines changed

4 files changed

+20
-83
lines changed

Dockerfile_intel

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -215,16 +215,9 @@ COPY server server
215215
COPY server/Makefile server/Makefile
216216
ENV UV_SYSTEM_PYTHON=1
217217
RUN cd server && \
218+
make gen-server && \
218219
pip install -U pip uv && \
219-
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project && \
220-
. ./.venv/bin/activate && \
221-
make gen-server-raw
222-
223-
RUN cd server && \
224-
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines && \
225-
. ./.venv/bin/activate && \
226-
pwd && \
227-
text-generation-server --help
220+
uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
228221

229222
# Install benchmarker
230223
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
@@ -238,8 +231,5 @@ ENV ATTENTION=flashdecoding-ipex
238231
ENV PREFIX_CACHING=1
239232
ENV PREFILL_CHUNKING=1
240233
ENV CUDA_GRAPHS=0
241-
COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
242-
RUN chmod +x /tgi-entrypoint.sh
243-
244-
ENTRYPOINT ["/tgi-entrypoint.sh"]
234+
ENTRYPOINT ["text-generation-launcher"]
245235
CMD ["--json-output"]

integration-tests/models/__snapshots__/test_mllama/test_mllama_load.json

Lines changed: 6 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,19 @@
66
"index": 0,
77
"logprobs": null,
88
"message": {
9-
"content": "In a bustling city, a chicken named Cluck",
9+
"content": "In a small town, a chicken named Cluck",
1010
"name": null,
1111
"role": "assistant",
1212
"tool_calls": null
1313
},
1414
"usage": null
1515
}
1616
],
17-
"created": 1727773835,
17+
"created": 1738753835,
1818
"id": "",
1919
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
2020
"object": "chat.completion",
21-
"system_fingerprint": "2.4.2-dev0-native",
21+
"system_fingerprint": "3.1.1-dev0-native",
2222
"usage": {
2323
"completion_tokens": 10,
2424
"prompt_tokens": 50,
@@ -32,71 +32,19 @@
3232
"index": 0,
3333
"logprobs": null,
3434
"message": {
35-
"content": "In a world where even chickens could dream big,",
35+
"content": "In a small town, a chicken named Cluck",
3636
"name": null,
3737
"role": "assistant",
3838
"tool_calls": null
3939
},
4040
"usage": null
4141
}
4242
],
43-
"created": 1727773835,
43+
"created": 1738753835,
4444
"id": "",
4545
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
4646
"object": "chat.completion",
47-
"system_fingerprint": "2.4.2-dev0-native",
48-
"usage": {
49-
"completion_tokens": 10,
50-
"prompt_tokens": 50,
51-
"total_tokens": 60
52-
}
53-
},
54-
{
55-
"choices": [
56-
{
57-
"finish_reason": "length",
58-
"index": 0,
59-
"logprobs": null,
60-
"message": {
61-
"content": "In a world where even chickens could dream big,",
62-
"name": null,
63-
"role": "assistant",
64-
"tool_calls": null
65-
},
66-
"usage": null
67-
}
68-
],
69-
"created": 1727773835,
70-
"id": "",
71-
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
72-
"object": "chat.completion",
73-
"system_fingerprint": "2.4.2-dev0-native",
74-
"usage": {
75-
"completion_tokens": 10,
76-
"prompt_tokens": 50,
77-
"total_tokens": 60
78-
}
79-
},
80-
{
81-
"choices": [
82-
{
83-
"finish_reason": "length",
84-
"index": 0,
85-
"logprobs": null,
86-
"message": {
87-
"content": "In a world where even chickens could dream big,",
88-
"name": null,
89-
"role": "assistant",
90-
"tool_calls": null
91-
},
92-
"usage": null
93-
}
94-
],
95-
"created": 1727773835,
96-
"id": "",
97-
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
98-
"object": "chat.completion",
99-
"system_fingerprint": "2.4.2-dev0-native",
47+
"system_fingerprint": "3.1.1-dev0-native",
10048
"usage": {
10149
"completion_tokens": 10,
10250
"prompt_tokens": 50,

integration-tests/models/__snapshots__/test_mllama/test_mllama_simpl.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,19 @@
55
"index": 0,
66
"logprobs": null,
77
"message": {
8-
"content": "In a bustling city, a chicken named Cluck",
8+
"content": "In a small town, a chicken named Cluck",
99
"name": null,
1010
"role": "assistant",
1111
"tool_calls": null
1212
},
1313
"usage": null
1414
}
1515
],
16-
"created": 1727556016,
16+
"created": 1738753833,
1717
"id": "",
1818
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
1919
"object": "chat.completion",
20-
"system_fingerprint": "2.4.2-dev0-native",
20+
"system_fingerprint": "3.1.1-dev0-native",
2121
"usage": {
2222
"completion_tokens": 10,
2323
"prompt_tokens": 50,

integration-tests/models/test_mllama.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,7 @@ async def test_mllama_simpl(mllama, response_snapshot):
4747
"total_tokens": 60,
4848
}
4949
assert (
50-
response.choices[0].message.content
51-
== "In a bustling city, a chicken named Cluck"
50+
response.choices[0].message.content == "In a small town, a chicken named Cluck"
5251
)
5352
assert response == response_snapshot
5453

@@ -84,12 +83,12 @@ async def test_mllama_load(mllama, generate_load, response_snapshot):
8483
]
8584
responses = await asyncio.gather(*futures)
8685

87-
_ = [response.choices[0].message.content for response in responses]
86+
generated_texts = [response.choices[0].message.content for response in responses]
8887

8988
# XXX: TODO: Fix this test.
90-
# assert generated_texts[0] == "In a bustling city, a chicken named Cluck"
91-
# assert len(generated_texts) == 4
92-
# assert generated_texts, all(
93-
# [text == generated_texts[0] for text in generated_texts]
94-
# )
95-
# assert responses == response_snapshot
89+
assert generated_texts[0] == "In a small town, a chicken named Cluck"
90+
assert len(generated_texts) == 2
91+
assert generated_texts, all(
92+
[text == generated_texts[0] for text in generated_texts]
93+
)
94+
assert responses == response_snapshot

0 commit comments

Comments
 (0)