def encode_pooling_bytes(
pooling_outputs: list[PoolingRequestOutput],
embed_dtype: EmbedDType,
endianness: Endianness,
) -> tuple[list[bytes], list[dict[str, Any]], dict[str, Any]]:
num_prompt_tokens = 0
items: list[dict[str, Any]] = []
body: list[bytes] = []
offset = 0
for idx, output in enumerate(pooling_outputs):
binary = tensor2binary(
tensor=output.outputs.data,
embed_dtype=embed_dtype,
endianness=endianness,
)
size = len(binary)
# Dictionary form of MetadataItem
item = dict(
index=idx,
embed_dtype=embed_dtype,
endianness=endianness,
start=offset,
end=offset + size,
shape=output.outputs.data.shape,
)
body.append(binary)
items.append(item)
prompt_token_ids = output.prompt_token_ids
num_prompt_tokens += len(prompt_token_ids)
offset += size
# Dictionary form of UsageInfo
usage = dict(
prompt_tokens=num_prompt_tokens,
total_tokens=num_prompt_tokens,
)
return body, items, usage