# Docs for v2 can be found by changing the above selector ^
from together import Together
import os
client = Together(
api_key=os.environ.get("TOGETHER_API_KEY"),
)
endpoint = client.endpoints.create(
model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
hardware="1x_nvidia_a100_80gb_sxm",
min_replicas=2,
max_replicas=5,
)
print(endpoint.id){
"object": "endpoint",
"id": "endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
"name": "devuser/meta-llama/Llama-3-8b-chat-hf-a32b82a1",
"display_name": "My Llama3 70b endpoint",
"model": "meta-llama/Llama-3-8b-chat-hf",
"hardware": "1x_nvidia_a100_80gb_sxm",
"type": "dedicated",
"owner": "devuser",
"state": "STARTED",
"autoscaling": {
"min_replicas": 123,
"max_replicas": 123
},
"created_at": "2025-02-04T10:43:55.405Z"
}Creates a new dedicated endpoint for serving models. The endpoint will automatically start after creation. You can deploy any supported model on hardware configurations that meet the model’s requirements.
# Docs for v2 can be found by changing the above selector ^
from together import Together
import os
client = Together(
api_key=os.environ.get("TOGETHER_API_KEY"),
)
endpoint = client.endpoints.create(
model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
hardware="1x_nvidia_a100_80gb_sxm",
min_replicas=2,
max_replicas=5,
)
print(endpoint.id){
"object": "endpoint",
"id": "endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
"name": "devuser/meta-llama/Llama-3-8b-chat-hf-a32b82a1",
"display_name": "My Llama3 70b endpoint",
"model": "meta-llama/Llama-3-8b-chat-hf",
"hardware": "1x_nvidia_a100_80gb_sxm",
"type": "dedicated",
"owner": "devuser",
"state": "STARTED",
"autoscaling": {
"min_replicas": 123,
"max_replicas": 123
},
"created_at": "2025-02-04T10:43:55.405Z"
}Bearer authentication header of the form Bearer <token>, where <token> is your auth token.
The model to deploy on this endpoint
The hardware configuration to use for this endpoint
A human-readable name for the endpoint
Whether to disable the prompt cache for this endpoint
Whether to disable speculative decoding for this endpoint
The desired state of the endpoint
STARTED, STOPPED "STARTED"
The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to null, omit or set to 0 to disable automatic timeout.
60
Create the endpoint in a specified availability zone (e.g., us-central-4b)
200
Details about a dedicated endpoint deployment
The type of object
endpoint "endpoint"
Unique identifier for the endpoint
"endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7"
System name for the endpoint
"devuser/meta-llama/Llama-3-8b-chat-hf-a32b82a1"
Human-readable name for the endpoint
"My Llama3 70b endpoint"
The model deployed on this endpoint
"meta-llama/Llama-3-8b-chat-hf"
The hardware configuration used for this endpoint
"1x_nvidia_a100_80gb_sxm"
The type of endpoint
dedicated "dedicated"
The owner of this endpoint
"devuser"
Current state of the endpoint
PENDING, STARTING, STARTED, STOPPING, STOPPED, ERROR "STARTED"
Timestamp when the endpoint was created
"2025-02-04T10:43:55.405Z"
Was this page helpful?