Get GPU cluster by cluster ID

Together AI SDK (v2)

from together import Together
client = Together()

cluster = client.beta.clusters.retrieve("cluster_id")
print(cluster)

{
  "cluster_id": "<string>",
  "region": "<string>",
  "cluster_name": "<string>",
  "volumes": [
    {
      "volume_id": "<string>",
      "volume_name": "<string>",
      "size_tib": 123,
      "status": "<string>"
    }
  ],
  "control_plane_nodes": [
    {
      "node_id": "<string>",
      "status": "<string>",
      "host_name": "<string>",
      "num_cpu_cores": 123,
      "memory_gib": 123,
      "network": "<string>",
      "phase_transitions": [
        {
          "transition_time": "2023-11-07T05:31:56Z"
        }
      ]
    }
  ],
  "gpu_worker_nodes": [
    {
      "node_id": "<string>",
      "status": "<string>",
      "host_name": "<string>",
      "num_cpu_cores": 123,
      "num_gpus": 123,
      "memory_gib": 123,
      "networks": [
        "<string>"
      ],
      "phase_transitions": [
        {
          "transition_time": "2023-11-07T05:31:56Z"
        }
      ],
      "instance_id": "<string>",
      "latest_remediation": {
        "id": "<string>",
        "cluster_id": "<string>",
        "instance_id": "<string>",
        "reason": "<string>",
        "active_health_check_run_id": "<string>",
        "passive_health_check_event_id": "<string>",
        "requested_by": "<string>",
        "create_time": "2023-11-07T05:31:56Z",
        "reviewed_by": "<string>",
        "review_time": "2023-11-07T05:31:56Z",
        "review_comment": "<string>",
        "start_time": "2023-11-07T05:31:56Z",
        "end_time": "2023-11-07T05:31:56Z",
        "error_message": "<string>",
        "update_time": "2023-11-07T05:31:56Z",
        "instance_name": "<string>"
      },
      "slurm_worker_hostname": "<string>"
    }
  ],
  "kube_config": "<string>",
  "num_gpus": 123,
  "cuda_version": "<string>",
  "nvidia_driver_version": "<string>",
  "project_id": "<string>",
  "num_cpu_workers": 123,
  "phase_transitions": [
    {
      "transition_time": "2023-11-07T05:31:56Z"
    }
  ],
  "desired_preemptible_gpus": 123,
  "allocated_preemptible_gpus": 123,
  "add_ons": [
    {
      "name": "<string>",
      "add_on_type": "<string>",
      "config": {
        "dashboard": {
          "enabled": true
        },
        "ingress": {
          "enabled": true
        }
      },
      "state": {
        "dashboard": {},
        "ingress": {}
      }
    }
  ],
  "duration_hours": 123,
  "slurm_shm_size_gib": 123,
  "capacity_pool_id": "<string>",
  "reservation_start_time": "2023-11-07T05:31:56Z",
  "reservation_end_time": "2023-11-07T05:31:56Z",
  "install_traefik": true,
  "created_at": "2023-11-07T05:31:56Z",
  "oidc_config": {
    "issuer_url": "<string>",
    "client_id": "<string>",
    "username_claim": "<string>",
    "username_prefix": "<string>",
    "group_claim": "<string>",
    "group_prefix": "<string>",
    "ca_cert": "<string>"
  },
  "cluster_config": {
    "kubernetes_dashboard_enabled": true,
    "jumphost_enabled": true,
    "slurm_startup_scripts": {
      "worker_prolog": "<string>",
      "worker_epilog": "<string>",
      "controller_prolog": "<string>",
      "controller_epilog": "<string>",
      "login_init_script": "<string>",
      "nodeset_init_script": "<string>",
      "extra_slurm_conf": "<string>"
    },
    "ingress": {
      "enabled": true
    },
    "observability": {
      "enabled": true
    },
    "gpu_operator_version": "<string>"
  }
}

GET

compute

clusters

{cluster_id}

Together AI SDK (v2)

from together import Together
client = Together()

cluster = client.beta.clusters.retrieve("cluster_id")
print(cluster)

{
  "cluster_id": "<string>",
  "region": "<string>",
  "cluster_name": "<string>",
  "volumes": [
    {
      "volume_id": "<string>",
      "volume_name": "<string>",
      "size_tib": 123,
      "status": "<string>"
    }
  ],
  "control_plane_nodes": [
    {
      "node_id": "<string>",
      "status": "<string>",
      "host_name": "<string>",
      "num_cpu_cores": 123,
      "memory_gib": 123,
      "network": "<string>",
      "phase_transitions": [
        {
          "transition_time": "2023-11-07T05:31:56Z"
        }
      ]
    }
  ],
  "gpu_worker_nodes": [
    {
      "node_id": "<string>",
      "status": "<string>",
      "host_name": "<string>",
      "num_cpu_cores": 123,
      "num_gpus": 123,
      "memory_gib": 123,
      "networks": [
        "<string>"
      ],
      "phase_transitions": [
        {
          "transition_time": "2023-11-07T05:31:56Z"
        }
      ],
      "instance_id": "<string>",
      "latest_remediation": {
        "id": "<string>",
        "cluster_id": "<string>",
        "instance_id": "<string>",
        "reason": "<string>",
        "active_health_check_run_id": "<string>",
        "passive_health_check_event_id": "<string>",
        "requested_by": "<string>",
        "create_time": "2023-11-07T05:31:56Z",
        "reviewed_by": "<string>",
        "review_time": "2023-11-07T05:31:56Z",
        "review_comment": "<string>",
        "start_time": "2023-11-07T05:31:56Z",
        "end_time": "2023-11-07T05:31:56Z",
        "error_message": "<string>",
        "update_time": "2023-11-07T05:31:56Z",
        "instance_name": "<string>"
      },
      "slurm_worker_hostname": "<string>"
    }
  ],
  "kube_config": "<string>",
  "num_gpus": 123,
  "cuda_version": "<string>",
  "nvidia_driver_version": "<string>",
  "project_id": "<string>",
  "num_cpu_workers": 123,
  "phase_transitions": [
    {
      "transition_time": "2023-11-07T05:31:56Z"
    }
  ],
  "desired_preemptible_gpus": 123,
  "allocated_preemptible_gpus": 123,
  "add_ons": [
    {
      "name": "<string>",
      "add_on_type": "<string>",
      "config": {
        "dashboard": {
          "enabled": true
        },
        "ingress": {
          "enabled": true
        }
      },
      "state": {
        "dashboard": {},
        "ingress": {}
      }
    }
  ],
  "duration_hours": 123,
  "slurm_shm_size_gib": 123,
  "capacity_pool_id": "<string>",
  "reservation_start_time": "2023-11-07T05:31:56Z",
  "reservation_end_time": "2023-11-07T05:31:56Z",
  "install_traefik": true,
  "created_at": "2023-11-07T05:31:56Z",
  "oidc_config": {
    "issuer_url": "<string>",
    "client_id": "<string>",
    "username_claim": "<string>",
    "username_prefix": "<string>",
    "group_claim": "<string>",
    "group_prefix": "<string>",
    "ca_cert": "<string>"
  },
  "cluster_config": {
    "kubernetes_dashboard_enabled": true,
    "jumphost_enabled": true,
    "slurm_startup_scripts": {
      "worker_prolog": "<string>",
      "worker_epilog": "<string>",
      "controller_prolog": "<string>",
      "controller_epilog": "<string>",
      "login_init_script": "<string>",
      "nodeset_init_script": "<string>",
      "extra_slurm_conf": "<string>"
    },
    "ingress": {
      "enabled": true
    },
    "observability": {
      "enabled": true
    },
    "gpu_operator_version": "<string>"
  }
}

Authorizations

Authorization

string

header

default:default

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Path Parameters

cluster_id

string

required

The ID of the cluster to retrieve

Response

200 - application/json

cluster_id

string

required

cluster_type

enum<string>

required

Type of cluster.

Available options:

KUBERNETES,

SLURM

region

string

required

gpu_type

enum<string>

required

Available options:

H100_SXM,

H200_SXM,

RTX_6000_PCI,

L40_PCIE,

B200_SXM,

H100_SXM_INF

cluster_name

string

required

volumes

object[]

required

Show child attributes

status

enum<string>

required

Current status of the GPU cluster.

Available options:

WaitingForControlPlaneNodes,

WaitingForDataPlaneNodes,

WaitingForSubnet,

WaitingForSharedVolume,

InstallingDrivers,

RunningAcceptanceTests,

Paused,

OnDemandComputePaused,

Ready,

Degraded,

Deleting

control_plane_nodes

object[]

required

Show child attributes

gpu_worker_nodes

object[]

required

Show child attributes

kube_config

string

required

num_gpus

integer<int32>

required

cuda_version

string

required

nvidia_driver_version

string

required

project_id

string

required

num_cpu_workers

integer<int32>

required

Number of CPU-only worker nodes in the cluster.

phase_transitions

object[]

required

Cluster-level phase transition history.

Show child attributes

desired_preemptible_gpus

integer<int32>

required

Customer's requested number of preemptible GPUs. Set on cluster create or update; persists until changed.

allocated_preemptible_gpus

integer<int32>

required

Actual number of preemptible GPUs currently allocated to the cluster. Updated asynchronously by the fulfillment and reclamation workers; may be less than desired_preemptible_gpus when capacity is constrained.

billing_type

enum<string>

required

Billing type for the cluster (RESERVED, ON_DEMAND, or SCHEDULED_CAPACITY).

Available options:

RESERVED,

ON_DEMAND,

SCHEDULED_CAPACITY

add_ons

object[]

required

Enabled add-ons on this cluster. Only add-ons with enabled=true in their config are returned.

Show child attributes

duration_hours

integer

slurm_shm_size_gib

integer

capacity_pool_id

string

reservation_start_time

string<date-time>

reservation_end_time

string<date-time>

install_traefik

boolean

created_at

string<date-time>

oidc_config

object

Show child attributes

cluster_config

object

Show child attributes

List all GPU clusters

Update a GPU cluster

⌘I