> ## Documentation Index
> Fetch the complete documentation index at: https://docs.together.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Create a GPU cluster

> Create an Instant Cluster on Together's high-performance GPU clusters.
With features like on-demand scaling, long-lived resizable high-bandwidth shared DC-local storage,
Kubernetes and Slurm cluster flavors, a REST API, and Terraform support,
you can run workloads flexibly without complex infrastructure management.


## OpenAPI

````yaml POST /compute/clusters
openapi: 3.1.0
info:
  title: Together APIs
  description: The Together REST API. See https://docs.together.ai for more details.
  version: 2.0.0
  termsOfService: https://www.together.ai/terms-of-service
  contact:
    name: Together Support
    url: https://www.together.ai/contact
  license:
    name: MIT
    url: https://github.com/togethercomputer/openapi/blob/main/LICENSE
servers:
  - url: https://api.together.ai/v1
    description: Default environment for APIs
  - url: https://api-inference.together.ai/v2
    description: Optimized environment for inference
security:
  - bearerAuth: []
paths:
  /compute/clusters:
    post:
      tags:
        - GPUClusterService
      summary: Create a GPU cluster
      description: >
        Create an Instant Cluster on Together's high-performance GPU clusters.

        With features like on-demand scaling, long-lived resizable
        high-bandwidth shared DC-local storage,

        Kubernetes and Slurm cluster flavors, a REST API, and Terraform support,

        you can run workloads flexibly without complex infrastructure
        management.
      operationId: GPUClusterService_Create
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GPUClusterCreateRequest'
        required: true
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GPUClusterInfo'
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together

            client = Together()

            response = client.beta.clusters.create(
              cluster_name="my-gpu-cluster",
              region="us-central-8",
              gpu_type="H100_SXM",
              num_gpus=8,
              nvidia_driver_version="560",
              cuda_version="12.6",
              billint_type="ON_DEMAND",
            )

            print(response.cluster_id)
        - lang: TypeScript
          label: Together AI SDK (v2)
          source: |
            import Together from "together-ai";

            const client = new Together();

            const response = await client.beta.clusters.create({
              cluster_name: "my-gpu-cluster",
              region: "us-central-8",
              gpu_type: "H100_SXM",
              num_gpus: 8,
              nvidia_driver_version: "560",
              cuda_version: "12.6",
              billint_type: "ON_DEMAND",
            });

            console.log(response.cluster_id)
        - lang: JavaScript
          label: Together AI SDK (v2)
          source: |
            import Together from "together-ai";

            const client = new Together();

            const response = await client.beta.clusters.create({
              cluster_name: "my-gpu-cluster",
              region: "us-central-8",
              gpu_type: "H100_SXM",
              num_gpus: 8,
              nvidia_driver_version: "560",
              cuda_version: "12.6",
              billint_type: "ON_DEMAND",
            });

            console.log(response.cluster_id)
        - lang: Shell
          label: CLI
          source: |
            tg beta clusters create \
              --name my-gpu-cluster \
              --region us-central-8 \
              --gpu-type H100_SXM \
              --cluster-type KUBERNETES \
              --num-gpus 8 \
              --billing-type ON_DEMAND \
              --nvidia-driver-version 560 \
              --cuda-version 12.6
components:
  schemas:
    GPUClusterCreateRequest:
      description: GPU Cluster create request
      required:
        - region
        - gpu_type
        - num_gpus
        - cluster_name
        - cuda_version
        - nvidia_driver_version
        - billing_type
      type: object
      properties:
        cluster_type:
          description: Type of cluster to create.
          type: string
          enum:
            - KUBERNETES
            - SLURM
        region:
          description: >-
            Region to create the GPU cluster in. Usable regions can be found
            from `client.clusters.list_regions()`
          type: string
        gpu_type:
          description: Type of GPU to use in the cluster
          type: string
          enum:
            - H100_SXM
            - H200_SXM
            - RTX_6000_PCI
            - L40_PCIE
            - B200_SXM
            - H100_SXM_INF
        num_gpus:
          description: >-
            Number of GPUs to allocate in the cluster. This must be multiple of
            8. For example, 8, 16 or 24
          type: integer
        cluster_name:
          description: Name of the GPU cluster.
          type: string
        duration_days:
          x-stainless-terraform-configurability: computed
          description: Duration in days to keep the cluster running.
          type: integer
        shared_volume:
          $ref: '#/components/schemas/GPUClustersSharedVolumeCreateRequest'
          x-stainless-terraform-configurability: computed
          description: >-
            Inline configuration to create a shared volume with the cluster
            creation.
        volume_id:
          description: ID of an existing volume to use with the cluster creation.
          type: string
        billing_type:
          description: >
            RESERVED billing types allow you to specify the duration of the
            cluster reservation via the duration_days field.

            ON_DEMAND billing types will give you ownership of the cluster until
            you delete it.

            SCHEDULED_CAPACITY billing types allow you to reserve capacity for a
            scheduled time window. You must specify the reservation_start_time
            and reservation_end_time with this request.
          x-stainless-terraform-configurability: computed
          type: string
          enum:
            - RESERVED
            - ON_DEMAND
            - SCHEDULED_CAPACITY
        auto_scaled:
          deprecated: true
          type: boolean
          default: false
          description: >-
            Whether GPU cluster should be auto-scaled based on the workload. By
            default, it is not auto-scaled.
        auto_scale_max_gpus:
          type: integer
          description: >-
            Maximum number of GPUs to which the cluster can be auto-scaled up.
            This field is required if auto_scaled is true.
        slurm_shm_size_gib:
          type: integer
          description: >-
            Shared memory size in GiB for Slurm cluster. This field is required
            if cluster_type is SLURM.
        capacity_pool_id:
          type: string
          description: >-
            ID of the capacity pool to use for the cluster. This field is
            optional and only applicable if the cluster is created from a
            capacity pool.
        reservation_start_time:
          type: string
          description: >-
            Reservation start time of the cluster. This field is required for
            SCHEDULED billing to specify the reservation start time for the
            cluster. If not provided, the cluster provisions immediately.
          format: date-time
        reservation_end_time:
          type: string
          description: >-
            Reservation end time of the cluster. This field is required for
            SCHEDULED billing to specify the reservation end time for the
            cluster.
          format: date-time
        install_traefik:
          type: boolean
          default: false
          description: >-
            Whether to install Traefik ingress controller in the cluster. This
            field is only applicable for Kubernetes clusters and is false by
            default.
        cuda_version:
          type: string
          description: CUDA version for this cluster. For example, 12.5
        nvidia_driver_version:
          type: string
          description: >-
            Nvidia driver version for this cluster. For example, 550. Only some
            combination of cuda_version and nvidia_driver_version are supported.
        slurm_image:
          type: string
          description: Custom Slurm image for Slurm clusters.
        oidc_config:
          $ref: '#/components/schemas/OIDCConfig'
        project_id:
          type: string
          description: >-
            Project ID for the cluster. If not set, the project from the request
            context is used.
        acceptance_tests_params:
          $ref: '#/components/schemas/AcceptanceTestsParams'
        cluster_config:
          $ref: '#/components/schemas/InstanceClusterConfig'
        num_capacity_pool_gpus:
          type: integer
          description: >-
            Number of GPUs to allocate from the capacity pool. Must be a
            multiple of 8 and not exceed num_gpus.
        auto_scale:
          type: boolean
          description: >-
            Whether to enable auto-scaling for the cluster. If true, the cluster
            will automatically scale the number of GPU worker nodes between
            num_gpus and auto_scale_max_gpus based on the workload.
        num_preemptible_gpus:
          type: integer
          description: >-
            Number of preemptible GPUs to request alongside on-demand capacity.
            Must be a multiple of 8. Preemptible nodes are cheaper but may be
            reclaimed when on-demand capacity is needed elsewhere; the system
            fulfills this asynchronously and surfaces the actual count in
            allocated_preemptible_gpus.
        num_reserved_gpus:
          type: integer
          description: >-
            Number of prepaid (PLG) reserved GPUs for this cluster. When omitted
            for RESERVED billing on create, the server defaults this to
            num_gpus.
        add_ons:
          type: array
          items:
            $ref: '#/components/schemas/AddOnCreateRequest'
          description: Add-ons to enable on the cluster at creation time.
    GPUClusterInfo:
      type: object
      required:
        - cluster_id
        - cluster_type
        - region
        - gpu_type
        - cluster_name
        - cuda_version
        - nvidia_driver_version
        - volumes
        - status
        - control_plane_nodes
        - gpu_worker_nodes
        - kube_config
        - num_gpus
        - project_id
        - num_cpu_workers
        - phase_transitions
        - desired_preemptible_gpus
        - allocated_preemptible_gpus
        - num_capacity_pool_gpus
        - num_reserved_gpus
        - billing_type
        - add_ons
      properties:
        cluster_id:
          type: string
        cluster_type:
          description: Type of cluster.
          enum:
            - KUBERNETES
            - SLURM
        region:
          type: string
        gpu_type:
          enum:
            - H100_SXM
            - H200_SXM
            - RTX_6000_PCI
            - L40_PCIE
            - B200_SXM
            - H100_SXM_INF
        cluster_name:
          type: string
        duration_hours:
          type: integer
        volumes:
          type: array
          items:
            $ref: '#/components/schemas/GPUClusterVolume'
        status:
          description: Current status of the GPU cluster.
          enum:
            - WaitingForControlPlaneNodes
            - WaitingForDataPlaneNodes
            - WaitingForSubnet
            - WaitingForSharedVolume
            - InstallingDrivers
            - RunningAcceptanceTests
            - Paused
            - OnDemandComputePaused
            - Ready
            - Degraded
            - Deleting
        control_plane_nodes:
          type: array
          items:
            $ref: '#/components/schemas/GPUClusterControlPlaneNode'
        gpu_worker_nodes:
          type: array
          items:
            $ref: '#/components/schemas/GPUClusterGPUWorkerNode'
        kube_config:
          type: string
        num_gpus:
          type: integer
        slurm_shm_size_gib:
          type: integer
        capacity_pool_id:
          type: string
        reservation_start_time:
          type: string
          format: date-time
        reservation_end_time:
          type: string
          format: date-time
        install_traefik:
          type: boolean
        cuda_version:
          type: string
        nvidia_driver_version:
          type: string
        created_at:
          type: string
          format: date-time
        oidc_config:
          $ref: '#/components/schemas/OIDCConfig'
        project_id:
          type: string
        cluster_config:
          $ref: '#/components/schemas/InstanceClusterConfig'
        num_cpu_workers:
          type: integer
          description: Number of CPU-only worker nodes in the cluster.
        phase_transitions:
          type: array
          items:
            $ref: '#/components/schemas/ClusterPhaseTransition'
          description: Cluster-level phase transition history.
        desired_preemptible_gpus:
          type: integer
          description: >-
            Customer's requested number of preemptible GPUs. Set on cluster
            create or update; persists until changed.
        allocated_preemptible_gpus:
          type: integer
          description: >-
            Actual number of preemptible GPUs currently allocated to the
            cluster. Updated asynchronously by the fulfillment and reclamation
            workers; may be less than desired_preemptible_gpus when capacity is
            constrained.
        billing_type:
          enum:
            - RESERVED
            - ON_DEMAND
            - SCHEDULED_CAPACITY
          type: string
          description: >-
            Billing type for the cluster (RESERVED, ON_DEMAND, or
            SCHEDULED_CAPACITY).
        add_ons:
          type: array
          items:
            $ref: '#/components/schemas/AddOnInfo'
          description: >-
            Enabled add-ons on this cluster. Only add-ons with enabled=true in
            their config are returned.
        machine_cluster_id:
          type: string
          description: ID of the machine cluster backing this GPU cluster.
        first_ready_at:
          type: string
          description: Timestamp when the cluster first reached the Ready phase.
          format: date-time
        is_in_substrate:
          type: boolean
          description: Whether the cluster is managed inside a substrate environment.
        control_plane_ready:
          type: boolean
          description: Whether the control plane is currently ready.
        ums_project_id:
          type: string
          description: UMS project ID associated with this cluster.
        ums_org_id:
          type: string
          description: UMS organization ID associated with this cluster.
        os_image:
          type: string
          description: Data-volume image name for GPU worker nodes.
        nvidia_driver_version_id:
          type: string
          description: >-
            Internal NVIDIA version ID for this cluster's driver and CUDA
            combination.
        num_capacity_pool_gpus:
          type: integer
          description: >-
            Number of GPUs to draw from a capacity pool. A component of the
            overall num_gpus, alongside num_reserved_gpus.
          format: int32
        num_reserved_gpus:
          type: integer
          description: >-
            Number of prepaid reserved GPUs for this cluster. A component of the
            overall num_gpus, alongside num_capacity_pool_gpus.
          format: int32
        deleted_gpu_worker_nodes:
          type: array
          items:
            $ref: '#/components/schemas/GPUClusterGPUWorkerNode'
          description: >-
            GPU worker nodes retained after they left the live data plane. These
            are separate from gpu_worker_nodes and must not be counted as live
            capacity.
        node_lifecycle_events:
          type: array
          items:
            $ref: '#/components/schemas/GPUClusterNodeLifecycleEvent'
          description: >-
            Recent node lifecycle events such as scale-up, scale-down, and
            preemption. Combine these with live and deleted node lists to render
            the cluster timeline.
    GPUClustersSharedVolumeCreateRequest:
      type: object
      required:
        - volume_name
        - size_tib
        - region
      properties:
        volume_name:
          type: string
          description: User provided name of the volume.
        size_tib:
          type: integer
          description: Volume size in whole tebibytes (TiB).
        region:
          type: string
          description: >-
            Region name. Usable regions can be found from
            `clusters.list_regions()`
        is_lifecycle_independent:
          type: boolean
          description: >-
            When true, the shared volume is not deleted when the cluster is
            decommissioned.
        project_id:
          type: string
          description: >-
            Project ID that will own the volume. When omitted, the caller's
            default project is used.
    OIDCConfig:
      type: object
      required:
        - issuer_url
        - client_id
        - username_claim
        - username_prefix
        - group_claim
        - group_prefix
      properties:
        issuer_url:
          type: string
          description: >-
            OIDC issuer URL for authentication. For example,
            https://accounts.google.com
        client_id:
          type: string
          description: OIDC client ID for authentication.
        username_claim:
          type: string
          description: JWT claim to use as the username. For example, 'sub' or 'email'
        username_prefix:
          type: string
          description: >-
            Prefix to add to the username claim to form the final username. For
            example, 'oidc:'
        group_claim:
          type: string
          description: JWT claim to use for user groups. For example, 'groups'
        group_prefix:
          type: string
          description: >-
            Prefix to add to the group claim to form the final group name. For
            example, 'oidc:'
        ca_cert:
          type: string
          description: >-
            CA certificate in PEM format to validate the OIDC issuer's TLS
            certificate. This field is optional but recommended if the issuer
            uses a private CA or self-signed certificate.
    AcceptanceTestsParams:
      type: object
      properties:
        enabled:
          type: boolean
          description: Whether to run GPU acceptance tests during cluster bring-up.
        dcgm_diag_level:
          enum:
            - DCGM_DIAG_LEVEL_SHORT
            - DCGM_DIAG_LEVEL_MEDIUM
            - DCGM_DIAG_LEVEL_LONG
            - DCGM_DIAG_LEVEL_EXTENDED
          type: string
          description: >-
            DCGM diagnostic depth. SHORT = readiness; MEDIUM = default; LONG =
            system validation; EXTENDED = memtest. An omitted value selects
            MEDIUM when enabled.
        gpu_burn_duration:
          type: integer
          description: GPU burn duration in seconds; 0 means use the default when enabled.
        nccl_single_node_skipped:
          type: boolean
          description: Skip NCCL single-node acceptance test.
        gpu_burn_skipped:
          type: boolean
          description: Skip GPU burn acceptance test.
        dcgm_diag_skipped:
          type: boolean
          description: Skip DCGM diagnostics acceptance test.
        nccl_multi_node_skipped:
          type: boolean
          description: Skip NCCL multi-node acceptance test.
        storage_skipped:
          type: boolean
          description: Skip storage-performance acceptance test.
      description: >-
        AcceptanceTestsParams groups all GPU acceptance test options when
        enabled is true.
    InstanceClusterConfig:
      type: object
      required:
        - load_balancer
      properties:
        load_balancer:
          enum:
            - NONE
            - TRAEFIK
            - NGINX
            - ISTIO
          type: string
        kubernetes_dashboard_enabled:
          type: boolean
        jumphost_enabled:
          type: boolean
        slurm_startup_scripts:
          $ref: '#/components/schemas/SlurmStartupScripts'
        ingress:
          $ref: '#/components/schemas/ClusterIngressConfig'
        observability:
          $ref: '#/components/schemas/ObservabilityConfig'
        gpu_operator_version:
          type: string
          description: >-
            NVIDIA GPU Operator chart/version for the tenant cluster (e.g.
            v24.6.2). When omitted, a service default is applied.
        network_operator_version:
          type: string
          description: >-
            NVIDIA Network Operator chart/version for the tenant cluster (e.g.
            v24.7.0). When omitted, a service default is applied.
        ssh_ca_enabled:
          type: boolean
          description: >-
            Whether this cluster uses a per-cluster SSH certificate authority
            for OIDC-signed SSH access.
    AddOnCreateRequest:
      type: object
      required:
        - name
        - add_on_type
      properties:
        name:
          type: string
          description: Human-readable name for this add-on instance.
        add_on_type:
          type: string
          description: 'Type of add-on. Valid values: ''dashboard'', ''ingress'', ''torchpass''.'
        config:
          $ref: '#/components/schemas/AddOnConfig'
    GPUClusterVolume:
      type: object
      required:
        - volume_id
        - volume_name
        - size_tib
        - status
      properties:
        volume_id:
          description: ID of the volume.
          type: string
        volume_name:
          type: string
          description: User provided name of the volume.
        size_tib:
          type: integer
          description: Size of the volume in TiB.
        status:
          type: string
          description: Current status of the volume.
    GPUClusterControlPlaneNode:
      type: object
      required:
        - node_id
        - status
        - host_name
        - num_cpu_cores
        - memory_gib
        - network
        - phase_transitions
      properties:
        node_id:
          type: string
        status:
          type: string
        host_name:
          type: string
        num_cpu_cores:
          type: integer
        memory_gib:
          type: number
        network:
          type: string
        phase_transitions:
          type: array
          items:
            $ref: '#/components/schemas/NodePhaseTransition'
          description: Phase transition history for this control plane node.
        public_ipv4:
          type: string
          description: Public IPv4 address of the control plane node.
    GPUClusterGPUWorkerNode:
      type: object
      required:
        - node_id
        - status
        - host_name
        - num_cpu_cores
        - num_gpus
        - memory_gib
        - networks
        - phase_transitions
      properties:
        node_id:
          type: string
        status:
          type: string
        host_name:
          type: string
        num_cpu_cores:
          type: integer
        num_gpus:
          type: integer
        memory_gib:
          type: number
        networks:
          type: array
          items:
            type: string
        instance_id:
          type: string
        latest_remediation:
          $ref: '#/components/schemas/Remediation'
        slurm_worker_hostname:
          type: string
        phase_transitions:
          type: array
          items:
            $ref: '#/components/schemas/NodePhaseTransition'
          description: Phase transition history for this GPU worker node.
        marked_for_deletion:
          type: boolean
          description: Whether this node is marked for deletion by the operator.
        public_ipv4:
          type: string
          description: Public IPv4 address of the GPU worker node.
        ib_hca_type:
          type: string
          description: InfiniBand HCA type.
        ib_hca_count:
          type: integer
          description: Number of InfiniBand HCAs.
        nvswitch_count:
          type: integer
          description: Number of NVSwitches.
        nvswitch_type:
          type: string
          description: NVSwitch type.
        ephemeral_storage:
          type: string
          description: Ephemeral storage size, such as 1Ti.
        auto_remediation_enabled:
          type: boolean
          description: Whether auto-remediation is enabled for this node's instance.
        deleted_at:
          type: string
          description: >-
            Timestamp when the node left the live data plane. Only set for
            deleted_gpu_worker_nodes.
          format: date-time
    ClusterPhaseTransition:
      type: object
      required:
        - phase
        - transition_time
      properties:
        phase:
          enum:
            - CLUSTER_PHASE_QUEUED
            - CLUSTER_PHASE_SCHEDULED
            - CLUSTER_PHASE_WAITING_FOR_CONTROL_PLANE_NODES
            - CLUSTER_PHASE_WAITING_FOR_DATA_PLANE_NODES
            - CLUSTER_PHASE_WAITING_FOR_SUBNET
            - CLUSTER_PHASE_WAITING_FOR_SHARED_VOLUME
            - CLUSTER_PHASE_WAITING_FOR_AUTO_SCALER
            - CLUSTER_PHASE_INSTALLING_DRIVERS
            - CLUSTER_PHASE_RUNNING_ACCEPTANCE_TESTS
            - CLUSTER_PHASE_ACCEPTANCE_TESTS_FAILED
            - CLUSTER_PHASE_RUNNING_NCCL_TESTS
            - CLUSTER_PHASE_NCCL_TESTS_FAILED
            - CLUSTER_PHASE_READY
            - CLUSTER_PHASE_PAUSED
            - CLUSTER_PHASE_ON_DEMAND_COMPUTE_PAUSED
            - CLUSTER_PHASE_DEGRADED
            - CLUSTER_PHASE_DELETING
          type: string
          description: Cluster phase.
        transition_time:
          type: string
          description: Timestamp when the phase transition occurred.
          format: date-time
    AddOnInfo:
      type: object
      required:
        - name
        - add_on_type
        - config
        - state
      properties:
        name:
          type: string
        add_on_type:
          type: string
        config:
          $ref: '#/components/schemas/AddOnConfig'
        state:
          $ref: '#/components/schemas/AddOnState'
      description: AddOnInfo is returned in cluster responses and add-on CRUD operations.
    GPUClusterNodeLifecycleEvent:
      type: object
      description: Node lifecycle event included in a GPU cluster timeline.
      required:
        - node_id
        - reason
        - message
        - timestamp
      properties:
        node_id:
          type: string
          description: Tenant node name this lifecycle event applies to.
        reason:
          type: string
          description: >-
            Lifecycle event reason, for example TogetherScaledUp,
            TogetherScaledDown, or TogetherPreempted.
        message:
          type: string
          description: Human-readable lifecycle event message.
        timestamp:
          type: string
          description: Event timestamp.
          format: date-time
    SlurmStartupScripts:
      type: object
      properties:
        worker_prolog:
          type: string
          description: Slurm worker node prolog script.
        worker_epilog:
          type: string
          description: Slurm worker node epilog script.
        controller_prolog:
          type: string
          description: Slurm controller prolog script.
        controller_epilog:
          type: string
          description: Slurm controller epilog script.
        login_init_script:
          type: string
          description: Script run on Slurm login node init.
        nodeset_init_script:
          type: string
          description: Script run on Slurm nodeset init.
        extra_slurm_conf:
          type: string
          description: Additional slurm.conf fragments.
      description: >-
        SlurmStartupScripts carries optional Slurm lifecycle scripts
        (prolog/epilog, init, extra conf).
    ClusterIngressConfig:
      type: object
      properties:
        enabled:
          type: boolean
    ObservabilityConfig:
      type: object
      properties:
        enabled:
          type: boolean
    AddOnConfig:
      type: object
      properties:
        dashboard:
          $ref: '#/components/schemas/DashboardConfig'
        ingress:
          $ref: '#/components/schemas/IngressConfig'
        torchpass:
          $ref: '#/components/schemas/TorchpassConfig'
          description: Configuration for the Model Aware TorchPass add-on.
      description: Configuration for a cluster add-on.
    NodePhaseTransition:
      type: object
      required:
        - phase
        - transition_time
      properties:
        phase:
          enum:
            - NODE_PHASE_PENDING
            - NODE_PHASE_SCHEDULING
            - NODE_PHASE_BOOTING
            - NODE_PHASE_BOOTSTRAPPING
            - NODE_PHASE_RUNNING
            - NODE_PHASE_SUCCEEDED
            - NODE_PHASE_FAILED
            - NODE_PHASE_PAUSED
          type: string
          description: Node phase.
        transition_time:
          type: string
          description: Timestamp when the phase transition occurred.
          format: date-time
    Remediation:
      type: object
      required:
        - id
        - cluster_id
        - instance_id
        - mode
        - trigger
        - state
      properties:
        id:
          readOnly: true
          type: string
        cluster_id:
          readOnly: true
          type: string
        instance_id:
          readOnly: true
          type: string
        mode:
          enum:
            - REMEDIATION_MODE_VM_ONLY
            - REMEDIATION_MODE_HOST_AWARE
            - REMEDIATION_MODE_EVICT_WITHOUT_REPLACEMENT
            - REMEDIATION_MODE_REBOOT_VM
            - REMEDIATION_MODE_HOST_POWER_CYCLE
          type: string
          description: >
            Remediation mode specifies how the remediation should be performed.


            - `REMEDIATION_MODE_VM_ONLY`: Deletes the VM and provisions a new
            one on any available host.

            - `REMEDIATION_MODE_HOST_AWARE`: Cordons the host, deletes the VM,
            and provisions a new one on a different host.

            - `REMEDIATION_MODE_EVICT_WITHOUT_REPLACEMENT`: Evicts the VM
            without provisioning a replacement.

            - `REMEDIATION_MODE_REBOOT_VM`: Reboots the VM in place.

            - `REMEDIATION_MODE_HOST_POWER_CYCLE`: Cordons and power-cycles the
            bare-metal host while preserving host and node identity.
        trigger:
          readOnly: true
          enum:
            - REMEDIATION_TRIGGER_MANUAL
            - REMEDIATION_TRIGGER_AUTOMATED
          type: string
          description: >
            RemediationTrigger specifies how the remediation was triggered.


            - `REMEDIATION_TRIGGER_MANUAL`: A user-initiated remediation (either
            via web UI or API call).

            - `REMEDIATION_TRIGGER_AUTOMATED`: A system-initiated remediation
            that requires approval.
        state:
          readOnly: true
          enum:
            - PENDING_APPROVAL
            - PENDING
            - RUNNING
            - SUCCEEDED
            - FAILED
            - CANCELLED
            - AUTO_RESOLVED
            - QUARANTINING
            - QUARANTINED
          type: string
          description: >
            RemediationState represents the lifecycle state of a remediation.


            - `PENDING_APPROVAL`: Awaiting approval before processing can begin.

            - `PENDING`: Approved and queued for processing.

            - `RUNNING`: Actively being processed.

            - `SUCCEEDED`: Successfully completed.

            - `FAILED`: Failed with an error.

            - `CANCELLED`: Cancelled by user or system.

            - `AUTO_RESOLVED`: The underlying issue was automatically resolved
            before processing.

            - `QUARANTINING`: Cordoning or preparing the host before
            remediation.

            - `QUARANTINED`: Host has been cordoned or isolated for remediation.
        reason:
          type: string
          description: User-provided reason for the remediation.
        active_health_check_run_id:
          type: string
          description: Active health check run ID (UUID) that triggered this remediation.
          readOnly: true
        passive_health_check_event_id:
          type: string
          description: Passive health check event ID that triggered this remediation.
          readOnly: true
        requested_by:
          readOnly: true
          type: string
          description: Who requested the remediation.
        create_time:
          type: string
          description: When the remediation was created.
          format: date-time
          readOnly: true
        reviewed_by:
          type: string
          description: Who reviewed the remediation.
          readOnly: true
        review_time:
          type: string
          format: date-time
          description: When the remediation was reviewed.
          readOnly: true
        review_comment:
          readOnly: true
          type: string
          description: Review comment.
        start_time:
          type: string
          format: date-time
          description: When processing started.
          readOnly: true
        end_time:
          readOnly: true
          type: string
          description: When the remediation completed.
          format: date-time
        error_message:
          readOnly: true
          type: string
          description: Error message if the remediation failed.
        update_time:
          readOnly: true
          type: string
          description: When the remediation was last updated.
          format: date-time
        instance_name:
          readOnly: true
          type: string
          description: Display name of the targeted instance.
        linked_alerts:
          readOnly: true
          type: array
          items:
            $ref: '#/components/schemas/PassiveHealthCheckAlert'
          description: >-
            Passive health check alerts linked to this remediation, including
            resolved alerts.
      description: >-
        Remediation represents a node remediation request for an instance.

        An instance can have multiple remediations over time (e.g., failed
        attempts followed by retries).
    AddOnState:
      type: object
      properties:
        dashboard:
          $ref: '#/components/schemas/DashboardState'
        ingress:
          $ref: '#/components/schemas/IngressState'
        torchpass:
          $ref: '#/components/schemas/TorchpassState'
          description: State for the Model Aware TorchPass add-on.
      description: State for a cluster add-on.
    DashboardConfig:
      type: object
      properties:
        enabled:
          type: boolean
    IngressConfig:
      type: object
      properties:
        enabled:
          type: boolean
    TorchpassConfig:
      type: object
      properties:
        enabled:
          type: boolean
          description: Whether to enable the Model Aware TorchPass add-on.
      description: Configuration for the Model Aware TorchPass cluster add-on.
    PassiveHealthCheckAlert:
      type: object
      required:
        - passive_health_check_alert_id
        - cluster_id
        - target_vm
        - alert_name
        - severity
        - annotations
        - started_at
      properties:
        passive_health_check_alert_id:
          readOnly: true
          type: string
          description: Primary key UUID for the passive health check alert.
        instance_id:
          readOnly: true
          type: string
          description: >-
            Resolved instance UUID. Empty until the alert is joined to an
            instance.
        cluster_id:
          readOnly: true
          type: string
          description: Cluster UUID the alert was raised against.
        target_vm:
          readOnly: true
          type: string
          description: VM name extracted from the Alertmanager labels.
        alert_name:
          readOnly: true
          type: string
          description: Alertmanager alert name.
        severity:
          readOnly: true
          enum:
            - PHC_SEVERITY_INFO
            - PHC_SEVERITY_WARNING
            - PHC_SEVERITY_CRITICAL
          type: string
          description: Canonical severity tier for the alert.
        annotations:
          readOnly: true
          type: object
          additionalProperties:
            type: string
          description: Alertmanager annotations as key-value strings.
        started_at:
          readOnly: true
          type: string
          description: Time when the underlying alert first fired.
          format: date-time
        resolved_at:
          readOnly: true
          type: string
          description: >-
            Time when the underlying alert resolved. Empty while the alert is
            firing.
          format: date-time
        node_remediation_intent_id:
          readOnly: true
          type: string
          description: Remediation intent UUID attached to this alert, if any.
      description: Passive health check alert returned by the health check API.
    DashboardState:
      type: object
      properties: {}
      additionalProperties: false
      x-stainless-empty-object: true
    IngressState:
      type: object
      properties: {}
      additionalProperties: false
      x-stainless-empty-object: true
    TorchpassState:
      type: object
      properties: {}
      additionalProperties: false
      x-stainless-empty-object: true
      description: Runtime state for the Model Aware TorchPass cluster add-on.
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      x-bearer-format: bearer
      x-default: default

````