> ## Documentation Index
> Fetch the complete documentation index at: https://docs.together.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Create a new deployment

> Create a new deployment with specified configuration



## OpenAPI

````yaml POST /deployments
openapi: 3.1.0
info:
  title: Together APIs
  description: The Together REST API. See https://docs.together.ai for more details.
  version: 2.0.0
  termsOfService: https://www.together.ai/terms-of-service
  contact:
    name: Together Support
    url: https://www.together.ai/contact
  license:
    name: MIT
    url: https://github.com/togethercomputer/openapi/blob/main/LICENSE
servers:
  - url: https://api.together.ai/v1
security:
  - bearerAuth: []
paths:
  /deployments:
    post:
      tags:
        - Deployments
      summary: Create a new deployment
      description: Create a new deployment with specified configuration
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateDeploymentRequest'
        description: Deployment configuration
        required: true
      responses:
        '200':
          description: Deployment created successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DeploymentResponseItem'
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                type: object
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            deployment = client.beta.jig.deploy(
              name="my-deployment",
              gpu_type="h100-80gb",
              image="registry.together.ai/proj_abcdefg1234567890/my-image:latest"
            )
            print(deployment)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const deployment = await client.beta.jig.deploy({
              name: "my-deployment",
              gpu_type: "h100-80gb",
              image: "registry.together.ai/proj_abcdefg1234567890/my-image:latest"
            });
            console.log(deployment);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const deployment = await client.beta.jig.deploy({
              name: "my-deployment",
              gpu_type: "h100-80gb",
              image: "registry.together.ai/proj_abcdefg1234567890/my-image:latest"
            });
            console.log(deployment);
components:
  schemas:
    CreateDeploymentRequest:
      properties:
        args:
          description: >-
            Args overrides the container's CMD. Provide as an array of arguments
            (e.g., ["python", "app.py"])
          items:
            type: string
          type: array
        autoscaling:
          description: >-
            Autoscaling configuration. Example: {"metric":
            "QueueBacklogPerWorker", "target": 1.01} to scale based on queue
            backlog. Omit or set to null to disable autoscaling
          oneOf:
            - $ref: '#/components/schemas/HTTPAutoscalingConfig'
            - $ref: '#/components/schemas/QueueAutoscalingConfig'
            - $ref: '#/components/schemas/CustomMetricAutoscalingConfig'
        command:
          description: >-
            Command overrides the container's ENTRYPOINT. Provide as an array
            (e.g., ["/bin/sh", "-c"])
          items:
            type: string
          type: array
        cpu:
          description: >-
            CPU is the number of CPU cores to allocate per container instance
            (e.g., 0.1 = 100 milli cores)
          minimum: 0.1
          type: number
        description:
          description: >-
            Description is an optional human-readable description of your
            deployment
          type: string
        environment_variables:
          description: >-
            EnvironmentVariables is a list of environment variables to set in
            the container. Each must have a name and either a value or
            value_from_secret
          items:
            $ref: '#/components/schemas/EnvironmentVariable'
          type: array
        gpu_count:
          description: >-
            GPUCount is the number of GPUs to allocate per container instance.
            Defaults to 0 if not specified
          type: integer
        gpu_type:
          description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb").
          enum:
            - h100-80gb
            - h100-40gb-mig
            - b200-192gb
          type: string
        health_check_path:
          description: >-
            HealthCheckPath is the HTTP path for health checks (e.g.,
            "/health"). If set, the platform checks this endpoint to determine
            container health.
          type: string
        image:
          description: Image is the container image to deploy from registry.together.ai.
          type: string
        max_replicas:
          description: >-
            MaxReplicas is the maximum number of container instances. Defaults
            to MinReplicas if not set.
          type: integer
        memory:
          description: >-
            Memory is the amount of RAM to allocate per container instance in
            GiB (e.g., 0.5 = 512MiB)
          maximum: 1000
          type: number
        min_replicas:
          description: >-
            MinReplicas is the minimum number of container instances to run.
            Defaults to 1 if not specified
          type: integer
        name:
          description: >-
            Name is the unique identifier for your deployment. Must contain only
            alphanumeric characters, underscores, or hyphens (1-100 characters)
          maxLength: 100
          minLength: 1
          type: string
        port:
          description: >-
            Port is the container port your application listens on (e.g., 8080
            for web servers). Required if your application serves traffic
          maximum: 65535
          minimum: 1
          type: integer
        storage:
          description: >-
            Storage is the amount of ephemeral disk storage to allocate per
            container instance (e.g., 10 = 10GiB)
          maximum: 400
          type: integer
        termination_grace_period_seconds:
          description: >-
            TerminationGracePeriodSeconds is the time in seconds to wait for
            graceful shutdown before forcefully terminating the replica
          type: integer
        volumes:
          description: >-
            Volumes is a list of volume mounts to attach to the container. Each
            mount must reference an existing volume by name
          items:
            $ref: '#/components/schemas/VolumeMount'
          type: array
      required:
        - gpu_type
        - image
        - name
      type: object
    DeploymentResponseItem:
      properties:
        args:
          description: Args are the arguments passed to the container's command
          items:
            type: string
          type: array
        autoscaling:
          description: >-
            Autoscaling contains autoscaling configuration parameters for this
            deployment. Omitted when autoscaling is disabled (nil)
          oneOf:
            - $ref: '#/components/schemas/HTTPAutoscalingConfig'
            - $ref: '#/components/schemas/QueueAutoscalingConfig'
            - $ref: '#/components/schemas/CustomMetricAutoscalingConfig'
        command:
          description: Command is the entrypoint command run in the container
          items:
            type: string
          type: array
        cpu:
          description: >-
            CPU is the amount of CPU resource allocated to each replica in cores
            (fractional value is allowed)
          type: number
        created_at:
          description: CreatedAt is the ISO8601 timestamp when this deployment was created
          type: string
          format: date-time
        description:
          description: >-
            Description provides a human-readable explanation of the
            deployment's purpose or content
          type: string
        desired_replicas:
          description: >-
            DesiredReplicas is the number of replicas that the orchestrator is
            targeting
          type: integer
        environment_variables:
          description: >-
            EnvironmentVariables is a list of environment variables set in the
            container
          items:
            $ref: '#/components/schemas/EnvironmentVariable'
          type: array
        gpu_count:
          description: >-
            GPUCount is the number of GPUs allocated to each replica in this
            deployment
          type: integer
        gpu_type:
          description: >-
            GPUType specifies the type of GPU requested (if any) for this
            deployment
          enum:
            - h100-80gb
            - h100-40gb-mig
            - b200-192gb
          type: string
        health_check_path:
          description: >-
            HealthCheckPath is the HTTP path used for health checks of the
            application
          type: string
        id:
          description: ID is the unique identifier of the deployment
          type: string
        image:
          description: Image specifies the container image used for this deployment
          type: string
        max_replicas:
          description: >-
            MaxReplicas is the maximum number of replicas to run for this
            deployment
          type: integer
        memory:
          description: >-
            Memory is the amount of memory allocated to each replica in GiB
            (fractional value is allowed)
          type: number
        min_replicas:
          description: >-
            MinReplicas is the minimum number of replicas to run for this
            deployment
          type: integer
        name:
          description: Name is the name of the deployment
          type: string
        object:
          description: The object type, which is always `deployment`.
          const: deployment
        port:
          description: Port is the container port that the deployment exposes
          type: integer
        ready_replicas:
          description: >-
            ReadyReplicas is the current number of replicas that are in the
            Ready state
          type: integer
        replica_events:
          additionalProperties:
            $ref: '#/components/schemas/ReplicaEvent'
          description: >-
            ReplicaEvents is a mapping of replica names or IDs to their status
            events
          type: object
        status:
          allOf:
            - $ref: '#/components/schemas/DeploymentStatus'
          description: >-
            Status represents the overall status of the deployment (e.g.,
            Updating, Scaling, Ready, Failed)
          enum:
            - Updating
            - Scaling
            - Ready
            - Failed
        storage:
          description: >-
            Storage is the amount of storage (in MB or units as defined by the
            platform) allocated to each replica
          type: integer
        updated_at:
          description: >-
            UpdatedAt is the ISO8601 timestamp when this deployment was last
            updated
          type: string
          format: date-time
        volumes:
          description: Volumes is a list of volume mounts for this deployment
          items:
            $ref: '#/components/schemas/VolumeMount'
          type: array
      type: object
    HTTPAutoscalingConfig:
      description: >-
        Autoscaling config for HTTPTotalRequests and HTTPAvgRequestDuration
        metrics
      properties:
        metric:
          description: Metric must be HTTPTotalRequests or HTTPAvgRequestDuration
          enum:
            - HTTPTotalRequests
            - HTTPAvgRequestDuration
          example: HTTPTotalRequests
          type: string
        target:
          description: >-
            Target is the threshold value. Default: 100 for HTTPTotalRequests,
            500 (ms) for HTTPAvgRequestDuration
          example: 100
          type: number
        time_interval_minutes:
          description: 'TimeIntervalMinutes is the rate window in minutes. Default: 10'
          example: 10
          type: integer
      type: object
    QueueAutoscalingConfig:
      description: Autoscaling config for QueueBacklogPerWorker metric
      properties:
        metric:
          description: Metric must be QueueBacklogPerWorker
          enum:
            - QueueBacklogPerWorker
          example: QueueBacklogPerWorker
          type: string
        model:
          description: >-
            Model overrides the model name for queue status lookup. Defaults to
            the deployment app name
          type: string
        target:
          description: 'Target is the threshold value. Default: 1.01'
          example: 1.01
          type: number
      type: object
    CustomMetricAutoscalingConfig:
      description: Autoscaling config for CustomMetric metric
      properties:
        custom_metric_name:
          description: >-
            CustomMetricName is the Prometheus metric name. Required. Must match
            [a-zA-Z_:][a-zA-Z0-9_:]*
          example: my_custom_metric
          type: string
        metric:
          description: Metric must be CustomMetric
          enum:
            - CustomMetric
          example: CustomMetric
          type: string
        target:
          description: 'Target is the threshold value. Default: 500'
          example: 500
          type: number
      type: object
    EnvironmentVariable:
      properties:
        name:
          description: >-
            Name is the environment variable name (e.g., "DATABASE_URL"). Must
            start with a letter or underscore, followed by letters, numbers, or
            underscores
          type: string
        value:
          description: >-
            Value is the plain text value for the environment variable. Use this
            for non-sensitive values. Either Value or ValueFromSecret must be
            set, but not both
          type: string
        value_from_secret:
          description: >-
            ValueFromSecret references a secret by name or ID to use as the
            value. Use this for sensitive values like API keys or passwords.
            Either Value or ValueFromSecret must be set, but not both
          type: string
      required:
        - name
      type: object
    VolumeMount:
      properties:
        mount_path:
          description: >-
            MountPath is the path in the container where the volume mounts
            (e.g., "/data").
          type: string
        name:
          description: >-
            Name is the name of the volume to mount. Must reference an existing
            volume by name or ID
          type: string
        version:
          description: >-
            Version is the volume version to mount. On create, defaults to the
            latest version. On update, defaults to the currently mounted
            version.
          type: integer
      required:
        - mount_path
        - name
      type: object
    ReplicaEvent:
      properties:
        image:
          description: Image is the container image used for this replica
          type: string
        replica_ready_since:
          description: >-
            ReplicaReadySince is the timestamp when the replica became ready to
            serve traffic
          type: string
        replica_status:
          description: >-
            ReplicaStatus is the current status of the replica (e.g., "Running",
            "Waiting", "Terminated")
          type: string
        replica_status_message:
          description: >-
            ReplicaStatusMessage provides a human-readable message explaining
            the replica's status
          type: string
        replica_status_reason:
          description: >-
            ReplicaStatusReason provides a brief machine-readable reason for the
            replica's status
          type: string
        revision_id:
          description: >-
            RevisionID is the deployment revision ID associated with this
            replica
          type: string
        volume_preload_completed_at:
          description: >-
            VolumePreloadCompletedAt is the timestamp when the volume preload
            completed
          type: string
        volume_preload_started_at:
          description: >-
            VolumePreloadStartedAt is the timestamp when the volume preload
            started
          type: string
        volume_preload_status:
          description: >-
            VolumePreloadStatus is the status of the volume preload (e.g.,
            "InProgress", "Completed", "Failed")
          type: string
      type: object
    DeploymentStatus:
      enum:
        - Updating
        - Scaling
        - Ready
        - Failed
      type: string
      x-enum-varnames:
        - DeploymentStatusUpdating
        - DeploymentStatusScaling
        - DeploymentStatusReady
        - DeploymentStatusFailed
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      x-bearer-format: bearer
      x-default: default

````