Skip to content

Schema for MACHINE configuration

The schema to configure machines to run processes.

Schema:

### Schema for YAML configuration to run CLI script

common_schema: &common  ### ANCHOR: Configure remote machine
  type: dict
  schema:
    machine:            # machine configuration. Accept all dpdispatcher parameters: https://docs.deepmodeling.com/projects/dpdispatcher/en/latest/machine.html
      type: dict
      required: True
      allow_unknown: True
      schema:
        batch_type:     # batch system type. Choices:'Bash' 'Slurm', 'OpenPBS', 'SGE', 'TORQUE', 'LSF', 'OpenAPI'. See more: https://docs.deepmodeling.com/projects/dpdispatcher/en/latest/batch.html
          type: string
        context_type:   # context type. Choices: 'Local', 'SSH', 'HDFS', 'OpenAPI'.See more: https://docs.deepmodeling.com/projects/dpdispatcher/en/latest/context.html
          type: string
        remote_root:    # remote root directory
          type: string
        remote_profile: # profile
          type: dict
          schema:
            hostname:   # hostname
              type: string
            username:   # username
              type: string
            password:   # password
              type: string
            port:       # port
              type: integer
            timeout:    # timeout
              type: integer

    resources:          # resources on remote machine. Defined based on batch system. Accept all dpdispatcher parameters: https://docs.deepmodeling.com/projects/dpdispatcher/en/latest/resources.html
      type: dict
      required: True
      allow_unknown: True
      schema:
        number_node:    # number of nodes.
          type: integer
        cpu_per_node:   # number of CPUs per node.
          type: integer
        gpu_per_node:   # number of GPUs per node.
          type: integer
        custom_flags:   # list[str] of custom flags
          type: list
        module_list:    # list[str] of modules
          type: list
        source_list:    # source list[str]
          type: list
        envs:           # environment variables
          type: dict

    command:            # command to execute at remote machine.
      type: string
    job_limit:          # maximum jobs in one submission to cluster. Default is 5.
      type: integer
    work_load_ratio:    # ratio of total jobs to run on this machine. If not set, all jobs will be distributed equally between all machines.
      type: float


train: *common          ### ANCHOR: Configure remote machine to run training.

md: *common             ### ANCHOR: Configure remote machine to run MD simulation.

dft: *common            ### ANCHOR: Configure remote machine to run DFT calculation.