Skip to content

How to create multiple Cluster using a single manifest file?

In scenarios where a single cluster is insufficient to handle the query load or specific performance issues arise, multiple Minerva clusters can be created. The following example demonstrates a typical configuration for a multi-cluster setup:

Manifest file

- version: v1                           # Cluster 1
  name: minervab
  type: cluster
  description: the default minerva cluster b
  tags:
    - cluster
    - minerva
  cluster:
    nodeSelector:
      "dataos.io/purpose": "query"
    toleration: query
    runAsApiKey: api-key
    minerva:
      replicas: 2
      resources:
        limits:
          cpu: 2000m
          memory: 4Gi
        requests:
          cpu: 2000m
          memory: 4Gi
      debug:
        logLevel: INFO
        trinoLogLevel: ERROR
      depots:
        - address: dataos://icebase:default
          properties:
            iceberg.file-format: PARQUET
            iceberg.compression-codec: GZIP
            hive.config.resources: "/usr/trino/etc/catalog/core-site.xml"
            hive.parquet.use-column-names: "true"
        - address: dataos://filebase:default
          properties:
            hive.config.resources: "/usr/trino/etc/catalog/core-site.xml"
            hive.parquet.use-column-names: "true"
        - address: dataos://kafka:default
          properties:
            kafka.empty-field-strategy: "ADD_DUMMY"
            kafka.table-description-supplier: "confluent"
            kafka.default-schema: "default"
            kafka.confluent-subjects-cache-refresh-interval: "5s"
            kafka.confluent-schema-registry-url: "http://schema-registry.caretaker.svc.cluster.local:8081"
      catalogs:               
        - name: redshift
          type: redshift
          properties:
            connection-url: "jdbc:redshift://URL:PORT/DB"
            connection-user: "USERNAME"
            connection-password: "PASSWORD"
        - name: oracle
           type: oracle
           properties:
             connection-url: "jdbc:oracle:thin:@URL:PORT/DB"
             connection-user: "USERNAME"
             connection-password: "PASSWORD"
        - name: cache
          type: memory
          properties:
            memory.max-data-per-node: "128MB"

- version: v1
  name: minervabc                          # Cluster 2
  type: cluster
  description: the default minerva cluster c
  tags:
    - cluster
    - minerva
  cluster:
    nodeSelector:
      "dataos.io/purpose": "query"
    toleration: query
    runAsApiKey: api-key
    minerva:
      replicas: 2
      resources:
        limits:
          cpu: 2000m
          memory: 4Gi
        requests:
          cpu: 2000m
          memory: 4Gi
      debug:
        logLevel: INFO
        trinoLogLevel: ERROR
      depots:
        - address: dataos://icebase:default
          properties:
            iceberg.file-format: PARQUET
            iceberg.compression-codec: GZIP
            hive.config.resources: "/usr/trino/etc/catalog/core-site.xml"
            hive.parquet.use-column-names: "true"
        - address: dataos://filebase:default
          properties:
            hive.config.resources: "/usr/trino/etc/catalog/core-site.xml"
            hive.parquet.use-column-names: "true"
      catalogs:               
        - name: redshift
          type: redshift
          properties:
            connection-url: "jdbc:redshift://URL:PORT/DB"
            connection-user: "USERNAME"
            connection-password: "PASSWORD"
        - name: oracle
          type: oracle
          properties:
            connection-url: "jdbc:oracle:thin:@URL:PORT/DB"
            connection-user: "USERNAME"
            connection-password: "PASSWORD"
        - name: cache
          type: memory
          properties:
            memory.max-data-per-node: "128MB"