Skip to content

How to run profiling using Soda Stack?

name: profile-soda
version: v1
type: workflow
tags:
  - profile
description: this jobs profile data
workflow:
  dag:
    - name: sample-profile-soda-01
      title: Sample profile data
      spec:
        stack: soda+python:1.0
        soda:
            - dataset: dataos://icebase:retail/customer
              checks:
                - row_count between 10 and 1000:
                    attributes:
                      category: Accuracy                  
                - missing_count(birth_date) = 0:
                    attributes:
                      category: Completeness 

                # - invalid_percent(phone) < 1 %:
                #     valid format: phone number
                - invalid_count(number_cars_owned) = 0:
                    valid min: 1
                    valid max: 6
                    attributes:
                      category: Validity
                - duplicate_count(phone) = 0:
                    attributes:
                      category: Uniqueness

              profile:
                columns:
                  - "*"
              engine: minerva
            - dataset: dataos://icebase:retail/customer_360
              checks:
                - row_count between 10 and 1000
                - missing_count(birth_date) = 0
                - invalid_percent(phone) < 1 %:
                    valid format: phone number
                - invalid_count(number_cars_owned) = 0:
                    valid min: 1
                    valid max: 6
                - duplicate_count(phone) = 0
              profile:
                columns:
                  - "*"