Example YAML
Published 10 February 2025
Below is an example of a typical workflow YAML file. It includes steps for masking and subsetting, starting from a backup file, but could easily be modified to subset from a live database.
version: 1
vars:
# Path to the original backup file
backup-path: ${env.backup-path}
# Name of the database to restore the original backup to
restored-database-name: ${env.restored-database-name}
# Name of the database to subset to (must be different from the restored-database-name)
subset-database-name: ${env.subset-database-name}
# Path to where you want the final backup to be saved
output-backup-path: /tmp/${env.output-backup-file-name}
# Should we overwrite the existing backup file in this location?
overwrite: true
connections:
# Your server's connection string (needs SA permissions)
server:
ado: ${env.connection-string}
ado-engine: ${env.connection-engine} # SqlServer or similar
jobs:
job1:
steps:
# Restore the backup to the server
- uses: deploy-backup
with:
backup-path: ${var.backup-path}
connection: ${connection.server}
database-name: ${var.restored-database-name}
ephemeral: true
output: =connection.restored-backup
# Create a script that will create an empty schema of the database
- uses: create-schema-script
with:
connection: ${connection.server}
database-name: ${var.restored-database-name}
output: =var.schema-script-file
# Deploy the schema to the server as a new database
- uses: deploy-from-script
with:
connection: ${connection.server}
database-name: ${var.subset-database-name}
ephemeral: true
script-file: ${var.schema-script-file}
output: =connection.subset-database
# Subset the data from the restored database to the new database
- uses: subset
with:
source-connection: ${connection.restored-backup}
target-connection: ${connection.subset-database}
output-file-path: =var.subsetting-file
# classify the data
- uses: classify
with:
connection: ${connection.subset-database}
output-file-path: =var.classification-file
# map classification data for masking
- uses: map
with:
connection: ${connection.subset-database}
classification-file-path: ${var.classification-file}
output-file-path: =var.masking-file
# Mask the data
- uses: mask
with:
connection: ${connection.subset-database}
input-masking-file-path: ${var.masking-file}
# Export the masked and subset data to a new backup file
- uses: export-backup
with:
backup-path: ${var.output-backup-path}
connection: ${connection.server}
database-name: ${var.subset-database-name}
overwrite: ${var.overwrite}
output-file-path: =var.backup-file