SQL Data Generator 3

Example Python scripts

The page is a list of example IronPython scripts you can use to generate data with the Python script generator. You can use these examples as starting points for creating your own scripts.

Book titles (SQL)

This script generates SQL Server book titles (eg SQL Server Hardware, SQL Server Statistics, Inside the SQL Server Query Optimizer):

# Read an XML file
# Use the CLR XML libraries
import clr
clr.AddReference("System.Xml")
from System.Xml.XPath import XPathDocument, XPathNavigator
# A sample XML file 
filename = r"BookStore.xml"
def main(config):
    filepath = config["config_path"] + "\\" + filename
    return list(books(filepath, column_size=config["column_size"]))
def books(filepath, column_size=50):
    # Locate all titles of books in this document
    doc = XPathDocument(filepath)
    nav = doc.CreateNavigator()
    expr = nav.Compile("/bookstore/book/title")
    titles = nav.Select(expr)
    for title in titles:
        # Truncate titles to the column size before yield
        yield str(title)[:column_size]

Credit card numbers (VISA style)

This script generates valid VISA style credit card numbers (eg 4289911008500748):

# Generate VISA-style numbers with a Luhn checksum
import clr
clr.AddReference("System")
from System import Random
random = Random()
def choice(l):
    return l[random.Next(len(l))]
def completed_number(prefix, length):
    # Given a prefix and a desired length, fill in the number up
    # to the desired length, using Luhn to compute the checksum
    ccnumber = list(prefix)
    # Generate digits
    for _ in range(length - len(prefix) - 1):
    	ccnumber.append(choice('0123456789'))
    # Calculate sum 
    sum = pos = 0
    reversedCCnumber = list(reversed(ccnumber))
    while pos < length - 1:
        odd = int(reversedCCnumber[pos]) * 2
        if odd > 9: odd -= 9
        if pos != (length - 2): sum += int(reversedCCnumber[pos+1])
        sum += odd
        pos += 2
    # Calculate check digit
    checkdigit = ((sum / 10 + 1) * 10 - sum) % 10
    ccnumber.append(str(checkdigit))
    
    return ''.join(ccnumber)
def main(config):
    for _ in range(config["n_rows"]):
        # VISA numbers start with a 4
        yield completed_number('4', 16)

Initials

This scripts generates lowercase initials (eg lg, pg, cs):

# Combinatoric generator
# The Python iteration and combinatorics library
import itertools
lowercase = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".lower()
def main(config):
    return list(combinations())
def combinations():
    # Iterate over all pairs of letters in `lowercase`
    for pair in itertools.combinations(lowercase, 2):
        # and yield them as a string
        yield "".join(pair)

ISO 639-3

This script uses a URL to generate ISO 639-3 language codes (eg afa, alg, art):

# Make an HTTP request and read the output
import clr
clr.AddReference("System.Net")
from System.Net import HttpWebRequest
clr.AddReference("System")
from System.IO import StreamReader
# geonames list of iso-laguages 
url = "http://download.geonames.org/export/dump/iso-languagecodes.txt"
def main(config):
    return list(read_wiki(column_size=config["column_size"]))
def read_wiki(column_size=50):
    # Create a CLR web request object
    http = HttpWebRequest.Create(url)
    http.Timeout = 5000
    http.UserAgent = "Red Gate SQL Data Generator"
    
    # Read the response with a CLR StreamReader
    response = http.GetResponse()
    responseStream = StreamReader(response.GetResponseStream())
    html = responseStream.ReadToEnd()
    
    # Yield all lines that start with a *,
    # truncated to the column width
    for line in html.splitlines():
        if not line.startswith("ISO"):
            items = line.split('\t')
            yield items[0]  #ISO 639-3
            #yield items[1]  #ISO 639-2
            #yield items[2]  #ISO 639-1
            #yield items[3]  #Language name

Numbered labels

This script generates numbered labels (eg Label 42, Label 43, Label 44):

# Specify a string stem as constant and append an incrementing number to it
def main(config):
    return list(Sequence(config["n_rows"])) # The max number of rows available
def Sequence(max):
    for i in range(1, max + 1): # Modify the range start and end points in order to offset the row number
        yield "Label {0}".format(i) # Format the output string here

US cities

This script generates US city names (eg New York, Los Angeles, Chicago):

# Open a file and filter the results
filename = r"cities.txt"
def main(config):
    filepath = config["config_path"] + "\\" + filename
    return list(cities(filepath))
def cities(filepath):
    with open(filepath) as cities:
        for city in cities:
            if city.startswith(tuple("ABDCEFG")):
                yield city.strip()

Years of service

This script uses a date of birth column to generate years of service numbers (eg 1, 23, 5):

To use this script, on line 5, replace InsertColumnName with the name of a column in your table that contains dates of birth.

# This example takes a column as a date of birth
# Then work out a random number of years service
# This if this is > now the person is still working at this company
def main(config):
    dateOfBirth = InsertColumnName
    workingAge = 18
    age = (DateTime.Now - dateOfBirth).TotalDays/365 
    numberofYearsWorking = age - workingAge
    #Initialize Randomness only on the first row
    if (config["row_count"] == 0):
        # could use python random number generator
        # and store the value in the config
        session.SetupNormalGenerator(False, config["seed"], 1.0, 50, 10, 0.2)
    
    if (numberofYearsWorking > 0) :
        yearsOfService = session.GetNextRandomNumber()
        if (yearsOfService > numberofYearsWorking) :           
            #Person still working at this company
            return numberofYearsWorking
        else:
            return yearsOfService
    return 0 #Too young

 

 


Didn't find what you were looking for?