Example Python scripts
Published 12 December 2013
The page is a list of example IronPython scripts you can use to generate data with the Python script generator. You can use these examples as starting points for creating your own scripts.
Book titles (SQL)
This script generates SQL Server book titles (eg SQL Server Hardware, SQL Server Statistics, Inside the SQL Server Query Optimizer):
- # Read an XML file
- # Use the CLR XML libraries
- import clr
- clr.AddReference("System.Xml")
- from System.Xml.XPath import XPathDocument, XPathNavigator
- # A sample XML file
- filename = r"BookStore.xml"
- def main(config):
- filepath = config["config_path"] + "\\" + filename
- return list(books(filepath, column_size=config["column_size"]))
- def books(filepath, column_size=50):
- # Locate all titles of books in this document
- doc = XPathDocument(filepath)
- nav = doc.CreateNavigator()
- expr = nav.Compile("/bookstore/book/title")
- titles = nav.Select(expr)
- for title in titles:
- # Truncate titles to the column size before yield
- yield str(title)[:column_size]
Credit card numbers (VISA style)
This script generates valid VISA style credit card numbers (eg 4289911008500748):
- # Generate VISA-style numbers with a Luhn checksum
- import clr
- clr.AddReference("System")
- from System import Random
- random = Random()
- def choice(l):
- return l[random.Next(len(l))]
- def completed_number(prefix, length):
- # Given a prefix and a desired length, fill in the number up
- # to the desired length, using Luhn to compute the checksum
- ccnumber = list(prefix)
- # Generate digits
- for _ in range(length - len(prefix) - 1):
- ccnumber.append(choice('0123456789'))
- # Calculate sum
- sum = pos = 0
- reversedCCnumber = list(reversed(ccnumber))
- while pos < length - 1:
- odd = int(reversedCCnumber[pos]) * 2
- if odd > 9: odd -= 9
- if pos != (length - 2): sum += int(reversedCCnumber[pos+1])
- sum += odd
- pos += 2
- # Calculate check digit
- checkdigit = ((sum / 10 + 1) * 10 - sum) % 10
- ccnumber.append(str(checkdigit))
- return ''.join(ccnumber)
- def main(config):
- for _ in range(config["n_rows"]):
- # VISA numbers start with a 4
- yield completed_number('4', 16)
Initials
This scripts generates lowercase initials (eg lg, pg, cs):
- # Combinatoric generator
- # The Python iteration and combinatorics library
- import itertools
- lowercase = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".lower()
- def main(config):
- return list(combinations())
- def combinations():
- # Iterate over all pairs of letters in `lowercase`
- for pair in itertools.combinations(lowercase, 2):
- # and yield them as a string
- yield "".join(pair)
ISO 639-3
This script uses a URL to generate ISO 639-3 language codes (eg afa, alg, art):
- # Make an HTTP request and read the output
- import clr
- clr.AddReference("System.Net")
- from System.Net import HttpWebRequest
- clr.AddReference("System")
- from System.IO import StreamReader
- # geonames list of iso-laguages
- url = "http://download.geonames.org/export/dump/iso-languagecodes.txt"
- def main(config):
- return list(read_wiki(column_size=config["column_size"]))
- def read_wiki(column_size=50):
- # Create a CLR web request object
- http = HttpWebRequest.Create(url)
- http.Timeout = 5000
- http.UserAgent = "Red Gate SQL Data Generator"
- # Read the response with a CLR StreamReader
- response = http.GetResponse()
- responseStream = StreamReader(response.GetResponseStream())
- html = responseStream.ReadToEnd()
- # Yield all lines that start with a *,
- # truncated to the column width
- for line in html.splitlines():
- if not line.startswith("ISO"):
- items = line.split('\t')
- yield items[0] #ISO 639-3
- #yield items[1] #ISO 639-2
- #yield items[2] #ISO 639-1
- #yield items[3] #Language name
Numbered labels
This script generates numbered labels (eg Label 42, Label 43, Label 44):
- # Specify a string stem as constant and append an incrementing number to it
- def main(config):
- return list(Sequence(config["n_rows"])) # The max number of rows available
- def Sequence(max):
- for i in range(1, max + 1): # Modify the range start and end points in order to offset the row number
- yield "Label {0}".format(i) # Format the output string here
US cities
This script generates US city names (eg New York, Los Angeles, Chicago):
- # Open a file and filter the results
- filename = r"cities.txt"
- def main(config):
- filepath = config["config_path"] + "\\" + filename
- return list(cities(filepath))
- def cities(filepath):
- with open(filepath) as cities:
- for city in cities:
- if city.startswith(tuple("ABDCEFG")):
- yield city.strip()
Years of service
This script uses a date of birth column to generate years of service numbers (eg 1, 23, 5):
To use this script, on line 5, replace InsertColumnName with the name of a column in your table that contains dates of birth.
- # This example takes a column as a date of birth
- # Then work out a random number of years service
- # This if this is > now the person is still working at this company
- def main(config):
- dateOfBirth = InsertColumnName
- workingAge = 18
- age = (DateTime.Now - dateOfBirth).TotalDays/365
- numberofYearsWorking = age - workingAge
- #Initialize Randomness only on the first row
- if (config["row_count"] == 0):
- # could use python random number generator
- # and store the value in the config
- session.SetupNormalGenerator(False, config["seed"], 1.0, 50, 10, 0.2)
- if (numberofYearsWorking > 0) :
- yearsOfService = session.GetNextRandomNumber()
- if (yearsOfService > numberofYearsWorking) :
- #Person still working at this company
- return numberofYearsWorking
- else:
- return yearsOfService
- return 0 #Too young