Merge git repositories

We have more than 20 Git repositories for the same project, but it causes lots of headaches for building. We’ve decided to merge most of them into a single one, keeping the file history when possible.

This script uses newren/git-filter-repo and as such needs Python 3.

It’s done in 3 steps:

clone the local repositories from d:\dev\xxx to d:\devnew\xxx using git-filter-repo with source and target parameters
create a new repository at d:\devnew\merged
merge from d:\devnew\xxx to d:\devnew\merged\xxx using git merge --allow-unrelated-histories

$repos = @(
    "Build",
    "Deploy",
    "External",
    "API",
    "Database",
    "Server",
    "Services",
    "UI",
    "Utils"
)

$currentLocation = "D:\Dev"
$newLocation = "D:\DevNew"

# removed:
# - check that git and python3 are installed and on the PATH
# - cleanup previous merges
# - check that repositories are clean
# - logging/write-host

$total = $repos.Length
$nb = 0

foreach ($repo in $repos) {
	$nb++
	Write-Host "Copy of $repo ($nb/$total)" -ForegroundColor Blue

	# checkout all remote branches in local
	git -C $currentLocation\$repo fetch -p
	$remoteBranches = (git -C $currentLocation\$repo branch -l -r).Split("`n").Trim().Replace("origin/","") | where { -not $_.StartsWith("HEAD ->") }
	$localBranches = (git -C $currentLocation\$repo branch -l).Split("`n").Replace("*","").Trim()
	foreach ($remoteBranch in $remoteBranches) {
		if (-not ($remoteBranch -in $localBranches)) {	
			git -C $currentLocation\$repo branch $remoteBranch "origin/$remoteBranch"
		}
		else {
			git -C $currentLocation\$repo rebase "origin/$remoteBranch" $remoteBranch
		}
	}

	# re-checkout dev
	git -C $currentLocation\$repo checkout dev
	git -C $currentLocation\$repo fetch
	git -C $currentLocation\$repo rebase
	
	# https://stackoverflow.com/a/14728706/
	git -C $currentLocation\$repo -c gc.reflogExpire=0 -c gc.reflogExpireUnreachable=0 -c gc.rerereresolved=0 -c gc.rerereunresolved=0 -c gc.pruneExpire=now gc

	# initialize new repo
	git init $newLocation\$repo
	
	# define ignored files and folders in history (old data/tests/mistakes you won't ever need again)
	$ignores = @()
	elseif ($repo -eq "Build") {
		$ignores = @("installers", "xunit", "opencover", "reportgenerator")
	}
	elseif ($repo -eq "Server") {
		$ignores = @("TestProject", "OldProject")
	}
	
	# build filter string
	$filterExpression = "python git-filter-repo --source $currentLocation\$repo --target $newLocation\$repo --path-rename :$repo/ --path packages --path trunk --path-glob *.suo"
	foreach ($ignore in $ignores) {
		if ($ignore.IndexOf("*") -gt -1) {
			$filterExpression += " --path-glob $ignore"
		}
		else {
			$filterExpression += " --path $ignore"
		}
	}
	
	# move tags/* and branches/* to $repo/tags/* and $repo/branches/*
	$filterExpression += " --invert-paths --mailmap $PSScriptRoot\mailmap.txt --refname-callback 'return refname.replace(`"refs/heads/`", `"refs/heads/$repo/`")replace(`"refs/tags/`", `"refs/tags/$repo/`")'"
	
	# copy the git history while filtering the repo
	Invoke-Expression $filterExpression
}

# create merged repo
git init $newLocation\Merged
git -C $newLocation\Merged checkout -b dev

# initialize LFS for binary files
git -C $newLocation\Merged lfs install
git -C $newLocation\Merged lfs track "*.dll"
git -C $newLocation\Merged lfs track "*.exe"

# operations on new repositories
$nb = 0
foreach ($repo in $repos) {
	$nb++
	Write-Host "Starts merging $repo into $newLocation\Merged ($nb/$total)" -ForegroundColor Blue

	# get list of imported branches and tags
	$branches = (git -C $newLocation\$repo branch -l).Split("`n").Replace("*","").Trim() | where { $_.StartsWith($repo) }
	$tags = $(git -C $newLocation\$repo tag -l).Split()

	# add remote
	git -C $newLocation\Merged remote add "$($repo)_remote" $newLocation\$repo
	git -C $newLocation\Merged fetch "$($repo)_remote"
	
	# create branches
	foreach ($branch in $branches) {
		git -C $newLocation\Merged branch $branch "$($repo)_remote/$branch"
	}
	
	# merge imported repo into merged repo
	git -C $newLocation\Merged checkout dev
	git -C $newLocation\Merged merge --allow-unrelated-histories "$repo/dev"

	git -C $newLocation\Merged remote remove "$($repo)_remote"
}

git -C $newLocation\Merged -c gc.reflogExpire=0 -c gc.reflogExpireUnreachable=0 -c gc.rerereresolved=0 -c gc.rerereunresolved=0 -c gc.pruneExpire=now gc

$EndDate = Get-Date

$duration = New-Timespan -Start $StartDate -End $EndDate
Write-Host "====================== DONE IN $($duration.minutes) mn ======================" -ForegroundColor Green

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

$repos = @(

"Build",

"Deploy",

"External",

"API",

"Database",

"Server",

"Services",

"UI",

"Utils"

)

$currentLocation = "D:\Dev"

$newLocation = "D:\DevNew"

# removed:

# - check that git and python3 are installed and on the PATH

# - cleanup previous merges

# - check that repositories are clean

# - logging/write-host

$total = $repos.Length

$nb = 0

foreach ($repo in $repos) {

$nb++

Write-Host "Copy of $repo ($nb/$total)" -ForegroundColor Blue

# checkout all remote branches in local

git -C $currentLocation\$repo fetch -p

$remoteBranches = (git -C $currentLocation\$repo branch -l -r).Split("`n").Trim().Replace("origin/","") | where { -not $_.StartsWith("HEAD ->") }

$localBranches = (git -C $currentLocation\$repo branch -l).Split("`n").Replace("*","").Trim()

foreach ($remoteBranch in $remoteBranches) {

if (-not ($remoteBranch -in $localBranches)) {

git -C $currentLocation\$repo branch $remoteBranch "origin/$remoteBranch"

}

else {

git -C $currentLocation\$repo rebase "origin/$remoteBranch" $remoteBranch

}

# re-checkout dev

git -C $currentLocation\$repo checkout dev

git -C $currentLocation\$repo fetch

git -C $currentLocation\$repo rebase

# https://stackoverflow.com/a/14728706/

git -C $currentLocation\$repo -c gc.reflogExpire=0 -c gc.reflogExpireUnreachable=0 -c gc.rerereresolved=0 -c gc.rerereunresolved=0 -c gc.pruneExpire=now gc

# initialize new repo

git init $newLocation\$repo

# define ignored files and folders in history (old data/tests/mistakes you won't ever need again)

$ignores = @()

elseif ($repo -eq "Build") {

$ignores = @("installers", "xunit", "opencover", "reportgenerator")

}

elseif ($repo -eq "Server") {

$ignores = @("TestProject", "OldProject")

}

# build filter string

$filterExpression = "python git-filter-repo --source $currentLocation\$repo --target $newLocation\$repo --path-rename :$repo/ --path packages --path trunk --path-glob *.suo"

foreach ($ignore in $ignores) {

if ($ignore.IndexOf("*") -gt -1) {

$filterExpression += " --path-glob $ignore"

}

else {

$filterExpression += " --path $ignore"

}

# move tags/* and branches/* to $repo/tags/* and $repo/branches/*

$filterExpression += " --invert-paths --mailmap $PSScriptRoot\mailmap.txt --refname-callback 'return refname.replace(`"refs/heads/`", `"refs/heads/$repo/`")replace(`"refs/tags/`", `"refs/tags/$repo/`")'"

# copy the git history while filtering the repo

Invoke-Expression $filterExpression

}

# create merged repo

git init $newLocation\Merged

git -C $newLocation\Merged checkout -b dev

# initialize LFS for binary files

git -C $newLocation\Merged lfs install

git -C $newLocation\Merged lfs track "*.dll"

git -C $newLocation\Merged lfs track "*.exe"

# operations on new repositories

$nb = 0

foreach ($repo in $repos) {

$nb++

Write-Host "Starts merging $repo into $newLocation\Merged ($nb/$total)" -ForegroundColor Blue

# get list of imported branches and tags

$branches = (git -C $newLocation\$repo branch -l).Split("`n").Replace("*","").Trim() | where { $_.StartsWith($repo) }

$tags = $(git -C $newLocation\$repo tag -l).Split()

# add remote

git -C $newLocation\Merged remote add "$($repo)_remote" $newLocation\$repo

git -C $newLocation\Merged fetch "$($repo)_remote"

# create branches

foreach ($branch in $branches) {

git -C $newLocation\Merged branch $branch "$($repo)_remote/$branch"

}

# merge imported repo into merged repo

git -C $newLocation\Merged checkout dev

git -C $newLocation\Merged merge --allow-unrelated-histories "$repo/dev"

git -C $newLocation\Merged remote remove "$($repo)_remote"

}

git -C $newLocation\Merged -c gc.reflogExpire=0 -c gc.reflogExpireUnreachable=0 -c gc.rerereresolved=0 -c gc.rerereunresolved=0 -c gc.pruneExpire=now gc

$EndDate = Get-Date

$duration = New-Timespan -Start $StartDate -End $EndDate

Write-Host "====================== DONE IN $($duration.minutes) mn ======================" -ForegroundColor Green

I’ve removed a lot of code for brievity, so maybe it won’t work out of the box, but you should get the general idea.

Next up: pushing the new repository, then migrate the developers workstations, for which I wrote another script:

# removed: checks and logs

$oldRootPath = "D:\Dev\"
$newRootPath = "D:\Dev\Mezzo\"

Install-Module -Name 'IISAdministration' -Force -ErrorAction Stop
Import-Module IISAdministration

# get sites list
$sites = @()
Get-IISSite | % {
	$sitepath = $_.Applications.VirtualDirectories.PhysicalPath

	if ($sitepath -is [array]) {
		$sitepath = $sitepath[0]
	}

	$sites += @{
		PhysicalPath = $sitepath
		Name = $_.Name
	}
}

# iterate over sites
foreach ($site in $sites) {
	if (-not $site.PhysicalPath.ToLower().StartsWith($oldRootPath.Tolower())) {
		continue
	}
	
	$sitename = $site.Name
	$sitepath = $site.PhysicalPath

	$relativePhysicalPath = $sitepath.Substring($oldRootPath.Length)
	$skip = $false
	$virtualDirectoryNames = @()
	switch ($relativePhysicalPath.ToLower()) {
		"ui\users" {
			$virtualDirectoryNames = @("js")
		}
		"ui\admin" {
			$virtualDirectoryNames = @("js")
		}
	}
	
	$newPhysicalPath = "$newRootPath$relativePhysicalPath"

	$manager = Get-IISServerManager
	$rootsite = $manager.Sites[$sitename].Applications["/"].VirtualDirectories["/"]
	$rootsite.PhysicalPath = $newPhysicalPath
	
	foreach ($vdirname in $virtualDirectoryNames) {
		$vdirname = "/" + ($vdirname -replace "\\", "/")

		$vdir = $manager.Sites[$sitename].Applications["/"].VirtualDirectories[$vdirname]
		if ($vdir -eq $null) {
			continue
		}

		$relativePhysicalPath = $vdir.Attributes["physicalPath"].Value.Substring($oldRootPath.Length)
		$newPhysicalPath = "$newRootPath$relativePhysicalPath"
		$vdir.PhysicalPath = $newPhysicalPath
	}
	
	$manager.CommitChanges()
}

# restart IIS
& iisreset

# removed: checks and logs

$oldRootPath = "D:\Dev\"

$newRootPath = "D:\Dev\Mezzo\"

Install-Module -Name 'IISAdministration' -Force -ErrorAction Stop

Import-Module IISAdministration

# get sites list

$sites = @()

Get-IISSite | % {

$sitepath = $_.Applications.VirtualDirectories.PhysicalPath

if ($sitepath -is [array]) {

$sitepath = $sitepath[0]

}

$sites += @{

PhysicalPath = $sitepath

Name = $_.Name

}

# iterate over sites

foreach ($site in $sites) {

if (-not $site.PhysicalPath.ToLower().StartsWith($oldRootPath.Tolower())) {

continue

}

$sitename = $site.Name

$sitepath = $site.PhysicalPath

$relativePhysicalPath = $sitepath.Substring($oldRootPath.Length)

$skip = $false

$virtualDirectoryNames = @()

switch ($relativePhysicalPath.ToLower()) {

"ui\users" {

$virtualDirectoryNames = @("js")

}

"ui\admin" {

$virtualDirectoryNames = @("js")

}

$newPhysicalPath = "$newRootPath$relativePhysicalPath"

$manager = Get-IISServerManager

$rootsite = $manager.Sites[$sitename].Applications["/"].VirtualDirectories["/"]

$rootsite.PhysicalPath = $newPhysicalPath

foreach ($vdirname in $virtualDirectoryNames) {

$vdirname = "/" + ($vdirname -replace "\\", "/")

$vdir = $manager.Sites[$sitename].Applications["/"].VirtualDirectories[$vdirname]

if ($vdir -eq $null) {

continue

}

$relativePhysicalPath = $vdir.Attributes["physicalPath"].Value.Substring($oldRootPath.Length)

$newPhysicalPath = "$newRootPath$relativePhysicalPath"

$vdir.PhysicalPath = $newPhysicalPath

}

$manager.CommitChanges()

}

# restart IIS

& iisreset

Next up: migrating Jenkins jobs. With more than 170 jobs, doing it by hand is a real chore. Fortunately, you can also automate it.

# removed: variables, checks, etc

# - generate an API token in your Jenkins profile
# - create a credentials.txt file, write your login like: `login:tokenapi`
# - call: certutil -encode credentials.txt credentials.asc
# - remove the lines `BEGIN CERTIFICATE` and `END CERTIFICATE`
$credFile = Get-Content ".\credentials.asc"
if ($credFile -contains "-----BEGIN CERTIFICATE-----") {
	Write-Host "Remove the 'BEGIN CERTIFICATE' and 'END CERTIFICATE' lines from credentials.asc"
	Exit 1
}

$Credentials = $credFile.Trim()
$authHeader = @{Authorization = ('Basic ' + $Credentials) }

# list projects
$projects = Invoke-RestMethod -Method "GET" -Uri "$Url/api/json" -Headers $authHeader -ContentType "application/json; charset=utf-8"

# for each project, get the config.xml file and save it here
foreach ($job in $projects.jobs) {
    $name = $job.name
    $configUrl = "$Url/job/$name/config.xml"

    Invoke-RestMethod -Method "GET" -Uri $configUrl -Headers $authHeader -ContentType "text/xml" | Out-File "jobs\$name.xml"
}

# removed: variables, checks, etc

# - generate an API token in your Jenkins profile

# - create a credentials.txt file, write your login like: `login:tokenapi`

# - call: certutil -encode credentials.txt credentials.asc

# - remove the lines `BEGIN CERTIFICATE` and `END CERTIFICATE`

$credFile = Get-Content ".\credentials.asc"

if ($credFile -contains "-----BEGIN CERTIFICATE-----") {

Write-Host "Remove the 'BEGIN CERTIFICATE' and 'END CERTIFICATE' lines from credentials.asc"

Exit 1

}

$Credentials = $credFile.Trim()

$authHeader = @{Authorization = ('Basic ' + $Credentials) }

# list projects

$projects = Invoke-RestMethod -Method "GET" -Uri "$Url/api/json" -Headers $authHeader -ContentType "application/json; charset=utf-8"

# for each project, get the config.xml file and save it here

foreach ($job in $projects.jobs) {

$name = $job.name

$configUrl = "$Url/job/$name/config.xml"

Invoke-RestMethod -Method "GET" -Uri $configUrl -Headers $authHeader -ContentType "text/xml" | Out-File "jobs\$name.xml"

}

Now we can version the Jenkins configs. Then we edit, and update them.

# removed: variables, checks, etc

$credFile = Get-Content ".\credentials.asc"
if ($credFile -contains "-----BEGIN CERTIFICATE-----") {
	Write-Host "Remove the 'BEGIN CERTIFICATE' and 'END CERTIFICATE' lines from credentials.asc"
	Exit 1
}

$Credentials = $credFile.Trim()
$authHeader = @{Authorization = ('Basic ' + $Credentials) }

$configUrl = "$Url/job/$JobName/config.xml"

# push $localConfigFile to $Url
Invoke-RestMethod -Method "POST" -Uri $configUrl -Headers $authHeader -ContentType "application/xml" -InFile $localConfigFile

# removed: variables, checks, etc

$credFile = Get-Content ".\credentials.asc"

if ($credFile -contains "-----BEGIN CERTIFICATE-----") {

Write-Host "Remove the 'BEGIN CERTIFICATE' and 'END CERTIFICATE' lines from credentials.asc"

Exit 1

}

$Credentials = $credFile.Trim()

$authHeader = @{Authorization = ('Basic ' + $Credentials) }

$configUrl = "$Url/job/$JobName/config.xml"

# push $localConfigFile to $Url

Invoke-RestMethod -Method "POST" -Uri $configUrl -Headers $authHeader -ContentType "application/xml" -InFile $localConfigFile

Powershell for data mining Access databases into SQL Server

I need to do some data extraction from several hundreds of Access databases extracted from zip files. The extracted data will allow us to do some statistics on our customer’s behavior.
In order to do that, I decided to use Powershell, because it has all the features I need, bundled in one neat language.

The scaffolding for the scripts uses PSake for the task launcher, Pester for Powershell unit tests, and a few Nuget packages (including PSake).

In a vendor folder, add the Nuget exe, as well as a packages.config file listing the necessary Nuget packages. I have added PSake, and NUnitOrange to transform the Pester results to a nice HTML report.
I’m not using the Pester Nuget package because it includes the Pester unit tests, and some of them fail, which breaks my build (in addition to adding a thousand tests I don’t care about). Instead, I directly include the Pester scripts, and I cleaned up the samples and tests.

Since the script needs to read Access databases using some sort of ADODB or OLEDB provider, you will have to install either the Access 2007 or Access 2010 provider. If you’re like me (with Office x86 already installed), you won’t be able to install the Access 2010 x64 provider, which will trigger “The Jet/ACE OLEDB provider is not registered on the local machine” errors. So you will have to run the x86 version of Powershell. Which means you will have to use the x86 version of the SQLPS module (make sure you download the x86 version). Don’t worry, the SQL 2012 version of SQLPS is compatible with older SQL Servers (at least 2008 R2).

The batch bootstrapper is inspired by the bootstrapper from Pester:

@echo off

rem Install nuget packages
%~dp0vendor\nuget.exe Install %~dp0vendor\packages.config -o %~dp0packages

SET INPUT=%~dp0input
SET OUTPUT=%~dp0output
IF NOT '%1'=='' SET INPUT=%1
IF NOT '%2'=='' SET OUTPUT=%2

rem Detect 32 or 64 bits Windows: http://stackoverflow.com/a/24590583/6776
SET PSPATH=powershell
reg Query "HKLM\Hardware\Description\System\CentralProcessor\0" | find /i "x86" > NUL && set OS=32BIT || set OS=64BIT
if %OS%==64BIT SET PSPATH=%SystemRoot%\syswow64\WindowsPowerShell\v1.0\powershell.exe

rem See: https://github.com/pester/Pester/blob/master/Build.bat
%PSPATH% -NoProfile -ExecutionPolicy Bypass -Command ^
    "$psakeDir = ([array](dir %~dp0packages\psake.*))[-1] ; Import-Module $psakeDir\tools\psake.psm1 ; Invoke-psake %~dp0tasks.ps1 Mining -properties @{'in'='%INPUT%';'out'='%OUTPUT%'}"

@echo off

rem Install nuget packages

%~dp0vendor\nuget.exe Install %~dp0vendor\packages.config -o %~dp0packages

SET INPUT=%~dp0input

SET OUTPUT=%~dp0output

IF NOT '%1'=='' SET INPUT=%1

IF NOT '%2'=='' SET OUTPUT=%2

rem Detect 32 or 64 bits Windows: http://stackoverflow.com/a/24590583/6776

SET PSPATH=powershell

reg Query "HKLM\Hardware\Description\System\CentralProcessor\0" | find /i "x86" > NUL && set OS=32BIT || set OS=64BIT

if %OS%==64BIT SET PSPATH=%SystemRoot%\syswow64\WindowsPowerShell\v1.0\powershell.exe

rem See: https://github.com/pester/Pester/blob/master/Build.bat

%PSPATH% -NoProfile -ExecutionPolicy Bypass -Command ^

"$psakeDir = ([array](dir %~dp0packages\psake.*))[-1] ; Import-Module $psakeDir\tools\psake.psm1 ; Invoke-psake %~dp0tasks.ps1 Mining -properties @{'in'='%INPUT%';'out'='%OUTPUT%'}"

The tasks in tasks.ps1 are pretty standard and minimalist, so that the maximum is tested in modules via Pester.

Reading Access databases is as simple as using OleDb, old-school style:

function Read-AccessDataSet (
  [string] $file,
  [string] $query
)
{
    $conn = New-Object System.Data.OleDb.OleDbConnection "Provider=Microsoft.Jet.OLEDB.4.0; Data Source=$file"
    $conn.Open()
    $cmd  = New-Object System.Data.OleDb.OleDbCommand($query, $conn)

    $adapter = New-Object System.Data.OleDb.OleDbDataAdapter $cmd
    $dataset = New-Object System.Data.DataSet
    $adapter.Fill($dataset) | Out-Null # returns the number of rows

    $conn.Close()

    return $dataset
}

function Read-AccessDataSet (

[string] $file,

[string] $query

)

{

$conn = New-Object System.Data.OleDb.OleDbConnection "Provider=Microsoft.Jet.OLEDB.4.0; Data Source=$file"

$conn.Open()

$cmd = New-Object System.Data.OleDb.OleDbCommand($query, $conn)

$adapter = New-Object System.Data.OleDb.OleDbDataAdapter $cmd

$dataset = New-Object System.Data.DataSet

$adapter.Fill($dataset) | Out-Null # returns the number of rows

$conn.Close()

return $dataset

}

And the corresponding Pester tests, using actual Access test databases:

$here = Split-Path -Parent $MyInvocation.MyCommand.Path

Import-Module "$here\db.psm1" -Force

Describe "database module" {
    $in = Resolve-Path "$here\..\tests\input"

    Context "reading Access DB" {
        It "should read multiple Access rows" {
            (Read-AccessDataSet -file "$in\input.dat" -query "SELECT * FROM Users").Tables[0].Rows.Count | Should Be 10
        }
    }
}

$here = Split-Path -Parent $MyInvocation.MyCommand.Path

Import-Module "$here\db.psm1" -Force

Describe "database module" {

$in = Resolve-Path "$here\..\tests\input"

Context "reading Access DB" {

It "should read multiple Access rows" {

(Read-AccessDataSet -file "$in\input.dat" -query "SELECT * FROM Users").Tables[0].Rows.Count | Should Be 10

}

Executing SQL scripts uses the much more powerful Invoke-SqlCmd cmdlet from the SQLPS module. The most is that it returns a Powershell object, so I can do something like this:

$users = Invoke-SqlCommand -query "SELECT * FROM users"

foreach ($user in $users) {
  write "id: $($user.id) ; name: $($user.name)"
}

$users = Invoke-SqlCommand -query "SELECT * FROM users"

foreach ($user in $users) {

write "id: $($user.id) ; name: $($user.name)"

}

Unit testing your Powershell scripts using Pester

In order to stabilize our middle office, we need to test it. Not that it’s buggy, but hey, testing is good.

We have a huge 500-lines script that processes PDF files through a bunch of programs. We need to test a few things:

Every step and each piece of the code must be working
The configuration files must be properly read
The proper programs must be run in the proper order
The processed PDFs must look like what we’re expecting

To do that, we have a lot of work to do.

There is a series of great articles on Pester on PowerShell Magazine.

Refactor your script

First, we need to extract the methods and code blocks we will test. Our script is not that hard to refactor into a bunch of methods, since it’s pretty well organized so far. A simple refactor will simply be moving a bunch of independent code blocks into methods, regrouped and externalized by feature.

Prefer using modules to do that. Create your methods into .psm1 files. It will allow you to just Import-Module mymodule.psm1 and use it the same way as if you were dot-sourcing your file (. .\myfile.ps1), but will allow you to do more awesome things later; for instance, you can get the list of available commands through Get-Command -Module mymodule, get the comment-based help of your module methods through Get-Help my-module-method, get tab-expansion on your methods, etc.

If you’re not sure how to refactor your script to extract units of work, I can’t really help you there, and you should go learn more about unit testing; there are a lot of places where you can do that.

Unit Test your extracted code

There is an awesome unit test and BDD tool called Pester. Download the latest nuget package into your scripts directory by running nuget install pester. PsGet also has a Pester package, but including the Pester files with your sources (or a way to get them like with Nuget) is very important if you intend to run your tests on a continuous integration platform, especially if you externalize them on a service like AppVeyor.

Create a _run_tests.ps1 file (or whatever your naming convention calls for), with the very simple following contents:

$here = Split-Path -Parent $MyInvocation.MyCommand.Path
Import-Module "$here\Pester\Pester.psm1"
Invoke-Pester

$here = Split-Path -Parent $MyInvocation.MyCommand.Path

Import-Module "$here\Pester\Pester.psm1"

Invoke-Pester

Invoke-Pester will run all the “*.Tests.ps1” files it finds in the current directory. Unfortunately, the Pester Nuget package comes with the Pester tests, and it’s pretty annoying to see the thousands of unit tests in the middle of yours. You can either not use Invoke-Pester and roll your own “look for *.Tests.ps1 file except in the Pester folder” method, or (like I did) forget about nuget update pester and remove the test files from the Pester folder.

To create unit test files, you can either write them manually, or use the New-Fixture module command, which will create both a file to contain your methods, and a test file to test your methods. If you’re working with modules, you will not really be able to use the power of this command, but if you’re creating a new script, it will provide you with a BDD workflow.

Your test file for your module will look like this:

$here = Split-Path -Parent $MyInvocation.MyCommand.Path
$root = "$here\..\.."
Import-Module "$root\modules\mail.psm1" -Force

Describe "mail" {
    Context "config file is provided" {
        It "gets sender from the config" {
            (Read-MailConfig -configFile "$here\mail.Tests.config").from | Should Be "test@example.com"
        }

        It "gets client from the config" {
            (Read-MailConfig -configFile "$here\mail.Tests.config").client.Host | Should Be "smtp.gmail.com"
        }

        It "gets login from the config" {
            (Read-MailConfig -configFile "$here\mail.Tests.config").client.Credentials.UserName | Should Be "testlogin"
        }
    }

    Context "config file is missing" {
        It "throws an error when no config is passed" {
            { Read-MailConfig } | Should Throw
        }
    }
}

$here = Split-Path -Parent $MyInvocation.MyCommand.Path

$root = "$here\..\.."

Import-Module "$root\modules\mail.psm1" -Force

Describe "mail" {

Context "config file is provided" {

It "gets sender from the config" {

(Read-MailConfig -configFile "$here\mail.Tests.config").from | Should Be "test@example.com"

}

It "gets client from the config" {

(Read-MailConfig -configFile "$here\mail.Tests.config").client.Host | Should Be "smtp.gmail.com"

}

It "gets login from the config" {

(Read-MailConfig -configFile "$here\mail.Tests.config").client.Credentials.UserName | Should Be "testlogin"

}

Context "config file is missing" {

It "throws an error when no config is passed" {

{ Read-MailConfig } | Should Throw

}

As you can see, for now I’m using a custom config files with the expected values filled. This is not the best way to unit test, so we’re going to use mocking.

Mock the system methods

Here is the true power of Pester and Powershell: the ability to mock system methods. Your method reads files from the disk? No problem. Just provide an alternative implementation and you don’t have to setup a bunch of test data and config files.

My Read-MailConfig looks like this:

function Read-MailConfig (
	[string]$configFile
)
{
	if (($configFile -eq $null) -or ($configFile -eq "")) {
		throw "Mandatory config file to read"
	}

	$config = @{}
	[xml]$MainConfigEmail = Get-Content $configFile
	$config.from = $MainConfigEmail.infos.email.from
	$config.client = New-Object Net.Mail.SmtpClient($MainConfigEmail.infos.email.server)
	# further configure the client

	return (New-Object PsObject -Property $config)
}

function Read-MailConfig (

[string]$configFile

)

{

if (($configFile -eq $null) -or ($configFile -eq "")) {

throw "Mandatory config file to read"

}

$config = @{}

[xml]$MainConfigEmail = Get-Content $configFile

$config.from = $MainConfigEmail.infos.email.from

$config.client = New-Object Net.Mail.SmtpClient($MainConfigEmail.infos.email.server)

# further configure the client

return (New-Object PsObject -Property $config)

}

So, there is a Get-Content method that I want to mock and control its return value. I can now modify my test so that the values used by my test are right next to them:

Describe "mail" {
    Context "config file is provided" {
        Mock&nbsp;-ModuleName mail Get-Content { "<infos><email><from>test@example.com</from><server>smtp.gmail.com</server><login>testlogin</login><pass>password</pass></email></infos>" }

        It "gets sender from the config" {
            (Read-MailConfig -configFile "whatever.config").from | Should Be "test@example.com"
        }
    }
}

Describe "mail" {

Context "config file is provided" {

Mock -ModuleName mail Get-Content { "<infos><email><from>test@example.com</from><server>smtp.gmail.com</server><login>testlogin</login><pass>password</pass></email></infos>" }

It "gets sender from the config" {

(Read-MailConfig -configFile "whatever.config").from | Should Be "test@example.com"

}

Note the usage of -ModuleName mail in the Mock call: modules have their own scope (which is not the case of plain dot-sourced script files), and so need a bit more work to inject mocks.

My mail module actually sends emails through the System.Net.Mail classes, but Pester can’t mock .Net objects (note that .Net mocking frameworks can’t mock most system classes either).

In order to bypass that, we’re going to extract the .Net object calls into separate methods doing only that, we’re going to mock this extraction, and not test the .Net method call:

# Sends an error email to the ones in the config file
function Send-ErrorMessage(
  [string]$Title,
  [string]$Body
)
{
  $config = Read-MailConfig -configFile ".\general.config"

  try {
    $message = New-Object System.Net.Mail.MailMessage($config.from, $config.from, $Title, $Body)
    # configure $message

    Send-Mail -client $config.client -message $message
  } catch {
    # handle exception
  }
}

# sends an email
function Send-Mail(
  [Net.Mail.SmtpClient]$client,
  [Net.Mail.MailMessage]$message
)
{
  $client.send($message)
}

# Sends an error email to the ones in the config file

function Send-ErrorMessage(

[string]$Title,

[string]$Body

)

{

$config = Read-MailConfig -configFile ".\general.config"

try {

$message = New-Object System.Net.Mail.MailMessage($config.from, $config.from, $Title, $Body)

# configure $message

Send-Mail -client $config.client -message $message

} catch {

# handle exception

}

# sends an email

function Send-Mail(

[Net.Mail.SmtpClient]$client,

[Net.Mail.MailMessage]$message

)

{

$client.send($message)

}

And the test:

Context "sends an email" {
    Mock -ModuleName mail Send-Mail { return "Success" }
    Mock -ModuleName mail Read-MailConfig {
        $config = @{}
        $config.client = $null
        $config.from = "test@example.com"
        return $config
    }

    It "sends an error message" {
        Send-ErrorMessage -Title "test" -Body "test" | Should Be "Success"
    }
}

Context "sends an email" {

Mock -ModuleName mail Send-Mail { return "Success" }

Mock -ModuleName mail Read-MailConfig {

$config = @{}

$config.client = $null

$config.from = "test@example.com"

return $config

}

It "sends an error message" {

Send-ErrorMessage -Title "test" -Body "test" | Should Be "Success"

}

Use TestDrive to test file system processes

If you need to test for complex file access, mocking system methods will quickly become too hard. For instance, I need to test two methods: one that removes files older than X days, and one that removes empty folders. Using TestDrive is much more straightforward, simple and compact than mocking the Get-ChildItem cmdlet:

Describe "cleanup module" {
    Context "folder has old and new files" {
        New-item "TestDrive:\old.txt" -Type file
        # change the file creation date
        $oldFile = Get-Item "TestDrive:\old.txt"
        $oldFile.CreationTime = (Get-Date).AddDays(-3)
        New-item "TestDrive:\new.txt" -Type file

        It "removes the old files" {
            Remove-OldFiles -path "TestDrive:" -olderThan (Get-Date).AddDays(-2)
            Test-Path "TestDrive:\old.txt" | Should Be $false
        }

        It "leaves the new files" {
            Remove-OldFiles -path "TestDrive:" -olderThan (Get-Date).AddDays(-2)
            Test-Path "TestDrive:\new.txt" | Should Be $true
        }
    }

    Context "folders doesn't have old files" {
        New-item "TestDrive:\new1.txt" -Type file
        New-item "TestDrive:\new2.txt" -Type file

        It "does nothing" {
            Remove-OldFiles -path "TestDrive:" -olderThan (Get-Date).AddDays(-2)
            Test-Path "TestDrive:\new1.txt" | Should Be $true
            Test-Path "TestDrive:\new2.txt" | Should Be $true
        }
    }

    Context "path has empty folders" {
        New-item "TestDrive:\empty" -Type directory

        It "removes the folder" {
            Remove-EmptyFolders -path "TestDrive:"
            Test-Path "TestDrive:\empty" | Should Be $false
        }
    }

    Context "path does not have empty folders" {
        New-item "TestDrive:\notempty" -Type directory
        New-item "TestDrive:\notempty\file.txt" -Type file

        It "does not removes the folder" {
            Remove-EmptyFolders -path "TestDrive:"
            Test-Path "TestDrive:\notempty" | Should Be $true
        }
    }
}

Describe "cleanup module" {

Context "folder has old and new files" {

New-item "TestDrive:\old.txt" -Type file

# change the file creation date

$oldFile = Get-Item "TestDrive:\old.txt"

$oldFile.CreationTime = (Get-Date).AddDays(-3)

New-item "TestDrive:\new.txt" -Type file

It "removes the old files" {

Remove-OldFiles -path "TestDrive:" -olderThan (Get-Date).AddDays(-2)

Test-Path "TestDrive:\old.txt" | Should Be $false

}

It "leaves the new files" {

Remove-OldFiles -path "TestDrive:" -olderThan (Get-Date).AddDays(-2)

Test-Path "TestDrive:\new.txt" | Should Be $true

}

Context "folders doesn't have old files" {

New-item "TestDrive:\new1.txt" -Type file

New-item "TestDrive:\new2.txt" -Type file

It "does nothing" {

Remove-OldFiles -path "TestDrive:" -olderThan (Get-Date).AddDays(-2)

Test-Path "TestDrive:\new1.txt" | Should Be $true

Test-Path "TestDrive:\new2.txt" | Should Be $true

}

Context "path has empty folders" {

New-item "TestDrive:\empty" -Type directory

It "removes the folder" {

Remove-EmptyFolders -path "TestDrive:"

Test-Path "TestDrive:\empty" | Should Be $false

}

Context "path does not have empty folders" {

New-item "TestDrive:\notempty" -Type directory

New-item "TestDrive:\notempty\file.txt" -Type file

It "does not removes the folder" {

Remove-EmptyFolders -path "TestDrive:"

Test-Path "TestDrive:\notempty" | Should Be $true

}