#
# scrapeSandbox1.ps1
#
# Invoke-WebResponse outputs: Microsoft.PowerShell.Commands.BasicHtmlWebResponseObject
#
# short version
function scrapeCustomerIds ($result) {
#
# accepts the output of Invoke-WebRequest as it's only parameter:
#
# PS > scrapeCustomerIds $result <--- Microsoft.PowerShell.Commands.HtmlWebResponseObject
#
# returns a hashtable with the structure:
#
# key value
# ~~~ ~~~~~
# <cust id> <hashtable>
# 15554026 {id, name}
# ...where id is the key of top table (numeric cust id) and name is cust name
#
$custs = @{}
$result.Links | ForEach-Object {
$link = $_
$matches = $null
try {
if ($link.href -match '/customers/(\d+)') {
$tmp = @{}
$companyId = $matches[1]
$tmp."id" = $companyId
$tmp."name" = $y.outerText
$custs.$companyId = $tmp
}
} catch {
$FailedItem = $_.Exception
Write-Output "FailedItem: $FailedItem"
Write-Output $_.Exception | Format-List -Force
}
}
# Write-Output "$($custs.Count) customers..."
return $custs
}
function scrapeCompanyIDs ($result) {
#
# .getElementsByClassName
# .getElementsByTagName
# .getElementsById
#
# $r3.ParsedHtml.getElementsByTagName('a') | ForEach-Object {
#
# now let's store unique customer IDs and info in a hash (which will also have a hash) of hashes
#
$custs = @{}
#
# 'Name' will be the id of the customer, as defined by Syncro (numbers)
#
$result.Links | ForEach-Object {
#
# innerHTML
# innerText
# outerHTML
# outerText
# tagName
# class
# href
#
$link = $_
$matches = $null
# $x = $y.href -match '/customers/(\d+)'
if ($link.href -match '/customers/(\d+)') {
Write-Host ('-' * 40)
$tmp = @{}
Write-Host "1:"
$1
Write-Host "2:"
$2
Write-Host "x:"
$x
Write-Host "matches:"
$matches
Write-Host "Company:" $matches[1]
$companyId = $matches[1]
$tmp."id" = $companyId
Write-Host "innerHTML:" $link.innerHTML
Write-Host "innerText:" $link.innerText
Write-Host "outerHTML:" $link.outerHTML
Write-Host "outerText:" $link.outerText
$tmp."name" = $link.outerText
Write-Host "tagName: " $link.tagName
Write-Host "class: " $link.class
Write-Host "href: " $link.href
# add to hashtable
$custs.$companyId = $tmp
}
}
Write-Host "--------------------custs has:" $custs.Count
$custs.GetEnumerator() | Sort-Object Name | ForEach-Object {
$cust = $_
Write-Host "In cust:" $cust.Key
foreach ($k in $cust.Value) {
Write-Host "id: " $k.'id'
Write-Host "name:" $k.'name'
}
}
}
function scrapeIt ($response) {
############################################################
#
# setup
#
$scriptDir = "C:\ProgramData\Syncro\live\scripts\"
Set-Location -Path $scriptDir
#
############################################################
#
# put all asset ids in an array (@assets)
#
$customerAssets = @{} # hash of hashes, with the 'Name' of top level being customer ID
$inputKvps = @{} # hash
$assets = @() # array
#$test = $wr.ParsedHtml.getElementsByTagName("input")
#Write-Host "test: $test"
#
$response.ParsedHtml.getElementsByTagName("input") | ForEach-Object {
if ($_.id -eq 'asset_ids_') {
$outer = $_.outerHtml
Write-Host ('_-' * 40)
Write-Host "outerHtml: " $outer
Write-Host "inputKvps count:" $inputKvps.Count
#
$outerClean = $outer.replace("<","")
$outerClean = $outerClean.replace(">","")
$outerClean = $outerClean.replace('INPUT',"")
$outerClean = $outerClean.replace('"',"")
Write-Host "outerClean: " $outerClean
# need ';'?
$outerPairSeparators = '?',' ','&' # can be more, comma separated
$splitOptions = [System.StringSplitOptions]::RemoveEmptyEntries
$outerPairs = $outerClean.split($outerPairSeparators,$splitOptions)
foreach ($kvp in $outerPairs) {
# Write-Host ('-' * 10)
# Write-Host "Outer Pair: $kvp"
$k, $v = $kvp.split('=')
Write-Host "[before] k: $k and v: $v"
$v = $v.replace('+',' ') # inserted to make good HTML encoding
#$k = $k.replace('+',' ') # inserted to make good HTML encoding
$v = $v.replace('%2C', ',')
Write-Host "[ after] k: $k and v: $v against: [" $inputKvps.$k "]"
# to add or not to add?
if (-Not $inputKvps.ContainsKey($k)) {
$inputKvps.Add($k, $v)
Write-Host ">>> Added key: $k and value: $v to inputKvps"
} else {
# key exists, but is the value the same?
if ($inputKvps.$k -ne $v) {
# both of the below: If the entry does not already exist it
# will be automatically added.
# values are different; update
$inputKvps.Set_Item($k, $v)
# can also do:
# $inputKvps.$k = $v
Write-Host ">>> Updated key: $k to value: $v"
}
}
#
# easier than all the above (will update or add as required):
#
# $inputKvps.$k = $v
#
# to debug:
#
# these unwrap to a collection of DictonaryEntry objects
# $inputKvps.GetEnumerator() | ForEach-Object { … } ...or...
# ForEach($item in $inputKvps.GetEnumerator()) {Echo $item … }
# ForEach($item in $inputKvps.KEYS.GetEnumerator()) {Echo $item … }
$inputKvps.GetEnumerator() | ForEach-Object {
Write-Host " Item k:" $_.Key "v:" $_.Value
}
} # end of foreach ($kvp...
#$innerPairs = $outerPairs.split('=', $splitOptions)
#foreach ($x in $innerPairs) {
# Write-Host "Inner Pair: $x"
#}
#
$assets += $_.value
Write-Host "Added to assets: " $_.value
}
}
# now find asset name:
#$r3.ParsedHtml.getElementsByClassName('selectedId') | ForEach-Object {
# Write-Host "a: " $_
#}
# $classTest = $r3.ParsedHtml.getElementsByClass("table-entry-head")
# Write-Host "classTest: $classTest"
Write-Host "Assets:"
$assets
} # end function scrapeIt...