forked from DBremen/PowerShellScripts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGet-FileEncoding.ps1
More file actions
71 lines (63 loc) · 2.54 KB
/
Get-FileEncoding.ps1
File metadata and controls
71 lines (63 loc) · 2.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
function Get-FileEncoding {
<#
.SYNOPSIS
Get the file encoding of a given file.
.DESCRIPTION
Unknown source.
.PARAMETER Path
The path to the file to get the encoding for.
.EXAMPLE
dir -file | Get-FileEncoding
#>
[CmdletBinding()]
param(
[Parameter(ValueFromPipeline, ValueFromPipelineByPropertyName, Mandatory,
Position = 0)]
[Alias("FullName")]
$Path
)
PROCESS {
## First, check if the file is binary. That is, if the first
## 5 lines contain any non-printable characters.
$nonPrintable = [char[]] (0..8 + 10..31 + 127 + 129 + 141 + 143 + 144 + 157)
$lines = Get-Content $Path -ErrorAction Ignore -TotalCount 5
$result = @($lines | Where-Object { $_.IndexOfAny($nonPrintable) -ge 0 })
if ($result.Count -gt 0) {
"Binary"
return
}
## Next, check if it matches a well-known encoding.
## The hashtable used to store our mapping of encoding bytes to their
## name. For example, "255-254 = Unicode"
$encodings = @{ }
## Find all of the encodings understood by the .NET Framework. For each,
## determine the bytes at the start of the file (the preamble) that the .NET
## Framework uses to identify that encoding.
foreach ($encoding in [System.Text.Encoding]::GetEncodings()) {
$preamble = $encoding.GetEncoding().GetPreamble()
if ($preamble) {
$encodingBytes = $preamble -join '-'
$encodings[$encodingBytes] = $encoding.GetEncoding()
}
}
## Find out the lengths of all of the preambles.
$encodingLengths = $encodings.Keys | Where-Object { $_ } |
Foreach-Object { ($_ -split "-").Count }
## Assume the encoding is UTF7 by default
$result = [System.Text.Encoding]::UTF7
## Go through each of the possible preamble lengths, read that many
## bytes from the file, and then see if it matches one of the encodings
## we know about.
foreach ($encodingLength in $encodingLengths | Sort -Descending) {
$bytes = Get-Content -Encoding byte -readcount $encodingLength $path | Select -First 1
$encoding = $encodings[$bytes -join '-']
## If we found an encoding that had the same preamble bytes,
## save that output and break.
if ($encoding) {
$result = $encoding
break
}
}
[PSCustomObject][ordered]@{File = $Path; Encoding = $result }
}
}