-
Notifications
You must be signed in to change notification settings - Fork 20
Expand file tree
/
Copy pathStringDistance.cs
More file actions
121 lines (110 loc) · 3.85 KB
/
StringDistance.cs
File metadata and controls
121 lines (110 loc) · 3.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
namespace BytecodeApi.Text;
/// <summary>
/// Class that provides algorithms to measure the distance between strings.
/// </summary>
public static class StringDistance
{
/// <summary>
/// Measures the minimum number of single-character edits (i.e. insertions, deletions or substitutions) required to change one word into the other.
/// </summary>
/// <param name="strA">The first <see cref="string" /> to compare.</param>
/// <param name="strB">The second <see cref="string" /> to compare.</param>
/// <returns>
/// The minimum number of single-character edits (i.e. insertions, deletions or substitutions) required to change one word into the other. If both strings are equal, 0 is returned.
/// </returns>
public static int Levenshtein(string strA, string strB)
{
Check.ArgumentNull(strA);
Check.ArgumentNull(strB);
return ComputeLevenshtein(strA, strB, false);
}
/// <summary>
/// Measures the minimum number of single-character edits (i.e. insertions, deletions, substitutions or transpositions) required to change one word into the other.
/// </summary>
/// <param name="strA">The first <see cref="string" /> to compare.</param>
/// <param name="strB">The second <see cref="string" /> to compare.</param>
/// <returns>
/// The minimum number of single-character edits (i.e. insertions, deletions, substitutions or transpositions) required to change one word into the other. If both strings are equal, 0 is returned.
/// </returns>
public static int DamerauLevenshtein(string strA, string strB)
{
Check.ArgumentNull(strA);
Check.ArgumentNull(strB);
return ComputeLevenshtein(strA, strB, true);
}
/// <summary>
/// Measures the minimum number of substitutions required to change one <see cref="string" /> into another. The Hamming distance between two strings of equal length is the number of positions at which the corresponding characters are different.
/// </summary>
/// <param name="strA">The first <see cref="string" /> to compare.</param>
/// <param name="strB">The second <see cref="string" /> to compare.</param>
/// <returns>
/// The minimum number of substitutions required to change one <see cref="string" /> into another. If both strings are equal, 0 is returned.
/// </returns>
public static int Hamming(string strA, string strB)
{
Check.ArgumentNull(strA);
Check.ArgumentNull(strB);
Check.Argument(strA.Length == strB.Length, nameof(strB), "Strings must have equal length.");
if (strA == strB)
{
return 0;
}
else
{
return strA
.Zip(strB, (c1, c2) => new { c1, c2 })
.Count(m => m.c1 != m.c2);
}
}
private static int ComputeLevenshtein(string strA, string strB, bool damerau)
{
if (strA == strB)
{
return 0;
}
else if (strA == "")
{
return strB.Length;
}
else if (strB == "")
{
return strA.Length;
}
else
{
int[,] matrix = new int[strA.Length + 1, strB.Length + 1];
for (int x = 0; x <= strA.Length; x++)
{
matrix[x, 0] = x;
}
for (int y = 0; y <= strB.Length; y++)
{
matrix[0, y] = y;
}
for (int x = 1; x <= strA.Length; x++)
{
for (int y = 1; y <= strB.Length; y++)
{
int cost = (strA[x - 1] == strB[y - 1]) ? 0 : 1;
int insertion = matrix[x, y - 1] + 1;
int deletion = matrix[x - 1, y] + 1;
int substitution = matrix[x - 1, y - 1];
if (damerau)
{
int distance = Math.Min(insertion, Math.Min(deletion, substitution + cost));
if (x > 1 && y > 1 && strA[x - 1] == strB[y - 2] && strA[x - 2] == strB[y - 1])
{
distance = Math.Min(distance, matrix[x - 2, y - 2] + cost);
}
matrix[x, y] = distance;
}
else
{
matrix[x, y] = Math.Min(Math.Min(deletion, insertion), substitution + cost);
}
}
}
return matrix[strA.Length, strB.Length];
}
}
}