--[[
* Diff Match and Patch
* Copyright 2018 The diff-match-patch Authors.
* https://github.com/google/diff-match-patch
*
* Based on the JavaScript implementation by Neil Fraser.
* Ported to Lua by Duncan Cross.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
--]]
--[[
-- Lua 5.1 and earlier requires the external BitOp library.
-- This library is built-in from Lua 5.2 and later as 'bit32'.
require 'bit' --
local band, bor, lshift
= bit.band, bit.bor, bit.lshift
--]]
local band, bor, lshift
= bit32.band, bit32.bor, bit32.lshift
local type, setmetatable, ipairs, select
= type, setmetatable, ipairs, select
local unpack, tonumber, error
= unpack, tonumber, error
local strsub, strbyte, strchar, gmatch, gsub
= string.sub, string.byte, string.char, string.gmatch, string.gsub
local strmatch, strfind, strformat
= string.match, string.find, string.format
local tinsert, tremove, tconcat
= table.insert, table.remove, table.concat
local max, min, floor, ceil, abs
= math.max, math.min, math.floor, math.ceil, math.abs
local clock = os.clock
-- Utility functions.
local percentEncode_pattern = '[^A-Za-z0-9%-=;\',./~!@#$%&*%(%)_%+ %?]'
local function percentEncode_replace(v)
return strformat('%%%02X', strbyte(v))
end
local function indexOf(a, b, start)
if (#b == 0) then
return nil
end
return strfind(a, b, start, true)
end
local htmlEncode_pattern = '[&<>\n]'
local htmlEncode_replace = {
['&'] = '&', ['<'] = '<', ['>'] = '>', ['\n'] = '¶
'
}
-- Public API Functions
-- (Exported at the end of the script)
local diff_main,
diff_cleanupSemantic,
diff_cleanupEfficiency,
diff_levenshtein,
diff_prettyHtml
local match_main
local patch_make,
patch_toText,
patch_fromText,
patch_apply
--[[
* The data structure representing a diff is an array of tuples:
* {{DIFF_DELETE, 'Hello'}, {DIFF_INSERT, 'Goodbye'}, {DIFF_EQUAL, ' world.'}}
* which means: delete 'Hello', add 'Goodbye' and keep ' world.'
--]]
local DIFF_DELETE = -1
local DIFF_INSERT = 1
local DIFF_EQUAL = 0
-- Number of seconds to map a diff before giving up (0 for infinity).
local Diff_Timeout = 1.0
-- Cost of an empty edit operation in terms of edit characters.
local Diff_EditCost = 4
-- At what point is no match declared (0.0 = perfection, 1.0 = very loose).
local Match_Threshold = 0.5
-- How far to search for a match (0 = exact location, 1000+ = broad match).
-- A match this many characters away from the expected location will add
-- 1.0 to the score (0.0 is a perfect match).
local Match_Distance = 1000
-- When deleting a large block of text (over ~64 characters), how close do
-- the contents have to be to match the expected contents. (0.0 = perfection,
-- 1.0 = very loose). Note that Match_Threshold controls how closely the
-- end points of a delete need to match.
local Patch_DeleteThreshold = 0.5
-- Chunk size for context length.
local Patch_Margin = 4
-- The number of bits in an int.
local Match_MaxBits = 32
function settings(new)
if new then
Diff_Timeout = new.Diff_Timeout or Diff_Timeout
Diff_EditCost = new.Diff_EditCost or Diff_EditCost
Match_Threshold = new.Match_Threshold or Match_Threshold
Match_Distance = new.Match_Distance or Match_Distance
Patch_DeleteThreshold = new.Patch_DeleteThreshold or Patch_DeleteThreshold
Patch_Margin = new.Patch_Margin or Patch_Margin
Match_MaxBits = new.Match_MaxBits or Match_MaxBits
else
return {
Diff_Timeout = Diff_Timeout;
Diff_EditCost = Diff_EditCost;
Match_Threshold = Match_Threshold;
Match_Distance = Match_Distance;
Patch_DeleteThreshold = Patch_DeleteThreshold;
Patch_Margin = Patch_Margin;
Match_MaxBits = Match_MaxBits;
}
end
end
-- ---------------------------------------------------------------------------
-- DIFF API
-- ---------------------------------------------------------------------------
-- The private diff functions
local _diff_compute,
_diff_bisect,
_diff_halfMatchI,
_diff_halfMatch,
_diff_cleanupSemanticScore,
_diff_cleanupSemanticLossless,
_diff_cleanupMerge,
_diff_commonPrefix,
_diff_commonSuffix,
_diff_commonOverlap,
_diff_xIndex,
_diff_text1,
_diff_text2,
_diff_toDelta,
_diff_fromDelta
--[[
* Find the differences between two texts. Simplifies the problem by stripping
* any common prefix or suffix off the texts before diffing.
* @param {string} text1 Old string to be diffed.
* @param {string} text2 New string to be diffed.
* @param {boolean} opt_checklines Has no effect in Lua.
* @param {number} opt_deadline Optional time when the diff should be complete
* by. Used internally for recursive calls. Users should set DiffTimeout
* instead.
* @return {Array.>} Array of diff tuples.
--]]
function diff_main(text1, text2, opt_checklines, opt_deadline)
-- Set a deadline by which time the diff must be complete.
if opt_deadline == nil then
if Diff_Timeout <= 0 then
opt_deadline = 2 ^ 31
else
opt_deadline = clock() + Diff_Timeout
end
end
local deadline = opt_deadline
-- Check for null inputs.
if text1 == nil or text1 == nil then
error('Null inputs. (diff_main)')
end
-- Check for equality (speedup).
if text1 == text2 then
if #text1 > 0 then
return {{DIFF_EQUAL, text1}}
end
return {}
end
-- LUANOTE: Due to the lack of Unicode support, Lua is incapable of
-- implementing the line-mode speedup.
local checklines = false
-- Trim off common prefix (speedup).
local commonlength = _diff_commonPrefix(text1, text2)
local commonprefix
if commonlength > 0 then
commonprefix = strsub(text1, 1, commonlength)
text1 = strsub(text1, commonlength + 1)
text2 = strsub(text2, commonlength + 1)
end
-- Trim off common suffix (speedup).
commonlength = _diff_commonSuffix(text1, text2)
local commonsuffix
if commonlength > 0 then
commonsuffix = strsub(text1, -commonlength)
text1 = strsub(text1, 1, -commonlength - 1)
text2 = strsub(text2, 1, -commonlength - 1)
end
-- Compute the diff on the middle block.
local diffs = _diff_compute(text1, text2, checklines, deadline)
-- Restore the prefix and suffix.
if commonprefix then
tinsert(diffs, 1, {DIFF_EQUAL, commonprefix})
end
if commonsuffix then
diffs[#diffs + 1] = {DIFF_EQUAL, commonsuffix}
end
_diff_cleanupMerge(diffs)
return diffs
end
--[[
* Reduce the number of edits by eliminating semantically trivial equalities.
* @param {Array.>} diffs Array of diff tuples.
--]]
function diff_cleanupSemantic(diffs)
local changes = false
local equalities = {} -- Stack of indices where equalities are found.
local equalitiesLength = 0 -- Keeping our own length var is faster.
local lastEquality = nil
-- Always equal to diffs[equalities[equalitiesLength]][2]
local pointer = 1 -- Index of current position.
-- Number of characters that changed prior to the equality.
local length_insertions1 = 0
local length_deletions1 = 0
-- Number of characters that changed after the equality.
local length_insertions2 = 0
local length_deletions2 = 0
while diffs[pointer] do
if diffs[pointer][1] == DIFF_EQUAL then -- Equality found.
equalitiesLength = equalitiesLength + 1
equalities[equalitiesLength] = pointer
length_insertions1 = length_insertions2
length_deletions1 = length_deletions2
length_insertions2 = 0
length_deletions2 = 0
lastEquality = diffs[pointer][2]
else -- An insertion or deletion.
if diffs[pointer][1] == DIFF_INSERT then
length_insertions2 = length_insertions2 + #(diffs[pointer][2])
else
length_deletions2 = length_deletions2 + #(diffs[pointer][2])
end
-- Eliminate an equality that is smaller or equal to the edits on both
-- sides of it.
if lastEquality
and (#lastEquality <= max(length_insertions1, length_deletions1))
and (#lastEquality <= max(length_insertions2, length_deletions2)) then
-- Duplicate record.
tinsert(diffs, equalities[equalitiesLength],
{DIFF_DELETE, lastEquality})
-- Change second copy to insert.
diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT
-- Throw away the equality we just deleted.
equalitiesLength = equalitiesLength - 1
-- Throw away the previous equality (it needs to be reevaluated).
equalitiesLength = equalitiesLength - 1
pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0
length_insertions1, length_deletions1 = 0, 0 -- Reset the counters.
length_insertions2, length_deletions2 = 0, 0
lastEquality = nil
changes = true
end
end
pointer = pointer + 1
end
-- Normalize the diff.
if changes then
_diff_cleanupMerge(diffs)
end
_diff_cleanupSemanticLossless(diffs)
-- Find any overlaps between deletions and insertions.
-- e.g: abcxxxxxxdef
-- -> abcxxxdef
-- e.g: xxxabcdefxxx
-- -> defxxxabc
-- Only extract an overlap if it is as big as the edit ahead or behind it.
pointer = 2
while diffs[pointer] do
if (diffs[pointer - 1][1] == DIFF_DELETE and
diffs[pointer][1] == DIFF_INSERT) then
local deletion = diffs[pointer - 1][2]
local insertion = diffs[pointer][2]
local overlap_length1 = _diff_commonOverlap(deletion, insertion)
local overlap_length2 = _diff_commonOverlap(insertion, deletion)
if (overlap_length1 >= overlap_length2) then
if (overlap_length1 >= #deletion / 2 or
overlap_length1 >= #insertion / 2) then
-- Overlap found. Insert an equality and trim the surrounding edits.
tinsert(diffs, pointer,
{DIFF_EQUAL, strsub(insertion, 1, overlap_length1)})
diffs[pointer - 1][2] =
strsub(deletion, 1, #deletion - overlap_length1)
diffs[pointer + 1][2] = strsub(insertion, overlap_length1 + 1)
pointer = pointer + 1
end
else
if (overlap_length2 >= #deletion / 2 or
overlap_length2 >= #insertion / 2) then
-- Reverse overlap found.
-- Insert an equality and swap and trim the surrounding edits.
tinsert(diffs, pointer,
{DIFF_EQUAL, strsub(deletion, 1, overlap_length2)})
diffs[pointer - 1] = {DIFF_INSERT,
strsub(insertion, 1, #insertion - overlap_length2)}
diffs[pointer + 1] = {DIFF_DELETE,
strsub(deletion, overlap_length2 + 1)}
pointer = pointer + 1
end
end
pointer = pointer + 1
end
pointer = pointer + 1
end
end
--[[
* Reduce the number of edits by eliminating operationally trivial equalities.
* @param {Array.>} diffs Array of diff tuples.
--]]
function diff_cleanupEfficiency(diffs)
local changes = false
-- Stack of indices where equalities are found.
local equalities = {}
-- Keeping our own length var is faster.
local equalitiesLength = 0
-- Always equal to diffs[equalities[equalitiesLength]][2]
local lastEquality = nil
-- Index of current position.
local pointer = 1
-- The following four are really booleans but are stored as numbers because
-- they are used at one point like this:
--
-- (pre_ins + pre_del + post_ins + post_del) == 3
--
-- ...i.e. checking that 3 of them are true and 1 of them is false.
-- Is there an insertion operation before the last equality.
local pre_ins = 0
-- Is there a deletion operation before the last equality.
local pre_del = 0
-- Is there an insertion operation after the last equality.
local post_ins = 0
-- Is there a deletion operation after the last equality.
local post_del = 0
while diffs[pointer] do
if diffs[pointer][1] == DIFF_EQUAL then -- Equality found.
local diffText = diffs[pointer][2]
if (#diffText < Diff_EditCost) and (post_ins == 1 or post_del == 1) then
-- Candidate found.
equalitiesLength = equalitiesLength + 1
equalities[equalitiesLength] = pointer
pre_ins, pre_del = post_ins, post_del
lastEquality = diffText
else
-- Not a candidate, and can never become one.
equalitiesLength = 0
lastEquality = nil
end
post_ins, post_del = 0, 0
else -- An insertion or deletion.
if diffs[pointer][1] == DIFF_DELETE then
post_del = 1
else
post_ins = 1
end
--[[
* Five types to be split:
* ABXYCD
* AXCD
* ABXC
* AXCD
* ABXC
--]]
if lastEquality and (
(pre_ins+pre_del+post_ins+post_del == 4)
or
(
(#lastEquality < Diff_EditCost / 2)
and
(pre_ins+pre_del+post_ins+post_del == 3)
)) then
-- Duplicate record.
tinsert(diffs, equalities[equalitiesLength],
{DIFF_DELETE, lastEquality})
-- Change second copy to insert.
diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT
-- Throw away the equality we just deleted.
equalitiesLength = equalitiesLength - 1
lastEquality = nil
if (pre_ins == 1) and (pre_del == 1) then
-- No changes made which could affect previous entry, keep going.
post_ins, post_del = 1, 1
equalitiesLength = 0
else
-- Throw away the previous equality.
equalitiesLength = equalitiesLength - 1
pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0
post_ins, post_del = 0, 0
end
changes = true
end
end
pointer = pointer + 1
end
if changes then
_diff_cleanupMerge(diffs)
end
end
--[[
* Compute the Levenshtein distance; the number of inserted, deleted or
* substituted characters.
* @param {Array.>} diffs Array of diff tuples.
* @return {number} Number of changes.
--]]
function diff_levenshtein(diffs)
local levenshtein = 0
local insertions, deletions = 0, 0
for x, diff in ipairs(diffs) do
local op, data = diff[1], diff[2]
if (op == DIFF_INSERT) then
insertions = insertions + #data
elseif (op == DIFF_DELETE) then
deletions = deletions + #data
elseif (op == DIFF_EQUAL) then
-- A deletion and an insertion is one substitution.
levenshtein = levenshtein + max(insertions, deletions)
insertions = 0
deletions = 0
end
end
levenshtein = levenshtein + max(insertions, deletions)
return levenshtein
end
--[[
* Convert a diff array into a pretty HTML report.
* @param {Array.>} diffs Array of diff tuples.
* @return {string} HTML representation.
--]]
function diff_prettyHtml(diffs)
local html = {}
for x, diff in ipairs(diffs) do
local op = diff[1] -- Operation (insert, delete, equal)
local data = diff[2] -- Text of change.
local text = gsub(data, htmlEncode_pattern, htmlEncode_replace)
if op == DIFF_INSERT then
html[x] = '' .. text .. ''
elseif op == DIFF_DELETE then
html[x] = '' .. text .. ''
elseif op == DIFF_EQUAL then
html[x] = '' .. text .. ''
end
end
return tconcat(html)
end
-- ---------------------------------------------------------------------------
-- UNOFFICIAL/PRIVATE DIFF FUNCTIONS
-- ---------------------------------------------------------------------------
--[[
* Find the differences between two texts. Assumes that the texts do not
* have any common prefix or suffix.
* @param {string} text1 Old string to be diffed.
* @param {string} text2 New string to be diffed.
* @param {boolean} checklines Has no effect in Lua.
* @param {number} deadline Time when the diff should be complete by.
* @return {Array.>} Array of diff tuples.
* @private
--]]
function _diff_compute(text1, text2, checklines, deadline)
if #text1 == 0 then
-- Just add some text (speedup).
return {{DIFF_INSERT, text2}}
end
if #text2 == 0 then
-- Just delete some text (speedup).
return {{DIFF_DELETE, text1}}
end
local diffs
local longtext = (#text1 > #text2) and text1 or text2
local shorttext = (#text1 > #text2) and text2 or text1
local i = indexOf(longtext, shorttext)
if i ~= nil then
-- Shorter text is inside the longer text (speedup).
diffs = {
{DIFF_INSERT, strsub(longtext, 1, i - 1)},
{DIFF_EQUAL, shorttext},
{DIFF_INSERT, strsub(longtext, i + #shorttext)}
}
-- Swap insertions for deletions if diff is reversed.
if #text1 > #text2 then
diffs[1][1], diffs[3][1] = DIFF_DELETE, DIFF_DELETE
end
return diffs
end
if #shorttext == 1 then
-- Single character string.
-- After the previous speedup, the character can't be an equality.
return {{DIFF_DELETE, text1}, {DIFF_INSERT, text2}}
end
-- Check to see if the problem can be split in two.
do
local
text1_a, text1_b,
text2_a, text2_b,
mid_common = _diff_halfMatch(text1, text2)
if text1_a then
-- A half-match was found, sort out the return data.
-- Send both pairs off for separate processing.
local diffs_a = diff_main(text1_a, text2_a, checklines, deadline)
local diffs_b = diff_main(text1_b, text2_b, checklines, deadline)
-- Merge the results.
local diffs_a_len = #diffs_a
diffs = diffs_a
diffs[diffs_a_len + 1] = {DIFF_EQUAL, mid_common}
for i, b_diff in ipairs(diffs_b) do
diffs[diffs_a_len + 1 + i] = b_diff
end
return diffs
end
end
return _diff_bisect(text1, text2, deadline)
end
--[[
* Find the 'middle snake' of a diff, split the problem in two
* and return the recursively constructed diff.
* See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
* @param {string} text1 Old string to be diffed.
* @param {string} text2 New string to be diffed.
* @param {number} deadline Time at which to bail if not yet complete.
* @return {Array.>} Array of diff tuples.
* @private
--]]
function _diff_bisect(text1, text2, deadline)
-- Cache the text lengths to prevent multiple calls.
local text1_length = #text1
local text2_length = #text2
local _sub, _element
local max_d = ceil((text1_length + text2_length) / 2)
local v_offset = max_d
local v_length = 2 * max_d
local v1 = {}
local v2 = {}
-- Setting all elements to -1 is faster in Lua than mixing integers and nil.
for x = 0, v_length - 1 do
v1[x] = -1
v2[x] = -1
end
v1[v_offset + 1] = 0
v2[v_offset + 1] = 0
local delta = text1_length - text2_length
-- If the total number of characters is odd, then
-- the front path will collide with the reverse path.
local front = (delta % 2 ~= 0)
-- Offsets for start and end of k loop.
-- Prevents mapping of space beyond the grid.
local k1start = 0
local k1end = 0
local k2start = 0
local k2end = 0
for d = 0, max_d - 1 do
-- Bail out if deadline is reached.
if clock() > deadline then
break
end
-- Walk the front path one step.
for k1 = -d + k1start, d - k1end, 2 do
local k1_offset = v_offset + k1
local x1
if (k1 == -d) or ((k1 ~= d) and
(v1[k1_offset - 1] < v1[k1_offset + 1])) then
x1 = v1[k1_offset + 1]
else
x1 = v1[k1_offset - 1] + 1
end
local y1 = x1 - k1
while (x1 <= text1_length) and (y1 <= text2_length)
and (strsub(text1, x1, x1) == strsub(text2, y1, y1)) do
x1 = x1 + 1
y1 = y1 + 1
end
v1[k1_offset] = x1
if x1 > text1_length + 1 then
-- Ran off the right of the graph.
k1end = k1end + 2
elseif y1 > text2_length + 1 then
-- Ran off the bottom of the graph.
k1start = k1start + 2
elseif front then
local k2_offset = v_offset + delta - k1
if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] ~= -1 then
-- Mirror x2 onto top-left coordinate system.
local x2 = text1_length - v2[k2_offset] + 1
if x1 > x2 then
-- Overlap detected.
return _diff_bisectSplit(text1, text2, x1, y1, deadline)
end
end
end
end
-- Walk the reverse path one step.
for k2 = -d + k2start, d - k2end, 2 do
local k2_offset = v_offset + k2
local x2
if (k2 == -d) or ((k2 ~= d) and
(v2[k2_offset - 1] < v2[k2_offset + 1])) then
x2 = v2[k2_offset + 1]
else
x2 = v2[k2_offset - 1] + 1
end
local y2 = x2 - k2
while (x2 <= text1_length) and (y2 <= text2_length)
and (strsub(text1, -x2, -x2) == strsub(text2, -y2, -y2)) do
x2 = x2 + 1
y2 = y2 + 1
end
v2[k2_offset] = x2
if x2 > text1_length + 1 then
-- Ran off the left of the graph.
k2end = k2end + 2
elseif y2 > text2_length + 1 then
-- Ran off the top of the graph.
k2start = k2start + 2
elseif not front then
local k1_offset = v_offset + delta - k2
if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] ~= -1 then
local x1 = v1[k1_offset]
local y1 = v_offset + x1 - k1_offset
-- Mirror x2 onto top-left coordinate system.
x2 = text1_length - x2 + 1
if x1 > x2 then
-- Overlap detected.
return _diff_bisectSplit(text1, text2, x1, y1, deadline)
end
end
end
end
end
-- Diff took too long and hit the deadline or
-- number of diffs equals number of characters, no commonality at all.
return {{DIFF_DELETE, text1}, {DIFF_INSERT, text2}}
end
--[[
* Given the location of the 'middle snake', split the diff in two parts
* and recurse.
* @param {string} text1 Old string to be diffed.
* @param {string} text2 New string to be diffed.
* @param {number} x Index of split point in text1.
* @param {number} y Index of split point in text2.
* @param {number} deadline Time at which to bail if not yet complete.
* @return {Array.>} Array of diff tuples.
* @private
--]]
function _diff_bisectSplit(text1, text2, x, y, deadline)
local text1a = strsub(text1, 1, x - 1)
local text2a = strsub(text2, 1, y - 1)
local text1b = strsub(text1, x)
local text2b = strsub(text2, y)
-- Compute both diffs serially.
local diffs = diff_main(text1a, text2a, false, deadline)
local diffsb = diff_main(text1b, text2b, false, deadline)
local diffs_len = #diffs
for i, v in ipairs(diffsb) do
diffs[diffs_len + i] = v
end
return diffs
end
--[[
* Determine the common prefix of two strings.
* @param {string} text1 First string.
* @param {string} text2 Second string.
* @return {number} The number of characters common to the start of each
* string.
--]]
function _diff_commonPrefix(text1, text2)
-- Quick check for common null cases.
if (#text1 == 0) or (#text2 == 0) or (strbyte(text1, 1) ~= strbyte(text2, 1))
then
return 0
end
-- Binary search.
-- Performance analysis: https://neil.fraser.name/news/2007/10/09/
local pointermin = 1
local pointermax = min(#text1, #text2)
local pointermid = pointermax
local pointerstart = 1
while (pointermin < pointermid) do
if (strsub(text1, pointerstart, pointermid)
== strsub(text2, pointerstart, pointermid)) then
pointermin = pointermid
pointerstart = pointermin
else
pointermax = pointermid
end
pointermid = floor(pointermin + (pointermax - pointermin) / 2)
end
return pointermid
end
--[[
* Determine the common suffix of two strings.
* @param {string} text1 First string.
* @param {string} text2 Second string.
* @return {number} The number of characters common to the end of each string.
--]]
function _diff_commonSuffix(text1, text2)
-- Quick check for common null cases.
if (#text1 == 0) or (#text2 == 0)
or (strbyte(text1, -1) ~= strbyte(text2, -1)) then
return 0
end
-- Binary search.
-- Performance analysis: https://neil.fraser.name/news/2007/10/09/
local pointermin = 1
local pointermax = min(#text1, #text2)
local pointermid = pointermax
local pointerend = 1
while (pointermin < pointermid) do
if (strsub(text1, -pointermid, -pointerend)
== strsub(text2, -pointermid, -pointerend)) then
pointermin = pointermid
pointerend = pointermin
else
pointermax = pointermid
end
pointermid = floor(pointermin + (pointermax - pointermin) / 2)
end
return pointermid
end
--[[
* Determine if the suffix of one string is the prefix of another.
* @param {string} text1 First string.
* @param {string} text2 Second string.
* @return {number} The number of characters common to the end of the first
* string and the start of the second string.
* @private
--]]
function _diff_commonOverlap(text1, text2)
-- Cache the text lengths to prevent multiple calls.
local text1_length = #text1
local text2_length = #text2
-- Eliminate the null case.
if text1_length == 0 or text2_length == 0 then
return 0
end
-- Truncate the longer string.
if text1_length > text2_length then
text1 = strsub(text1, text1_length - text2_length + 1)
elseif text1_length < text2_length then
text2 = strsub(text2, 1, text1_length)
end
local text_length = min(text1_length, text2_length)
-- Quick check for the worst case.
if text1 == text2 then
return text_length
end
-- Start by looking for a single character match
-- and increase length until no match is found.
-- Performance analysis: https://neil.fraser.name/news/2010/11/04/
local best = 0
local length = 1
while true do
local pattern = strsub(text1, text_length - length + 1)
local found = strfind(text2, pattern, 1, true)
if found == nil then
return best
end
length = length + found - 1
if found == 1 or strsub(text1, text_length - length + 1) ==
strsub(text2, 1, length) then
best = length
length = length + 1
end
end
end
--[[
* Does a substring of shorttext exist within longtext such that the substring
* is at least half the length of longtext?
* This speedup can produce non-minimal diffs.
* Closure, but does not reference any external variables.
* @param {string} longtext Longer string.
* @param {string} shorttext Shorter string.
* @param {number} i Start index of quarter length substring within longtext.
* @return {?Array.} Five element Array, containing the prefix of
* longtext, the suffix of longtext, the prefix of shorttext, the suffix
* of shorttext and the common middle. Or nil if there was no match.
* @private
--]]
function _diff_halfMatchI(longtext, shorttext, i)
-- Start with a 1/4 length substring at position i as a seed.
local seed = strsub(longtext, i, i + floor(#longtext / 4))
local j = 0 -- LUANOTE: do not change to 1, was originally -1
local best_common = ''
local best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b
while true do
j = indexOf(shorttext, seed, j + 1)
if (j == nil) then
break
end
local prefixLength = _diff_commonPrefix(strsub(longtext, i),
strsub(shorttext, j))
local suffixLength = _diff_commonSuffix(strsub(longtext, 1, i - 1),
strsub(shorttext, 1, j - 1))
if #best_common < suffixLength + prefixLength then
best_common = strsub(shorttext, j - suffixLength, j - 1)
.. strsub(shorttext, j, j + prefixLength - 1)
best_longtext_a = strsub(longtext, 1, i - suffixLength - 1)
best_longtext_b = strsub(longtext, i + prefixLength)
best_shorttext_a = strsub(shorttext, 1, j - suffixLength - 1)
best_shorttext_b = strsub(shorttext, j + prefixLength)
end
end
if #best_common * 2 >= #longtext then
return {best_longtext_a, best_longtext_b,
best_shorttext_a, best_shorttext_b, best_common}
else
return nil
end
end
--[[
* Do the two texts share a substring which is at least half the length of the
* longer text?
* @param {string} text1 First string.
* @param {string} text2 Second string.
* @return {?Array.} Five element Array, containing the prefix of
* text1, the suffix of text1, the prefix of text2, the suffix of
* text2 and the common middle. Or nil if there was no match.
* @private
--]]
function _diff_halfMatch(text1, text2)
if Diff_Timeout <= 0 then
-- Don't risk returning a non-optimal diff if we have unlimited time.
return nil
end
local longtext = (#text1 > #text2) and text1 or text2
local shorttext = (#text1 > #text2) and text2 or text1
if (#longtext < 4) or (#shorttext * 2 < #longtext) then
return nil -- Pointless.
end
-- First check if the second quarter is the seed for a half-match.
local hm1 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 4))
-- Check again based on the third quarter.
local hm2 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 2))
local hm
if not hm1 and not hm2 then
return nil
elseif not hm2 then
hm = hm1
elseif not hm1 then
hm = hm2
else
-- Both matched. Select the longest.
hm = (#hm1[5] > #hm2[5]) and hm1 or hm2
end
-- A half-match was found, sort out the return data.
local text1_a, text1_b, text2_a, text2_b
if (#text1 > #text2) then
text1_a, text1_b = hm[1], hm[2]
text2_a, text2_b = hm[3], hm[4]
else
text2_a, text2_b = hm[1], hm[2]
text1_a, text1_b = hm[3], hm[4]
end
local mid_common = hm[5]
return text1_a, text1_b, text2_a, text2_b, mid_common
end
--[[
* Given two strings, compute a score representing whether the internal
* boundary falls on logical boundaries.
* Scores range from 6 (best) to 0 (worst).
* @param {string} one First string.
* @param {string} two Second string.
* @return {number} The score.
* @private
--]]
function _diff_cleanupSemanticScore(one, two)
if (#one == 0) or (#two == 0) then
-- Edges are the best.
return 6
end
-- Each port of this function behaves slightly differently due to
-- subtle differences in each language's definition of things like
-- 'whitespace'. Since this function's purpose is largely cosmetic,
-- the choice has been made to use each language's native features
-- rather than force total conformity.
local char1 = strsub(one, -1)
local char2 = strsub(two, 1, 1)
local nonAlphaNumeric1 = strmatch(char1, '%W')
local nonAlphaNumeric2 = strmatch(char2, '%W')
local whitespace1 = nonAlphaNumeric1 and strmatch(char1, '%s')
local whitespace2 = nonAlphaNumeric2 and strmatch(char2, '%s')
local lineBreak1 = whitespace1 and strmatch(char1, '%c')
local lineBreak2 = whitespace2 and strmatch(char2, '%c')
local blankLine1 = lineBreak1 and strmatch(one, '\n\r?\n$')
local blankLine2 = lineBreak2 and strmatch(two, '^\r?\n\r?\n')
if blankLine1 or blankLine2 then
-- Five points for blank lines.
return 5
elseif lineBreak1 or lineBreak2 then
-- Four points for line breaks.
return 4
elseif nonAlphaNumeric1 and not whitespace1 and whitespace2 then
-- Three points for end of sentences.
return 3
elseif whitespace1 or whitespace2 then
-- Two points for whitespace.
return 2
elseif nonAlphaNumeric1 or nonAlphaNumeric2 then
-- One point for non-alphanumeric.
return 1
end
return 0
end
--[[
* Look for single edits surrounded on both sides by equalities
* which can be shifted sideways to align the edit to a word boundary.
* e.g: The cat came. -> The cat came.
* @param {Array.>} diffs Array of diff tuples.
--]]
function _diff_cleanupSemanticLossless(diffs)
local pointer = 2
-- Intentionally ignore the first and last element (don't need checking).
while diffs[pointer + 1] do
local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1]
if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then
-- This is a single edit surrounded by equalities.
local diff = diffs[pointer]
local equality1 = prevDiff[2]
local edit = diff[2]
local equality2 = nextDiff[2]
-- First, shift the edit as far left as possible.
local commonOffset = _diff_commonSuffix(equality1, edit)
if commonOffset > 0 then
local commonString = strsub(edit, -commonOffset)
equality1 = strsub(equality1, 1, -commonOffset - 1)
edit = commonString .. strsub(edit, 1, -commonOffset - 1)
equality2 = commonString .. equality2
end
-- Second, step character by character right, looking for the best fit.
local bestEquality1 = equality1
local bestEdit = edit
local bestEquality2 = equality2
local bestScore = _diff_cleanupSemanticScore(equality1, edit)
+ _diff_cleanupSemanticScore(edit, equality2)
while strbyte(edit, 1) == strbyte(equality2, 1) do
equality1 = equality1 .. strsub(edit, 1, 1)
edit = strsub(edit, 2) .. strsub(equality2, 1, 1)
equality2 = strsub(equality2, 2)
local score = _diff_cleanupSemanticScore(equality1, edit)
+ _diff_cleanupSemanticScore(edit, equality2)
-- The >= encourages trailing rather than leading whitespace on edits.
if score >= bestScore then
bestScore = score
bestEquality1 = equality1
bestEdit = edit
bestEquality2 = equality2
end
end
if prevDiff[2] ~= bestEquality1 then
-- We have an improvement, save it back to the diff.
if #bestEquality1 > 0 then
diffs[pointer - 1][2] = bestEquality1
else
tremove(diffs, pointer - 1)
pointer = pointer - 1
end
diffs[pointer][2] = bestEdit
if #bestEquality2 > 0 then
diffs[pointer + 1][2] = bestEquality2
else
tremove(diffs, pointer + 1, 1)
pointer = pointer - 1
end
end
end
pointer = pointer + 1
end
end
--[[
* Reorder and merge like edit sections. Merge equalities.
* Any edit section can move as long as it doesn't cross an equality.
* @param {Array.>} diffs Array of diff tuples.
--]]
function _diff_cleanupMerge(diffs)
diffs[#diffs + 1] = {DIFF_EQUAL, ''} -- Add a dummy entry at the end.
local pointer = 1
local count_delete, count_insert = 0, 0
local text_delete, text_insert = '', ''
local commonlength
while diffs[pointer] do
local diff_type = diffs[pointer][1]
if diff_type == DIFF_INSERT then
count_insert = count_insert + 1
text_insert = text_insert .. diffs[pointer][2]
pointer = pointer + 1
elseif diff_type == DIFF_DELETE then
count_delete = count_delete + 1
text_delete = text_delete .. diffs[pointer][2]
pointer = pointer + 1
elseif diff_type == DIFF_EQUAL then
-- Upon reaching an equality, check for prior redundancies.
if count_delete + count_insert > 1 then
if (count_delete > 0) and (count_insert > 0) then
-- Factor out any common prefixies.
commonlength = _diff_commonPrefix(text_insert, text_delete)
if commonlength > 0 then
local back_pointer = pointer - count_delete - count_insert
if (back_pointer > 1) and (diffs[back_pointer - 1][1] == DIFF_EQUAL)
then
diffs[back_pointer - 1][2] = diffs[back_pointer - 1][2]
.. strsub(text_insert, 1, commonlength)
else
tinsert(diffs, 1,
{DIFF_EQUAL, strsub(text_insert, 1, commonlength)})
pointer = pointer + 1
end
text_insert = strsub(text_insert, commonlength + 1)
text_delete = strsub(text_delete, commonlength + 1)
end
-- Factor out any common suffixies.
commonlength = _diff_commonSuffix(text_insert, text_delete)
if commonlength ~= 0 then
diffs[pointer][2] =
strsub(text_insert, -commonlength) .. diffs[pointer][2]
text_insert = strsub(text_insert, 1, -commonlength - 1)
text_delete = strsub(text_delete, 1, -commonlength - 1)
end
end
-- Delete the offending records and add the merged ones.
pointer = pointer - count_delete - count_insert
for i = 1, count_delete + count_insert do
tremove(diffs, pointer)
end
if #text_delete > 0 then
tinsert(diffs, pointer, {DIFF_DELETE, text_delete})
pointer = pointer + 1
end
if #text_insert > 0 then
tinsert(diffs, pointer, {DIFF_INSERT, text_insert})
pointer = pointer + 1
end
pointer = pointer + 1
elseif (pointer > 1) and (diffs[pointer - 1][1] == DIFF_EQUAL) then
-- Merge this equality with the previous one.
diffs[pointer - 1][2] = diffs[pointer - 1][2] .. diffs[pointer][2]
tremove(diffs, pointer)
else
pointer = pointer + 1
end
count_insert, count_delete = 0, 0
text_delete, text_insert = '', ''
end
end
if diffs[#diffs][2] == '' then
diffs[#diffs] = nil -- Remove the dummy entry at the end.
end
-- Second pass: look for single edits surrounded on both sides by equalities
-- which can be shifted sideways to eliminate an equality.
-- e.g: ABAC -> ABAC
local changes = false
pointer = 2
-- Intentionally ignore the first and last element (don't need checking).
while pointer < #diffs do
local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1]
if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then
-- This is a single edit surrounded by equalities.
local diff = diffs[pointer]
local currentText = diff[2]
local prevText = prevDiff[2]
local nextText = nextDiff[2]
if #prevText == 0 then
tremove(diffs, pointer - 1)
changes = true
elseif strsub(currentText, -#prevText) == prevText then
-- Shift the edit over the previous equality.
diff[2] = prevText .. strsub(currentText, 1, -#prevText - 1)
nextDiff[2] = prevText .. nextDiff[2]
tremove(diffs, pointer - 1)
changes = true
elseif strsub(currentText, 1, #nextText) == nextText then
-- Shift the edit over the next equality.
prevDiff[2] = prevText .. nextText
diff[2] = strsub(currentText, #nextText + 1) .. nextText
tremove(diffs, pointer + 1)
changes = true
end
end
pointer = pointer + 1
end
-- If shifts were made, the diff needs reordering and another shift sweep.
if changes then
-- LUANOTE: no return value, but necessary to use 'return' to get
-- tail calls.
return _diff_cleanupMerge(diffs)
end
end
--[[
* loc is a location in text1, compute and return the equivalent location in
* text2.
* e.g. 'The cat' vs 'The big cat', 1->1, 5->8
* @param {Array.>} diffs Array of diff tuples.
* @param {number} loc Location within text1.
* @return {number} Location within text2.
--]]
function _diff_xIndex(diffs, loc)
local chars1 = 1
local chars2 = 1
local last_chars1 = 1
local last_chars2 = 1
local x
for _x, diff in ipairs(diffs) do
x = _x
if diff[1] ~= DIFF_INSERT then -- Equality or deletion.
chars1 = chars1 + #diff[2]
end
if diff[1] ~= DIFF_DELETE then -- Equality or insertion.
chars2 = chars2 + #diff[2]
end
if chars1 > loc then -- Overshot the location.
break
end
last_chars1 = chars1
last_chars2 = chars2
end
-- Was the location deleted?
if diffs[x + 1] and (diffs[x][1] == DIFF_DELETE) then
return last_chars2
end
-- Add the remaining character length.
return last_chars2 + (loc - last_chars1)
end
--[[
* Compute and return the source text (all equalities and deletions).
* @param {Array.>} diffs Array of diff tuples.
* @return {string} Source text.
--]]
function _diff_text1(diffs)
local text = {}
for x, diff in ipairs(diffs) do
if diff[1] ~= DIFF_INSERT then
text[#text + 1] = diff[2]
end
end
return tconcat(text)
end
--[[
* Compute and return the destination text (all equalities and insertions).
* @param {Array.>} diffs Array of diff tuples.
* @return {string} Destination text.
--]]
function _diff_text2(diffs)
local text = {}
for x, diff in ipairs(diffs) do
if diff[1] ~= DIFF_DELETE then
text[#text + 1] = diff[2]
end
end
return tconcat(text)
end
--[[
* Crush the diff into an encoded string which describes the operations
* required to transform text1 into text2.
* E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
* Operations are tab-separated. Inserted text is escaped using %xx notation.
* @param {Array.>} diffs Array of diff tuples.
* @return {string} Delta text.
--]]
function _diff_toDelta(diffs)
local text = {}
for x, diff in ipairs(diffs) do
local op, data = diff[1], diff[2]
if op == DIFF_INSERT then
text[x] = '+' .. gsub(data, percentEncode_pattern, percentEncode_replace)
elseif op == DIFF_DELETE then
text[x] = '-' .. #data
elseif op == DIFF_EQUAL then
text[x] = '=' .. #data
end
end
return tconcat(text, '\t')
end
--[[
* Given the original text1, and an encoded string which describes the
* operations required to transform text1 into text2, compute the full diff.
* @param {string} text1 Source string for the diff.
* @param {string} delta Delta text.
* @return {Array.>} Array of diff tuples.
* @throws {Errorend If invalid input.
--]]
function _diff_fromDelta(text1, delta)
local diffs = {}
local diffsLength = 0 -- Keeping our own length var is faster
local pointer = 1 -- Cursor in text1
for token in gmatch(delta, '[^\t]+') do
-- Each token begins with a one character parameter which specifies the
-- operation of this token (delete, insert, equality).
local tokenchar, param = strsub(token, 1, 1), strsub(token, 2)
if (tokenchar == '+') then
local invalidDecode = false
local decoded = gsub(param, '%%(.?.?)',
function(c)
local n = tonumber(c, 16)
if (#c ~= 2) or (n == nil) then
invalidDecode = true
return ''
end
return strchar(n)
end)
if invalidDecode then
-- Malformed URI sequence.
error('Illegal escape in _diff_fromDelta: ' .. param)
end
diffsLength = diffsLength + 1
diffs[diffsLength] = {DIFF_INSERT, decoded}
elseif (tokenchar == '-') or (tokenchar == '=') then
local n = tonumber(param)
if (n == nil) or (n < 0) then
error('Invalid number in _diff_fromDelta: ' .. param)
end
local text = strsub(text1, pointer, pointer + n - 1)
pointer = pointer + n
if (tokenchar == '=') then
diffsLength = diffsLength + 1
diffs[diffsLength] = {DIFF_EQUAL, text}
else
diffsLength = diffsLength + 1
diffs[diffsLength] = {DIFF_DELETE, text}
end
else
error('Invalid diff operation in _diff_fromDelta: ' .. token)
end
end
if (pointer ~= #text1 + 1) then
error('Delta length (' .. (pointer - 1)
.. ') does not equal source text length (' .. #text1 .. ').')
end
return diffs
end
-- ---------------------------------------------------------------------------
-- MATCH API
-- ---------------------------------------------------------------------------
local _match_bitap, _match_alphabet
--[[
* Locate the best instance of 'pattern' in 'text' near 'loc'.
* @param {string} text The text to search.
* @param {string} pattern The pattern to search for.
* @param {number} loc The location to search around.
* @return {number} Best match index or -1.
--]]
function match_main(text, pattern, loc)
-- Check for null inputs.
if text == nil or pattern == nil or loc == nil then
error('Null inputs. (match_main)')
end
if text == pattern then
-- Shortcut (potentially not guaranteed by the algorithm)
return 1
elseif #text == 0 then
-- Nothing to match.
return -1
end
loc = max(1, min(loc, #text))
if strsub(text, loc, loc + #pattern - 1) == pattern then
-- Perfect match at the perfect spot! (Includes case of null pattern)
return loc
else
-- Do a fuzzy compare.
return _match_bitap(text, pattern, loc)
end
end
-- ---------------------------------------------------------------------------
-- UNOFFICIAL/PRIVATE MATCH FUNCTIONS
-- ---------------------------------------------------------------------------
--[[
* Initialise the alphabet for the Bitap algorithm.
* @param {string} pattern The text to encode.
* @return {Object} Hash of character locations.
* @private
--]]
function _match_alphabet(pattern)
local s = {}
local i = 0
for c in gmatch(pattern, '.') do
s[c] = bor(s[c] or 0, lshift(1, #pattern - i - 1))
i = i + 1
end
return s
end
--[[
* Locate the best instance of 'pattern' in 'text' near 'loc' using the
* Bitap algorithm.
* @param {string} text The text to search.
* @param {string} pattern The pattern to search for.
* @param {number} loc The location to search around.
* @return {number} Best match index or -1.
* @private
--]]
function _match_bitap(text, pattern, loc)
if #pattern > Match_MaxBits then
error('Pattern too long.')
end
-- Initialise the alphabet.
local s = _match_alphabet(pattern)
--[[
* Compute and return the score for a match with e errors and x location.
* Accesses loc and pattern through being a closure.
* @param {number} e Number of errors in match.
* @param {number} x Location of match.
* @return {number} Overall score for match (0.0 = good, 1.0 = bad).
* @private
--]]
local function _match_bitapScore(e, x)
local accuracy = e / #pattern
local proximity = abs(loc - x)
if (Match_Distance == 0) then
-- Dodge divide by zero error.
return (proximity == 0) and 1 or accuracy
end
return accuracy + (proximity / Match_Distance)
end
-- Highest score beyond which we give up.
local score_threshold = Match_Threshold
-- Is there a nearby exact match? (speedup)
local best_loc = indexOf(text, pattern, loc)
if best_loc then
score_threshold = min(_match_bitapScore(0, best_loc), score_threshold)
-- LUANOTE: Ideally we'd also check from the other direction, but Lua
-- doesn't have an efficent lastIndexOf function.
end
-- Initialise the bit arrays.
local matchmask = lshift(1, #pattern - 1)
best_loc = -1
local bin_min, bin_mid
local bin_max = #pattern + #text
local last_rd
for d = 0, #pattern - 1, 1 do
-- Scan for the best match; each iteration allows for one more error.
-- Run a binary search to determine how far from 'loc' we can stray at this
-- error level.
bin_min = 0
bin_mid = bin_max
while (bin_min < bin_mid) do
if (_match_bitapScore(d, loc + bin_mid) <= score_threshold) then
bin_min = bin_mid
else
bin_max = bin_mid
end
bin_mid = floor(bin_min + (bin_max - bin_min) / 2)
end
-- Use the result from this iteration as the maximum for the next.
bin_max = bin_mid
local start = max(1, loc - bin_mid + 1)
local finish = min(loc + bin_mid, #text) + #pattern
local rd = {}
for j = start, finish do
rd[j] = 0
end
rd[finish + 1] = lshift(1, d) - 1
for j = finish, start, -1 do
local charMatch = s[strsub(text, j - 1, j - 1)] or 0
if (d == 0) then -- First pass: exact match.
rd[j] = band(bor((rd[j + 1] * 2), 1), charMatch)
else
-- Subsequent passes: fuzzy match.
-- Functions instead of operators make this hella messy.
rd[j] = bor(
band(
bor(
lshift(rd[j + 1], 1),
1
),
charMatch
),
bor(
bor(
lshift(bor(last_rd[j + 1], last_rd[j]), 1),
1
),
last_rd[j + 1]
)
)
end
if (band(rd[j], matchmask) ~= 0) then
local score = _match_bitapScore(d, j - 1)
-- This match will almost certainly be better than any existing match.
-- But check anyway.
if (score <= score_threshold) then
-- Told you so.
score_threshold = score
best_loc = j - 1
if (best_loc > loc) then
-- When passing loc, don't exceed our current distance from loc.
start = max(1, loc * 2 - best_loc)
else
-- Already passed loc, downhill from here on in.
break
end
end
end
end
-- No hope for a (better) match at greater error levels.
if (_match_bitapScore(d + 1, loc) > score_threshold) then
break
end
last_rd = rd
end
return best_loc
end
-- -----------------------------------------------------------------------------
-- PATCH API
-- -----------------------------------------------------------------------------
local _patch_addContext,
_patch_deepCopy,
_patch_addPadding,
_patch_splitMax,
_patch_appendText,
_new_patch_obj
--[[
* Compute a list of patches to turn text1 into text2.
* Use diffs if provided, otherwise compute it ourselves.
* There are four ways to call this function, depending on what data is
* available to the caller:
* Method 1:
* a = text1, b = text2
* Method 2:
* a = diffs
* Method 3 (optimal):
* a = text1, b = diffs
* Method 4 (deprecated, use method 3):
* a = text1, b = text2, c = diffs
*
* @param {string|Array.>} a text1 (methods 1,3,4) or
* Array of diff tuples for text1 to text2 (method 2).
* @param {string|Array.>} opt_b text2 (methods 1,4) or
* Array of diff tuples for text1 to text2 (method 3) or undefined (method 2).
* @param {string|Array.>} opt_c Array of diff tuples for
* text1 to text2 (method 4) or undefined (methods 1,2,3).
* @return {Array.<_new_patch_obj>} Array of patch objects.
--]]
function patch_make(a, opt_b, opt_c)
local text1, diffs
local type_a, type_b, type_c = type(a), type(opt_b), type(opt_c)
if (type_a == 'string') and (type_b == 'string') and (type_c == 'nil') then
-- Method 1: text1, text2
-- Compute diffs from text1 and text2.
text1 = a
diffs = diff_main(text1, opt_b, true)
if (#diffs > 2) then
diff_cleanupSemantic(diffs)
diff_cleanupEfficiency(diffs)
end
elseif (type_a == 'table') and (type_b == 'nil') and (type_c == 'nil') then
-- Method 2: diffs
-- Compute text1 from diffs.
diffs = a
text1 = _diff_text1(diffs)
elseif (type_a == 'string') and (type_b == 'table') and (type_c == 'nil') then
-- Method 3: text1, diffs
text1 = a
diffs = opt_b
elseif (type_a == 'string') and (type_b == 'string') and (type_c == 'table')
then
-- Method 4: text1, text2, diffs
-- text2 is not used.
text1 = a
diffs = opt_c
else
error('Unknown call format to patch_make.')
end
if (diffs[1] == nil) then
return {} -- Get rid of the null case.
end
local patches = {}
local patch = _new_patch_obj()
local patchDiffLength = 0 -- Keeping our own length var is faster.
local char_count1 = 0 -- Number of characters into the text1 string.
local char_count2 = 0 -- Number of characters into the text2 string.
-- Start with text1 (prepatch_text) and apply the diffs until we arrive at
-- text2 (postpatch_text). We recreate the patches one by one to determine
-- context info.
local prepatch_text, postpatch_text = text1, text1
for x, diff in ipairs(diffs) do
local diff_type, diff_text = diff[1], diff[2]
if (patchDiffLength == 0) and (diff_type ~= DIFF_EQUAL) then
-- A new patch starts here.
patch.start1 = char_count1 + 1
patch.start2 = char_count2 + 1
end
if (diff_type == DIFF_INSERT) then
patchDiffLength = patchDiffLength + 1
patch.diffs[patchDiffLength] = diff
patch.length2 = patch.length2 + #diff_text
postpatch_text = strsub(postpatch_text, 1, char_count2)
.. diff_text .. strsub(postpatch_text, char_count2 + 1)
elseif (diff_type == DIFF_DELETE) then
patch.length1 = patch.length1 + #diff_text
patchDiffLength = patchDiffLength + 1
patch.diffs[patchDiffLength] = diff
postpatch_text = strsub(postpatch_text, 1, char_count2)
.. strsub(postpatch_text, char_count2 + #diff_text + 1)
elseif (diff_type == DIFF_EQUAL) then
if (#diff_text <= Patch_Margin * 2)
and (patchDiffLength ~= 0) and (#diffs ~= x) then
-- Small equality inside a patch.
patchDiffLength = patchDiffLength + 1
patch.diffs[patchDiffLength] = diff
patch.length1 = patch.length1 + #diff_text
patch.length2 = patch.length2 + #diff_text
elseif (#diff_text >= Patch_Margin * 2) then
-- Time for a new patch.
if (patchDiffLength ~= 0) then
_patch_addContext(patch, prepatch_text)
patches[#patches + 1] = patch
patch = _new_patch_obj()
patchDiffLength = 0
-- Unlike Unidiff, our patch lists have a rolling context.
-- https://github.com/google/diff-match-patch/wiki/Unidiff
-- Update prepatch text & pos to reflect the application of the
-- just completed patch.
prepatch_text = postpatch_text
char_count1 = char_count2
end
end
end
-- Update the current character count.
if (diff_type ~= DIFF_INSERT) then
char_count1 = char_count1 + #diff_text
end
if (diff_type ~= DIFF_DELETE) then
char_count2 = char_count2 + #diff_text
end
end
-- Pick up the leftover patch if not empty.
if (patchDiffLength > 0) then
_patch_addContext(patch, prepatch_text)
patches[#patches + 1] = patch
end
return patches
end
--[[
* Merge a set of patches onto the text. Return a patched text, as well
* as a list of true/false values indicating which patches were applied.
* @param {Array.<_new_patch_obj>} patches Array of patch objects.
* @param {string} text Old text.
* @return {Array.>} Two return values, the
* new text and an array of boolean values.
--]]
function patch_apply(patches, text)
if patches[1] == nil then
return text, {}
end
-- Deep copy the patches so that no changes are made to originals.
patches = _patch_deepCopy(patches)
local nullPadding = _patch_addPadding(patches)
text = nullPadding .. text .. nullPadding
_patch_splitMax(patches)
-- delta keeps track of the offset between the expected and actual location
-- of the previous patch. If there are patches expected at positions 10 and
-- 20, but the first patch was found at 12, delta is 2 and the second patch
-- has an effective expected position of 22.
local delta = 0
local results = {}
for x, patch in ipairs(patches) do
local expected_loc = patch.start2 + delta
local text1 = _diff_text1(patch.diffs)
local start_loc
local end_loc = -1
if #text1 > Match_MaxBits then
-- _patch_splitMax will only provide an oversized pattern in
-- the case of a monster delete.
start_loc = match_main(text,
strsub(text1, 1, Match_MaxBits), expected_loc)
if start_loc ~= -1 then
end_loc = match_main(text, strsub(text1, -Match_MaxBits),
expected_loc + #text1 - Match_MaxBits)
if end_loc == -1 or start_loc >= end_loc then
-- Can't find valid trailing context. Drop this patch.
start_loc = -1
end
end
else
start_loc = match_main(text, text1, expected_loc)
end
if start_loc == -1 then
-- No match found. :(
results[x] = false
-- Subtract the delta for this failed patch from subsequent patches.
delta = delta - patch.length2 - patch.length1
else
-- Found a match. :)
results[x] = true
delta = start_loc - expected_loc
local text2
if end_loc == -1 then
text2 = strsub(text, start_loc, start_loc + #text1 - 1)
else
text2 = strsub(text, start_loc, end_loc + Match_MaxBits - 1)
end
if text1 == text2 then
-- Perfect match, just shove the replacement text in.
text = strsub(text, 1, start_loc - 1) .. _diff_text2(patch.diffs)
.. strsub(text, start_loc + #text1)
else
-- Imperfect match. Run a diff to get a framework of equivalent
-- indices.
local diffs = diff_main(text1, text2, false)
if (#text1 > Match_MaxBits)
and (diff_levenshtein(diffs) / #text1 > Patch_DeleteThreshold) then
-- The end points match, but the content is unacceptably bad.
results[x] = false
else
_diff_cleanupSemanticLossless(diffs)
local index1 = 1
local index2
for y, mod in ipairs(patch.diffs) do
if mod[1] ~= DIFF_EQUAL then
index2 = _diff_xIndex(diffs, index1)
end
if mod[1] == DIFF_INSERT then
text = strsub(text, 1, start_loc + index2 - 2)
.. mod[2] .. strsub(text, start_loc + index2 - 1)
elseif mod[1] == DIFF_DELETE then
text = strsub(text, 1, start_loc + index2 - 2) .. strsub(text,
start_loc + _diff_xIndex(diffs, index1 + #mod[2] - 1))
end
if mod[1] ~= DIFF_DELETE then
index1 = index1 + #mod[2]
end
end
end
end
end
end
-- Strip the padding off.
text = strsub(text, #nullPadding + 1, -#nullPadding - 1)
return text, results
end
--[[
* Take a list of patches and return a textual representation.
* @param {Array.<_new_patch_obj>} patches Array of patch objects.
* @return {string} Text representation of patches.
--]]
function patch_toText(patches)
local text = {}
for x, patch in ipairs(patches) do
_patch_appendText(patch, text)
end
return tconcat(text)
end
--[[
* Parse a textual representation of patches and return a list of patch objects.
* @param {string} textline Text representation of patches.
* @return {Array.<_new_patch_obj>} Array of patch objects.
* @throws {Error} If invalid input.
--]]
function patch_fromText(textline)
local patches = {}
if (#textline == 0) then
return patches
end
local text = {}
for line in gmatch(textline, '([^\n]*)') do
text[#text + 1] = line
end
local textPointer = 1
while (textPointer <= #text) do
local start1, length1, start2, length2
= strmatch(text[textPointer], '^@@ %-(%d+),?(%d*) %+(%d+),?(%d*) @@$')
if (start1 == nil) then
error('Invalid patch string: "' .. text[textPointer] .. '"')
end
local patch = _new_patch_obj()
patches[#patches + 1] = patch
start1 = tonumber(start1)
length1 = tonumber(length1) or 1
if (length1 == 0) then
start1 = start1 + 1
end
patch.start1 = start1
patch.length1 = length1
start2 = tonumber(start2)
length2 = tonumber(length2) or 1
if (length2 == 0) then
start2 = start2 + 1
end
patch.start2 = start2
patch.length2 = length2
textPointer = textPointer + 1
while true do
local line = text[textPointer]
if (line == nil) then
break
end
local sign; sign, line = strsub(line, 1, 1), strsub(line, 2)
local invalidDecode = false
local decoded = gsub(line, '%%(.?.?)',
function(c)
local n = tonumber(c, 16)
if (#c ~= 2) or (n == nil) then
invalidDecode = true
return ''
end
return strchar(n)
end)
if invalidDecode then
-- Malformed URI sequence.
error('Illegal escape in patch_fromText: ' .. line)
end
line = decoded
if (sign == '-') then
-- Deletion.
patch.diffs[#patch.diffs + 1] = {DIFF_DELETE, line}
elseif (sign == '+') then
-- Insertion.
patch.diffs[#patch.diffs + 1] = {DIFF_INSERT, line}
elseif (sign == ' ') then
-- Minor equality.
patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, line}
elseif (sign == '@') then
-- Start of next patch.
break
elseif (sign == '') then
-- Blank line? Whatever.
else
-- WTF?
error('Invalid patch mode "' .. sign .. '" in: ' .. line)
end
textPointer = textPointer + 1
end
end
return patches
end
-- ---------------------------------------------------------------------------
-- UNOFFICIAL/PRIVATE PATCH FUNCTIONS
-- ---------------------------------------------------------------------------
local patch_meta = {
__tostring = function(patch)
local buf = {}
_patch_appendText(patch, buf)
return tconcat(buf)
end
}
--[[
* Class representing one patch operation.
* @constructor
--]]
function _new_patch_obj()
return setmetatable({
--[[ @type {Array.>} ]]
diffs = {};
--[[ @type {?number} ]]
start1 = 1; -- nil;
--[[ @type {?number} ]]
start2 = 1; -- nil;
--[[ @type {number} ]]
length1 = 0;
--[[ @type {number} ]]
length2 = 0;
}, patch_meta)
end
--[[
* Increase the context until it is unique,
* but don't let the pattern expand beyond Match_MaxBits.
* @param {_new_patch_obj} patch The patch to grow.
* @param {string} text Source text.
* @private
--]]
function _patch_addContext(patch, text)
if (#text == 0) then
return
end
local pattern = strsub(text, patch.start2, patch.start2 + patch.length1 - 1)
local padding = 0
-- LUANOTE: Lua's lack of a lastIndexOf function results in slightly
-- different logic here than in other language ports.
-- Look for the first two matches of pattern in text. If two are found,
-- increase the pattern length.
local firstMatch = indexOf(text, pattern)
local secondMatch = nil
if (firstMatch ~= nil) then
secondMatch = indexOf(text, pattern, firstMatch + 1)
end
while (#pattern == 0 or secondMatch ~= nil)
and (#pattern < Match_MaxBits - Patch_Margin - Patch_Margin) do
padding = padding + Patch_Margin
pattern = strsub(text, max(1, patch.start2 - padding),
patch.start2 + patch.length1 - 1 + padding)
firstMatch = indexOf(text, pattern)
if (firstMatch ~= nil) then
secondMatch = indexOf(text, pattern, firstMatch + 1)
else
secondMatch = nil
end
end
-- Add one chunk for good luck.
padding = padding + Patch_Margin
-- Add the prefix.
local prefix = strsub(text, max(1, patch.start2 - padding), patch.start2 - 1)
if (#prefix > 0) then
tinsert(patch.diffs, 1, {DIFF_EQUAL, prefix})
end
-- Add the suffix.
local suffix = strsub(text, patch.start2 + patch.length1,
patch.start2 + patch.length1 - 1 + padding)
if (#suffix > 0) then
patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, suffix}
end
-- Roll back the start points.
patch.start1 = patch.start1 - #prefix
patch.start2 = patch.start2 - #prefix
-- Extend the lengths.
patch.length1 = patch.length1 + #prefix + #suffix
patch.length2 = patch.length2 + #prefix + #suffix
end
--[[
* Given an array of patches, return another array that is identical.
* @param {Array.<_new_patch_obj>} patches Array of patch objects.
* @return {Array.<_new_patch_obj>} Array of patch objects.
--]]
function _patch_deepCopy(patches)
local patchesCopy = {}
for x, patch in ipairs(patches) do
local patchCopy = _new_patch_obj()
local diffsCopy = {}
for i, diff in ipairs(patch.diffs) do
diffsCopy[i] = {diff[1], diff[2]}
end
patchCopy.diffs = diffsCopy
patchCopy.start1 = patch.start1
patchCopy.start2 = patch.start2
patchCopy.length1 = patch.length1
patchCopy.length2 = patch.length2
patchesCopy[x] = patchCopy
end
return patchesCopy
end
--[[
* Add some padding on text start and end so that edges can match something.
* Intended to be called only from within patch_apply.
* @param {Array.<_new_patch_obj>} patches Array of patch objects.
* @return {string} The padding string added to each side.
--]]
function _patch_addPadding(patches)
local paddingLength = Patch_Margin
local nullPadding = ''
for x = 1, paddingLength do
nullPadding = nullPadding .. strchar(x)
end
-- Bump all the patches forward.
for x, patch in ipairs(patches) do
patch.start1 = patch.start1 + paddingLength
patch.start2 = patch.start2 + paddingLength
end
-- Add some padding on start of first diff.
local patch = patches[1]
local diffs = patch.diffs
local firstDiff = diffs[1]
if (firstDiff == nil) or (firstDiff[1] ~= DIFF_EQUAL) then
-- Add nullPadding equality.
tinsert(diffs, 1, {DIFF_EQUAL, nullPadding})
patch.start1 = patch.start1 - paddingLength -- Should be 0.
patch.start2 = patch.start2 - paddingLength -- Should be 0.
patch.length1 = patch.length1 + paddingLength
patch.length2 = patch.length2 + paddingLength
elseif (paddingLength > #firstDiff[2]) then
-- Grow first equality.
local extraLength = paddingLength - #firstDiff[2]
firstDiff[2] = strsub(nullPadding, #firstDiff[2] + 1) .. firstDiff[2]
patch.start1 = patch.start1 - extraLength
patch.start2 = patch.start2 - extraLength
patch.length1 = patch.length1 + extraLength
patch.length2 = patch.length2 + extraLength
end
-- Add some padding on end of last diff.
patch = patches[#patches]
diffs = patch.diffs
local lastDiff = diffs[#diffs]
if (lastDiff == nil) or (lastDiff[1] ~= DIFF_EQUAL) then
-- Add nullPadding equality.
diffs[#diffs + 1] = {DIFF_EQUAL, nullPadding}
patch.length1 = patch.length1 + paddingLength
patch.length2 = patch.length2 + paddingLength
elseif (paddingLength > #lastDiff[2]) then
-- Grow last equality.
local extraLength = paddingLength - #lastDiff[2]
lastDiff[2] = lastDiff[2] .. strsub(nullPadding, 1, extraLength)
patch.length1 = patch.length1 + extraLength
patch.length2 = patch.length2 + extraLength
end
return nullPadding
end
--[[
* Look through the patches and break up any which are longer than the maximum
* limit of the match algorithm.
* Intended to be called only from within patch_apply.
* @param {Array.<_new_patch_obj>} patches Array of patch objects.
--]]
function _patch_splitMax(patches)
local patch_size = Match_MaxBits
local x = 1
while true do
local patch = patches[x]
if patch == nil then
return
end
if patch.length1 > patch_size then
local bigpatch = patch
-- Remove the big old patch.
tremove(patches, x)
x = x - 1
local start1 = bigpatch.start1
local start2 = bigpatch.start2
local precontext = ''
while bigpatch.diffs[1] do
-- Create one of several smaller patches.
local patch = _new_patch_obj()
local empty = true
patch.start1 = start1 - #precontext
patch.start2 = start2 - #precontext
if precontext ~= '' then
patch.length1, patch.length2 = #precontext, #precontext
patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, precontext}
end
while bigpatch.diffs[1] and (patch.length1 < patch_size-Patch_Margin) do
local diff_type = bigpatch.diffs[1][1]
local diff_text = bigpatch.diffs[1][2]
if (diff_type == DIFF_INSERT) then
-- Insertions are harmless.
patch.length2 = patch.length2 + #diff_text
start2 = start2 + #diff_text
patch.diffs[#(patch.diffs) + 1] = bigpatch.diffs[1]
tremove(bigpatch.diffs, 1)
empty = false
elseif (diff_type == DIFF_DELETE) and (#patch.diffs == 1)
and (patch.diffs[1][1] == DIFF_EQUAL)
and (#diff_text > 2 * patch_size) then
-- This is a large deletion. Let it pass in one chunk.
patch.length1 = patch.length1 + #diff_text
start1 = start1 + #diff_text
empty = false
patch.diffs[#patch.diffs + 1] = {diff_type, diff_text}
tremove(bigpatch.diffs, 1)
else
-- Deletion or equality.
-- Only take as much as we can stomach.
diff_text = strsub(diff_text, 1,
patch_size - patch.length1 - Patch_Margin)
patch.length1 = patch.length1 + #diff_text
start1 = start1 + #diff_text
if (diff_type == DIFF_EQUAL) then
patch.length2 = patch.length2 + #diff_text
start2 = start2 + #diff_text
else
empty = false
end
patch.diffs[#patch.diffs + 1] = {diff_type, diff_text}
if (diff_text == bigpatch.diffs[1][2]) then
tremove(bigpatch.diffs, 1)
else
bigpatch.diffs[1][2]
= strsub(bigpatch.diffs[1][2], #diff_text + 1)
end
end
end
-- Compute the head context for the next patch.
precontext = _diff_text2(patch.diffs)
precontext = strsub(precontext, -Patch_Margin)
-- Append the end context for this patch.
local postcontext = strsub(_diff_text1(bigpatch.diffs), 1, Patch_Margin)
if postcontext ~= '' then
patch.length1 = patch.length1 + #postcontext
patch.length2 = patch.length2 + #postcontext
if patch.diffs[1]
and (patch.diffs[#patch.diffs][1] == DIFF_EQUAL) then
patch.diffs[#patch.diffs][2] = patch.diffs[#patch.diffs][2]
.. postcontext
else
patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, postcontext}
end
end
if not empty then
x = x + 1
tinsert(patches, x, patch)
end
end
end
x = x + 1
end
end
--[[
* Emulate GNU diff's format.
* Header: @@ -382,8 +481,9 @@
* @return {string} The GNU diff string.
--]]
function _patch_appendText(patch, text)
local coords1, coords2
local length1, length2 = patch.length1, patch.length2
local start1, start2 = patch.start1, patch.start2
local diffs = patch.diffs
if length1 == 1 then
coords1 = start1
else
coords1 = ((length1 == 0) and (start1 - 1) or start1) .. ',' .. length1
end
if length2 == 1 then
coords2 = start2
else
coords2 = ((length2 == 0) and (start2 - 1) or start2) .. ',' .. length2
end
text[#text + 1] = '@@ -' .. coords1 .. ' +' .. coords2 .. ' @@\n'
local op
-- Escape the body of the patch with %xx notation.
for x, diff in ipairs(patch.diffs) do
local diff_type = diff[1]
if diff_type == DIFF_INSERT then
op = '+'
elseif diff_type == DIFF_DELETE then
op = '-'
elseif diff_type == DIFF_EQUAL then
op = ' '
end
text[#text + 1] = op
.. gsub(diffs[x][2], percentEncode_pattern, percentEncode_replace)
.. '\n'
end
return text
end
-- Expose the API
local _M = {}
_M.DIFF_DELETE = DIFF_DELETE
_M.DIFF_INSERT = DIFF_INSERT
_M.DIFF_EQUAL = DIFF_EQUAL
_M.diff_main = diff_main
_M.diff_cleanupSemantic = diff_cleanupSemantic
_M.diff_cleanupEfficiency = diff_cleanupEfficiency
_M.diff_levenshtein = diff_levenshtein
_M.diff_prettyHtml = diff_prettyHtml
_M.match_main = match_main
_M.patch_make = patch_make
_M.patch_toText = patch_toText
_M.patch_fromText = patch_fromText
_M.patch_apply = patch_apply
-- Expose some non-API functions as well, for testing purposes etc.
_M.diff_commonPrefix = _diff_commonPrefix
_M.diff_commonSuffix = _diff_commonSuffix
_M.diff_commonOverlap = _diff_commonOverlap
_M.diff_halfMatch = _diff_halfMatch
_M.diff_bisect = _diff_bisect
_M.diff_cleanupMerge = _diff_cleanupMerge
_M.diff_cleanupSemanticLossless = _diff_cleanupSemanticLossless
_M.diff_text1 = _diff_text1
_M.diff_text2 = _diff_text2
_M.diff_toDelta = _diff_toDelta
_M.diff_fromDelta = _diff_fromDelta
_M.diff_xIndex = _diff_xIndex
_M.match_alphabet = _match_alphabet
_M.match_bitap = _match_bitap
_M.new_patch_obj = _new_patch_obj
_M.patch_addContext = _patch_addContext
_M.patch_splitMax = _patch_splitMax
_M.patch_addPadding = _patch_addPadding
_M.settings = settings
return _M