Module:Diff
CodeDiscussionEditHistoryLinksLink count Subpages:DocumentationTestsResultsSandboxLive code All modules
This module uses TemplateStyles: |
Provides functions for diffing text.
Usage
Ciaran Hope (born 4 August 1974) is an Irish composer of orchestral, choral, and film music.
Difference in words
{{TextDiff|Ciaran Hope (born 4 August 1974) is an Irish composer of orchestral, choral, and film music.|Ciaran Hope (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves.}}
Result:
− | [[w:Ciaran Hope|Ciaran Hope]] | + | [[w:Ciaran Hope|Ciaran Hope]] (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves. |
Difference in characters
{{StringDiff|Ciaran Hope (born 4 August 1974) is an Irish composer of orchestral, choral, and film music.|Ciaran Hope (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves.}}
Result:
− | [[w:Ciaran Hope|Ciaran Hope]] ( | + | [[w:Ciaran Hope|Ciaran Hope]] (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves. |
Example with invoke
{{#invoke:Diff|main|[[w:Ciaran Hope|Ciaran Hope]] (born 4 August 1974) is an Irish composer of orchestral, choral, and film music.|[[w:Ciaran Hope|Ciaran Hope]] (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves.}}
Result:
− | [[w:Ciaran Hope|Ciaran Hope]] | + | [[w:Ciaran Hope|Ciaran Hope]] (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves. |
Code
-----------------------------------------------------------------------------
-- Provides functions for diffing text.
--
-- (c) 2007, 2008 Yuri Takhteyev (yuri@freewisdom.org)
-- (c) 2007 Hisham Muhammad
--
-- 2013-, Adapted to MediaWiki's Lua system originally by User:Ebrahim
--
-- License: MIT/X, see http://sputnik.freewisdom.org/en/License
-----------------------------------------------------------------------------
local SKIP_SEPARATOR = true -- a constant
-- token statuses
local IN = "in"
local OUT = "out"
local SAME = "same"
-----------------------------------------------------------------------------
-- Split a string into tokens. (Adapted from Gavin Kistner's split on
-- http://lua-users.org/wiki/SplitJoin.
--
-- @param text A string to be split.
-- @param separator [optional] the separator pattern (defaults to any
-- white space - %s+).
-- @param skip_separator [optional] don't include the sepator in the results.
-- @return A list of tokens.
-----------------------------------------------------------------------------
local function split(text, separator, skip_separator)
separator = separator or "%s+"
local parts = {}
local start = 1
local split_start, split_end = mw.ustring.find(text, separator, start)
while split_start do
table.insert(parts, mw.ustring.sub(text, start, split_start-1))
if not skip_separator then
table.insert(parts, mw.ustring.sub(text, split_start, split_end))
end
start = split_end + 1
split_start, split_end = mw.ustring.find(text, separator, start)
end
if mw.ustring.sub(text, start) ~= "" then
table.insert(parts, mw.ustring.sub(text, start))
end
return parts
end
-----------------------------------------------------------------------------
-- Derives the longest common subsequence of two strings. This is a faster
-- implementation than one provided by stdlib. Submitted by Hisham Muhammad.
-- The algorithm was taken from:
-- http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_subsequence
--
-- @param t1 the first string.
-- @param t2 the second string.
-- @return the least common subsequence as a matrix.
-----------------------------------------------------------------------------
local function quick_LCS(t1, t2)
local m = #t1
local n = #t2
-- Build matrix on demand
local C = {}
local setmetatable = setmetatable
local mt_tbl = {
__index = function(t, k)
t[k] = 0
return 0
end
}
local mt_C = {
__index = function(t, k)
local tbl = {}
setmetatable(tbl, mt_tbl)
t[k] = tbl
return tbl
end
}
setmetatable(C, mt_C)
local max = math.max
for i = 1, m+1 do
local ci1 = C[i+1]
local ci = C[i]
for j = 1, n+1 do
if t1[i-1] == t2[j-1] then
ci1[j+1] = ci[j] + 1
else
ci1[j+1] = max(ci1[j], ci[j+1])
end
end
end
return C
end
-----------------------------------------------------------------------------
-- Formats an inline diff as HTML, with <ins> and <del> tags.
--
-- @param tokens a table of {token, status} pairs.
-- @return an HTML string.
-----------------------------------------------------------------------------
local function format_as_html(tokens)
local diff_buffer = ""
local token, status
for i, token_record in ipairs(tokens) do
token = mw.text.nowiki(token_record[1])
status = token_record[2]
if status == "in" then
diff_buffer = diff_buffer..'<ins>'..token..'</ins>'
elseif status == "out" then
diff_buffer = diff_buffer..'<del>'..token..'</del>'
else
diff_buffer = diff_buffer..token
end
end
return diff_buffer
end
-----------------------------------------------------------------------------
-- Returns a diff of two strings as a list of pairs, where the first value
-- represents a token and the second the token's status ("same", "in", "out").
--
-- @param old The "old" text string
-- @param new The "new" text string
-- @param separator [optional] the separator pattern (defaults ot any
-- white space).
-- @return A list of annotated tokens.
-----------------------------------------------------------------------------
local function diff(old, new, separator)
assert(old); assert(new)
new = split(new, separator); old = split(old, separator)
-- First, compare the beginnings and ends of strings to remove the common
-- prefix and suffix. Chances are, there is only a small number of tokens
-- in the middle that differ, in which case we can save ourselves a lot
-- in terms of LCS computation.
local prefix = "" -- common text in the beginning
local suffix = "" -- common text in the end
while old[1] and old[1] == new[1] do
local token = table.remove(old, 1)
table.remove(new, 1)
prefix = prefix..token
end
while old[#old] and old[#old] == new[#new] do
local token = table.remove(old)
table.remove(new)
suffix = token..suffix
end
-- Setup a table that will store the diff (an upvalue for get_diff). We'll
-- store it in the reverse order to allow for tail calls. We'll also keep
-- in this table functions to handle different events.
local rev_diff = {
put = function(self, token, type) table.insert(self, {token,type}) end,
ins = function(self, token) self:put(token, IN) end,
del = function(self, token) self:put(token, OUT) end,
same = function(self, token) if token then self:put(token, SAME) end end,
}
-- Put the suffix as the first token (we are storing the diff in the
-- reverse order)
rev_diff:same(suffix)
-- Define a function that will scan the LCS matrix backwards and build the
-- diff output recursively.
local function get_diff(C, old, new, i, j)
local old_i = old[i]
local new_j = new[j]
if i >= 1 and j >= 1 and old_i == new_j then
rev_diff:same(old_i)
return get_diff(C, old, new, i-1, j-1)
else
local Cij1 = C[i][j-1]
local Ci1j = C[i-1][j]
if j >= 1 and (i == 0 or Cij1 >= Ci1j) then
rev_diff:ins(new_j)
return get_diff(C, old, new, i, j-1)
elseif i >= 1 and (j == 0 or Cij1 < Ci1j) then
rev_diff:del(old_i)
return get_diff(C, old, new, i-1, j)
end
end
end
-- Then call it.
get_diff(quick_LCS(old, new), old, new, #old + 1, #new + 1)
-- Put the prefix in at the end
rev_diff:same(prefix)
-- Reverse the diff.
local diff = {}
for i = #rev_diff, 1, -1 do
table.insert(diff, rev_diff[i])
end
diff.to_html = format_as_html
return diff
end
-----------------------------------------------------------------------------
-- Wiki diff style, currently just for a line
-----------------------------------------------------------------------------
local function wikiDiff(old, new, separator, opts)
opts = opts or {}
local tokens = diff(old, new, separator)
local root = mw.html.create('')
root:wikitext(mw.getCurrentFrame():extensionTag('templatestyles', '', {src = 'Module:Diff/styles.css'}))
local token, status
-- Override default border-width for browsers that support them.
-- Needed for RTL support; forbidden in TemplateStyles.
local tdSharedStyle = 'border-inline-end-width: 1px; border-inline-start-width: 4px;'
local is_different = false
for _, token_record in ipairs(tokens) do
if token_record[2] ~= SAME then
is_different = true
break
end
end
local tbl = root:tag('table'):attr('lang', ''):addClass('diff')
if (opts.oldTitle or opts.newTitle) then
local tr = tbl:tag('tr')
tr:tag('th')
:attr('scope', 'col')
:attr('colspan', '2')
:wikitext(opts.oldTitle)
tr:tag('th')
:attr('scope', 'col')
:attr('colspan', '2')
:wikitext(opts.newTitle)
end
local tr = tbl:tag('tr')
tr:tag('td')
:addClass('diff-marker')
:wikitext(is_different and '−' or ' ')
local deleted = tr
:tag('td')
:cssText(tdSharedStyle)
:addClass(is_different and 'diff-deletedline' or 'diff-context')
:tag('div')
for i, token_record in ipairs(tokens) do
token = mw.text.nowiki(token_record[1])
status = token_record[2]
if status == OUT then
deleted
:tag('del')
:addClass('diffchange')
:addClass('diffchange-inline')
:wikitext(token)
elseif status == SAME then
deleted:wikitext(token)
end
end
tr:tag('td')
:addClass('diff-marker')
:wikitext(is_different and '+' or ' ')
local inserted = tr
:tag('td')
:cssText(tdSharedStyle)
:addClass(is_different and 'diff-addedline' or 'diff-context')
:tag('div')
for i, token_record in ipairs(tokens) do
token = mw.text.nowiki(token_record[1])
status = token_record[2]
if status == IN then
inserted
:tag('ins')
:addClass('diffchange')
:addClass('diffchange-inline')
:wikitext(token)
elseif status == SAME then
inserted:wikitext(token)
end
end
return tostring(root)
end
local function tidyVal(val)
if ((type(val) == 'string') and (val == '')) then
return nil
end
return val
end
local function main(frame)
local args = frame.args
local pargs = (frame:getParent() or {}).args or {}
return wikiDiff(
mw.text.unstrip(mw.text.decode(args[1])),
mw.text.unstrip(mw.text.decode(args[2])),
frame.args[3] or '[%s%.:-]+',
{
oldTitle = tidyVal(args['1title']) or tidyVal(pargs['1title']),
newTitle = tidyVal(args['2title']) or tidyVal(pargs['2title']),
}
)
end
return {
diff = diff,
wikiDiff = wikiDiff,
main = main
}