--- unified.rb (revision 620) +++ unified.rb (revision 644) @@ -1,298 +1,390 @@ module Diff module Display module Unified - - LINE_RE = /@@ [+-]([0-9]+),([0-9]+) [+-]([0-9]+),([0-9]+) @@/ - TABWIDTH = 4 - SPACE = ' ' #' ' - # By defaul don't wrap inline diffs in anything - INLINE_REM_OPEN = "\e[4;33m" - INLINE_REM_CLOSE = "\e[m" - INLINE_ADD_OPEN = "\e[4;35m" - INLINE_ADD_CLOSE = "\e[m" - ESCAPE_HTML = false - class Line < String - attr_reader :add_lineno, :rem_lineno - def initialize(line, type, add_lineno, rem_lineno = add_lineno) + def initialize(line, line_number) super(line) - @type = type - @add_lineno = add_lineno - @rem_lineno = rem_lineno + @line_number = line_number + self end + def contains_inline_change? + @inline + end + def number - add_lineno ? add_lineno : rem_lineno + @line_number end - def type - @type + def decorate(&block) + yield self end - class << self - def add(line, add_lineno) - AddLine.new(line, add_lineno) + def inline_add_open; '' end + def inline_add_close; '' end + def inline_rem_open; '' end + def inline_rem_close; '' end + + protected + + def escape + self end - def rem(line, rem_lineno) - RemLine.new(line, rem_lineno) + def expand + escape.gsub("\t", ' ' * tabwidth).gsub(/ ( +)|^ /) do |match| + (space + ' ') * (match.size / 2) + + space * (match.size % 2) + end end - def unmod(line, lineno) - UnModLine.new(line, lineno) + def tabwidth + 4 end - def mod(line, lineno) - ModLine.new(line, lineno) + + def space + ' ' end + + class << self + def add(line, line_number, inline = false) + AddLine.new(line, line_number, inline) + end + + def rem(line, line_number, inline = false) + RemLine.new(line, line_number, inline) + end + + def unmod(line, line_number) + UnModLine.new(line, line_number) + end end end class AddLine < Line - def initialize(line, add_lineno) - super(line, 'add', add_lineno, nil) + def initialize(line, line_number, inline = false) + line = inline ? line % [inline_add_open, inline_add_close] : line + super(line, line_number) + @inline = inline + self end end class RemLine < Line - def initialize(line, rem_lineno) - super(line, 'rem', nil, rem_lineno) + def initialize(line, line_number, inline = false) + line = inline ? line % [inline_rem_open, inline_rem_close] : line + super(line, line_number) + @inline = inline + self end end class UnModLine < Line - def initialize(line, lineno) - super(line, 'unmod', lineno) + def initialize(line, line_number) + super(line, line_number) end end - class ModLine < Line - def initialize(line, lineno) - super(line, 'mod', lineno) + class SepLine < Line + def initialize(line = '...') + super(line, nil) end end + # This class is an array which contains Line objects. Just like Line + # classes, several Block classes inherit from Block. If all the lines + # in the block are added lines then it is an AddBlock. If all lines + # in the block are removed lines then it is a RemBlock. If the lines + # in the block are all unmodified then it is an UnMod block. If the + # lines in the block are a mixture of added and removed lines then + # it is a ModBlock. There are no blocks that contain a mixture of + # modified and unmodified lines. class Block < Array - def initialize(type) - super(0) - @type = type + def initialize + super + @line_types = [] end def <<(line_object) super(line_object) - (@line_types ||= []).push(line_object.type) - @line_types.uniq! + line_class = line_object.class.name[/\w+$/] + @line_types.push(line_class) unless @line_types.include?(line_class) self end + def decorate(&block) + yield self + end + def line_types @line_types end - def type - @type + class << self + def add; AddBlock.new end + def rem; RemBlock.new end + def mod; ModBlock.new end + def unmod; UnModBlock.new end end end - class Generator < Array + class AddBlock < Block; end + class RemBlock < Block; end + class ModBlock < Block; end + class UnModBlock < Block; end + class SepBlock < Block; end + # This data object contains the generated diff data structure. It is an + # array of Block objects which are themselves arrays of Line objects. The + # Generator class returns a Data instance object after it is done + # processing the diff. + class Data < Array + def initialize + super + end + + def debug + demodularize = Proc.new {|obj| obj.class.name[/\w+$/]} + each do |diff_block| + print "*" * 40, ' ', demodularize.call(diff_block) + puts + puts diff_block.map {|line| + "%5d" % line.number + + " [#{demodularize.call(line)}]" + + line + }.join("\n") + puts "*" * 40, ' ' + end + end + + end + + # Processes the diff and generates a Data object which contains the + # resulting data structure. + class Generator + + # Extracts the line number info for a given diff section + LINE_NUM_RE = /@@ [+-]([0-9]+),([0-9]+) [+-]([0-9]+),([0-9]+) @@/ + LINE_TYPES = {'+' => :add, '-' => :rem, ' ' => :unmod} + class << self - def run(udiff, options = {}) - generator = new(options) - udiff.split("\n").each {|line| generator.build(line) } - generator.close - generator + + # Runs the generator on a diff and returns a Data object without + # instantiating a Generator object + def run(udiff) + raise ArgumentError, "Object must be enumerable" unless udiff.respond_to?(:each) + generator = new + udiff.each {|line| generator.process(line.chomp)} + generator.render end end - def initialize(options = {}) - super(0) - default_options = {:inline_add_open => INLINE_ADD_OPEN, - :inline_add_close => INLINE_ADD_CLOSE, - :inline_rem_open => INLINE_REM_OPEN, - :inline_rem_close => INLINE_REM_CLOSE, - :escape_html => ESCAPE_HTML, - :tabwidth => TABWIDTH, - :space => SPACE} - - @options = default_options.merge(options) - @block = [] - @ttype = nil - @p_block = [] - @p_type = nil - @changeno = -1 - @blockno = 0 + def initialize + @buffer = [] + @prev_buffer = [] + @line_type = nil + @prev_line_type = nil @offset_base = 0 @offset_changed = 0 + @data = Diff::Display::Unified::Data.new + self end - def current_block - last + # Operates on a single line from the diff and passes along the + # collected data to the appropriate method for further processing. The + # cycle of processing is in general: + # + # process --> identify_block --> process_block --> process_line + # + def process(line) + return if ['++', '--'].include?(line[0,2]) + + if match = LINE_NUM_RE.match(line) + identify_block + push SepBlock.new and current_block << SepLine.new unless @offset_changed.zero? + @line_type = nil + @offset_base = match[1].to_i - 1 + @offset_changed = match[3].to_i - 1 + return + end + + new_line_type, line = LINE_TYPES[car(line)], cdr(line) + + # Add line to the buffer if it's the same diff line type + # as the previous line + # + # e.g. + # + # + This is a new line + # + As is this one + # + And yet another one... + # + if new_line_type.eql?(@line_type) + @buffer.push(line) + else + # Side by side inline diff + # + # e.g. + # + # - This line just had to go + # + This line is on the way in + # + if new_line_type.eql?(LINE_TYPES['+']) and @line_type.eql?(LINE_TYPES['-']) + @prev_buffer = @buffer + @prev_line_type = @line_type + else + identify_block + end + @buffer = [line] + @line_type = new_line_type + end end + # Finishes up with the generation and returns the Data object (could + # probably use a better name...maybe just #data?) def render close - self + @data end - - def escape(text) - return '' unless text - return text unless @options[:escape_html] - text.gsub('&', '&'). - gsub('<', '<' ). - gsub('>', '>' ). - gsub('"', '"') - end - def expand(text) - escape(text).gsub(/ ( +)|^ /) do |match| - (@options[:space] + ' ') * (match.size / 2) + - @options[:space] * (match.size % 2) - end - end + protected - def inline_diff(line, start, ending, change) - expand(line[0, start]) + - change + - expand(line[ending, ending.abs]) - end + def identify_block + if @prev_line_type.eql?(LINE_TYPES['-']) and @line_type.eql?(LINE_TYPES['+']) + process_block(:mod, {:old => @prev_buffer, :new => @buffer}) + else + if LINE_TYPES.values.include?(@line_type) + process_block(@line_type, {:new => @buffer}) + end + end - def write_line(oldline, newline) - start, ending = get_change_extent(oldline, newline) - change = '' - if oldline.size > start - ending - change = @options[:inline_rem_open] + - expand(oldline[start...ending]) + - @options[:inline_rem_close] + @prev_line_type = nil end - line = inline_diff(oldline, start, ending, change) - current_block << Line.rem(line, @offset_base) + def process_block(diff_line_type, blocks = {:old => nil, :new => nil}) + push Block.send(diff_line_type) + old, new = blocks[:old], blocks[:new] - change = '' - if newline.size > start - ending - change = @options[:inline_add_open] + - expand(newline[start...ending]) + - @options[:inline_add_close] + # Mod block + if diff_line_type.eql?(:mod) and old.size & new.size == 1 + process_line(old.first, new.first) + return + end + + if old and not old.empty? + old.each do |line| + @offset_base += 1 + current_block << Line.send(@prev_line_type, line, @offset_base) + end + end + + if new and not new.empty? + new.each do |line| + @offset_changed += 1 + current_block << Line.send(@line_type, line, @offset_changed) + end + end end - line = inline_diff(newline, start, ending, change) - current_block << Line.add(line, @offset_changed) - end + # TODO Needs a better name...it does process a line (two in fact) but + # its primary function is to add a Rem and an Add pair which + # potentially have inline changes + def process_line(oldline, newline) + start, ending = get_change_extent(oldline, newline) - def write_block(dtype, old = nil, new = nil) - push Block.new(dtype) + # - + line = inline_diff(oldline, start, ending) + current_block << Line.rem(line, @offset_base += 1, true) - if dtype == 'mod' and old.size == 1 and new.size == 1 - write_line(old.first, new.first) - return + # + + line = inline_diff(newline, start, ending) + current_block << Line.add(line, @offset_changed += 1, true) end - if old and not old.empty? - old.each do |e| - current_block << Line.send(dtype, expand(e), @offset_base) - @offset_base += 1 - end + # Inserts string formating characters around the section of a string + # that differs internally from another line so that the Line class + # can insert the desired formating + def inline_diff(line, start, ending) + line[0, start] + + '%s' + extract_change(line, start, ending) + '%s' + + line[ending, ending.abs] end - if new and not new.empty? - new.each do |e| - current_block << Line.send(dtype, expand(e), @offset_changed) - @offset_changed += 1 - end + def extract_change(line, start, ending) + line.size > (start - ending) ? line[start...ending] : '' end - end - def print_block - if @p_type.eql?('-') and @ttype.eql?('+') - write_block('mod', @p_block, @block) - else - case @ttype - when '+' - write_block('add', @block) - when '-' - write_block('rem', @block) - when ' ' - write_block('unmod', @block) - end + def car(line) + line[0,1] end - @block = @p_block = [] - @p_type = ' ' - @blockno += 1 - end + def cdr(line) + line[1..-1] + end - def build(text) - # TODO Names of the files and their versions go here perhaps + # Returns the current Block object + def current_block + @data.last + end - return if ['++', '--'].include?(text[0,2]) + # Adds a Line object onto the current Block object + def push(line) + @data.push line + end - if match = LINE_RE.match(text) - print_block - @changeno += 1 - @blockno = 0 - @offset_base = match[1].to_i - 1 - @offset_changed = match[3].to_i - 1 - return + # This method is called once the generator is done with the unified + # diff. It is a finalizer of sorts. By the time it is called all data + # has been collected and processed. + def close + # certain things could be set now that processing is done + identify_block end - # Set ttype to first character of line - ttype = text[0, 1] - text = text[1..-1] - text = text.gsub("\t", ' ' * @options[:tabwidth]) if text - # If it's the same type of mod as the last line push this line onto the - # block stack - if ttype.eql?(@ttype) - @block.push(text) - else - # If we have a side by side subtraction/addition - if ttype == '+' and @ttype == '-' - @p_block = @block - @p_type = @ttype - else - print_block + # Determines the extent of differences between two string. Returns + # an array containing the offset at which changes start, and then + # negative offset at which the chnages end. If the two strings have + # neither a common prefix nor a common suffic, [0, 0] is returned. + def get_change_extent(str1, str2) + start = 0 + limit = [str1.size, str2.size].sort.first + while start < limit and str1[start, 1] == str2[start, 1] + start += 1 end - @block = [text] - @ttype = ttype + ending = -1 + limit -= start + while -ending <= limit and str1[ending, 1] == str2[ending, 1] + ending -= 1 + end + + return [start, ending + 1] end - end + end - def debug - each do |diff_block| - print "*" * (40 - diff_block.type.size / 2), ' ', diff_block.type - puts - puts diff_block.map {|line| "#{line.number}" << line << " [#{line.type}]"}.join("\n") - print "Line types:" - puts diff_block.line_types.join(", ") - puts - end + # Mostly a convinience class at this point that just overwrites various + # customization methods + class HTMLGenerator < Generator + + # This and the space method now don't work/make sense now that those + # methods are part of the Line class and there certainly won't be an + # HTMLLine class + def escape(text) + text.gsub('&', '&'). + gsub('<', '<' ). + gsub('>', '>' ). + gsub('"', '"') end - def close - # certain things could be set now that processing is done - print_block + def space + ' ' end - # Determines the extent of differences between two string. Returns - # an array containing the offset at which changes start, and then - # negative offset at which the chnages end. If the two strings have - # neither a common prefix nor a common suffic, [0, 0] is returned. - def get_change_extent(str1, str2) - start = 0 - limit = [str1.size, str2.size].sort.first - while start < limit and str1[start, 1] == str2[start, 1] - start += 1 - end - ending = -1 - limit -= start - while -ending <= limit and str1[ending, 1] == str2[ending, 1] - ending -= 1 - end + end - return [start, ending + 1] - end + # See doc string for HTMLGenerator + class ASCIIGenerator < Generator end + end end end -